From 3b054ccf10e91e0c6658bf27b7a64baaad5205c2 Mon Sep 17 00:00:00 2001
From: Ayushman <payushman72@gmail.com>
Date: Fri, 1 Nov 2024 22:26:43 +0530
Subject: [PATCH] Image Scrapper from a website

---
 .../Readme.md                                 |  6 ++
 .../requirements.txt                          |  1 +
 .../scrap_img.py                              | 59 +++++++++++++++++++
 3 files changed, 66 insertions(+)
 create mode 100644 PROJECTS/Download_images_from_websites.py/Readme.md
 create mode 100644 PROJECTS/Download_images_from_websites.py/requirements.txt
 create mode 100644 PROJECTS/Download_images_from_websites.py/scrap_img.py
diff --git a/PROJECTS/Download_images_from_websites.py/Readme.md b/PROJECTS/Download_images_from_websites.py/Readme.md
new file mode 100644
index 000000000..9e60dd938
--- /dev/null
+++ b/PROJECTS/Download_images_from_websites.py/Readme.md
@@ -0,0 +1,6 @@
+# Scrap images from URL
+
+1. Dowmload Chrome Drive From Chrome.
+2. Run scrap-img.py file `py scrap-img.py`
+3. `Enter Path : E:\webscraping\chromedriver_win32\chromedriver.exe` <br/>
+   `Enter URL : https://dribbble.com/`
\ No newline at end of file
diff --git a/PROJECTS/Download_images_from_websites.py/requirements.txt b/PROJECTS/Download_images_from_websites.py/requirements.txt
new file mode 100644
index 000000000..27bc3be5d
--- /dev/null
+++ b/PROJECTS/Download_images_from_websites.py/requirements.txt
@@ -0,0 +1 @@
+selenium==3.141.0
\ No newline at end of file
diff --git a/PROJECTS/Download_images_from_websites.py/scrap_img.py b/PROJECTS/Download_images_from_websites.py/scrap_img.py
new file mode 100644
index 000000000..a5d8f11f4
--- /dev/null
+++ b/PROJECTS/Download_images_from_websites.py/scrap_img.py
@@ -0,0 +1,59 @@
+from selenium import webdriver
+import requests as rq
+import os
+from bs4 import BeautifulSoup
+import time
+
+# path= E:\web scraping\chromedriver_win32\chromedriver.exe
+path = input("Enter Path : ")
+
+url = input("Enter URL : ")
+
+output = "output"
+
+
+def get_url(path, url):
+    driver = webdriver.Chrome(executable_path=r"{}".format(path))
+    driver.get(url)
+    print("loading.....")
+    res = driver.execute_script("return document.documentElement.outerHTML")
+
+    return res
+
+
+def get_img_links(res):
+    soup = BeautifulSoup(res, "lxml")
+    imglinks = soup.find_all("img", src=True)
+    return imglinks
+
+
+def download_img(img_link, index):
+    try:
+        extensions = [".jpeg", ".jpg", ".png", ".gif"]
+        extension = ".jpg"
+        for exe in extensions:
+            if img_link.find(exe) > 0:
+                extension = exe
+                break
+
+        img_data = rq.get(img_link).content
+        with open(output + "\\" + str(index + 1) + extension, "wb+") as f:
+            f.write(img_data)
+        
+        f.close()
+    except Exception:
+        pass
+
+
+result = get_url(path, url)
+time.sleep(60)
+img_links = get_img_links(result)
+if not os.path.isdir(output):
+    os.mkdir(output)
+
+for index, img_link in enumerate(img_links):
+    img_link = img_link["src"]
+    print("Downloading...")
+    if img_link:
+        download_img(img_link, index)
+print("Download Complete!!")
\ No newline at end of file