Image Scrapper from a website

fineanmol · Nov 1, 2024 · 3b054cc · 3b054cc
1 parent 7d18a27
commit 3b054cc
Show file tree

Hide file tree

Showing 3 changed files with 66 additions and 0 deletions.
diff --git a/PROJECTS/Download_images_from_websites.py/Readme.md b/PROJECTS/Download_images_from_websites.py/Readme.md
@@ -0,0 +1,6 @@
+# Scrap images from URL
+
+1. Dowmload Chrome Drive From Chrome.
+2. Run scrap-img.py file `py scrap-img.py`
+3. `Enter Path : E:\webscraping\chromedriver_win32\chromedriver.exe` <br/>
+   `Enter URL : https://dribbble.com/`
diff --git a/PROJECTS/Download_images_from_websites.py/requirements.txt b/PROJECTS/Download_images_from_websites.py/requirements.txt
@@ -0,0 +1 @@
+selenium==3.141.0
diff --git a/PROJECTS/Download_images_from_websites.py/scrap_img.py b/PROJECTS/Download_images_from_websites.py/scrap_img.py
@@ -0,0 +1,59 @@
+from selenium import webdriver
+import requests as rq
+import os
+from bs4 import BeautifulSoup
+import time
+
+# path= E:\web scraping\chromedriver_win32\chromedriver.exe
+path = input("Enter Path : ")
+
+url = input("Enter URL : ")
+
+output = "output"
+
+
+def get_url(path, url):
+    driver = webdriver.Chrome(executable_path=r"{}".format(path))
+    driver.get(url)
+    print("loading.....")
+    res = driver.execute_script("return document.documentElement.outerHTML")
+
+    return res
+
+
+def get_img_links(res):
+    soup = BeautifulSoup(res, "lxml")
+    imglinks = soup.find_all("img", src=True)
+    return imglinks
+
+
+def download_img(img_link, index):
+    try:
+        extensions = [".jpeg", ".jpg", ".png", ".gif"]
+        extension = ".jpg"
+        for exe in extensions:
+            if img_link.find(exe) > 0:
+                extension = exe
+                break
+
+        img_data = rq.get(img_link).content
+        with open(output + "\\" + str(index + 1) + extension, "wb+") as f:
+            f.write(img_data)
+
+        f.close()
+    except Exception:
+        pass
+
+
+result = get_url(path, url)
+time.sleep(60)
+img_links = get_img_links(result)
+if not os.path.isdir(output):
+    os.mkdir(output)
+
+for index, img_link in enumerate(img_links):
+    img_link = img_link["src"]
+    print("Downloading...")
+    if img_link:
+        download_img(img_link, index)
+print("Download Complete!!")