Skip to content

Commit

Permalink
Image Scrapper from a website
Browse files Browse the repository at this point in the history
  • Loading branch information
WannaCry016 committed Nov 1, 2024
1 parent 7d18a27 commit 3b054cc
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 0 deletions.
6 changes: 6 additions & 0 deletions PROJECTS/Download_images_from_websites.py/Readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Scrap images from URL

1. Dowmload Chrome Drive From Chrome.
2. Run scrap-img.py file `py scrap-img.py`
3. `Enter Path : E:\webscraping\chromedriver_win32\chromedriver.exe` <br/>
`Enter URL : https://dribbble.com/`
1 change: 1 addition & 0 deletions PROJECTS/Download_images_from_websites.py/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
selenium==3.141.0
59 changes: 59 additions & 0 deletions PROJECTS/Download_images_from_websites.py/scrap_img.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from selenium import webdriver
import requests as rq
import os
from bs4 import BeautifulSoup
import time

# path= E:\web scraping\chromedriver_win32\chromedriver.exe
path = input("Enter Path : ")

url = input("Enter URL : ")

output = "output"


def get_url(path, url):
driver = webdriver.Chrome(executable_path=r"{}".format(path))
driver.get(url)
print("loading.....")
res = driver.execute_script("return document.documentElement.outerHTML")

return res


def get_img_links(res):
soup = BeautifulSoup(res, "lxml")
imglinks = soup.find_all("img", src=True)
return imglinks


def download_img(img_link, index):
try:
extensions = [".jpeg", ".jpg", ".png", ".gif"]
extension = ".jpg"
for exe in extensions:
if img_link.find(exe) > 0:
extension = exe
break

img_data = rq.get(img_link).content
with open(output + "\\" + str(index + 1) + extension, "wb+") as f:
f.write(img_data)

f.close()
except Exception:
pass


result = get_url(path, url)
time.sleep(60)
img_links = get_img_links(result)
if not os.path.isdir(output):
os.mkdir(output)

for index, img_link in enumerate(img_links):
img_link = img_link["src"]
print("Downloading...")
if img_link:
download_img(img_link, index)
print("Download Complete!!")

0 comments on commit 3b054cc

Please sign in to comment.