# Python Standard Libaries import requests # Python Standard Library for url requests import os # Python 3rd Party Libraries from bs4 import BeautifulSoup # 3rd Party BeautifulSoup Library - pip...

1 answer below »
The picture "requirements.jpg" has all the requirements for the python web scrapping assignment. The file "webscrapping.py" can serve as a source code, its a similar easier project I did in the past with web scrapping. The picture "SourceCodeReq.jpg" shows the guidelines of the source code provided so you know what is doing. Thank you


# Python Standard Libaries import requests # Python Standard Library for url requests import os # Python 3rd Party Libraries from bs4 import BeautifulSoup # 3rd Party BeautifulSoup Library - pip install Beautifulsoup4 url = 'https://casl.website/' base = 'https://casl.website/' IMG_SAVE = "./IMAGES/" # Directory to store images # Create the directory if necessary if not os.path.exists(IMG_SAVE): os.makedirs(IMG_SAVE) page = requests.get(url) # retrieve a page from your favorite website soup = BeautifulSoup(page.text, 'html.parser') # convert the page into soup #title variable looks for the website's title tag title = soup.find('title') print("||Title Information||") print(title) # Prints the tag print(title.string) # Prints the tag string content print("\n\n||Extracting Images Information||") print("Extracting Images from: ", url) print("Please Wait") images = soup.findAll('img') # Find the image tags for eachImage in images: # Process and display each image try: imgURL = eachImage['src'] print("Processing Image:", imgURL, end="") if imgURL[0:4] != 'http': # If URL path is relative imgURL = base+imgURL # try prepending the base url response = requests.get(imgURL) # Get the image from the URL imageName = os.path.basename(imgURL) imgOutputPath = IMG_SAVE+imageName with open(imgOutputPath, 'wb') as outFile: outFile.write(response.content) # Save the image print(" >> Saved Image:", imgOutputPath) except Exception as err: print(imgURL, err) continue #The href variable finds the href tags href = soup.findAll('a') print("\n\n||URL Information||") #The for loop here goes through each href tag and prints the url for eachHref in href: try: url = eachHref['href'] print(url) except: continue print('\n\nScript Complete')
Answered 2 days AfterMay 08, 2021

Answer To: # Python Standard Libaries import requests # Python Standard Library for url requests import os #...

Rushendra answered on May 09 2021
152 Votes
webscraping-ezsobsvc.py
# Python Standard Libaries
import requests # Python
Standard Library for url requests
import os
import nltk
from nltk.tag import pos_tag
from textblob import TextBlob
#nltk.download()
# Python 3rd Party Libraries
from bs4 import BeautifulSoup # 3rd Party BeautifulSoup Library - pip install Beautifulsoup4
url = 'https://casl.website/'
base = 'https://casl.website/'
IMG_SAVE = "./IMAGES/" # Directory to store images
# Create the directory if necessary
if not os.path.exists(IMG_SAVE):
os.makedirs(IMG_SAVE)
page = requests.get(url) # retrieve a page from your favorite website
soup = BeautifulSoup(page.text, 'html.parser') # convert the page into soup
#title variable looks for the website's title tag
title = soup.find('title')
print("\n\n||Title Information||")
print(title) # Prints the tag
print(title.string) # Prints the tag string content
print("\n\n||Extracting Images Information||")
print("Extracting...
SOLUTION.PDF

Answer To This Question Is Available To Download

Related Questions & Answers

More Questions »

Submit New Assignment

Copy and Paste Your Assignment Here