Spaces:
Running
Running
File size: 1,295 Bytes
98b1ce4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
def scrapeCourse(url):
import requests
from bs4 import BeautifulSoup
webcontent=[]
# URL of the page you want to scrape
# Send a GET request to fetch the raw HTML content
response = requests.get(url)
# Check if the request was successful
if response.status_code == 200:
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')
# Find the content based on the given CSS selector
selector = "#main-content-anchor > div.paid-course-landing-page__body > div > div.ud-text-sm.component-margin.styles--description--AfVWV > div > div > div > div:nth-child(1) > ul"
content = soup.select(selector)
# Check if any elements are found
if content:
# Extract and print the text content from the first matched element
for item in content[0].find_all('li'): # Assuming the list items <li> are the ones you're interested in
# print(item.get_text(strip=True))
webcontent.append(item.get_text(strip=True))
return webcontent
else:
print("No content found for the selector.")
else:
print(f"Failed to retrieve the page. Status code: {response.status_code}")
|