Spaces:
Running
Running
def scrapeCourse(url): | |
import requests | |
from bs4 import BeautifulSoup | |
webcontent=[] | |
# URL of the page you want to scrape | |
# Send a GET request to fetch the raw HTML content | |
response = requests.get(url) | |
# Check if the request was successful | |
if response.status_code == 200: | |
# Parse the HTML content using BeautifulSoup | |
soup = BeautifulSoup(response.text, 'html.parser') | |
# Find the content based on the given CSS selector | |
selector = "#main-content-anchor > div.paid-course-landing-page__body > div > div.ud-text-sm.component-margin.styles--description--AfVWV > div > div > div > div:nth-child(1) > ul" | |
content = soup.select(selector) | |
# Check if any elements are found | |
if content: | |
# Extract and print the text content from the first matched element | |
for item in content[0].find_all('li'): # Assuming the list items <li> are the ones you're interested in | |
# print(item.get_text(strip=True)) | |
webcontent.append(item.get_text(strip=True)) | |
return webcontent | |
else: | |
print("No content found for the selector.") | |
else: | |
print(f"Failed to retrieve the page. Status code: {response.status_code}") | |