Spaces:

Nattyboi
/

resume-api

Running

resume-api / scraper.py

added stuff

98b1ce4 4 days ago

1.3 kB



	def scrapeCourse(url):
	import requests
	from bs4 import BeautifulSoup
	webcontent=[]

	# URL of the page you want to scrape


	# Send a GET request to fetch the raw HTML content
	response = requests.get(url)

	# Check if the request was successful
	if response.status_code == 200:
	# Parse the HTML content using BeautifulSoup
	soup = BeautifulSoup(response.text, 'html.parser')

	# Find the content based on the given CSS selector
	selector = "#main-content-anchor > div.paid-course-landing-page__body > div > div.ud-text-sm.component-margin.styles--description--AfVWV > div > div > div > div:nth-child(1) > ul"
	content = soup.select(selector)

	# Check if any elements are found
	if content:
	# Extract and print the text content from the first matched element
	for item in content[0].find_all('li'): # Assuming the list items <li> are the ones you're interested in
	# print(item.get_text(strip=True))

	webcontent.append(item.get_text(strip=True))
	return webcontent
	else:
	print("No content found for the selector.")
	else:
	print(f"Failed to retrieve the page. Status code: {response.status_code}")