Spaces:
Sleeping
Sleeping
from selenium import webdriver | |
from selenium.webdriver.chrome.service import Service as ChromeService | |
from selenium.webdriver.firefox.service import Service as FirefoxService | |
from webdriver_manager.chrome import ChromeDriverManager | |
from webdriver_manager.firefox import GeckoDriverManager | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
from selenium.webdriver.chrome.options import Options as ChromeOptions | |
from selenium.webdriver.firefox.options import Options as FirefoxOptions | |
class WebScraper: | |
def __init__(self, browser='chrome', hidden=True): | |
if browser.lower() == 'chrome': | |
options = ChromeOptions() | |
if hidden: | |
options.add_argument('--headless') | |
options.add_argument('--window-size=1920,1200') | |
self.driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options) | |
elif browser.lower() == 'firefox': | |
options = FirefoxOptions() | |
if hidden: | |
options.add_argument('--headless') | |
options.add_argument('--window-size=1920,1200') | |
self.driver = webdriver.Firefox(service=FirefoxService(GeckoDriverManager().install()), options=options) | |
elif browser.lower() == 'huggingface': | |
options = webdriver.ChromeOptions() | |
options.add_argument('--headless') | |
options.add_argument('--no-sandbox') | |
options.add_argument('--disable-dev-shm-usage') | |
options.add_argument('--window-size=1920,1200') | |
self.driver = webdriver.Chrome(options=options) | |
else: | |
raise ValueError('Unsupported browser. Only "chrome" and "firefox" are supported.') | |
def get(self, url, wait_time=10): | |
self.driver.get(url) | |
WebDriverWait(self.driver, wait_time).until( | |
EC.presence_of_element_located((By.TAG_NAME, 'body')) | |
) | |
def get_html(self): | |
return self.driver.page_source | |
def close_browser(self): | |
self.driver.quit() | |