from selenium import webdriver from selenium.webdriver.chrome.service import Service as ChromeService from selenium.webdriver.firefox.service import Service as FirefoxService from webdriver_manager.chrome import ChromeDriverManager from webdriver_manager.firefox import GeckoDriverManager from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.chrome.options import Options as ChromeOptions from selenium.webdriver.firefox.options import Options as FirefoxOptions class WebScraper: def __init__(self, browser='chrome', hidden=True): if browser.lower() == 'chrome': options = ChromeOptions() if hidden: options.add_argument('--headless') options.add_argument('--window-size=1920,1200') self.driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options) elif browser.lower() == 'firefox': options = FirefoxOptions() if hidden: options.add_argument('--headless') options.add_argument('--window-size=1920,1200') self.driver = webdriver.Firefox(service=FirefoxService(GeckoDriverManager().install()), options=options) elif browser.lower() == 'huggingface': options = webdriver.ChromeOptions() options.add_argument('--headless') options.add_argument('--no-sandbox') options.add_argument('--disable-dev-shm-usage') options.add_argument('--window-size=1920,1200') self.driver = webdriver.Chrome(options=options) else: raise ValueError('Unsupported browser. Only "chrome" and "firefox" are supported.') def get(self, url, wait_time=10): self.driver.get(url) WebDriverWait(self.driver, wait_time).until( EC.presence_of_element_located((By.TAG_NAME, 'body')) ) def get_html(self): return self.driver.page_source def close_browser(self): self.driver.quit()