edumail-scrapper/browser.py
2020-11-04 01:13:41 +09:00

60 lines
2.2 KiB
Python

import time
from selenium import webdriver
from selenium.webdriver.firefox.options import Options as FirefoxOptions
from selenium.common.exceptions import TimeoutException, StaleElementReferenceException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
class Browser:
DRIVER_PATH = r'./geckodriver'
TIMEOUT = 5
def __init__(self, headless=True):
self.uri = None
self.headless = headless
self.options = FirefoxOptions()
self.options.headless = headless
self.driver = webdriver.Firefox(executable_path=self.DRIVER_PATH, options=self.options)
def open_uri(self, uri, allow_refresh=False):
if self.driver.current_url != uri or allow_refresh:
self.driver.get(uri)
self.find('/html', None, self.TIMEOUT)
return True
return False
def find(self, xpath, element=None, timeout=TIMEOUT):
while True:
try:
if element is None:
return WebDriverWait(self.driver, timeout).until(EC.presence_of_element_located((By.XPATH, xpath)))
return element.find_element_by_xpath(xpath)
except TimeoutException:
return None
except StaleElementReferenceException:
continue
def click(self, xpath, element=None, timeout=TIMEOUT):
try:
WebDriverWait(self.driver, timeout).until(EC.element_to_be_clickable((By.XPATH, xpath))).click() if element is None else element.click()
return True
except TimeoutException:
return False
def get_attribute(self, xpath, name, element=None, timeout=TIMEOUT):
try:
if element is None:
return WebDriverWait(self.driver, timeout).until(EC.visibility_of_element_located((By.XPATH, xpath))).get_attribute(name)
return element.get_attribute(name)
except TimeoutException:
return None
def get_css_value(self, xpath, name, element=None, timeout=TIMEOUT):
try:
if element is None:
return WebDriverWait(self.driver, timeout).until(EC.presence_of_element_located((By.XPATH, xpath))).value_of_css_property(name)
return element.value_of_css_property(name)
except TimeoutException:
return None