edumail-scrapper/contacts.py
2020-11-04 01:13:41 +09:00

94 lines
3.2 KiB
Python

import base64
import csv
import datetime
import time
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from conf import Conf
from browser import Browser
# Load configuration into variables
conf = Conf('./conf.json').data
auth = conf['auth']
xpath = conf['path']
# Authenticate
browser = Browser(headless=False)
browser.open_uri(xpath['login_url'])
# Enter username and password
username = browser.find(xpath['username_txt'])
username.clear()
username.send_keys(auth['username'])
password = browser.find(xpath['password_txt'])
password.clear()
password.send_keys(auth['password'])
# Sign in
browser.click(xpath['login_btn'])
browser.click(xpath['stay_signed_btn'])
# Open contacts
browser.open_uri(xpath['contact_url'])
browser.click(xpath['contact_dir_btn'])
# Set focus on the first item in contacts list
first_contact = browser.find(xpath['first_contact'])
first_contact.click()
# TODO: To automatically locate the first contract on headless browser, use driver.execute_scrpt().scrollBy
screenshot_filename = input("Enter filname of screenshot: ")
output_filename = input("Enter filename of output csv file: ")
input("Click on the first contact and press Enter to continue...")
with open(output_filename, 'a+', newline='') as f:
writer = csv.writer(f)
# If output file is empty, create header
if sum(1 for line in f) == 0:
f.write(conf['output_header'] + '\r\n')
while True:
time.sleep(browser.TIMEOUT)
# Current contact has :focus
curr_contact = browser.driver.switch_to.active_element
# Scrape current contact
name = browser.find(xpath['name'])
email = browser.find(xpath['email'])
chat = browser.find(xpath['chat'])
mobile = browser.find(xpath['mobile'])
work_phone = browser.find(xpath['work_phone'])
job_title = browser.find(xpath['job_title'])
department = browser.find(xpath['department'])
office_location = browser.find(xpath['office_location'])
company = browser.find(xpath['company'])
profile_pic = browser.find(xpath['profile_picture'])
if profile_pic is not None:
# Save profile picture to local file and convert to base64
profile_pic.screenshot(screenshot_filename)
with open(screenshot_filename, 'rb') as p:
profile_pic = base64.b64encode(p.read()).decode('utf-8')
# Append current contact to output file
data = [
'' if name is None else name.text,
'' if email is None else email.text,
'' if chat is None else chat.text,
'' if mobile is None else mobile.text,
'' if work_phone is None else work_phone.text,
'' if job_title is None else job_title.text,
'' if department is None else department.text,
'' if office_location is None else office_location.text,
'' if company is None else company.text,
'' if profile_pic is None else profile_pic
]
# Write current contact to output file
writer.writerow(data)
f.flush()
# Print current contact
print(datetime.datetime.now(), name.text, name.email)
# Set :focus on the next contact
curr_contact.send_keys(Keys.DOWN)