knowledge_base:programming:selenium

#!/usr/bin/python

# This is writen and tested under python2.7 and 
# it should be able to adapt to python3 with minimum changes.
# Chrome browser is required
# The script was tested on a Mac machine. Please change the driver if
# your environment is Linux or Windows. Driver can be found at
#       https://chromedriver.storage.googleapis.com/index.html?path=2.43/

# Usage:
#       python test.py username password 100
# where 100 is the number of entries you want to parse. Any interger is OK.

from selenium import webdriver
import datetime
import time
import os
import csv
import sys

parent_diretory_path = os.path.abspath(__file__+"/../../driver")
Result_directory_path = os.path.abspath(__file__+"/../../Result")

# This is tested on a Mac machine. Change the following driver to your computer OS system. Drives are located at:
#        https://chromedriver.storage.googleapis.com/index.html?path=2.43/
driver = webdriver.Chrome(parent_diretory_path+'/chromedriver')

driver.get("https://alexa.amazon.com")

result_file_name = 'praseresult.csv'
result_folder = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")

def write_result_csv(row):
    result_file = open(Result_directory_path + '/'+result_folder + '/' +result_file_name, 'a')
    csv_writer = csv.writer(result_file)
    csv_writer.writerow(row)
    result_file.close()


def createresultfolder():
    try:
        os.makedirs(Result_directory_path + '/' + result_folder)
    except:
        print("directory avaible")
    result_file = open(Result_directory_path + '/'+result_folder + '/' +result_file_name, 'w')
    csv_writer = csv.writer(result_file)
    csv_writer.writerow(
        ['Timestamp', 'Question', 'TTS Reponse'])
    result_file.close()


def login(username,password):
    try:
        driver.find_element_by_xpath('//*[@id="ap_email"]').send_keys(username)
        driver.find_element_by_xpath('//*[@id="ap_password"]').send_keys(password)
        time.sleep(1)
        driver.find_element_by_xpath('//*[@id="signInSubmit"]').submit()
        time.sleep(1)
    except:
        print("error while entering password")

def parseHistoryData():
    time.sleep(3)
    datetimestamp = driver.find_element_by_xpath("//div[@class='d-header-timestamp']").text.encode('utf-8')
    question = driver.find_element_by_xpath('//span[@class="dd-title"]').text.encode('utf-8')
    ttsreponse= ""
    try:
        ttsreponse = driver.find_element_by_xpath('//div[@class="center-single space"]').text.encode('utf-8')
    except:
        try:
            ttsreponse = driver.find_element_by_xpath('//div[@class="d-dialog-non-device-directed"]').text.encode('utf-8')
        except:
            ttsreponse = "None"

    print(datetimestamp,question,ttsreponse)
    write_result_csv([str(datetimestamp),str(question),str(ttsreponse)])


def toHistoryPage(count):
    time.sleep(3)

    #opening the history page url
    driver.get('https://alexa.amazon.com/spa/index.html#settings/dialogs')
    time.sleep(3)

    for i in range(count):
        dl = i / 25 + 1
        dd = i%25 +1

        xpath = "//div[@class='inf-container']/dl[@class='d-list-highlight-hover'][{0}]/dd[{1}]".format(dl,dd)
        element = driver.find_element_by_xpath(xpath)
        driver.execute_script("return arguments[0].scrollIntoView(true);", element)
        element.click()
        time.sleep(2)

        parseHistoryData()

        driver.get('https://alexa.amazon.com/spa/index.html#settings/dialogs')

        time.sleep(3)

        for line in range(1,int(dl)+3):
            xpath = "//div[@class='inf-container']/dl[@class='d-list-highlight-hover'][{0}]//dd[{1}]".format(1,line)
            element = driver.find_element_by_xpath(xpath)
            driver.execute_script("return arguments[0].scrollIntoView(true);", element)
            time.sleep(1)

if __name__ == "__main__":

    username = str(sys.argv[1])
    password = str(sys.argv[2])
    count = int(sys.argv[3])

    try:
        createresultfolder()
        login(username,password)
        toHistoryPage(count)
        driver.quit()
        print("The parse result can be found in under Result folder under subdirectory " + str(result_folder))
    except:
        print("Prasing failed")
  • Last modified: 2022/12/02 15:58
  • by George Wayne