#!/usr/bin/python # This is writen and tested under python2.7 and # it should be able to adapt to python3 with minimum changes. # Chrome browser is required # The script was tested on a Mac machine. Please change the driver if # your environment is Linux or Windows. Driver can be found at # https://chromedriver.storage.googleapis.com/index.html?path=2.43/ # Usage: # python test.py username password 100 # where 100 is the number of entries you want to parse. Any interger is OK. from selenium import webdriver import datetime import time import os import csv import sys parent_diretory_path = os.path.abspath(__file__+"/../../driver") Result_directory_path = os.path.abspath(__file__+"/../../Result") # This is tested on a Mac machine. Change the following driver to your computer OS system. Drives are located at: # https://chromedriver.storage.googleapis.com/index.html?path=2.43/ driver = webdriver.Chrome(parent_diretory_path+'/chromedriver') driver.get("https://alexa.amazon.com") result_file_name = 'praseresult.csv' result_folder = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f") def write_result_csv(row): result_file = open(Result_directory_path + '/'+result_folder + '/' +result_file_name, 'a') csv_writer = csv.writer(result_file) csv_writer.writerow(row) result_file.close() def createresultfolder(): try: os.makedirs(Result_directory_path + '/' + result_folder) except: print("directory avaible") result_file = open(Result_directory_path + '/'+result_folder + '/' +result_file_name, 'w') csv_writer = csv.writer(result_file) csv_writer.writerow( ['Timestamp', 'Question', 'TTS Reponse']) result_file.close() def login(username,password): try: driver.find_element_by_xpath('//*[@id="ap_email"]').send_keys(username) driver.find_element_by_xpath('//*[@id="ap_password"]').send_keys(password) time.sleep(1) driver.find_element_by_xpath('//*[@id="signInSubmit"]').submit() time.sleep(1) except: print("error while entering password") def parseHistoryData(): time.sleep(3) datetimestamp = driver.find_element_by_xpath("//div[@class='d-header-timestamp']").text.encode('utf-8') question = driver.find_element_by_xpath('//span[@class="dd-title"]').text.encode('utf-8') ttsreponse= "" try: ttsreponse = driver.find_element_by_xpath('//div[@class="center-single space"]').text.encode('utf-8') except: try: ttsreponse = driver.find_element_by_xpath('//div[@class="d-dialog-non-device-directed"]').text.encode('utf-8') except: ttsreponse = "None" print(datetimestamp,question,ttsreponse) write_result_csv([str(datetimestamp),str(question),str(ttsreponse)]) def toHistoryPage(count): time.sleep(3) #opening the history page url driver.get('https://alexa.amazon.com/spa/index.html#settings/dialogs') time.sleep(3) for i in range(count): dl = i / 25 + 1 dd = i%25 +1 xpath = "//div[@class='inf-container']/dl[@class='d-list-highlight-hover'][{0}]/dd[{1}]".format(dl,dd) element = driver.find_element_by_xpath(xpath) driver.execute_script("return arguments[0].scrollIntoView(true);", element) element.click() time.sleep(2) parseHistoryData() driver.get('https://alexa.amazon.com/spa/index.html#settings/dialogs') time.sleep(3) for line in range(1,int(dl)+3): xpath = "//div[@class='inf-container']/dl[@class='d-list-highlight-hover'][{0}]//dd[{1}]".format(1,line) element = driver.find_element_by_xpath(xpath) driver.execute_script("return arguments[0].scrollIntoView(true);", element) time.sleep(1) if __name__ == "__main__": username = str(sys.argv[1]) password = str(sys.argv[2]) count = int(sys.argv[3]) try: createresultfolder() login(username,password) toHistoryPage(count) driver.quit() print("The parse result can be found in under Result folder under subdirectory " + str(result_folder)) except: print("Prasing failed")