Skip to content

Commit

Permalink
feat: logged task execution results
Browse files Browse the repository at this point in the history
  • Loading branch information
lynchee-owo committed May 16, 2024
1 parent c9c773b commit 77e1223
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 48 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,6 @@
utils/reload/*.js
utils/reload/injections/*.js
public/manifest.json

# test results
/results
45 changes: 36 additions & 9 deletions benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ def setup_driver():
chrome_options.add_argument("--window-size=1600,900")
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=chrome_options)
# Set script timeout to 240 seconds
driver.set_script_timeout(240)
# Set script timeout to 120 seconds
driver.set_script_timeout(120)
return driver

def dispatch_event(driver, event_name, event):
Expand All @@ -46,7 +46,7 @@ def dispatch_event(driver, event_name, event):
driver.execute_script(script)

def add_task_listener(driver, task_id, max_retries=3):
logging.info(f'Adding task listener for task {task_id}')
logging.info(f'Adding task listeners for task {task_id}')
"""
Add event listeners for task history and screenshot events. Both events include task status.
Then process those events as they are captured.
Expand Down Expand Up @@ -79,16 +79,25 @@ def add_task_listener(driver, task_id, max_retries=3):
"""

attempts = 0
result = ""

def handle_event(event_data):
nonlocal attempts
nonlocal result
if not event_data:
logging.info("No event data received")
return
if event_data['type'] == 'history':
# Record history when task stops
logging.info(f'Task {task_id} status: {event_data["status"]}')
write_history(task_id, event_data['data'])
result = event_data["status"]
# Determine the last action status
history = event_data['data']
last_action = history[-1]["action"]["operation"]["name"]
if last_action == "finish":
result = "success"
else:
result = "fail"
write_history(task_id, history)
attempts = 0
return
if event_data['type'] == 'screenshot':
Expand All @@ -98,7 +107,8 @@ def handle_event(event_data):
handle_event(driver.execute_async_script(script))
else:
logging.error(f"Unhandled event data type: {event_data['type']}")
raise ValueError(f"Unhandled event data type: {event_data['type']}")
result = "script-error"
return

while attempts < max_retries:
try:
Expand All @@ -108,15 +118,25 @@ def handle_event(event_data):
if "javascript error: document unloaded while waiting for result" in str(e):
attempts += 1
logging.warning(f'Document unloaded error during task {task_id} attempt {attempts}: {str(e)}')
logging.info("Retrying...")
if attempts == max_retries:
logging.error(f'Maximum retry attempts reached for task {task_id}.')
result = 'doc-unload-max-retry'
break
else:
logging.info("Retrying...")
elif "script timeout" in str(e):
logging.error(f'Script timeout for task {task_id}: {str(e)}')
result = 'js-script-timeout'
break
else:
logging.error(f'WebDriver exception for task {task_id}: {str(e)}')
result = 'webdriver-error'
break
except Exception as e:
logging.error(f'Unhandled error for task {task_id}: {str(e)}')
result = 'python-script-error'
break
return result

def write_history(task_id, task_history):
task_dir = os.path.join(results_dir, f"test{task_id}")
Expand All @@ -143,9 +163,10 @@ def run_webwand_task(driver, task_id, task_description):
dispatch_event(driver, 'SetAPIKey', {"value": api_key})
dispatch_event(driver, 'SetTask', {"value": task_description})
dispatch_event(driver, 'RunTask', {})
add_task_listener(driver, task_id)
result = add_task_listener(driver, task_id)
end = time.time()
logging.info(f'Task {task_id} took {end - start} seconds to complete.')
return result

def click_extensions_icon(driver):
# Simulate click to open side panel
Expand Down Expand Up @@ -173,7 +194,13 @@ def main():
click_extensions_icon(driver)
initial_load = False

run_webwand_task(driver, task_id, task['ques'])
result = run_webwand_task(driver, task_id, task['ques'])
logging.info(f'Task {task_id} status: {result}')
# Optional: if the previous task timed out, reset the driver after each task to ensure proper state for the next task
# if result == "js-script-timeout":
# driver.quit()
# driver = setup_driver()
# initial_load = True
driver.quit()

if __name__ == "__main__":
Expand Down
16 changes: 8 additions & 8 deletions results/testGoogle--0/interact_messages.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
},
"name": "setValueAndEnter"
},
"thought": "I will enter 'bananas' into the search bar and press Enter to search."
"thought": "I will enter 'bananas' in the search bar and press Enter to search."
},
"prompt": "The user requests the following task:\n\n Search for bananas\n\n\nCurrent time: 5/16/2024, 2:57:41 PM\nCurrent URL: https://www.google.com/\nCurrent page scrolling position: 100.0%\n\n\nUse the following data as a reference of the annotated elements (using `===` as a delimiter between each annotation):\n\nlabel = 1\nname = About\ntagName = A\n===\nlabel = 2\nname = Store\ntagName = A\n===\nlabel = 3\nname = Gmail\ntagName = A\n===\nlabel = 4\nname = Search for Images\ntagName = A\n===\nlabel = 5\nname = Google apps\ntagName = A\nrole = button\n===\nlabel = 6\nname = Sign in\ntagName = A\n===\nlabel = 7\nname = Celebrating Hank Adams\ntagName = A\n===\nlabel = 8\nname = Search\ntagName = TEXTAREA\ncurrentValue = \nplaceholder = \nrole = combobox\n===\nlabel = 9\nname = Search by voice\ntagName = DIV\nrole = button\n===\nlabel = 10\nname = Search by image\ntagName = DIV\nrole = button\n===\nlabel = 11\nname = Google Search\ntagName = INPUT\ncurrentValue = Google Search\nplaceholder = \nrole = button\n===\nlabel = 12\nname = I'm Feeling Lucky\ntagName = INPUT\ncurrentValue = I'm Feeling Lucky\nplaceholder = \nrole = button\n===\nlabel = 13\nname = Advertising\ntagName = A\n===\nlabel = 14\nname = Business\ntagName = A\n===\nlabel = 15\nname = How Search works\ntagName = A\n===\nlabel = 16\nname = Our third decade of climate action: join us\ntagName = A\n===\nlabel = 17\nname = Privacy\ntagName = A\n===\nlabel = 18\nname = Terms\ntagName = A\n===\nlabel = 19\nname = Settings\ntagName = DIV\nrole = button\n\nThis textarea currently has focus:\nlabel = 8\nname = Search\ntagName = TEXTAREA\ncurrentValue = \nplaceholder = \nrole = combobox\n",
"response": "{\n \"thought\": \"I will enter 'bananas' into the search bar and press Enter to search.\",\n \"action\": {\n \"name\": \"setValueAndEnter\",\n \"args\": {\n \"label\": \"8\",\n \"value\": \"bananas\"\n }\n }\n}",
"prompt": "The user requests the following task:\n\n Search for bananas\n\n\nCurrent time: 5/16/2024, 3:32:45 PM\nCurrent URL: https://www.google.com/\nCurrent page scrolling position: 100.0%\n\n\nUse the following data as a reference of the annotated elements (using `===` as a delimiter between each annotation):\n\nlabel = 1\nname = About\ntagName = A\n===\nlabel = 2\nname = Store\ntagName = A\n===\nlabel = 3\nname = Gmail\ntagName = A\n===\nlabel = 4\nname = Search for Images\ntagName = A\n===\nlabel = 5\nname = Google apps\ntagName = A\nrole = button\n===\nlabel = 6\nname = Sign in\ntagName = A\n===\nlabel = 7\nname = Celebrating Hank Adams\ntagName = A\n===\nlabel = 8\nname = Search\ntagName = TEXTAREA\ncurrentValue = \nplaceholder = \nrole = combobox\n===\nlabel = 9\nname = Search by voice\ntagName = DIV\nrole = button\n===\nlabel = 10\nname = Search by image\ntagName = DIV\nrole = button\n===\nlabel = 11\nname = Google Search\ntagName = INPUT\ncurrentValue = Google Search\nplaceholder = \nrole = button\n===\nlabel = 12\nname = I'm Feeling Lucky\ntagName = INPUT\ncurrentValue = I'm Feeling Lucky\nplaceholder = \nrole = button\n===\nlabel = 13\nname = Advertising\ntagName = A\n===\nlabel = 14\nname = Business\ntagName = A\n===\nlabel = 15\nname = How Search works\ntagName = A\n===\nlabel = 16\nname = Our third decade of climate action: join us\ntagName = A\n===\nlabel = 17\nname = Privacy\ntagName = A\n===\nlabel = 18\nname = Terms\ntagName = A\n===\nlabel = 19\nname = Settings\ntagName = DIV\nrole = button\n\nThis textarea currently has focus:\nlabel = 8\nname = Search\ntagName = TEXTAREA\ncurrentValue = \nplaceholder = \nrole = combobox\n",
"response": "{\n \"thought\": \"I will enter 'bananas' in the search bar and press Enter to search.\",\n \"action\": {\n \"name\": \"setValueAndEnter\",\n \"args\": {\n \"label\": \"8\",\n \"value\": \"bananas\"\n }\n }\n}",
"usage": {
"completion_tokens": 63,
"prompt_tokens": 2599,
Expand All @@ -23,14 +23,14 @@
"operation": {
"name": "finish"
},
"thought": "The search results for 'bananas' are displayed. The first result is a Wikipedia link about bananas, followed by related questions and nutritional information."
"thought": "The search results for 'bananas' are displayed. The top result is a Wikipedia link about bananas, followed by a 'People also ask' section with questions about the benefits, daily consumption, nutritional content, and best time to eat bananas."
},
"prompt": "The user requests the following task:\n\n Search for bananas\nYou have already taken the following actions: \nThought: I will enter 'bananas' into the search bar and press Enter to search.\nAction:{\"name\":\"setValueAndEnter\",\"args\":{\"label\":\"8\",\"value\":\"bananas\"}}\n\n\n\nCurrent time: 5/16/2024, 2:57:49 PM\nCurrent URL: https://www.google.com/search\nCurrent page scrolling position: 22.1%\n\n\nUse the following data as a reference of the annotated elements (using `===` as a delimiter between each annotation):\n\nlabel = 1\nname = Celebrating Hank Adams\ntagName = A\n===\nlabel = 2\nname = Search\ntagName = TEXTAREA\ncurrentValue = bananas\nplaceholder = \nrole = combobox\n===\nlabel = 3\nname = Clear\ntagName = DIV\nrole = button\n===\nlabel = 4\nname = Search by voice\ntagName = DIV\nrole = button\n===\nlabel = 5\nname = Search by image\ntagName = DIV\nrole = button\n===\nlabel = 6\nname = Search\ntagName = BUTTON\n===\nlabel = 7\nname = Settings\ntagName = DIV\nrole = button\n===\nlabel = 8\nname = Google apps\ntagName = A\nrole = button\n===\nlabel = 9\nname = Sign in\ntagName = A\n===\nlabel = 10\nname = Images\ntagName = A\nrole = link\n===\nlabel = 11\nname = Shopping\ntagName = A\nrole = link\n===\nlabel = 12\nname = Videos\ntagName = A\nrole = link\n===\nlabel = 13\nname = Forums\ntagName = A\nrole = link\n===\nlabel = 14\nname = More\ntagName = DIV\nrole = button\n===\nlabel = 15\nname = Tools\ntagName = DIV\nrole = button\n===\nlabel = 16\nname = Add Nutrition facts\ntagName = A\nrole = link\n===\nlabel = 17\nname = Add Benefits\ntagName = A\nrole = link\n===\nlabel = 18\nname = Add Recipes\ntagName = A\nrole = link\n===\nlabel = 19\nname = Add In stock\ntagName = A\nrole = link\n===\nlabel = 20\nname = Add Types\ntagName = A\nrole = link\n===\nlabel = 21\nname = Add Open now\ntagName = A\nrole = link\n===\nlabel = 22\nname = Add Plant\ntagName = A\nrole = link\n===\nlabel = 23\nname = Add Tree\ntagName = A\nrole = link\n===\nlabel = 24\nname = Add Who should avoid\ntagName = A\nrole = link\n===\nlabel = 25\nname = SafeSearch\ntagName = DIV\nrole = button\n===\nlabel = 26\nname = Banana Wikipedia https://en.wikipedia.org \u203a wiki \u203a Banana\ntagName = A\n===\nlabel = 27\nname = About this result\ntagName = DIV\nrole = button\n===\nlabel = 28\nname = List of banana cultivars\ntagName = A\n===\nlabel = 29\nname = Cavendish banana\ntagName = A\n===\nlabel = 30\nname = Gros Michel\ntagName = A\n===\nlabel = 31\nname = Cooking banana\ntagName = A\n===\nlabel = 32\nname = About this result\ntagName = DIV\nrole = button\n===\nlabel = 33\nname = What are the benefits of eating bananas?\ntagName = DIV\nrole = button\n===\nlabel = 34\nname = Is it good to eat a banana every day?\ntagName = DIV\nrole = button\n===\nlabel = 35\nname = What are bananas high in?\ntagName = DIV\nrole = button\n===\nlabel = 36\nname = What is the best time to eat bananas?\ntagName = DIV\nrole = button\n===\nlabel = 37\nname = Feedback\ntagName = DIV\nrole = button\n===\nlabel = 38\nname = Food Nutrition\u2026Banana nutrition\ntagName = DIV\nrole = button\n===\nlabel = 39\nname = Protein Amount\u2026protein in banana\ntagName = DIV\nrole = button\n===\nlabel = 40\nname = Equivalent\u2026what fruit is equivalent to a banana\ntagName = DIV\nrole = button\n===\nlabel = 41\nname = Sugar Amount\u2026how much sugar in a banana\ntagName = DIV\nrole = button\n===\nlabel = 42\nname = 6 more\ntagName = DIV\nrole = button\n===\nlabel = 43\nname = Feedback\ntagName = DIV\nrole = button\n",
"response": "{\n \"thought\": \"The search results for 'bananas' are displayed. The first result is a Wikipedia link about bananas, followed by related questions and nutritional information.\",\n \"action\": {\n \"name\": \"finish\"\n }\n}",
"prompt": "The user requests the following task:\n\n Search for bananas\nYou have already taken the following actions: \nThought: I will enter 'bananas' in the search bar and press Enter to search.\nAction:{\"name\":\"setValueAndEnter\",\"args\":{\"label\":\"8\",\"value\":\"bananas\"}}\n\n\n\nCurrent time: 5/16/2024, 3:32:54 PM\nCurrent URL: https://www.google.com/search\nCurrent page scrolling position: 20.3%\n\n\nUse the following data as a reference of the annotated elements (using `===` as a delimiter between each annotation):\n\nlabel = 1\nname = Celebrating Hank Adams\ntagName = A\n===\nlabel = 2\nname = Search\ntagName = TEXTAREA\ncurrentValue = bananas\nplaceholder = \nrole = combobox\n===\nlabel = 3\nname = Clear\ntagName = DIV\nrole = button\n===\nlabel = 4\nname = Search by voice\ntagName = DIV\nrole = button\n===\nlabel = 5\nname = Search by image\ntagName = DIV\nrole = button\n===\nlabel = 6\nname = Search\ntagName = BUTTON\n===\nlabel = 7\nname = Settings\ntagName = DIV\nrole = button\n===\nlabel = 8\nname = Google apps\ntagName = A\nrole = button\n===\nlabel = 9\nname = Sign in\ntagName = A\n===\nlabel = 10\nname = Images\ntagName = A\nrole = link\n===\nlabel = 11\nname = Shopping\ntagName = A\nrole = link\n===\nlabel = 12\nname = Videos\ntagName = A\nrole = link\n===\nlabel = 13\nname = Forums\ntagName = A\nrole = link\n===\nlabel = 14\nname = More\ntagName = DIV\nrole = button\n===\nlabel = 15\nname = Tools\ntagName = DIV\nrole = button\n===\nlabel = 16\nname = Add Nutrition facts\ntagName = A\nrole = link\n===\nlabel = 17\nname = Add Benefits\ntagName = A\nrole = link\n===\nlabel = 18\nname = Add Recipes\ntagName = A\nrole = link\n===\nlabel = 19\nname = Add In stock\ntagName = A\nrole = link\n===\nlabel = 20\nname = Add Types\ntagName = A\nrole = link\n===\nlabel = 21\nname = Add Open now\ntagName = A\nrole = link\n===\nlabel = 22\nname = Add Tree\ntagName = A\nrole = link\n===\nlabel = 23\nname = Add Plant\ntagName = A\nrole = link\n===\nlabel = 24\nname = Add Who should avoid\ntagName = A\nrole = link\n===\nlabel = 25\nname = SafeSearch\ntagName = DIV\nrole = button\n===\nlabel = 26\nname = Banana Wikipedia https://en.wikipedia.org \u203a wiki \u203a Banana\ntagName = A\n===\nlabel = 27\nname = About this result\ntagName = DIV\nrole = button\n===\nlabel = 28\nname = List of banana cultivars\ntagName = A\n===\nlabel = 29\nname = Cavendish banana\ntagName = A\n===\nlabel = 30\nname = Gros Michel\ntagName = A\n===\nlabel = 31\nname = Cooking banana\ntagName = A\n===\nlabel = 32\nname = About this result\ntagName = DIV\nrole = button\n===\nlabel = 33\nname = What are the benefits of eating bananas?\ntagName = DIV\nrole = button\n===\nlabel = 34\nname = Is it good to eat a banana every day?\ntagName = DIV\nrole = button\n===\nlabel = 35\nname = What are bananas high in?\ntagName = DIV\nrole = button\n===\nlabel = 36\nname = What is the best time to eat bananas?\ntagName = DIV\nrole = button\n===\nlabel = 37\nname = Feedback\ntagName = DIV\nrole = button\n===\nlabel = 38\nname = Food Nutrition\u2026Banana nutrition\ntagName = DIV\nrole = button\n===\nlabel = 39\nname = Protein Amount\u2026protein in banana\ntagName = DIV\nrole = button\n===\nlabel = 40\nname = Equivalent\u2026what fruit is equivalent to a banana\ntagName = DIV\nrole = button\n===\nlabel = 41\nname = Sugar Amount\u2026how much sugar in a banana\ntagName = DIV\nrole = button\n===\nlabel = 42\nname = 6 more\ntagName = DIV\nrole = button\n===\nlabel = 43\nname = Feedback\ntagName = DIV\nrole = button\n",
"response": "{\n \"thought\": \"The search results for 'bananas' are displayed. The top result is a Wikipedia link about bananas, followed by a 'People also ask' section with questions about the benefits, daily consumption, nutritional content, and best time to eat bananas.\",\n \"action\": {\n \"name\": \"finish\"\n }\n}",
"usage": {
"completion_tokens": 50,
"completion_tokens": 70,
"prompt_tokens": 3147,
"total_tokens": 3197
"total_tokens": 3217
}
}
]
Loading

0 comments on commit 77e1223

Please sign in to comment.