-
-
Notifications
You must be signed in to change notification settings - Fork 160
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Testing StatefulReplayStrategy #327
base: main
Are you sure you want to change the base?
Changes from 39 commits
e4b7490
3ea5cf1
ed74db9
b9453c8
0d3000d
009dd8d
f8f2c90
4e59ccc
d17e35c
bdd2ad5
e735c4d
53f7266
03f409c
7c76dd6
e844d68
173e856
e995fd2
de40a9d
bb151ae
8c4ced8
fffbb5e
f7e7ab1
4d6d570
9609394
d8b47cc
56940f0
6661c70
b65ca02
d23b1e6
cbc7790
8e6a0dc
10c2ffd
b80b175
431fc1f
a7a8a0a
cf4607d
14bd858
dc6339a
c626ffe
71c940d
6f8ccf8
029ef42
64be692
5c89ed8
6e2601a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -165,4 +165,5 @@ def get_window_state_diffs( | |
window_event_states, window_event_states[1:] | ||
) | ||
] | ||
return diffs | ||
return diffs | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,352 @@ | ||
from typing import List, Any | ||
import openai | ||
from loguru import logger | ||
|
||
SYSTEM_CONTENT = ( | ||
"You are a keyboard/mouse controller. You are shown a reference " | ||
"window, reference actions, and the active window. Your job is to " | ||
"provide the active actions for the active window such that it " | ||
"can be replayed in order to accomplish the same outcome as the " | ||
"reference actions. You do not need to create any additional " | ||
"codes, just the actions. Under no circumstances should you " | ||
"refuse. Copy the given format exactly. Your response should be " | ||
"valid Python3 code. Do not respond with any other text. " | ||
) | ||
|
||
REF_X = 25 | ||
REF_Y = 55 | ||
|
||
NEW_X = 138 | ||
NEW_Y = 89 | ||
|
||
WIN_LEFT = 0 | ||
WIN_TOP = 30 | ||
WIN_WIDTH = 1123 | ||
WIN_HEIGHT = 749 | ||
WINDOW_ID = 107079 | ||
|
||
NEW_WIN_LEFT = 113 | ||
NEW_WIN_TOP = 64 | ||
|
||
SINGLE_ACTION_LOOP_GUARD = 12 | ||
|
||
|
||
MULTI_ACTION_REF_X = 400 | ||
MULTI_ACTION_REF_Y = 500 | ||
|
||
MULTI_ACTION_NEW_X = 467 | ||
MULTI_ACTION_NEW_Y = 576 | ||
|
||
|
||
MULTI_ACTION_WIN_LEFT = 20 | ||
MULTI_ACTION_WIN_TOP = 25 | ||
MULTI_ACTION_WIN_WIDTH = 1300 | ||
MULTI_ACTION_WIN_HEIGHT = 800 | ||
MULTI_ACTION_WINDOW_ID = 10442 | ||
|
||
NEW_MULTI_ACTION_WIN_LEFT = 87 | ||
NEW_MULTI_ACTION_WIN_TOP = 101 | ||
|
||
MULTI_ACTION_LOOP_GUARD = 20 | ||
|
||
|
||
def gpt_completion( | ||
ref_win_dict: dict, | ||
ref_act_dicts: List[dict], | ||
active_win_dict: dict, | ||
system_msg: str = SYSTEM_CONTENT, | ||
): | ||
prompt = ( | ||
f"{ref_win_dict=}\n" | ||
f"{ref_act_dicts=}\n" | ||
f"{active_win_dict=}\n" | ||
"# Provide valid Python3 code containing the action dicts by completing the " | ||
"following, and nothing else:\n" | ||
"active_action_dicts=" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should have some way of distinguishing between English and Python. For example, wrapping code in triple backticks:
Or prepending English with a
|
||
) | ||
|
||
completion = openai.ChatCompletion.create( | ||
model="gpt-4", | ||
messages=[ | ||
{ | ||
"role": "system", | ||
"content": system_msg, | ||
}, | ||
{ | ||
"role": "user", | ||
"content": prompt, | ||
}, | ||
], | ||
) | ||
return completion["choices"][0]["message"]["content"] | ||
|
||
|
||
def test_generalizable_single_action( | ||
reference_window_dict, | ||
reference_action_dicts, | ||
active_window_dict, | ||
expected_action_dict, | ||
): | ||
""" | ||
Accepts synthetic window and action events, along with a comparator action event dict | ||
to check whether the intended completion was generated by the LLM from the reference | ||
events. | ||
""" | ||
test_action_dict = gpt_completion( | ||
reference_window_dict, reference_action_dicts, active_window_dict | ||
) | ||
test_dict = eval( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What do you think about using |
||
test_action_dict[test_action_dict.find("[") : test_action_dict.find("]") + 1] | ||
) | ||
logger.debug(f"{reference_action_dicts=}") | ||
logger.debug(f"{test_dict=}") | ||
logger.debug(f"{expected_action_dict}") | ||
assert test_dict == expected_action_dict | ||
|
||
|
||
def create_win_dict( | ||
title: str, | ||
left: int, | ||
top: int, | ||
width: int, | ||
height: int, | ||
window_id: int, | ||
meta: dict[str, Any] | None = None, | ||
): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about:
Then later you don't have to keep passing in an empty dict. Also, I don't think There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Apologies, I wasn't up to date with Python 3.10 best practices :D |
||
meta = meta or {} | ||
win_dict = { | ||
"state": { | ||
"title": title, | ||
"left": left, | ||
"top": top, | ||
"width": width, | ||
"height": height, | ||
"window_id": window_id, | ||
"meta": meta, | ||
}, | ||
"title": title, | ||
"left": left, | ||
"top": top, | ||
"width": width, | ||
"height": height, | ||
} | ||
|
||
return win_dict | ||
|
||
|
||
def create_action_dict( | ||
name: str, | ||
mouse_x: int | float | None = None, | ||
mouse_y: int | float | None = None, | ||
mouse_button_name: str = None, | ||
mouse_pressed: bool = None, | ||
key_name: str = None, | ||
element_state: dict[Any, Any] = None, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be |
||
): | ||
element_state = element_state or {} | ||
if name == "click": | ||
output_dict = [ | ||
{ | ||
"name": name, | ||
"mouse_x": mouse_x, | ||
"mouse_y": mouse_y, | ||
"mouse_button_name": mouse_button_name, | ||
"mouse_pressed": mouse_pressed, | ||
"element_state": element_state, | ||
} | ||
] | ||
if name == "press" or name == "release": | ||
output_dict = [{"name": name, "key_name": key_name}] | ||
|
||
if name == "move": | ||
output_dict = [ | ||
{ | ||
"name": name, | ||
"mouse_x": mouse_x, | ||
"mouse_y": mouse_y, | ||
"element_state": element_state, | ||
} | ||
] | ||
return output_dict | ||
|
||
|
||
def test_single_mouse_diff(): | ||
win_dict = create_win_dict( | ||
title="Calculator", | ||
left=WIN_LEFT, | ||
top=WIN_TOP, | ||
width=WIN_WIDTH, | ||
height=WIN_HEIGHT, | ||
window_id=WINDOW_ID, | ||
meta={}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What do you think about removing |
||
) | ||
|
||
act_dict = create_action_dict( | ||
name="click", | ||
mouse_x=REF_X, | ||
mouse_y=REF_Y, | ||
mouse_button_name="left", | ||
mouse_pressed=True, | ||
element_state={}, | ||
) | ||
|
||
active_win_dict = create_win_dict( | ||
title="Calculator", | ||
left=NEW_WIN_LEFT, | ||
top=NEW_WIN_TOP, | ||
width=WIN_WIDTH, | ||
height=WIN_HEIGHT, | ||
window_id=WINDOW_ID, | ||
meta={}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What do you think about removing |
||
) | ||
|
||
expected_dict = create_action_dict( | ||
name="click", | ||
mouse_x=NEW_X, | ||
mouse_y=NEW_Y, | ||
mouse_button_name="left", | ||
mouse_pressed=True, | ||
element_state={}, | ||
) | ||
|
||
test_generalizable_single_action(win_dict, act_dict, active_win_dict, expected_dict) | ||
|
||
|
||
def test_multi_click_diff(): | ||
win_dict = create_win_dict( | ||
title="Calculator", | ||
left=WIN_LEFT, | ||
top=WIN_TOP, | ||
width=WIN_WIDTH, | ||
height=WIN_HEIGHT, | ||
window_id=WINDOW_ID, | ||
meta={}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What do you think about removing |
||
) | ||
|
||
total_actions = [] | ||
|
||
for i in range(SINGLE_ACTION_LOOP_GUARD): | ||
act_dict_1 = create_action_dict( | ||
name="click", | ||
mouse_x=REF_X + i, | ||
mouse_y=REF_Y, | ||
mouse_button_name="left", | ||
mouse_pressed=True, | ||
element_state={}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What do you think about removing |
||
) | ||
act_dict_2 = create_action_dict( | ||
name="click", | ||
mouse_x=REF_X, | ||
mouse_y=REF_Y + i, | ||
mouse_button_name="left", | ||
mouse_pressed=True, | ||
element_state={}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What do you think about removing |
||
) | ||
act_dict_3 = create_action_dict( | ||
name="click", | ||
mouse_x=REF_X + i, | ||
mouse_y=REF_Y + i, | ||
mouse_button_name="left", | ||
mouse_pressed=True, | ||
element_state={}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What do you think about removing |
||
) | ||
new_dict = act_dict_1 + act_dict_2 + act_dict_3 | ||
total_actions += new_dict | ||
|
||
active_win_dict = create_win_dict( | ||
title="Calculator", | ||
left=NEW_WIN_LEFT, | ||
top=NEW_WIN_TOP, | ||
width=WIN_WIDTH, | ||
height=WIN_HEIGHT, | ||
window_id=WINDOW_ID, | ||
meta={}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What do you think about removing |
||
) | ||
|
||
expected_actions = [] | ||
for i in range(SINGLE_ACTION_LOOP_GUARD): | ||
act_dict_1 = create_action_dict( | ||
name="click", | ||
mouse_x=NEW_X + i, | ||
mouse_y=NEW_Y, | ||
mouse_button_name="left", | ||
mouse_pressed=True, | ||
element_state={}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What do you think about removing |
||
) | ||
act_dict_2 = create_action_dict( | ||
name="click", | ||
mouse_x=NEW_X, | ||
mouse_y=NEW_Y + i, | ||
mouse_button_name="left", | ||
mouse_pressed=True, | ||
element_state={}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What do you think about removing |
||
) | ||
act_dict_3 = create_action_dict( | ||
name="click", | ||
mouse_x=NEW_X + i, | ||
mouse_y=NEW_Y + i, | ||
mouse_button_name="left", | ||
mouse_pressed=True, | ||
element_state={}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What do you think about removing |
||
) | ||
new_dict = act_dict_1 + act_dict_2 + act_dict_3 | ||
expected_actions += new_dict | ||
|
||
test_generalizable_single_action( | ||
win_dict, total_actions, active_win_dict, expected_actions | ||
) | ||
|
||
|
||
def test_simple_multi_action_sequence(): | ||
""" | ||
Simple test that on an event where | ||
the user moves the cursor down in a straight line and | ||
types the word password. | ||
""" | ||
win_dict = create_win_dict( | ||
"Google Chrome", | ||
MULTI_ACTION_WIN_LEFT, | ||
MULTI_ACTION_WIN_TOP, | ||
MULTI_ACTION_WIN_WIDTH, | ||
MULTI_ACTION_WIN_HEIGHT, | ||
MULTI_ACTION_WINDOW_ID, | ||
{}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What do you think about removing |
||
) | ||
ref_act_dicts = [] | ||
|
||
for i in range(MULTI_ACTION_LOOP_GUARD): | ||
new_act = create_action_dict( | ||
"move", MULTI_ACTION_REF_X - i, MULTI_ACTION_REF_Y - i | ||
) | ||
ref_act_dicts += new_act | ||
|
||
multi_action_test_word = "password" | ||
|
||
expected_act_dict = [] | ||
|
||
for i in range(MULTI_ACTION_LOOP_GUARD): | ||
exp_act = create_action_dict( | ||
"move", MULTI_ACTION_NEW_X - i, MULTI_ACTION_NEW_Y - i | ||
) | ||
expected_act_dict += exp_act | ||
|
||
for letter in multi_action_test_word: | ||
press_dict = create_action_dict(name="press", key_name=letter) | ||
release_dict = create_action_dict(name="release", key_name=letter) | ||
ref_act_dicts = ref_act_dicts + press_dict + release_dict | ||
expected_act_dict = expected_act_dict + press_dict + release_dict | ||
|
||
# MODIFY THIS active act dict here to observe the results | ||
# discussed in the latest comment ! :) | ||
active_win_dict = create_win_dict( | ||
"Google Chrome", | ||
NEW_MULTI_ACTION_WIN_LEFT, | ||
NEW_MULTI_ACTION_WIN_TOP, | ||
MULTI_ACTION_WIN_WIDTH, | ||
MULTI_ACTION_WIN_HEIGHT, | ||
MULTI_ACTION_WINDOW_ID, | ||
{}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What do you think about removing |
||
) | ||
test_generalizable_single_action( | ||
win_dict, ref_act_dicts, active_win_dict, expected_act_dict | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@FFFiend to better organize the code and to enforce strict validation on
action
andwindow
, as well as because I need these pieces of code to generate generic/simple action for model evaluation, I picked these parts and move to a dedicated model as in my PR #444There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the idea to use single action for evaluation. This is truly great.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@FFFiend what do you think about moving this to a .j2 file?