diff --git a/examples/crafter/README.md b/examples/crafter/README.md new file mode 100644 index 0000000..0e37c39 --- /dev/null +++ b/examples/crafter/README.md @@ -0,0 +1,6 @@ +# Using AgentKit for Crafter + +Run +``` +python main.py +``` \ No newline at end of file diff --git a/examples/crafter/baseline.py b/examples/crafter/baseline.py new file mode 100644 index 0000000..39087f9 --- /dev/null +++ b/examples/crafter/baseline.py @@ -0,0 +1,124 @@ +import os +os.environ["MINEDOJO_HEADLESS"]="1" +import argparse +import numpy as np +from tqdm import tqdm +import gym +import crafter +from crafter_description import describe_frame, action_list, match_act +from functools import partial +from utils import get_ctxt, describe_achievements +MANUAL = get_ctxt() + +parser = argparse.ArgumentParser() +parser.add_argument('--llm_name', type=str, default='yintat-all-gpt-4', help='Name of the LLM') + +args = parser.parse_args() + +LLM_name = args.llm_name + +env = crafter.Env(area=(256, 256)) +action_space = env.action_space + +# Replace with your own LLM API. +# Note: query_model takes two arguments: 1) message in openai chat completion form (list of dictionaries), +# 2) an index to indicate where the message should be truncated if the length exceeds LLM context length. +from llm_api import get_query +query_model = partial(get_query(LLM_name), max_gen=2048) + +def compose_ingame_prompt(info, question, past_qa=[]): + messages = [ + {"role": "system", "content" : "You’re a player trying to play the game."} + ] + + if len(info['manual'])>0: + messages.append({"role": "system", "content": info['manual']}) + + messages.append({"role": "system", "content": "{}".format(info['obs'])}) + + if len(past_qa)>0: + for q,a in past_qa: + messages.append({"role": "user", "content": q}) + messages.append({"role": "assistant", "content": a}) + + messages.append({"role": "user", "content": question}) + + return messages, 1 # This is the index of the history, we will truncate the history if it is too long for LLM + +questions=[ + "What is the best action to take? Let's think step by step, ", + "Choose the best executable action from the list of all actions. Write the exact chosen action." + ] + +def run(): + env = crafter.Env(area=(256, 256)) + env_steps = 1000000 + num_iter = 2 + + rewards = [] + progresses = [] + for eps in tqdm(range(num_iter), desc="Evaluating LLM {}".format(LLM_name)): + import wandb + wandb.init(project="Crafter_baseline", config={"LLM": LLM_name, "eps": eps, "num_iter": num_iter, "env_steps": env_steps}) + step = 0 + trajectories = [] + qa_history = [] + progress = [0] + reward = 0 + rewards = [] + done=False + + columns=["Context", "Step", "OBS", "Score", "Reward", "Total Reward"] + questions + ["Action"] + wandb_table = wandb.Table(columns=columns) + + env.reset() + a = action_list.index("noop") + obs, reward, done, info = env.step(a) + + while step < env_steps: + last_act_desc, desc = describe_frame(info, 1) + if len(trajectories)>0: + trajectories[-1][1] = last_act_desc + trajectories.append([step, None, desc]) + text_obs = "\n\n".join(["== Gamestep {}{} ==\n\n".format(i, "" if i!=trajectories[-1][0] else " (current)",) + "{}{}".format(d, "\n\nAction:\n{}".format(a) if a is not None else "") for i, a, d in trajectories[-2:]]) + info['obs'] = text_obs + info['manual'] = describe_achievements(info, MANUAL) + info['reward'] = reward + info['score'] = sum(rewards) + new_row = [info['manual'], step, info['obs'], info['score'], reward, sum(rewards)] + wandb.log({"metric/total_reward".format(eps): sum(rewards), + "metric/score".format(eps): info['score'], + "metric/reward".format(eps): reward, + }) + + if done: + break + + qa_history = [] + for question in questions: + prompt = compose_ingame_prompt(info, question, qa_history) + answer, _ = query_model(*prompt) + qa_history.append((question, answer)) + new_row.append(answer) + answer_act = answer + + a, _, _ = match_act(answer_act) + if a is None: + a = action_list.index("noop") + new_row.append(action_list[a]) + obs, reward, done, info = env.step(a) + rewards.append(reward) + + step += 1 + wandb_table.add_data(*new_row) + + progresses.append(np.max(progress)) + wandb.log({"rollout/rollout-{}".format(eps): wandb_table, + "final/total_reward":sum(rewards), + "final/episodic_step":step, + "final/eps":eps, + }) + del wandb_table + wandb.finish() + +run() \ No newline at end of file diff --git a/examples/crafter/build_graph_new.py b/examples/crafter/build_graph_new.py new file mode 100644 index 0000000..98cc6c7 --- /dev/null +++ b/examples/crafter/build_graph_new.py @@ -0,0 +1,581 @@ +from agentkit import Graph, SimpleDBNode +from compose_prompt import * +from post_processing import * + +query_fast = 'query_fast' +query_fast_accurate = 'query_fast_accurate' +query_reason = 'query_reason' +query_plan_accurate = 'query_plan_accurate' +query_spatial = 'query_spatial' + +prompts = { + +'obs_obj':{ +'prompt':""" +First, describe all objects the player faces or around the player. Describe the object type, the direction, the distance, the coordinates, and the requirements to interact with the object from the instruction manual. +Be precise and accurate with the direction, coordinates. +Output a Json list in the following format: +[ +{"object":$type, "direction":$precise_direction, "distance":$distance$, "facing":$[yes/no], 'coordinate':"precise coordinates", "requirements":"requirements to interact from instruction manual, put 'NA' if not applicable"}, +... +] + +Second, in one sentence, describe what the player sees further to the front. +""", +'dep':[], +'shorthand': "Current observation/surroundings", +'compose': ComposeObservationPrompt(), +'query': query_fast_accurate, +}, + +'obs_inv':{ +'prompt':"""Describe the player's inventory as a json dictionary, with the item_names as key and item_amount as value. Write '{}' if inventory is empty.""", +'dep':[], +'shorthand': "Current inventory", +'compose': ComposeObservationPrompt(), +'query': query_fast, +}, + +'obs_vit':{ +'prompt':"""Describe the player's vitals as a json dictionary, in the format of {vital_name: "value/max"}.""", +'dep':[], +'shorthand': "Current vitals", +'compose': ComposeObservationPrompt(), +'query': query_fast, +}, + +'obs_chg':{ +'prompt':"""Using a list, document any changes to the player observation, inventory, and status in the most recent step. +Be precise about changes to numbers and details about the vitals, inventory, and the distance to objects around the player. +Be concise and only list the changes. Output 'NA' if there is no change""", +'dep':[], +'shorthand': "Changes to the observation in the current step", +'compose': ComposeObservationReasoningPrompt(), +'query': query_reason, +}, + +'obs_last_act':{ +'prompt':"""First, consider how the most recent action should have changed the player's observation/status/inventory, be specific with the action and the target. +Then, deduce if the last action succeeded. +If yes, end the answer here. +If not, identify potential cause for the failure, by focusing on the player's surrounding/observation, inventory, instruction manual, and missing information. +Be concise with the language, but precise with the details.""", +'dep':['obs_chg',], +'compose': ComposeActionReflectionPrompt(), +'query': query_spatial, +}, + + +'s-obs-obs':{ +'prompt':"""In two sentences, describe the observation and surroundings, including all object types and what the player is facing. +Make sure to include the location details of objects relevant to the current plan '$db.action_summary.plan-sketch$' and target '$db.action_summary.target$'.""", +'dep':['obs_obj'], +'shorthand': "Player observation", +'compose': ComposeSummaryPrompt(), +'query': query_fast, +}, + +'s-obs-vitals':{ +'prompt':"In one sentence, describe all inventory items and the current vitals.", +'dep':['obs_inv', 'obs_vit'], +'shorthand': "Player vitals", +'compose': ComposeSummaryPrompt(), +'query': query_fast, +}, + +'s-action':{ +'prompt':"""Output a Json dictionary of the following format: +``` +{ +"action": $action, # The most recent action, including the direction if it's a movement action +"repeats": $repeats # The number of times the action was repeated +"target": $target # The target of the action. Write 'NA' if the action is a movement/sleep action. +"success": $success # [yes/no] If the action succeeded +"causes_of_failure": $causes_of_failure # In one sentence, write the cause(s) of the failure. Write 'NA' if the action succeeded. +} +``` +""", +'dep':['obs_last_act',], +'shorthand': "Most recent action and result", +'compose': ComposeSummaryPrompt(), +'query': query_fast, +'after_query': SummaryAfterQuery(), +}, + +'obs_current_actions':{ +'prompt':"""For each action in the numbered list of all actions, reason with the current observation/surroundings and identify if the action is allowed at the current gamestep according to the requirements. +Then predict the target the action will interact with. +Format the output as a Json dictionary of the following format: +{ +"$action": {"requirements": $requirements of the action, "related observation": $what in the observation may affect/block the action?, "reasoning": $precise but very concise reason of why the action is blocked, "allowed": $yes/no, "target": $inferred target, "unlock new achievement": $yes/no, will the success of this action unlock an unaccomplished achievement?}, # do not output white spaces +... +} +""", +'dep':['s-action', 'obs_obj'], +'shorthand': "List of all actions", +'compose': ComposeObservationActionReasoningPrompt(), +'query': query_plan_accurate, +'after_query': ListActionAfterQuery(), +}, + +'reflect':{ +'prompt':"""Consider how the player reached the current state, concisely summarize the player's high-level trajectory in the gameplay history. Focus on the present and the past and do not make future predictions. +Has the player been making effective progress towards the top subgoal '$db.subgoals.subgoal$'? Has the player been effectively addressing all the mistakes/obstacles encountered? +Output 'NA' if there is no history.""", +'dep': ['obs_obj', 'obs_inv', 'obs_vit', 'obs_chg', 'obs_last_act'], +'after': ['s-obs-obs', 's-obs-vitals', 's-action',], +'shorthand': "Gameplay history", +'compose': ComposePlannerPrompt(), +'query': query_reason, +}, + +'planner_unexpected':{ +'prompt':"""Did the player encounter any obstacles or unexpected scenarios? Also, did player status change unexpectedly? +Write \"NA\" and end answer here if no expectancy. +Otherwise, should the player modify the subgoals to react?""", +'dep':['s-obs-obs', 's-obs-vitals', 'obs_obj', 'obs_inv', 'obs_chg', 'obs_last_act'], +'after': ['s-action',], +'short': "Past Unexpected Scenarios", +'compose': ComposePlannerReflectionPrompt(), +'query': query_reason, +}, + +'planner_mistake':{ +'prompt':"""Did the player make any mistakes approaching the top subgoal '$db.subgoals.subgoal$'? Write \"NA\" and stop the answer here if there's no mistake. +Otherwise, identify the potential causes of the mistake and find the most probable cause by analyzing the current situation. +Then, suggest a precise and executable modification to the subgoals to address the mistake.""", +'dep':['s-obs-obs', 's-obs-vitals', 'obs_chg', 'obs_current_actions'], +'after': ['s-action',], +'short': "Past Mistakes", +'compose': ComposePlannerReflectionPrompt(), +'query': query_reason, +}, + +'challenge':{ +'prompt':"""Identify three high level challenges. +Based on the current situation, score their urgency out of 5 (5 is the highest urgency) and score their convenience out of 5 (5 being most convenient). +Then choose a challenge to address based on the urgency, convenience, and overall objective of the game. +Be concise.""", +'dep':['reflect', 'planner_mistake', 'obs_obj', 'obs_inv', 'obs_vit'], +'after': ['s-action',], +'compose': ComposePlannerPrompt(), +'query': query_reason, +}, + +'achievements':{ +'prompt':"""Reason with the current situation and the requirements of the *current unaccomplished achievements*. +Identify a list of unaccomplished achievements that are easily unlockable and relevant to the current situation. +For each achievement, write a one-sentence explanation of how the achievement may be easily unlocked, and how it may benefit.""", +'dep':['reflect', 'obs_obj', 'obs_inv', 'obs_vit', 'obs_current_actions'], +'shorthand': "Relevant unaccomplished achievements", +'after': ['s-action',], +'compose': ComposePlannerPrompt(), +'query': query_reason, +}, + +'gate-plan_sketch':{ +'prompt':"""Reason with the instruction manual and knowledge base. Explain concisely how the choosen challenge may be approached given the player's current situation, with actions permitted by the game. +Then, +1. Confirm if the subgoal '$db.subgoals.subgoal$' is still accuracte for the choosen challenge. +2. Confirm if the subgoal '$db.subgoals.subgoal$' is incomplete and up-to-date according to the completion criteria '$db.subgoals.completion_criteria$'. + +If yes to both, end the answer here. +Then, sketch an updated high-level plan for addressing the choosen challenge. Do not plan to go back to something if you cannot provide it's location. +If situation allows, the plan-sketch should aim for achieving relevant unaccomplished achievements in the order of difficulty, without impacting player safety.""", +'dep':['reflect', 'planner_unexpected', 'planner_mistake', 'obs_obj', 'obs_inv', 'obs_vit', 'obs_current_actions', 'achievements', 'challenge'], +'compose': ComposePlannerPrompt(), +'query': query_reason, +}, + +'gate':{ +'prompt':"""Reason with the previous conversation and context, and output a Json dictionary with answers to the following questions: +``` +{ +"unexpected_encounters": $ANSWER, # [yes/no] Did the player encounter any unexpected scenarios or obstacles? +"mistake": $ANSWER, # [yes/no] Did the player make any mistakes? +"correction_planned": $ANSWER, # [yes/no] Was correction planned at current step for the mistake? +"confused": $ANSWER, # [yes/no] Does the player seem confused? +"top_subgoal_completed": $ANSWER, # [yes/no] Is the most recent top subgoal complete according to the completion criteria? +"top_subgoal_changed": $ANSWER, # [yes/no] Has the most recent top subgoal been changed? +"replan": $ANSWER # [yes/no] Was there a re-plan/change for the plan sketch? +} +``` +""", +'dep':['reflect', 'planner_unexpected', 'planner_mistake', 'gate-plan_sketch'], +'compose': ComposePlannerPrompt(), +'query': query_reason, +'after_query': ReflectionAfterQuery(), +}, + +'subgoals':{ +'prompt':"""List the top 3 subgoals for the player according to the plan sketch. The subgoals should be quantifiable and actionable with in-game actions. Put subgoals of highest priority first. +For each subgoal, specify how the completion may be precisely quantified in at most one sentence (include the numbers and details). +Do not include additional information other than the subgoals and quantification.""", +'dep':['reflect', 'obs_obj', 'obs_inv', 'obs_vit', 'gate-plan_sketch'], +'after': ['gate'], +'shorthand': "Current subgoals (decreasing priority)", +'compose': ComposePlannerPrompt(), +'query': query_reason, +}, + +'top-subgoal':{ +'prompt':"""Write the highest priority subgoal and completion criteria as a Json dictionary of the following format: +``` +{ +"subgoal": $subgoal, # The highest priority subgoal +"completion_criteria": $completion_criteria # The completion criteria for the subgoal +"guide": $guide # A brief high-level guide for completing the subgoal +} +``` +""", +'dep':['reflect', 's-action', 'obs_current_actions', 'subgoals'], +'shorthand': "Current top subgoal and completion criteria", +'compose': ComposePlannerPrompt(), +'query': query_reason, +'after_query': SubgoalAfterQuery(), +}, + +'subgoal_analysis':{ +'prompt':"""Check the instruction manual for requirements to complete the top subgoal. +Identify a list of unknown/missing information/details and do not make any assumptions. +Pay close attention to the details such as the numbers and specifications.""", +'dep':['reflect', 'obs_current_actions', 'top-subgoal'], +'shorthand': "Requirement and Missing Info Analysis for the Top Subgoal", +'compose': ComposePlannerPrompt(), +'query': query_reason, +}, + +'skill':{ +'prompt':"""First, try to find an existing skill from the 'skill library' that could be used to represent the current top subgoal. + +If none of the existing skills could be applied, create a new skill. + +The skill, parameters, and guide should be general enough to be reusable for tasks of the same class. + +Format the choosen skill as a Json object of the following format: +``` +{$skill_name: [$1_line_skill_desciption, $supported_parameters, $skill_guide]} +``` +""", +'dep':['top-subgoal'], +'compose': ComposeSkillPrompt(), +'query': query_reason, +'after_query': SkillAfterQuery(), +}, + +'planner-adaptive':{ +'prompt':"""List at most 3 questions to help guide the agent on the subgoal: '$db.subgoals.subgoal$'. The questions should prompt the player to think concretely and precisely. +Example questions could ask the player to: + - identify goal-relevant details from the observation, history, and instruction manual + - recall related historical encounters + - potential obstacles and how to overcome them + - specify concrete ways to improve efficiency + +Output only the list of questions and nothing else. Write \"NA\" if there's no question to ask.""", +'dep':['reflect', 'planner_unexpected', 'planner_mistake', 'top-subgoal', 'subgoal_analysis'], +'compose': ComposePlannerPrompt(), +'query': query_reason, +'after_query': AdaptiveAfterQuery(), +}, + +'kb-add':{ +'prompt':"""Rewrite the unknown information and details list into a Json dictionary. +Give each item a concise but precise name as the key, and a dictionary of answers to the following inquiries as the value: +``` +"item_name":{ +"discovered": $ANSWER, # Can this unknown information be precisely uncovered or determined at the current gamestep? [yes/no] +"discovery": $ANSWER, # In concise but precise terms, write what has been uncovered [If the information is not uncovered, write 'NA'.] +"discovery_short": $ANSWER, # Condensed version of 'discovery', only containing precisely the discovered info [If the information is not uncovered, write "NA".] +"general": $ANSWER, # Confirm that this uncovered information remain unchanged in subsequent steps of this game. [yes/no] +"unknown": $ANSWER, # Confirm that this uncovered information is missing from instruction manual. [yes/no] +"concrete_and_precise": $ANSWER, # Is the uncovered information concrete and precise enough to add to instruction manual? [yes/no] +"solid": $ANSWER, # Confirm that this information is not speculative or deduced based on assumption. [yes/no] +} +``` + +Include answers with a one-sentence justification or detail. Do not repeat the inquiries. +Write '{}' if there is nothing in the list of 'unknown information and details'.""", +'dep':['planner_unexpected', 'planner_mistake', 'top-subgoal', 'subgoal_analysis', 'obs_obj', 'obs_inv', 'obs_vit', 'obs_chg', 'obs_last_act'], +'shorthand': 'discovered information and details', +'compose': ComposeKBAddPrompt(), +'query': query_reason, +'after_query': KBAddAfterQuery(), +}, + +'unknown':{ +'prompt':"""Merge the 'unknown/missing information' in the previous answer and the 'Previous unknown information and details' into a single Json dictionary. +Give each item a concise but precise name as the key, and a dictionary of answers to the following inquiries as the value: +``` +"item_name": { # if applicable, use the same item name as in the previous answer or knowledge base +"info": $ANSWER, # In concise but precise language, describe what exactly is missing. [If the information is not missing, write 'NA'.] +"knowledge": $ANSWER, # What do you already know about the current requested info? [If nothing is known, write 'NA'.] +"unknown": $ANSWER, # Confirm that this requested info is missing from the instruction manual. [yes/no] +"novel": $ANSWER, # Confirm that the knowledge base does not already contain precise answer to this requested info. [yes/no] +"general": $ANSWER, # Is the requested info expected to remain unchanged in future game steps? [yes/no] +"relevant": $ANSWER, # Is this requested info helpful for succeeding the game? [yes/no] +"correct": $ANSWER # Confirm that this request does not disagree with the instruction manual. [yes/no] followed by a one-sentence justification. +} +``` +Only include the answers, not the inquires. Remove duplicates and arrange multi-target inquiries into separate items.""", +'dep':['subgoal_analysis'], +'after': ['kb-add'], +'shorthand': 'Unknown information and details', +'compose': ComposeKBReasonPrompt(), +'query': query_reason, +'after_query': KBReasonAfterQuery(), +}, + +'actor-reflect':{ +'prompt':"""First, identify up to 2 types of information other than observation and action (from the gameplay history) that may be important for the subgoal '$db.subgoals.subgoal$', and output them in a concise list. + +Secondly, summarize the gameplay history using a table. +Use 'TBD' for the action corresponding to the current step, and include the observations, action (including number of steps and result), and the identified information in the table. + +Finally, analyze and evaluate each action in '$db.allowed_actions$'. Utilize spatial and temporal reasoning with the table to determine if the action is safe, if it leads to an unexplored state. +Format the analysis as a Json dictionary of the following format, and write "NA" for not applicable fields: +``` +{ +"$action": { # The action name +"spatial_relation_with_current_obs": $answer, # How does this action spatially relate to objects in the observation? Focus on important objects (1 sentence) +"alignment_with_history":{ +"temporal": $answer, # How does this action relate with the most recent historical trajectory temporally? Explain your reasoning by connecting with the history table. (1 sentence) +"spatial": $answer, # How does this action relate to the most recent historical path spatially? Explain your reasoning by connecting with the history table. (1 sentence) +}, +"risk": $answer, # list potential risks or hazards associated with the action as concisely as possible +"benefit": $answer, # list potential benefits or advantages associated with the action as concisely as possible +}, +... +} +""", +'dep':['top-subgoal', 'subgoal_analysis', 'obs_current_actions', 'obs_obj', 'obs_inv', 'obs_vit'], +'after': ['unknown',], +'shorthand': "Trajectory summary and action analysis", +'compose': ComposeActorEfficiencyPrompt(), +'query': query_spatial, +}, + +'actor-plan-sketch':{ +'prompt':"""First, describe the current observation/surroundings in with a focus on things related to the subgoal '$db.subgoals.subgoal$' and answer the following questions (no more than 1 sentence per-answer): +Out of the goal-relevant objects, which ones are easily reachable and which ones are harder to reach? +Are there direct obstacles in the way? +Are there risks in the way? Are they addressable? + +Then, determine if the previous plan '$db.action_summary.plan-sketch$' still applies for the subgoal based on the criteiras and the expiration condition. +Relevance criteria: $db.action_summary.relevance-crieria$ +Expiration condition: $db.action_summary.expiration-condition$ + +If the plan still applies, examine the current observation for any obstacles, hazarads, or unexpected scenarios and reason spatially how they may be addressed. +If necessary, update only the 'details' of the previous plan to achieve the target '$db.action_summary.target$' of the plan. + +If the plan does not apply, explain your reasoning, and write an new plan-sketch. +Reason spatially and temporally with the current observation, the gameplay history, and the action analysis. + +Finally, output the updated plan-sketch. The plan-sketch details should concretely describe a procedure or a specific direction to follow, and must be a Json dictionary of the following format: +``` +{ +"plan-sketch": $plan-sketch, # A 1-line summary of the plan-sketch +"detials", # Concrete description of what procedure or a specific direction to follow. Do not offer multiple options or possibilities. +"target", # Concisely write the target of the plan +"relevance-crieria": $relevance_criteria, # The criteria that the plan is relevant to the current situation +"expiration-condition": $expiration_condition, # The condition that may cause the plan to expire, like specific changes in the observation or the inventory, or after a certain number of steps. +"notes": $notes # Anything about the current situation or reasoning that may be meaningful to remember for the future. +} +``` +""", +'dep':['planner_unexpected', 'planner_mistake', 'top-subgoal', 'subgoal_analysis', 'obs_obj', 'obs_inv', 'obs_vit', 's-action', 'obs_current_actions', 'actor-reflect'], +'shorthand': "Plan-sketch", +'compose': ComposeActorPlannerPrompt(), +'query': query_spatial, +'after_query': ActionSummaryAfterQuery(), +}, + +'actor-actions':{ +'prompt':"""Given the target: $db.action_summary.target$ +First, describe the current observation/surroundings and identify any hazards. +Examine the observation/surroundings any obstacles that may interfere with achieving the target. + +- Plan-sketch: $db.action_summary.plan-sketch$ +- Plan details: $db.action_summary.details$ + +Discuss how to spatially address or evade the hazards and obstacles based on analysis of the observation, target, and the plan-sketch. + +Then, reason with the instruction manual and the current observation, and identify a sequence of actions to achieve the target. + +The sequence of identified actions should start from current step and only include actions for up to the first 6 steps, without skipping any steps. +Think step by step and explain your reasoning before writing down the sequence of actions. + +Finally, group repeated actions together to speed up the game, and explicitly state the number of repeats. + +Note: The player's reach is 1 step to the front. +""", +'dep':['obs_inv', 'obs_vit', 's-action', 'actor-reflect', 'obs_current_actions', 'obs_obj'], +'after':['actor-plan-sketch'], +'shorthand': "Actor plan and reasoning", +'compose': ComposeActorReasoningPrompt(), +'query': query_spatial, +}, + +'actor-final':{ +'prompt':"""Examine the actor plan and reasoning and identify the first action (only the exact action name from the list of all actions in the manual) in the plan. +Then, if the identified action is *explicitly stated as a repeat*, write the number of times this action should be repeated. +Finally, identify if there are any observed/nearby hazards or obstacles, no matter if they interfere with the plan. +Format the output as a Json dictionary of the following format: +``` +{ +"action": $action, # Write only the exact action name from the list of all actions in the manual. +"repeats": $repeats # The number of times this action should be repeated, write 1 if the action is not stated as repeated. +"obstacles": $hazard # [yes/no] Presence of obstacles in the course of the action. +"hazards": $hazard # [yes/no] Presence of hazards in the observation/surroundings, no matter if they interfere with the plan. +} +``` +""", +'dep':['obs_obj', 'actor-actions'], +'compose': ComposeActorBarePrompt(), +'query': query_reason, +'after_query': ActionAfterQuery(), +}, + +'s-plan':{ +'prompt':"In one sentence, describe the high-level subgoals and the reasoning behind the subgoals.", +'dep':['reflect', 'planner_unexpected', 'planner_mistake', 'top-subgoal', 'subgoal_analysis', 'actor-reflect'], +'shorthand': "Subgoals", +'compose': ComposeSummaryPrompt(), +'query': query_fast, +}, + +'s-mistakes':{ +'prompt':"In one sentence, concisely explain any unexpected situations or mistakes encountered recently. Output \"NA\" and end the answer if there's nothing unexpected and no mistakes.", +'dep':['reflect', 'planner_unexpected', 'planner_mistake', 'top-subgoal', 'subgoal_analysis'], +'shorthand': "Mistakes", +'compose': ComposeSummaryPrompt(), +'query': query_fast, +}, + +} + + +gamestep_questions = { + prompts['s-obs-obs']['prompt']:"Observation and inventory", + prompts['s-obs-vitals']['prompt']:"Vitals", + prompts['s-action']['prompt']:"Action", +} +delayed_gamestep_questions = { + prompts['s-action']['prompt'] +} + +strategy_questions_desc = { + prompts['subgoals']['prompt']: "Most recent top 3 subgoals", + prompts['top-subgoal']['prompt']: "Most recent top subgoal and completion criteria", + # strategy_questions_list[12]: "Unknown information and details", +} +attention_question = prompts['gate']['prompt'] +adaptive_strategy_questions = [ + prompts['gate-plan_sketch']['prompt'], + prompts['top-subgoal']['prompt'], +] +goal_question = prompts['top-subgoal']['prompt'] +adaptive_question = prompts['planner-adaptive']['prompt'] +reflection_skip_questions = [ + 'subgoals', 'top-subgoal', 'subgoal_analysis', 'skill', 'planner-adaptive', 'kb-add', 'unknown', 's-mistakes', +] +reflection_skip_questions = [prompts[k]['prompt'] for k in reflection_skip_questions] + +kb_questions_desc = { + prompts['unknown']['prompt']: "Unknown information and details", +} +kb_question = prompts['kb-add']['prompt'] +adaptive_kb_questions = [ +] + +q_act = prompts['actor-final']['prompt'] +adaptive_actor_questions = [prompts['actor-reflect']['prompt'], prompts['actor-plan-sketch']['prompt'], prompts['actor-actions']['prompt']] +adaptive_dependencies = ['obs_obj', 'obs_inv', 'obs_vit', 'obs_chg', 'obs_last_act'] +adaptive_dependencies = [prompts[k]['prompt'] for k in adaptive_dependencies] + + + +gameplan_questions = { + prompts['s-mistakes']['prompt']:"Mistakes and Unexpectancies", + prompts['s-plan']['prompt']:"Subgoals", +} + +feedback_questions = [ + # "Concisely make a list of historical observations or findings that could benefit the player. Only include the most important 5 items and do not include anything you already know.", +"""Concisely list up to a total number of 3 significant mistakes or dangerous situations encountered on the current skill. +If there are less than 3, list all of them. If there are no significant encounters, write 'NA'. + +For each encounter, offer a brief one-sentence advice to address it using information from the instruction manual. +You advice should be precise and actionable, but should retain generality to be applicable to similar situations/problems in the future. + +Do not include anything unrelated to the current skill.""", +] + +feedback_questions = [ + # (feedback_questions[0], gamestep_questions, "Past findings", False), + (feedback_questions[0], gameplan_questions, "Past Notes", True), +] + +gameplay_questions = { + # "Summarize learnings from the gameplay into a list. Do not include anything you already know.": ["How did the player die at the final step?",], + "Base your answers on the gameplay, create a list of up to 5 most important aspects the player should pay more attention to next time. For each aspect, write a 1-line specification on how the aspected may be addressed/quantified.": ["What did the player accomplish?", "How did the player die at the final step?", "What did the player fail to accomplish?", "What mistakes did the player make?", ], +} +gameplay_shorthands = { + # "Summarize learnings from the gameplay into a list. Do not include anything you already know.": "Learnings", + "Base your answers on the gameplay, create a list of up to 5 most important aspects the player should pay more attention to next time. For each aspect, write a 1-line specification on how the aspected may be addressed/quantified.": "Attention", +} + +def build_graph(llm_functions, database={}): + + database['shorthands'] = {} + + # Create graph + graph = Graph() + edge_list = [] + order_list = [] + + for _, node_info in prompts.items(): + key = node_prompt = node_info['prompt'] + node = SimpleDBNode(key, node_prompt, graph, llm_functions[node_info['query']]['query_model'], node_info['compose'], database, after_query=node_info['after_query'] if 'after_query' in node_info.keys() else None, verbose=True, token_counter=llm_functions[node_info['query']]['token_counter']) + graph.add_node(node) + + if 'shorthand' in node_info.keys() and node_info['shorthand'] is not None: + database['shorthands'][key] = node_info['shorthand'] + + for dependency in node_info['dep']: + dependency_name = prompts[dependency]['prompt'] + edge_list.append((dependency_name, key)) + + if 'after' in node_info.keys(): + for dependency in node_info['after']: + dependency_name = prompts[dependency]['prompt'] + order_list.append((dependency_name, key)) + + + for edge in edge_list: + graph.add_edge(*edge) + for order in order_list: + graph.add_order(*order) + + database['prompts'] = { + 'gamestep_questions': gamestep_questions, + 'delayed_gamestep_questions': delayed_gamestep_questions, + 'strategy_questions_desc': strategy_questions_desc, + 'attention_question': attention_question, + 'adaptive_strategy_questions': adaptive_strategy_questions, + 'goal_question': goal_question, + 'adaptive_question': adaptive_question, + 'kb_questions_desc': kb_questions_desc, + 'kb_question': kb_question, + 'adaptive_kb_questions': adaptive_kb_questions, + 'q_act': q_act, + 'adaptive_actor_questions': adaptive_actor_questions, + 'gameplan_questions': gameplan_questions, + 'feedback_questions': feedback_questions, + 'gameplay_questions': gameplay_questions, + 'gameplay_shorthands': gameplay_shorthands, + 'adaptive_dependencies': adaptive_dependencies, + 'reflection_skip_questions': reflection_skip_questions, + # 'action_summary': action_summary, + } + + return graph \ No newline at end of file diff --git a/examples/crafter/cache/ctxt.pkl b/examples/crafter/cache/ctxt.pkl new file mode 100644 index 0000000..c948738 Binary files /dev/null and b/examples/crafter/cache/ctxt.pkl differ diff --git a/examples/crafter/compose_prompt.py b/examples/crafter/compose_prompt.py new file mode 100644 index 0000000..61f85eb --- /dev/null +++ b/examples/crafter/compose_prompt.py @@ -0,0 +1,433 @@ +import json +import copy +import re +import agentkit.compose_prompt as cp + + +def describe_gamestep(db, qa_history, i, step, step_offset): + text_desc = "" + text_desc += "* Step {}:\n".format(i+1+step_offset) + obs_desc = "" + delayed_desc = "" + plan_desc = "" + for q,h in db['prompts']['gamestep_questions'].items(): + if q in db['prompts']['delayed_gamestep_questions']: + if i0: + kb_desc = "\n\n".join(["{}:\n\n{}".format(d, db['kb']['unknowns'][q]) for q,d in db['prompts']['kb_questions_desc'].items()]) + messages.append({"role": "system", "content": "{}".format(kb_desc)}) + + messages.append({"role": "system", "content": "Most recent two steps of the player's in-game observation:\n\n{}".format(db['environment']['observation_2step_with_action'])}) + return messages + +class ComposePlannerPrompt(ComposeBasePrompt): + + def __init__(self): + super().__init__() + self.system_prompt = """Assist the player to make the best plan for the game by analyzing current observation, current inventory, current status, the manual, the gameplay history, and game guidance. The game does not contain bugs or glitches, and does not offer additional cues or patterns. +Pay close attention to the details such as the numbers and requirement specifications. +Be vigilant for information or details missing from the manual, and do not make use of or assume anything not in the game, the manual, or the knowledge base. +Finally, make sure that each planned items is both quantifiable and achievable with the game actions, and the requirements are well-addressed.""" + self.shrink_idx = 2 + + def before_dependencies(self, messages, db): + skill_feedback = db['feedback']['skill_feedback'][db['skills']['skill']] if db['skills']['skill'] in db['feedback']['skill_feedback'].keys() else "" + qa_history = db['history']['qa_history_stream'][-db['history']['qa_history_planner_length']:] + step_offset = db['environment']['step'] - len(qa_history) + + messages.append({"role": "system", "content": "Instruction manual:\n\n{}".format(db['environment']['manual'])}) + + if len(db['kb']['knowledge_base'])>0: + messages.append({"role": "system", "content": "Knowledge base:\n\n{}".format(json.dumps(db['kb']['knowledge_base'], indent=0))}) + + text_desc = self.compose_gamestep_prompt(qa_history, db, step_offset, skip_last=False) + + messages.append({"role": "system", "content": "Gameplay history for the previous {} steps:\n\n{}".format(len(qa_history), text_desc)}) + + if len(db['feedback']['feedback']) > 0: + messages.append({"role": "system", "content": "General guidance:\n\n{}".format(db['feedback']['feedback'])}) + + if len(db['history']['qa_history'])>0: + strategy_desc = "\n\n".join(["## {}\n{}".format(d, db['history']['qa_history'][-1][q]) for q,d in db['prompts']['strategy_questions_desc'].items()]) + kb_desc = "\n\n".join(["## {}\n{}".format(d, db['kb']['unknowns'][q]) for q,d in db['prompts']['kb_questions_desc'].items()]) + else: + strategy_desc = "Most recent subgoals: None available. Re-plan necessary." + kb_desc = "" + + messages.append({"role": "system", "content": "{}".format(strategy_desc)}) + + if len(skill_feedback) > 0: + messages.append({"role": "system", "content": "Subgoal guidance:\n\n{}".format(skill_feedback)}) + + return messages + +class ComposePlannerReflectionPrompt(ComposeBasePrompt): + + def __init__(self): + super().__init__() + self.system_prompt = """Analyze the observation, inventory, status, the manual, the gameplay history. The game does not contain bugs or glitches, and does not offer additional cues or patterns. +Be vigilant for information or details missing from the manual.""" + self.shrink_idx = 2 + + def before_dependencies(self, messages, db): + qa_history = db['history']['qa_history_stream'][-db['history']['qa_history_planner_reflection_length']:] + step_offset = db['environment']['step'] - len(qa_history) + + messages.append({"role": "system", "content": "Instruction manual:\n\n{}".format(db['environment']['manual'])}) + + text_desc = self.compose_gamestep_prompt(qa_history, db, step_offset, skip_last=False) + + messages.append({"role": "system", "content": "Gameplay history for the previous {} steps:\n\n{}".format(len(qa_history), text_desc)}) + + if len(db["history"]["qa_history"])>0: + strategy_desc = "\n\n".join(["## {}\n{}".format(d, db["history"]["qa_history"][-1][q]) for q,d in db['prompts']['strategy_questions_desc'].items()]) + kb_desc = "\n\n".join(["## {}\n{}".format(d, db['kb']['unknowns'][q]) for q,d in db['prompts']['kb_questions_desc'].items()]) + else: + strategy_desc = "Most recent subgoals: NA" + kb_desc = "" + + messages.append({"role": "system", "content": "{}".format(strategy_desc)}) + + return messages + +def print_skill_library(skill_library): + output = { + k: v['skill_desc'] for k,v in skill_library.items() + } + return json.dumps(output, indent=0) + +class ComposeSkillPrompt(ComposeBasePrompt): + + def __init__(self): + super().__init__() + self.system_prompt = """Build a skill library to help the player ace the game. The skills should exmplifies high simplicity, granularity, and reusability. Pay close attetion to actions allowed by the game. All skills must be actionable and executable.""" + + def before_dependencies(self, messages, db): + messages.append({"role": "system", "content": "Instruction manual:\n\n{}".format(db['environment']['manual'])}) + return messages + + def after_dependencies(self, messages, db): + messages.append({"role": "system", "content": "Skill library\n\n```{}```".format(print_skill_library(db['skills']['skill_library']))}) + return messages + +class ComposeKBAddPrompt(ComposeBasePrompt): + + def __init__(self): + super().__init__() + self.system_prompt = """Reason with the instruction manual, the observation, the gameplay history. +Gather accuracte information about the 'unknown information and details'. Pay attention to past failures and make sure to separate assumptions from facts. +Reason carefully and precisely.""" + + def before_dependencies(self, messages, db): + messages.append({"role": "system", "content": "Instruction manual:\n\n{}".format(db['environment']['manual'])}) + return messages + + def after_dependencies(self, messages, db): + if 'unknowns' in db['kb'] and len(db['kb']['unknowns'])>0: + kb_desc = "\n\n".join(["{}:\n\n{}".format(d, db['kb']['unknowns'][q]) for q,d in db['prompts']['kb_questions_desc'].items()]) + else: + kb_desc = "Unknown information and details: NA" + + messages.append({"role": "system", "content": "{}".format(kb_desc)}) + return messages + +class ComposeRewritePrompt(ComposeBasePrompt): + + def __init__(self): + super().__init__() + self.system_prompt = """Rewrite information accurately and precisely according to the user instructions. Pay close attention to the numbers, the details, and the format of the output.""" + +class ComposeKBReasonPrompt(ComposeBasePrompt): + + def __init__(self): + super().__init__() + self.system_prompt = """Uncover previous unknown information from the player's gameplay and observation. Be vigilant for information or details missing from the manual. Focus on the details such as the numbers and requirement specifications.""" + self.shrink_idx = 2 + + def before_dependencies(self, messages, db): + messages.append({"role": "system", "content": "Instruction manual:\n\n{}".format(db['environment']['manual'])}) + + if len(db['kb']['knowledge_base'])>0: + messages.append({"role": "system", "content": "Knowledge base:\n\n{}".format(json.dumps(db['kb']['knowledge_base'], indent=0))}) + + if 'unknowns' in db['kb'] and len(db['kb']['unknowns'])>0: + kb_desc = "\n\n".join(["{}:\n\n{}".format(d, db['kb']['unknowns'][q]) for q,d in db['prompts']['kb_questions_desc'].items()]) + else: + kb_desc = "Unknown information and details: NA" + + messages.append({"role": "system", "content": "{}".format(kb_desc)}) + return messages + +class ComposeActorPlannerPrompt(ComposeBasePrompt): + + def __init__(self): + super().__init__() + self.system_prompt = """Your task is to assist the player to accomplish the current top subgoal for a game. Find the best action by reasoning with the previous plan, the manual, the gameplay history, and guidances.""" + + def before_dependencies(self, messages, db): + skill_feedback = db['feedback']['skill_feedback'][db['skills']['skill']] if db['skills']['skill'] in db['feedback']['skill_feedback'].keys() else "" + feedback = db['feedback']['feedback'] + knowledge_base = db['kb']['knowledge_base'] + + messages.append({"role": "system", "content": "Instruction manual:\n\n{}".format(db['environment']['manual'])}) + if len(feedback) > 0: + messages.append({"role": "system", "content": "General guidance:\n\n{}".format(feedback)}) + + if len(skill_feedback) > 0: + messages.append({"role": "system", "content": "Past mistakes:\n\n{}".format(skill_feedback)}) + + if len(knowledge_base)>0: + messages.append({"role": "system", "content": "Knowledge base:\n\n{}".format(json.dumps(knowledge_base, indent=0))}) + + messages.append({"role": "system", "content": "Most recent two steps of in-game observation\n\n{}".format(db['environment']['observation_2step_with_action'])}) + self.shrink_idx = len(messages) + return messages + + def after_dependencies(self, messages, db): + if db['skills']['skill'] is not None and db['skills']['skill'] == db['skills']['skill_old']: + messages.append({"role": "system", "content": "Previous plan:\n\n{}".format(json.dumps(db['action_summary'], indent=0))}) + messages.append({"role": "system", "content": "Notes from the last step:\n\n{}".format(db['action_notes'])}) + else: + messages.append({"role": "system", "content": "Previous plan: Not available, please re-plan."}) + messages.append({"role": "system", "content": "Notes from the last step: NA"}) + db['action_summary'] = { + "plan-sketch": "NA", + "details": "NA", + "target": "NA", + "relevance-criteria": "NA", + "expiration-condition": "NA", + } + db["action_notes"] = "NA" + return messages + +class ComposeActorEfficiencyPrompt(ComposeBasePrompt): + + def __init__(self): + super().__init__() + self.system_prompt = """Your task is to assist the player to accomplish the current top subgoal for a game. +Base your answer on the gameplay history, instruction manual, and knowledge base. +Think spatially and temporally, and keep track relative/absolute positions and timestep. +Do not make use of or assume anything not in the game, the manual, or the knowledge base. +The game does not contain bugs or glitches, and does not offer additional cues or patterns.""" + self.shrink_idx = 2 + + def before_dependencies(self, messages, db): + qa_history = db['history']['qa_history_stream'][-db['history']['qa_history_actor_length']:] + step_offset = db['environment']['step'] - len(qa_history) + + messages.append({"role": "system", "content": "Instruction manual:\n\n{}".format(db['environment']['manual'])}) + + text_desc = self.compose_gamestep_prompt(qa_history, db, step_offset, skip_last=False) + + messages.append({"role": "system", "content": "Gameplay history for the previous {} steps:\n\n{}".format(len(qa_history), text_desc)}) + + if len(db['kb']['knowledge_base'])>0: + messages.append({"role": "system", "content": "Knowledge base:\n\n{}".format(json.dumps(db['kb']['knowledge_base'], indent=0))}) + + return messages + +class ComposeActorReasoningPrompt(ComposeBasePrompt): + + def __init__(self): + super().__init__() + self.system_prompt = """Your task is to assist the player to accomplish the current top subgoal for a game. Start with the current state, and identify what actions to take by reasoning with the previous plan, the manual, the gameplay history, and guidance. Do not make use of or assume anything not in the game, the manual, or the knowledge base. The game does not contain bugs or glitches, and does not offer additional cues or patterns.""" + self.shrink_idx = 2 + + def before_dependencies(self, messages, db): + knowledge_base = db['kb']['knowledge_base'] + + messages.append({"role": "system", "content": "Instruction manual:\n\n{}".format(db['environment']['manual'])}) + + if len(knowledge_base)>0: + messages.append({"role": "system", "content": "Knowledge base:\n\n{}".format(json.dumps(knowledge_base, indent=0))}) + + messages.append({"role": "system", "content": "Most recent two steps of in-game observation\n\n{}".format(db['environment']['observation_2step_with_action'])}) + return messages + +class ComposeActorBarePrompt(ComposeBasePrompt): + + def __init__(self): + super().__init__() + self.system_prompt = """Precisely answer the following questions based on the context provided.""" + + def before_dependencies(self, messages, db): + messages.append({"role": "system", "content": "Instruction manual:\n\n{}".format(db['environment']['manual'])}) + return messages + + +def compose_filtered_gamestep_prompt(qa_history, attention_rounds, game_questions, plan_questions=None, db=None, step_offset=0, skip_last=False): + text_desc = "" + for i,step in enumerate(qa_history): + if (skip_last and i==len(qa_history)-1) or (i not in attention_rounds): + continue + text_desc+=describe_gamestep(db, qa_history, i, step, step_offset) + text_desc += "\n\n" + + if len(text_desc.strip())==0: + text_desc = "NA" + return text_desc.strip() + +def compose_feedback_prompt(CTXT_dict, qa_history, current_objective, questions): + + CTXT = CTXT_dict['CTXT'] + db = CTXT_dict['db'] + attention_rounds = CTXT_dict['attention_rounds'] + step_offset = CTXT_dict['step_offset'] - len(CTXT_dict['qa_history']) + + messages_feedback = [ + {"role": "system", "content" : "Provide concrete feedback for the player using concise language. The feedback must be on the current skill."} + ] + + messages_feedback.append({"role": "system", "content": "Instruction manual:\n\n{}".format(CTXT)}) + + messages_feedback.append({"role": "system", "content": ""}) + # messages_feedback.append({"role": "system", "content": "Your gameplay:\n\n{}".format(text_desc)}) + + messages_feedback.append({"role": "system", "content": "Current skill:\n{}".format(current_objective)}) + + messages_feedback.append({"role": "user", "content": ""}) + + messages = {} + for question, dependencies, shorthand, filtered in questions: + if filtered: + text_desc = compose_filtered_gamestep_prompt(qa_history, attention_rounds, dependencies, plan_questions=None, db=db) + else: + raise Exception("Not implemented") + # text_desc = compose_gamestep_prompt(qa_history, db, step_offset) + msg = copy.deepcopy(messages_feedback) + msg[2]['content'] = "Gameplay history:\n\n{}".format(text_desc) + msg[-1]['content'] = question + messages[shorthand]=msg + + return messages, 2 + + + +def compose_gameplay_prompt(CTXT_dict, qa_history, Q_CTXT, question): + CTXT = CTXT_dict['CTXT'] + db = CTXT_dict['db'] + attention_rounds = CTXT_dict['attention_rounds'] + + messages = [ + {"role": "system", "content" : "The player just finished playing the game of crafter. Provide concrete feedback for the player's gameplay using very concise language."} + ] + + messages.append({"role": "system", "content": "You already know:\n\n{}".format(CTXT)}) + + # text_desc = "\n\n".join(["Step {}:\n{}".format((i+1)*int(granularity), "\n".join(["{}: {}".format(h, step[q]) for q,h in gamestep_questions.items()])) for i,step in enumerate(qa_history)]) + text_desc = compose_filtered_gamestep_prompt(qa_history, attention_rounds, db['prompts']['gamestep_questions'], plan_questions=db['prompts']['gameplan_questions'], db=db) + + messages.append({"role": "system", "content": "Gameplay:\n\n{}".format(text_desc)}) + + if len(Q_CTXT)>0: + for q,a in Q_CTXT: + messages.append({"role": "user", "content": q}) + messages.append({"role": "assistant", "content": a}) + + messages.append({"role": "user", "content": question}) + + return messages, 2 \ No newline at end of file diff --git a/examples/crafter/crafter/__init__.py b/examples/crafter/crafter/__init__.py new file mode 100644 index 0000000..0fd476b --- /dev/null +++ b/examples/crafter/crafter/__init__.py @@ -0,0 +1,17 @@ +from .env import Env +from .recorder import Recorder + +# try: +# import gym +# gym.register( +# id='CrafterReward-v1', +# entry_point='crafter:Env', +# max_episode_steps=10000, +# kwargs={'reward': True}) +# gym.register( +# id='CrafterNoReward-v1', +# entry_point='crafter:Env', +# max_episode_steps=10000, +# kwargs={'reward': False}) +# except ImportError: +# pass diff --git a/examples/crafter/crafter/assets/1.png b/examples/crafter/crafter/assets/1.png new file mode 100644 index 0000000..a659963 Binary files /dev/null and b/examples/crafter/crafter/assets/1.png differ diff --git a/examples/crafter/crafter/assets/2.png b/examples/crafter/crafter/assets/2.png new file mode 100644 index 0000000..9b1d210 Binary files /dev/null and b/examples/crafter/crafter/assets/2.png differ diff --git a/examples/crafter/crafter/assets/3.png b/examples/crafter/crafter/assets/3.png new file mode 100644 index 0000000..5519f1c Binary files /dev/null and b/examples/crafter/crafter/assets/3.png differ diff --git a/examples/crafter/crafter/assets/4.png b/examples/crafter/crafter/assets/4.png new file mode 100644 index 0000000..0604e1c Binary files /dev/null and b/examples/crafter/crafter/assets/4.png differ diff --git a/examples/crafter/crafter/assets/5.png b/examples/crafter/crafter/assets/5.png new file mode 100644 index 0000000..355950b Binary files /dev/null and b/examples/crafter/crafter/assets/5.png differ diff --git a/examples/crafter/crafter/assets/6.png b/examples/crafter/crafter/assets/6.png new file mode 100644 index 0000000..466735a Binary files /dev/null and b/examples/crafter/crafter/assets/6.png differ diff --git a/examples/crafter/crafter/assets/7.png b/examples/crafter/crafter/assets/7.png new file mode 100644 index 0000000..32c67f0 Binary files /dev/null and b/examples/crafter/crafter/assets/7.png differ diff --git a/examples/crafter/crafter/assets/8.png b/examples/crafter/crafter/assets/8.png new file mode 100644 index 0000000..2f4c4cc Binary files /dev/null and b/examples/crafter/crafter/assets/8.png differ diff --git a/examples/crafter/crafter/assets/9.png b/examples/crafter/crafter/assets/9.png new file mode 100644 index 0000000..bda0269 Binary files /dev/null and b/examples/crafter/crafter/assets/9.png differ diff --git a/examples/crafter/crafter/assets/arrow-down.png b/examples/crafter/crafter/assets/arrow-down.png new file mode 100644 index 0000000..426ebf6 Binary files /dev/null and b/examples/crafter/crafter/assets/arrow-down.png differ diff --git a/examples/crafter/crafter/assets/arrow-left.png b/examples/crafter/crafter/assets/arrow-left.png new file mode 100644 index 0000000..66b18ef Binary files /dev/null and b/examples/crafter/crafter/assets/arrow-left.png differ diff --git a/examples/crafter/crafter/assets/arrow-right.png b/examples/crafter/crafter/assets/arrow-right.png new file mode 100644 index 0000000..2c7e8dd Binary files /dev/null and b/examples/crafter/crafter/assets/arrow-right.png differ diff --git a/examples/crafter/crafter/assets/arrow-up.png b/examples/crafter/crafter/assets/arrow-up.png new file mode 100644 index 0000000..73992e2 Binary files /dev/null and b/examples/crafter/crafter/assets/arrow-up.png differ diff --git a/examples/crafter/crafter/assets/coal.png b/examples/crafter/crafter/assets/coal.png new file mode 100644 index 0000000..467a44d Binary files /dev/null and b/examples/crafter/crafter/assets/coal.png differ diff --git a/examples/crafter/crafter/assets/cow.png b/examples/crafter/crafter/assets/cow.png new file mode 100644 index 0000000..5c346ac Binary files /dev/null and b/examples/crafter/crafter/assets/cow.png differ diff --git a/examples/crafter/crafter/assets/debug-2.png b/examples/crafter/crafter/assets/debug-2.png new file mode 100644 index 0000000..c7cd531 Binary files /dev/null and b/examples/crafter/crafter/assets/debug-2.png differ diff --git a/examples/crafter/crafter/assets/debug-3.png b/examples/crafter/crafter/assets/debug-3.png new file mode 100644 index 0000000..b7d00a2 Binary files /dev/null and b/examples/crafter/crafter/assets/debug-3.png differ diff --git a/examples/crafter/crafter/assets/debug.png b/examples/crafter/crafter/assets/debug.png new file mode 100644 index 0000000..01948f5 Binary files /dev/null and b/examples/crafter/crafter/assets/debug.png differ diff --git a/examples/crafter/crafter/assets/diamond.png b/examples/crafter/crafter/assets/diamond.png new file mode 100644 index 0000000..fe74e39 Binary files /dev/null and b/examples/crafter/crafter/assets/diamond.png differ diff --git a/examples/crafter/crafter/assets/drink.png b/examples/crafter/crafter/assets/drink.png new file mode 100644 index 0000000..b875879 Binary files /dev/null and b/examples/crafter/crafter/assets/drink.png differ diff --git a/examples/crafter/crafter/assets/energy.png b/examples/crafter/crafter/assets/energy.png new file mode 100644 index 0000000..a77aaf5 Binary files /dev/null and b/examples/crafter/crafter/assets/energy.png differ diff --git a/examples/crafter/crafter/assets/fence.png b/examples/crafter/crafter/assets/fence.png new file mode 100644 index 0000000..4814616 Binary files /dev/null and b/examples/crafter/crafter/assets/fence.png differ diff --git a/examples/crafter/crafter/assets/food.png b/examples/crafter/crafter/assets/food.png new file mode 100644 index 0000000..a408a52 Binary files /dev/null and b/examples/crafter/crafter/assets/food.png differ diff --git a/examples/crafter/crafter/assets/furnace.png b/examples/crafter/crafter/assets/furnace.png new file mode 100644 index 0000000..fafc893 Binary files /dev/null and b/examples/crafter/crafter/assets/furnace.png differ diff --git a/examples/crafter/crafter/assets/grass.png b/examples/crafter/crafter/assets/grass.png new file mode 100644 index 0000000..08360bb Binary files /dev/null and b/examples/crafter/crafter/assets/grass.png differ diff --git a/examples/crafter/crafter/assets/health.png b/examples/crafter/crafter/assets/health.png new file mode 100644 index 0000000..88a8597 Binary files /dev/null and b/examples/crafter/crafter/assets/health.png differ diff --git a/examples/crafter/crafter/assets/iron.png b/examples/crafter/crafter/assets/iron.png new file mode 100644 index 0000000..eafc7fe Binary files /dev/null and b/examples/crafter/crafter/assets/iron.png differ diff --git a/examples/crafter/crafter/assets/iron_pickaxe.png b/examples/crafter/crafter/assets/iron_pickaxe.png new file mode 100644 index 0000000..52c45c2 Binary files /dev/null and b/examples/crafter/crafter/assets/iron_pickaxe.png differ diff --git a/examples/crafter/crafter/assets/iron_sword.png b/examples/crafter/crafter/assets/iron_sword.png new file mode 100644 index 0000000..080b821 Binary files /dev/null and b/examples/crafter/crafter/assets/iron_sword.png differ diff --git a/examples/crafter/crafter/assets/lava.png b/examples/crafter/crafter/assets/lava.png new file mode 100644 index 0000000..13c45f9 Binary files /dev/null and b/examples/crafter/crafter/assets/lava.png differ diff --git a/examples/crafter/crafter/assets/leaves.png b/examples/crafter/crafter/assets/leaves.png new file mode 100644 index 0000000..97a8cef Binary files /dev/null and b/examples/crafter/crafter/assets/leaves.png differ diff --git a/examples/crafter/crafter/assets/log.png b/examples/crafter/crafter/assets/log.png new file mode 100644 index 0000000..6ce2985 Binary files /dev/null and b/examples/crafter/crafter/assets/log.png differ diff --git a/examples/crafter/crafter/assets/path.png b/examples/crafter/crafter/assets/path.png new file mode 100644 index 0000000..9b6ffe2 Binary files /dev/null and b/examples/crafter/crafter/assets/path.png differ diff --git a/examples/crafter/crafter/assets/plant-ripe.png b/examples/crafter/crafter/assets/plant-ripe.png new file mode 100644 index 0000000..ed7394e Binary files /dev/null and b/examples/crafter/crafter/assets/plant-ripe.png differ diff --git a/examples/crafter/crafter/assets/plant-young.png b/examples/crafter/crafter/assets/plant-young.png new file mode 100644 index 0000000..4e7fda2 Binary files /dev/null and b/examples/crafter/crafter/assets/plant-young.png differ diff --git a/examples/crafter/crafter/assets/plant.png b/examples/crafter/crafter/assets/plant.png new file mode 100644 index 0000000..4c793c8 Binary files /dev/null and b/examples/crafter/crafter/assets/plant.png differ diff --git a/examples/crafter/crafter/assets/player-down.png b/examples/crafter/crafter/assets/player-down.png new file mode 100644 index 0000000..d499ce7 Binary files /dev/null and b/examples/crafter/crafter/assets/player-down.png differ diff --git a/examples/crafter/crafter/assets/player-left.png b/examples/crafter/crafter/assets/player-left.png new file mode 100644 index 0000000..0b8b3b1 Binary files /dev/null and b/examples/crafter/crafter/assets/player-left.png differ diff --git a/examples/crafter/crafter/assets/player-right.png b/examples/crafter/crafter/assets/player-right.png new file mode 100644 index 0000000..6c3efdc Binary files /dev/null and b/examples/crafter/crafter/assets/player-right.png differ diff --git a/examples/crafter/crafter/assets/player-sleep.png b/examples/crafter/crafter/assets/player-sleep.png new file mode 100644 index 0000000..592435d Binary files /dev/null and b/examples/crafter/crafter/assets/player-sleep.png differ diff --git a/examples/crafter/crafter/assets/player-up.png b/examples/crafter/crafter/assets/player-up.png new file mode 100644 index 0000000..b4c971b Binary files /dev/null and b/examples/crafter/crafter/assets/player-up.png differ diff --git a/examples/crafter/crafter/assets/player.png b/examples/crafter/crafter/assets/player.png new file mode 100644 index 0000000..82eaa0e Binary files /dev/null and b/examples/crafter/crafter/assets/player.png differ diff --git a/examples/crafter/crafter/assets/sand.png b/examples/crafter/crafter/assets/sand.png new file mode 100644 index 0000000..8948044 Binary files /dev/null and b/examples/crafter/crafter/assets/sand.png differ diff --git a/examples/crafter/crafter/assets/sapling.png b/examples/crafter/crafter/assets/sapling.png new file mode 100644 index 0000000..f712216 Binary files /dev/null and b/examples/crafter/crafter/assets/sapling.png differ diff --git a/examples/crafter/crafter/assets/skeleton.png b/examples/crafter/crafter/assets/skeleton.png new file mode 100644 index 0000000..f4dc90b Binary files /dev/null and b/examples/crafter/crafter/assets/skeleton.png differ diff --git a/examples/crafter/crafter/assets/stone.png b/examples/crafter/crafter/assets/stone.png new file mode 100644 index 0000000..d5d37d4 Binary files /dev/null and b/examples/crafter/crafter/assets/stone.png differ diff --git a/examples/crafter/crafter/assets/stone_pickaxe.png b/examples/crafter/crafter/assets/stone_pickaxe.png new file mode 100644 index 0000000..fb813f8 Binary files /dev/null and b/examples/crafter/crafter/assets/stone_pickaxe.png differ diff --git a/examples/crafter/crafter/assets/stone_sword.png b/examples/crafter/crafter/assets/stone_sword.png new file mode 100644 index 0000000..89bb46b Binary files /dev/null and b/examples/crafter/crafter/assets/stone_sword.png differ diff --git a/examples/crafter/crafter/assets/table.png b/examples/crafter/crafter/assets/table.png new file mode 100644 index 0000000..7b41597 Binary files /dev/null and b/examples/crafter/crafter/assets/table.png differ diff --git a/examples/crafter/crafter/assets/tree.png b/examples/crafter/crafter/assets/tree.png new file mode 100644 index 0000000..68e06c4 Binary files /dev/null and b/examples/crafter/crafter/assets/tree.png differ diff --git a/examples/crafter/crafter/assets/unknown.png b/examples/crafter/crafter/assets/unknown.png new file mode 100644 index 0000000..6df29cd Binary files /dev/null and b/examples/crafter/crafter/assets/unknown.png differ diff --git a/examples/crafter/crafter/assets/water.png b/examples/crafter/crafter/assets/water.png new file mode 100644 index 0000000..2dffced Binary files /dev/null and b/examples/crafter/crafter/assets/water.png differ diff --git a/examples/crafter/crafter/assets/wood.png b/examples/crafter/crafter/assets/wood.png new file mode 100644 index 0000000..edaf0ff Binary files /dev/null and b/examples/crafter/crafter/assets/wood.png differ diff --git a/examples/crafter/crafter/assets/wood_pickaxe.png b/examples/crafter/crafter/assets/wood_pickaxe.png new file mode 100644 index 0000000..69e9362 Binary files /dev/null and b/examples/crafter/crafter/assets/wood_pickaxe.png differ diff --git a/examples/crafter/crafter/assets/wood_sword.png b/examples/crafter/crafter/assets/wood_sword.png new file mode 100644 index 0000000..09819ae Binary files /dev/null and b/examples/crafter/crafter/assets/wood_sword.png differ diff --git a/examples/crafter/crafter/assets/zombie.png b/examples/crafter/crafter/assets/zombie.png new file mode 100644 index 0000000..f154bed Binary files /dev/null and b/examples/crafter/crafter/assets/zombie.png differ diff --git a/examples/crafter/crafter/constants.py b/examples/crafter/crafter/constants.py new file mode 100644 index 0000000..868446b --- /dev/null +++ b/examples/crafter/crafter/constants.py @@ -0,0 +1,7 @@ +import pathlib + +import ruamel.yaml as yaml + +root = pathlib.Path(__file__).parent +for key, value in yaml.safe_load((root / 'data.yaml').read_text()).items(): + globals()[key] = value diff --git a/examples/crafter/crafter/data.yaml b/examples/crafter/crafter/data.yaml new file mode 100644 index 0000000..4bcd365 --- /dev/null +++ b/examples/crafter/crafter/data.yaml @@ -0,0 +1,102 @@ +actions: + - noop + - move_left + - move_right + - move_up + - move_down + - do + - sleep + - place_stone + - place_table + - place_furnace + - place_plant + - make_wood_pickaxe + - make_stone_pickaxe + - make_iron_pickaxe + - make_wood_sword + - make_stone_sword + - make_iron_sword + +materials: + - water + - grass + - stone + - path + - sand + - tree + - lava + - coal + - iron + - diamond + - table + - furnace + +walkable: + - grass + - path + - sand + +items: + health: {max: 9, initial: 9} + food: {max: 9, initial: 9} + drink: {max: 9, initial: 9} + energy: {max: 9, initial: 9} + sapling: {max: 9, initial: 0} + wood: {max: 9, initial: 0} + stone: {max: 9, initial: 0} + coal: {max: 9, initial: 0} + iron: {max: 9, initial: 0} + diamond: {max: 9, initial: 0} + wood_pickaxe: {max: 9, initial: 0} + stone_pickaxe: {max: 9, initial: 0} + iron_pickaxe: {max: 9, initial: 0} + wood_sword: {max: 9, initial: 0} + stone_sword: {max: 9, initial: 0} + iron_sword: {max: 9, initial: 0} + +collect: + tree: {require: {}, receive: {wood: 1}, leaves: grass} + stone: {require: {wood_pickaxe: 1}, receive: {stone: 1}, leaves: path} + coal: {require: {wood_pickaxe: 1}, receive: {coal: 1}, leaves: path} + iron: {require: {stone_pickaxe: 1}, receive: {iron: 1}, leaves: path} + diamond: {require: {iron_pickaxe: 1}, receive: {diamond: 1}, leaves: path} + water: {require: {}, receive: {drink: 1}, leaves: water} + grass: {require: {}, receive: {sapling: 1}, probability: 0.1, leaves: grass} + +place: + stone: {uses: {stone: 1}, where: [grass, sand, path, water, lava], type: material} + table: {uses: {wood: 2}, where: [grass, sand, path], type: material} + furnace: {uses: {stone: 4}, where: [grass, sand, path], type: material} + plant: {uses: {sapling: 1}, where: [grass], type: object} + +make: + wood_pickaxe: {uses: {wood: 1}, nearby: [table], gives: 1} + stone_pickaxe: {uses: {wood: 1, stone: 1}, nearby: [table], gives: 1} + iron_pickaxe: {uses: {wood: 1, coal: 1, iron: 1}, nearby: [table, furnace], gives: 1} + wood_sword: {uses: {wood: 1}, nearby: [table], gives: 1} + stone_sword: {uses: {wood: 1, stone: 1}, nearby: [table], gives: 1} + iron_sword: {uses: {wood: 1, coal: 1, iron: 1}, nearby: [table, furnace], gives: 1} + +achievements: + - collect_coal + - collect_diamond + - collect_drink + - collect_iron + - collect_sapling + - collect_stone + - collect_wood + - defeat_skeleton + - defeat_zombie + - eat_cow + - eat_plant + - make_iron_pickaxe + - make_iron_sword + - make_stone_pickaxe + - make_stone_sword + - make_wood_pickaxe + - make_wood_sword + - place_furnace + - place_plant + - place_stone + - place_table + - wake_up diff --git a/examples/crafter/crafter/engine.py b/examples/crafter/crafter/engine.py new file mode 100644 index 0000000..2ba960a --- /dev/null +++ b/examples/crafter/crafter/engine.py @@ -0,0 +1,284 @@ +import collections +import functools +import pathlib + +import imageio +import numpy as np +from PIL import Image, ImageEnhance + + +class AttrDict(dict): + + __getattr__ = dict.__getitem__ + + +class staticproperty: + + def __init__(self, function): + self.function = function + + def __get__(self, instance, owner=None): + return self.function() + + +class World: + + def __init__(self, area, materials, chunk_size): + self.area = area + self._chunk_size = chunk_size + self._mat_names = {i: x for i, x in enumerate([None] + materials)} + self._mat_ids = {x: i for i, x in enumerate([None] + materials)} + self.reset() + + def reset(self, seed=None): + self.random = np.random.RandomState(seed) + self.daylight = 0.0 + self._chunks = collections.defaultdict(set) + self._objects = [None] + self._mat_map = np.zeros(self.area, np.uint8) + self._obj_map = np.zeros(self.area, np.uint32) + + @property + def objects(self): + # Return a new list so the objects cannot change while being iterated over. + return [obj for obj in self._objects if obj] + + @property + def chunks(self): + return self._chunks.copy() + + def add(self, obj): + assert hasattr(obj, 'pos') + obj.pos = np.array(obj.pos) + assert self._obj_map[tuple(obj.pos)] == 0 + index = len(self._objects) + self._objects.append(obj) + self._obj_map[tuple(obj.pos)] = index + self._chunks[self.chunk_key(obj.pos)].add(obj) + + def remove(self, obj): + if obj.removed: + return + self._objects[self._obj_map[tuple(obj.pos)]] = None + self._obj_map[tuple(obj.pos)] = 0 + self._chunks[self.chunk_key(obj.pos)].remove(obj) + obj.removed = True + + def move(self, obj, pos): + if obj.removed: + return + pos = np.array(pos) + assert self._obj_map[tuple(pos)] == 0 + index = self._obj_map[tuple(obj.pos)] + self._obj_map[tuple(pos)] = index + self._obj_map[tuple(obj.pos)] = 0 + old_chunk = self.chunk_key(obj.pos) + new_chunk = self.chunk_key(pos) + if old_chunk != new_chunk: + self._chunks[old_chunk].remove(obj) + self._chunks[new_chunk].add(obj) + obj.pos = pos + + def __setitem__(self, pos, material): + if material not in self._mat_ids: + id_ = len(self._mat_ids) + self._mat_ids[material] = id_ + self._mat_map[tuple(pos)] = self._mat_ids[material] + + def __getitem__(self, pos): + if not _inside((0, 0), pos, self.area): + return None, None + material = self._mat_names[self._mat_map[tuple(pos)]] + obj = self._objects[self._obj_map[tuple(pos)]] + return material, obj + + def nearby(self, pos, distance): + (x, y), d = pos, distance + ids = set(self._mat_map[ + x - d: x + d + 1, y - d: y + d + 1].flatten().tolist()) + materials = tuple(self._mat_names[x] for x in ids) + indices = self._obj_map[ + x - d: x + d + 1, y - d: y + d + 1].flatten().tolist() + objs = {self._objects[i] for i in indices if i > 0} + return materials, objs + + def mask(self, xmin, xmax, ymin, ymax, material): + region = self._mat_map[xmin: xmax, ymin: ymax] + return (region == self._mat_ids[material]) + + def count(self, material): + return (self._mat_map == self._mat_ids[material]).sum() + + def chunk_key(self, pos): + (x, y), (csx, csy) = pos, self._chunk_size + xmin, ymin = (x // csx) * csx, (y // csy) * csy + xmax = min(xmin + csx, self.area[0]) + ymax = min(ymin + csy, self.area[1]) + return (xmin, xmax, ymin, ymax) + + +class Textures: + + def __init__(self, directory): + self._originals = {} + self._textures = {} + for filename in pathlib.Path(directory).glob('*.png'): + image = imageio.imread(filename.read_bytes()) + image = image.transpose((1, 0) + tuple(range(2, len(image.shape)))) + self._originals[filename.stem] = image + self._textures[(filename.stem, image.shape[:2])] = image + + def get(self, name, size): + if name is None: + name = 'unknown' + size = int(size[0]), int(size[1]) + key = name, size + if key not in self._textures: + image = self._originals[name] + image = Image.fromarray(image) + image = image.resize(size[::-1], resample=Image.NEAREST) + image = np.array(image) + self._textures[key] = image + return self._textures[key] + + +class GlobalView: + + pass + + +class UncoverView: + + pass + + +class LocalView: + + def __init__(self, world, textures, grid): + self._world = world + self._textures = textures + self._grid = np.array(grid) + self._offset = self._grid // 2 + self._area = np.array(self._world.area) + self._center = None + + def __call__(self, player, unit): + self._unit = np.array(unit) + self._center = np.array(player.pos) + canvas = np.zeros(tuple(self._grid * unit) + (3,), np.uint8) + 127 + for x in range(self._grid[0]): + for y in range(self._grid[1]): + pos = self._center + np.array([x, y]) - self._offset + if not _inside((0, 0), pos, self._area): + continue + texture = self._textures.get(self._world[pos][0], unit) + _draw(canvas, np.array([x, y]) * unit, texture) + for obj in self._world.objects: + pos = obj.pos - self._center + self._offset + if not _inside((0, 0), pos, self._grid): + continue + texture = self._textures.get(obj.texture, unit) + _draw_alpha(canvas, pos * unit, texture) + canvas = self._light(canvas, self._world.daylight) + if player.sleeping: + canvas = self._sleep(canvas) + # if player.health < 1: + # canvas = self._tint(canvas, (128, 0, 0), 0.6) + return canvas + + def _light(self, canvas, daylight): + night = canvas + if daylight < 0.5: + night = self._noise(night, 2 * (0.5 - daylight), 0.5) + night = np.array(ImageEnhance.Color( + Image.fromarray(night.astype(np.uint8))).enhance(0.4)) + night = self._tint(night, (0, 16, 64), 0.5) + return daylight * canvas + (1 - daylight) * night + + def _sleep(self, canvas): + canvas = np.array(ImageEnhance.Color( + Image.fromarray(canvas.astype(np.uint8))).enhance(0.0)) + canvas = self._tint(canvas, (0, 0, 16), 0.5) + return canvas + + def _tint(self, canvas, color, amount): + color = np.array(color) + return (1 - amount) * canvas + amount * color + + def _noise(self, canvas, amount, stddev): + noise = self._world.random.uniform(32, 127, canvas.shape[:2])[..., None] + mask = amount * self._vignette(canvas.shape, stddev)[..., None] + return (1 - mask) * canvas + mask * noise + + @functools.lru_cache(10) + def _vignette(self, shape, stddev): + xs, ys = np.meshgrid( + np.linspace(-1, 1, shape[0]), + np.linspace(-1, 1, shape[1])) + return 1 - np.exp(-0.5 * (xs ** 2 + ys ** 2) / (stddev ** 2)).T + + +class ItemView: + + def __init__(self, textures, grid): + self._textures = textures + self._grid = np.array(grid) + + def __call__(self, inventory, unit): + unit = np.array(unit) + canvas = np.zeros(tuple(self._grid * unit) + (3,), np.uint8) + for index, (item, amount) in enumerate(inventory.items()): + if amount < 1: + continue + self._item(canvas, index, item, unit) + self._amount(canvas, index, amount, unit) + return canvas + + def _item(self, canvas, index, item, unit): + pos = index % self._grid[0], index // self._grid[0] + pos = (pos * unit + 0.1 * unit).astype(np.int32) + texture = self._textures.get(item, 0.8 * unit) + _draw_alpha(canvas, pos, texture) + + def _amount(self, canvas, index, amount, unit): + pos = index % self._grid[0], index // self._grid[0] + pos = (pos * unit + 0.4 * unit).astype(np.int32) + text = str(amount) if amount in list(range(10)) else 'unknown' + texture = self._textures.get(text, 0.6 * unit) + _draw_alpha(canvas, pos, texture) + + +class SemanticView: + + def __init__(self, world, obj_types): + self._world = world + self._mat_ids = world._mat_ids.copy() + self._obj_ids = { + c: len(self._mat_ids) + i + for i, c in enumerate(obj_types)} + + def __call__(self): + canvas = self._world._mat_map.copy() + for obj in self._world.objects: + canvas[tuple(obj.pos)] = self._obj_ids[type(obj)] + return canvas + + +def _inside(lhs, mid, rhs): + return (lhs[0] <= mid[0] < rhs[0]) and (lhs[1] <= mid[1] < rhs[1]) + +def _draw(canvas, pos, texture): + (x, y), (w, h) = pos, texture.shape[:2] + if texture.shape[-1] == 4: + texture = texture[..., :3] + canvas[x: x + w, y: y + h] = texture + +def _draw_alpha(canvas, pos, texture): + (x, y), (w, h) = pos, texture.shape[:2] + if texture.shape[-1] == 4: + alpha = texture[..., 3:].astype(np.float32) / 255 + texture = texture[..., :3].astype(np.float32) / 255 + current = canvas[x: x + w, y: y + h].astype(np.float32) / 255 + blended = alpha * texture + (1 - alpha) * current + texture = (255 * blended).astype(np.uint8) + canvas[x: x + w, y: y + h] = texture diff --git a/examples/crafter/crafter/env.py b/examples/crafter/crafter/env.py new file mode 100644 index 0000000..62522f2 --- /dev/null +++ b/examples/crafter/crafter/env.py @@ -0,0 +1,187 @@ +import collections + +import numpy as np + +from . import constants +from . import engine +from . import objects +from . import worldgen + + +# Gym is an optional dependency. +try: + import gym + DiscreteSpace = gym.spaces.Discrete + BoxSpace = gym.spaces.Box + DictSpace = gym.spaces.Dict + BaseClass = gym.Env +except ImportError: + DiscreteSpace = collections.namedtuple('DiscreteSpace', 'n') + BoxSpace = collections.namedtuple('BoxSpace', 'low, high, shape, dtype') + DictSpace = collections.namedtuple('DictSpace', 'spaces') + BaseClass = object + + +class Env(BaseClass): + + def __init__( + self, area=(64, 64), view=(9, 9), size=(64, 64), + reward=True, length=10000, seed=None): + view = np.array(view if hasattr(view, '__len__') else (view, view)) + size = np.array(size if hasattr(size, '__len__') else (size, size)) + seed = np.random.randint(0, 2**31 - 1) if seed is None else seed + self._area = area + self._view = view + self._size = size + self._reward = reward + self._length = length + self._seed = seed + self._episode = 0 + self._world = engine.World(area, constants.materials, (12, 12)) + self._textures = engine.Textures(constants.root / 'assets') + item_rows = int(np.ceil(len(constants.items) / view[0])) + self._local_view = engine.LocalView( + self._world, self._textures, [view[0], view[1] - item_rows]) + self._item_view = engine.ItemView( + self._textures, [view[0], item_rows]) + self._sem_view = engine.SemanticView(self._world, [ + objects.Player, objects.Cow, objects.Zombie, + objects.Skeleton, objects.Arrow, objects.Plant]) + self._step = None + self._player = None + self._last_health = None + self._unlocked = None + # Some libraries expect these attributes to be set. + self.reward_range = None + self.metadata = None + + @property + def observation_space(self): + return BoxSpace(0, 255, tuple(self._size) + (3,), np.uint8) + + @property + def action_space(self): + return DiscreteSpace(len(constants.actions)) + + @property + def action_names(self): + return constants.actions + + def reset(self): + center = (self._world.area[0] // 2, self._world.area[1] // 2) + self._episode += 1 + self._step = 0 + self._world.reset(seed=hash((self._seed, self._episode)) % (2 ** 31 - 1)) + self._update_time() + self._player = objects.Player(self._world, center) + self._last_health = self._player.health + self._world.add(self._player) + self._unlocked = set() + worldgen.generate_world(self._world, self._player) + return self._obs() + + def step(self, action): + if self._player.sleeping: + action = constants.actions.index('noop') + self._step += 1 + self._update_time() + self._player.action = constants.actions[action] + for obj in self._world.objects: + if self._player.distance(obj) < 2 * max(self._view): + obj.update() + if self._step % 10 == 0: + for chunk, objs in self._world.chunks.items(): + # xmin, xmax, ymin, ymax = chunk + # center = (xmax - xmin) // 2, (ymax - ymin) // 2 + # if self._player.distance(center) < 4 * max(self._view): + self._balance_chunk(chunk, objs) + obs = self._obs() + reward = (self._player.health - self._last_health) / 10 + self._last_health = self._player.health + unlocked = { + name for name, count in self._player.achievements.items() + if count > 0 and name not in self._unlocked} + if unlocked: + self._unlocked |= unlocked + reward += 1.0 + dead = self._player.health <= 0 + over = self._length and self._step >= self._length + done = dead # or over + info = { + 'inventory': self._player.inventory.copy(), + 'achievements': self._player.achievements.copy(), + 'sleeping': self._player.sleeping, + 'discount': 1 - float(dead), + 'semantic': self._sem_view(), + 'player_pos': self._player.pos, + 'player_facing': self._player.facing, + 'reward': reward, + 'dead': dead, + 'unlocked': unlocked, + 'action': self._player.action, + 'view': self._view, + } + if not self._reward: + reward = 0.0 + return obs, reward, done, info + + def render(self, size=None): + size = size or self._size + unit = size // self._view + canvas = np.zeros(tuple(size) + (3,), np.uint8) + local_view = self._local_view(self._player, unit) + item_view = self._item_view(self._player.inventory, unit) + view = np.concatenate([local_view, item_view], 1) + border = (size - (size // self._view) * self._view) // 2 + (x, y), (w, h) = border, view.shape[:2] + canvas[x: x + w, y: y + h] = view + return canvas.transpose((1, 0, 2)) + + def _obs(self): + return self.render() + + def _update_time(self): + # https://www.desmos.com/calculator/grfbc6rs3h + progress = (self._step / 300) % 1 + 0.3 + daylight = 1 - np.abs(np.cos(np.pi * progress)) ** 3 + self._world.daylight = daylight + + def _balance_chunk(self, chunk, objs): + light = self._world.daylight + self._balance_object( + chunk, objs, objects.Zombie, 'grass', 6, 0, 0.3, 0.4, + lambda pos: objects.Zombie(self._world, pos, self._player), + lambda num, space: ( + 0 if space < 50 else 3.5 - 3 * light, 3.5 - 3 * light)) + self._balance_object( + chunk, objs, objects.Skeleton, 'path', 7, 7, 0.1, 0.1, + lambda pos: objects.Skeleton(self._world, pos, self._player), + lambda num, space: (0 if space < 6 else 1, 2)) + self._balance_object( + chunk, objs, objects.Cow, 'grass', 5, 5, 0.01, 0.1, + lambda pos: objects.Cow(self._world, pos), + lambda num, space: (0 if space < 30 else 1, 1.5 + light)) + + def _balance_object( + self, chunk, objs, cls, material, span_dist, despan_dist, + spawn_prob, despawn_prob, ctor, target_fn): + xmin, xmax, ymin, ymax = chunk + random = self._world.random + creatures = [obj for obj in objs if isinstance(obj, cls)] + mask = self._world.mask(*chunk, material) + target_min, target_max = target_fn(len(creatures), mask.sum()) + if len(creatures) < int(target_min) and random.uniform() < spawn_prob: + xs = np.tile(np.arange(xmin, xmax)[:, None], [1, ymax - ymin]) + ys = np.tile(np.arange(ymin, ymax)[None, :], [xmax - xmin, 1]) + xs, ys = xs[mask], ys[mask] + i = random.randint(0, len(xs)) + pos = np.array((xs[i], ys[i])) + empty = self._world[pos][1] is None + away = self._player.distance(pos) >= span_dist + if empty and away: + self._world.add(ctor(pos)) + elif len(creatures) > int(target_max) and random.uniform() < despawn_prob: + obj = creatures[random.randint(0, len(creatures))] + away = self._player.distance(obj.pos) >= despan_dist + if away: + self._world.remove(obj) diff --git a/examples/crafter/crafter/objects.py b/examples/crafter/crafter/objects.py new file mode 100644 index 0000000..fe0699e --- /dev/null +++ b/examples/crafter/crafter/objects.py @@ -0,0 +1,424 @@ +import numpy as np + +from . import constants +from . import engine + + +class Object: + + def __init__(self, world, pos): + self.world = world + self.pos = np.array(pos) + self.random = world.random + self.inventory = {'health': 0} + self.removed = False + + @property + def texture(self): + raise 'unknown' + + @property + def walkable(self): + return constants.walkable + + @property + def health(self): + return self.inventory['health'] + + @health.setter + def health(self, value): + self.inventory['health'] = max(0, value) + + @property + def all_dirs(self): + return ((-1, 0), (+1, 0), (0, -1), (0, +1)) + + def move(self, direction): + direction = np.array(direction) + target = self.pos + direction + if self.is_free(target): + self.world.move(self, target) + return True + return False + + def is_free(self, target, materials=None): + materials = self.walkable if materials is None else materials + material, obj = self.world[target] + return obj is None and material in materials + + def distance(self, target): + if hasattr(target, 'pos'): + target = target.pos + return np.abs(target - self.pos).sum() + + def toward(self, target, long_axis=True): + if hasattr(target, 'pos'): + target = target.pos + offset = target - self.pos + dists = np.abs(offset) + if (dists[0] > dists[1] if long_axis else dists[0] <= dists[1]): + return np.array((np.sign(offset[0]), 0)) + else: + return np.array((0, np.sign(offset[1]))) + + def random_dir(self): + return self.all_dirs[self.random.randint(0, 4)] + + +class Player(Object): + + def __init__(self, world, pos): + super().__init__(world, pos) + self.facing = (0, 1) + self.inventory = { + name: info['initial'] for name, info in constants.items.items()} + self.achievements = {name: 0 for name in constants.achievements} + self.action = 'noop' + self.sleeping = False + self._last_health = self.health + self._hunger = 0 + self._thirst = 0 + self._fatigue = 0 + self._recover = 0 + + @property + def texture(self): + if self.sleeping: + return 'player-sleep' + return { + (-1, 0): 'player-left', + (+1, 0): 'player-right', + (0, -1): 'player-up', + (0, +1): 'player-down', + }[tuple(self.facing)] + + @property + def walkable(self): + return constants.walkable + ['lava'] + + def update(self): + target = (self.pos[0] + self.facing[0], self.pos[1] + self.facing[1]) + material, obj = self.world[target] + action = self.action + if self.sleeping: + if self.inventory['energy'] < constants.items['energy']['max']: + action = 'sleep' + else: + self.sleeping = False + self.achievements['wake_up'] += 1 + if action == 'noop': + pass + elif action.startswith('move_'): + self._move(action[len('move_'):]) + elif action == 'do' and obj: + self._do_object(obj) + elif action == 'do': + self._do_material(target, material) + elif action == 'sleep': + if self.inventory['energy'] < constants.items['energy']['max']: + self.sleeping = True + elif action.startswith('place_'): + self._place(action[len('place_'):], target, material) + elif action.startswith('make_'): + self._make(action[len('make_'):]) + self._update_life_stats() + self._degen_or_regen_health() + for name, amount in self.inventory.items(): + maxmium = constants.items[name]['max'] + self.inventory[name] = max(0, min(amount, maxmium)) + # This needs to happen after the inventory states are clamped + # because it involves the health water inventory count. + self._wake_up_when_hurt() + + def _update_life_stats(self): + self._hunger += 0.5 if self.sleeping else 1 + if self._hunger > 25: + self._hunger = 0 + self.inventory['food'] -= 1 + self._thirst += 0.5 if self.sleeping else 1 + if self._thirst > 20: + self._thirst = 0 + self.inventory['drink'] -= 1 + if self.sleeping: + self._fatigue = min(self._fatigue - 1, 0) + else: + self._fatigue += 1 + if self._fatigue < -10: + self._fatigue = 0 + self.inventory['energy'] += 1 + if self._fatigue > 30: + self._fatigue = 0 + self.inventory['energy'] -= 1 + + def _degen_or_regen_health(self): + necessities = ( + self.inventory['food'] > 0, + self.inventory['drink'] > 0, + self.inventory['energy'] > 0 or self.sleeping) + if all(necessities): + self._recover += 2 if self.sleeping else 1 + else: + self._recover -= 0.5 if self.sleeping else 1 + if self._recover > 25: + self._recover = 0 + self.health += 1 + if self._recover < -15: + self._recover = 0 + self.health -= 1 + + def _wake_up_when_hurt(self): + if self.health < self._last_health: + self.sleeping = False + self._last_health = self.health + + def _move(self, direction): + directions = dict(left=(-1, 0), right=(+1, 0), up=(0, -1), down=(0, +1)) + self.facing = directions[direction] + self.move(self.facing) + if self.world[self.pos][0] == 'lava': + self.health = 0 + + def _do_object(self, obj): + damage = max([ + 1, + self.inventory['wood_sword'] and 2, + self.inventory['stone_sword'] and 3, + self.inventory['iron_sword'] and 5, + ]) + if isinstance(obj, Plant): + if obj.ripe: + obj.grown = 0 + self.inventory['food'] += 4 + self.achievements['eat_plant'] += 1 + if isinstance(obj, Fence): + self.world.remove(obj) + self.inventory['fence'] += 1 + self.achievements['collect_fence'] += 1 + if isinstance(obj, Zombie): + obj.health -= damage + if obj.health <= 0: + self.achievements['defeat_zombie'] += 1 + if isinstance(obj, Skeleton): + obj.health -= damage + if obj.health <= 0: + self.achievements['defeat_skeleton'] += 1 + if isinstance(obj, Cow): + obj.health -= damage + if obj.health <= 0: + self.inventory['food'] += 6 + self.achievements['eat_cow'] += 1 + # TODO: Keep track of previous inventory state to do this in a more + # general way. + self._hunger = 0 + + def _do_material(self, target, material): + if material == 'water': + # TODO: Keep track of previous inventory state to do this in a more + # general way. + self._thirst = 0 + info = constants.collect.get(material) + if not info: + return + for name, amount in info['require'].items(): + if self.inventory[name] < amount: + return + self.world[target] = info['leaves'] + if self.random.uniform() <= info.get('probability', 1): + for name, amount in info['receive'].items(): + self.inventory[name] += amount + self.achievements[f'collect_{name}'] += 1 + + def _place(self, name, target, material): + if self.world[target][1]: + return + info = constants.place[name] + if material not in info['where']: + return + if any(self.inventory[k] < v for k, v in info['uses'].items()): + return + for item, amount in info['uses'].items(): + self.inventory[item] -= amount + if info['type'] == 'material': + self.world[target] = name + elif info['type'] == 'object': + cls = { + 'fence': Fence, + 'plant': Plant, + }[name] + self.world.add(cls(self.world, target)) + self.achievements[f'place_{name}'] += 1 + + def _make(self, name): + nearby, _ = self.world.nearby(self.pos, 1) + info = constants.make[name] + if not all(util in nearby for util in info['nearby']): + return + if any(self.inventory[k] < v for k, v in info['uses'].items()): + return + for item, amount in info['uses'].items(): + self.inventory[item] -= amount + self.inventory[name] += info['gives'] + self.achievements[f'make_{name}'] += 1 + + +class Cow(Object): + + def __init__(self, world, pos): + super().__init__(world, pos) + self.health = 3 + + @property + def texture(self): + return 'cow' + + def update(self): + if self.health <= 0: + self.world.remove(self) + if self.random.uniform() < 0.5: + direction = self.random_dir() + self.move(direction) + + +class Zombie(Object): + + def __init__(self, world, pos, player): + super().__init__(world, pos) + self.player = player + self.health = 5 + self.cooldown = 0 + + @property + def texture(self): + return 'zombie' + + def update(self): + if self.health <= 0: + self.world.remove(self) + dist = self.distance(self.player) + if dist <= 8 and self.random.uniform() < 0.9: + self.move(self.toward(self.player, self.random.uniform() < 0.8)) + else: + self.move(self.random_dir()) + dist = self.distance(self.player) + if dist <= 1: + if self.cooldown: + self.cooldown -= 1 + else: + if self.player.sleeping: + damage = 7 + else: + damage = 2 + self.player.health -= damage + self.cooldown = 5 + + +class Skeleton(Object): + + def __init__(self, world, pos, player): + super().__init__(world, pos) + self.player = player + self.health = 3 + self.reload = 0 + + @property + def texture(self): + return 'skeleton' + + def update(self): + if self.health <= 0: + self.world.remove(self) + self.reload = max(0, self.reload - 1) + dist = self.distance(self.player.pos) + if dist <= 3: + moved = self.move(-self.toward(self.player, self.random.uniform() < 0.6)) + if moved: + return + if dist <= 5 and self.random.uniform() < 0.5: + self._shoot(self.toward(self.player)) + elif dist <= 8 and self.random.uniform() < 0.3: + self.move(self.toward(self.player, self.random.uniform() < 0.6)) + elif self.random.uniform() < 0.2: + self.move(self.random_dir()) + + def _shoot(self, direction): + if self.reload > 0: + return + if direction[0] == 0 and direction[1] == 0: + return + pos = self.pos + direction + if self.is_free(pos, Arrow.walkable): + self.world.add(Arrow(self.world, pos, direction)) + self.reload = 4 + + +class Arrow(Object): + + def __init__(self, world, pos, facing): + super().__init__(world, pos) + self.facing = facing + + @property + def texture(self): + return { + (-1, 0): 'arrow-left', + (+1, 0): 'arrow-right', + (0, -1): 'arrow-up', + (0, +1): 'arrow-down', + }[tuple(self.facing)] + + @engine.staticproperty + def walkable(): + return constants.walkable + ['water', 'lava'] + + def update(self): + target = self.pos + self.facing + material, obj = self.world[target] + if obj: + obj.health -= 2 + self.world.remove(self) + elif material not in self.walkable: + self.world.remove(self) + if material in ['table', 'furnace']: + self.world[target] = 'path' + else: + self.move(self.facing) + + +class Plant(Object): + + def __init__(self, world, pos): + super().__init__(world, pos) + self.health = 1 + self.grown = 0 + + @property + def texture(self): + if self.ripe: + return 'plant-ripe' + else: + return 'plant' + + @property + def ripe(self): + return self.grown > 300 + + def update(self): + self.grown += 1 + objs = [self.world[self.pos + dir_][1] for dir_ in self.all_dirs] + if any(isinstance(obj, (Zombie, Skeleton, Cow)) for obj in objs): + self.health -= 1 + if self.health <= 0: + self.world.remove(self) + + +class Fence(Object): + + def __init__(self, world, pos): + super().__init__(world, pos) + + @property + def texture(self): + return 'fence' + + def update(self): + pass diff --git a/examples/crafter/crafter/recorder.py b/examples/crafter/crafter/recorder.py new file mode 100644 index 0000000..76dc6b0 --- /dev/null +++ b/examples/crafter/crafter/recorder.py @@ -0,0 +1,185 @@ +import datetime +import json +import pathlib + +import imageio +import numpy as np + + +class Recorder: + + def __init__( + self, env, directory, save_stats=True, save_video=True, + save_episode=True, video_size=(512, 512)): + if directory and save_stats: + env = StatsRecorder(env, directory) + if directory and save_video: + env = VideoRecorder(env, directory, video_size) + if directory and save_episode: + env = EpisodeRecorder(env, directory) + self._env = env + + def __getattr__(self, name): + if name.startswith('__'): + raise AttributeError(name) + return getattr(self._env, name) + + +class StatsRecorder: + + def __init__(self, env, directory): + self._env = env + self._directory = pathlib.Path(directory).expanduser() + self._directory.mkdir(exist_ok=True, parents=True) + self._file = (self._directory / 'stats.jsonl').open('a') + self._length = None + self._reward = None + self._unlocked = None + self._stats = None + + def __getattr__(self, name): + if name.startswith('__'): + raise AttributeError(name) + return getattr(self._env, name) + + def reset(self): + obs = self._env.reset() + self._length = 0 + self._reward = 0 + self._unlocked = None + self._stats = None + return obs + + def step(self, action): + obs, reward, done, info = self._env.step(action) + self._length += 1 + self._reward += info['reward'] + if done: + self._stats = {'length': self._length, 'reward': round(self._reward, 1)} + for key, value in info['achievements'].items(): + self._stats[f'achievement_{key}'] = value + self._save() + return obs, reward, done, info + + def _save(self): + self._file.write(json.dumps(self._stats) + '\n') + self._file.flush() + + +class VideoRecorder: + + def __init__(self, env, directory, size=(512, 512)): + if not hasattr(env, 'episode_name'): + env = EpisodeName(env) + self._env = env + self._directory = pathlib.Path(directory).expanduser() + self._directory.mkdir(exist_ok=True, parents=True) + self._size = size + self._frames = None + + def __getattr__(self, name): + if name.startswith('__'): + raise AttributeError(name) + return getattr(self._env, name) + + def reset(self): + obs = self._env.reset() + self._frames = [self._env.render(self._size)] + return obs + + def step(self, action): + obs, reward, done, info = self._env.step(action) + self._frames.append(self._env.render(self._size)) + if done: + self._save() + return obs, reward, done, info + + def _save(self): + filename = str(self._directory / (self._env.episode_name + '.mp4')) + imageio.mimsave(filename, self._frames) + + +class EpisodeRecorder: + + def __init__(self, env, directory): + if not hasattr(env, 'episode_name'): + env = EpisodeName(env) + self._env = env + self._directory = pathlib.Path(directory).expanduser() + self._directory.mkdir(exist_ok=True, parents=True) + self._episode = None + + def __getattr__(self, name): + if name.startswith('__'): + raise AttributeError(name) + return getattr(self._env, name) + + def reset(self): + obs = self._env.reset() + self._episode = [{'image': obs}] + return obs + + def step(self, action): + # Transitions are defined from the environment perspective, meaning that a + # transition contains the action and the resulting reward and next + # observation produced by the environment in response to said action. + obs, reward, done, info = self._env.step(action) + transition = { + 'action': action, 'image': obs, 'reward': reward, 'done': done, + } + for key, value in info.items(): + if key in ('inventory', 'achievements'): + continue + transition[key] = value + for key, value in info['achievements'].items(): + transition[f'achievement_{key}'] = value + for key, value in info['inventory'].items(): + transition[f'ainventory_{key}'] = value + self._episode.append(transition) + if done: + self._save() + return obs, reward, done, info + + def _save(self): + filename = str(self._directory / (self._env.episode_name + '.npz')) + # Fill in zeros for keys missing at the first time step. + for key, value in self._episode[1].items(): + if key not in self._episode[0]: + self._episode[0][key] = np.zeros_like(value) + episode = { + k: np.array([step[k] for step in self._episode]) + for k in self._episode[0]} + np.savez_compressed(filename, **episode) + + +class EpisodeName: + + def __init__(self, env): + self._env = env + self._timestamp = None + self._unlocked = None + self._length = None + + def __getattr__(self, name): + if name.startswith('__'): + raise AttributeError(name) + return getattr(self._env, name) + + def reset(self): + obs = self._env.reset() + self._timestamp = None + self._unlocked = None + self._length = 0 + return obs + + def step(self, action): + obs, reward, done, info = self._env.step(action) + self._length += 1 + if done: + self._timestamp = datetime.datetime.now().strftime('%Y%m%dT%H%M%S') + self._unlocked = sum(int(v >= 1) for v in info['achievements'].values()) + return obs, reward, done, info + + @property + def episode_name(self): + return f'{self._timestamp}-ach{self._unlocked}-len{self._length}' diff --git a/examples/crafter/crafter/run_gui.py b/examples/crafter/crafter/run_gui.py new file mode 100644 index 0000000..06885f9 --- /dev/null +++ b/examples/crafter/crafter/run_gui.py @@ -0,0 +1,150 @@ +import argparse + +import numpy as np +try: + import pygame +except ImportError: + print('Please install the pygame package to use the GUI.') + raise +from PIL import Image + +import crafter + + +def main(): + boolean = lambda x: bool(['False', 'True'].index(x)) + parser = argparse.ArgumentParser() + parser.add_argument('--seed', type=int, default=None) + parser.add_argument('--area', nargs=2, type=int, default=(64, 64)) + parser.add_argument('--view', type=int, nargs=2, default=(9, 9)) + parser.add_argument('--length', type=int, default=None) + parser.add_argument('--health', type=int, default=9) + parser.add_argument('--window', type=int, nargs=2, default=(600, 600)) + parser.add_argument('--size', type=int, nargs=2, default=(0, 0)) + parser.add_argument('--record', type=str, default=None) + parser.add_argument('--fps', type=int, default=5) + parser.add_argument('--wait', type=boolean, default=False) + parser.add_argument('--death', type=str, default='reset', choices=[ + 'continue', 'reset', 'quit']) + args = parser.parse_args() + + keymap = { + pygame.K_a: 'move_left', + pygame.K_d: 'move_right', + pygame.K_w: 'move_up', + pygame.K_s: 'move_down', + pygame.K_SPACE: 'do', + pygame.K_TAB: 'sleep', + + pygame.K_r: 'place_stone', + pygame.K_t: 'place_table', + pygame.K_f: 'place_furnace', + pygame.K_p: 'place_plant', + + pygame.K_1: 'make_wood_pickaxe', + pygame.K_2: 'make_stone_pickaxe', + pygame.K_3: 'make_iron_pickaxe', + pygame.K_4: 'make_wood_sword', + pygame.K_5: 'make_stone_sword', + pygame.K_6: 'make_iron_sword', + } + print('Actions:') + for key, action in keymap.items(): + print(f' {pygame.key.name(key)}: {action}') + + crafter.constants.items['health']['max'] = args.health + crafter.constants.items['health']['initial'] = args.health + + size = list(args.size) + size[0] = size[0] or args.window[0] + size[1] = size[1] or args.window[1] + + env = crafter.Env( + area=args.area, view=args.view, length=args.length, seed=args.seed) + env = crafter.Recorder(env, args.record) + env.reset() + achievements = set() + duration = 0 + return_ = 0 + was_done = False + print('Diamonds exist:', env._world.count('diamond')) + + pygame.init() + screen = pygame.display.set_mode(args.window) + clock = pygame.time.Clock() + running = True + while running: + + # Rendering. + image = env.render(size) + if size != args.window: + image = Image.fromarray(image) + image = image.resize(args.window, resample=Image.NEAREST) + image = np.array(image) + surface = pygame.surfarray.make_surface(image.transpose((1, 0, 2))) + screen.blit(surface, (0, 0)) + pygame.display.flip() + clock.tick(args.fps) + + # Keyboard input. + action = None + pygame.event.pump() + for event in pygame.event.get(): + if event.type == pygame.QUIT: + running = False + elif event.type == pygame.KEYDOWN and event.key == pygame.K_ESCAPE: + running = False + elif event.type == pygame.KEYDOWN and event.key in keymap.keys(): + action = keymap[event.key] + if action is None: + pressed = pygame.key.get_pressed() + for key, action in keymap.items(): + if pressed[key]: + break + else: + if args.wait and not env._player.sleeping: + continue + else: + action = 'noop' + + # Environment step. + _, reward, done, _ = env.step(env.action_names.index(action)) + duration += 1 + + # Achievements. + unlocked = { + name for name, count in env._player.achievements.items() + if count > 0 and name not in achievements} + for name in unlocked: + achievements |= unlocked + total = len(env._player.achievements.keys()) + print(f'Achievement ({len(achievements)}/{total}): {name}') + if env._step > 0 and env._step % 100 == 0: + print(f'Time step: {env._step}') + if reward: + print(f'Reward: {reward}') + return_ += reward + + # Episode end. + if done and not was_done: + was_done = True + print('Episode done!') + print('Duration:', duration) + print('Return:', return_) + if args.death == 'quit': + running = False + if args.death == 'reset': + print('\nStarting a new episode.') + env.reset() + achievements = set() + was_done = False + duration = 0 + return_ = 0 + if args.death == 'continue': + pass + + pygame.quit() + + +if __name__ == '__main__': + main() diff --git a/examples/crafter/crafter/run_random.py b/examples/crafter/crafter/run_random.py new file mode 100644 index 0000000..8ab1367 --- /dev/null +++ b/examples/crafter/crafter/run_random.py @@ -0,0 +1,48 @@ +import argparse +import pathlib +import time + +import numpy as np + +import crafter + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--seed', type=int, default=None) + parser.add_argument('--area', nargs=2, type=int, default=(64, 64)) + parser.add_argument('--length', type=int, default=10000) + parser.add_argument('--health', type=int, default=9) + parser.add_argument('--record', type=pathlib.Path, default=None) + parser.add_argument('--episodes', type=int, default=1) + args = parser.parse_args() + + random = np.random.RandomState(args.seed) + crafter.constants.items['health']['max'] = args.health + crafter.constants.items['health']['initial'] = args.health + env = crafter.Env(area=args.area, length=args.length, seed=args.seed) + env = crafter.Recorder(env, args.record) + + for _ in range(args.episodes): + + start = time.time() + obs = env.reset() + print('') + print(f'Reset time: {1000*(time.time()-start):.2f}ms') + print('Coal exist: ', env._world.count('coal')) + print('Iron exist: ', env._world.count('iron')) + print('Diamonds exist:', env._world.count('diamond')) + + start = time.time() + done = False + while not done: + action = random.randint(0, env.action_space.n) + obs, reward, done, info = env.step(action) + duration = time.time() - start + step = env._step + print(f'Step time: {1000*duration/step:.2f}ms ({int(step/duration)} FPS)') + print('Episode length:', step) + + +if __name__ == '__main__': + main() diff --git a/examples/crafter/crafter/run_terrain.py b/examples/crafter/crafter/run_terrain.py new file mode 100644 index 0000000..009955b --- /dev/null +++ b/examples/crafter/crafter/run_terrain.py @@ -0,0 +1,43 @@ +import argparse + +import imageio +import numpy as np + +import crafter + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--seed', type=int, default=None) + parser.add_argument('--amount', type=int, default=4) + parser.add_argument('--cols', type=int, default=4) + parser.add_argument('--area', nargs=2, type=int, default=(64, 64)) + parser.add_argument('--size', type=int, default=1024) + parser.add_argument('--filename', type=str, default='terrain.png') + args = parser.parse_args() + + env = crafter.Env(args.area, args.area, args.size, seed=args.seed) + images = [] + for index in range(args.amount): + images.append(env.reset()) + diamonds = env._world.count('diamond') + print(f'Map: {index:>2}, diamonds: {diamonds:>2}') + + rows = len(images) // args.cols + strips = [] + for row in range(rows): + strip = [] + for col in range(args.cols): + try: + strip.append(images[row * args.cols + col]) + except IndexError: + strip.append(np.zeros_like(strip[-1])) + strips.append(np.concatenate(strip, 1)) + grid = np.concatenate(strips, 0) + + imageio.imsave(args.filename, grid) + print('Saved', args.filename) + + +if __name__ == '__main__': + main() diff --git a/examples/crafter/crafter/worldgen.py b/examples/crafter/crafter/worldgen.py new file mode 100644 index 0000000..257d5e7 --- /dev/null +++ b/examples/crafter/crafter/worldgen.py @@ -0,0 +1,91 @@ +import functools + +import numpy as np +import opensimplex + +from . import constants +from . import objects + + +def generate_world(world, player): + simplex = opensimplex.OpenSimplex(seed=world.random.randint(0, 2 ** 31 - 1)) + tunnels = np.zeros(world.area, bool) + for x in range(world.area[0]): + for y in range(world.area[1]): + _set_material(world, (x, y), player, tunnels, simplex) + for x in range(world.area[0]): + for y in range(world.area[1]): + _set_object(world, (x, y), player, tunnels) + + +def _set_material(world, pos, player, tunnels, simplex): + x, y = pos + simplex = functools.partial(_simplex, simplex) + uniform = world.random.uniform + start = 4 - np.sqrt((x - player.pos[0]) ** 2 + (y - player.pos[1]) ** 2) + start += 2 * simplex(x, y, 8, 3) + start = 1 / (1 + np.exp(-start)) + water = simplex(x, y, 3, {15: 1, 5: 0.15}, False) + 0.1 + water -= 2 * start + mountain = simplex(x, y, 0, {15: 1, 5: 0.3}) + mountain -= 4 * start + 0.3 * water + if start > 0.5: + world[x, y] = 'grass' + elif mountain > 0.15: + if (simplex(x, y, 6, 7) > 0.15 and mountain > 0.3): # cave + world[x, y] = 'path' + elif simplex(2 * x, y / 5, 7, 3) > 0.4: # horizonal tunnle + world[x, y] = 'path' + tunnels[x, y] = True + elif simplex(x / 5, 2 * y, 7, 3) > 0.4: # vertical tunnle + world[x, y] = 'path' + tunnels[x, y] = True + elif simplex(x, y, 1, 8) > 0 and uniform() > 0.85: + world[x, y] = 'coal' + elif simplex(x, y, 2, 6) > 0.4 and uniform() > 0.75: + world[x, y] = 'iron' + elif mountain > 0.18 and uniform() > 0.994: + world[x, y] = 'diamond' + elif mountain > 0.3 and simplex(x, y, 6, 5) > 0.35: + world[x, y] = 'lava' + else: + world[x, y] = 'stone' + elif 0.25 < water <= 0.35 and simplex(x, y, 4, 9) > -0.2: + world[x, y] = 'sand' + elif 0.3 < water: + world[x, y] = 'water' + else: # grassland + if simplex(x, y, 5, 7) > 0 and uniform() > 0.8: + world[x, y] = 'tree' + else: + world[x, y] = 'grass' + + +def _set_object(world, pos, player, tunnels): + x, y = pos + uniform = world.random.uniform + dist = np.sqrt((x - player.pos[0]) ** 2 + (y - player.pos[1]) ** 2) + material, _ = world[x, y] + if material not in constants.walkable: + pass + elif dist > 3 and material == 'grass' and uniform() > 0.985: + world.add(objects.Cow(world, (x, y))) + elif dist > 10 and uniform() > 0.993: + world.add(objects.Zombie(world, (x, y), player)) + elif material == 'path' and tunnels[x, y] and uniform() > 0.95: + world.add(objects.Skeleton(world, (x, y), player)) + + +def _simplex(simplex, x, y, z, sizes, normalize=True): + if not isinstance(sizes, dict): + sizes = {sizes: 1} + value = 0 + for size, weight in sizes.items(): + if hasattr(simplex, 'noise3d'): + noise = simplex.noise3d(x / size, y / size, z) + else: + noise = simplex.noise3(x / size, y / size, z) + value += weight * noise + if normalize: + value /= sum(sizes.values()) + return value diff --git a/examples/crafter/crafter_description.py b/examples/crafter/crafter_description.py new file mode 100644 index 0000000..ab4c71c --- /dev/null +++ b/examples/crafter/crafter_description.py @@ -0,0 +1,192 @@ +import numpy as np +import crafter + +env = crafter.Env(size=(224, 224)) +action_space = env.action_space + +vitals = ["health","food","drink","energy",] + +rot = np.array([[0,-1],[1,0]]) +directions = ['front', 'right', 'back', 'left'] + +id_to_item = [0]*19 +import itertools +import difflib +for name, ind in itertools.chain(env._world._mat_ids.items(), env._sem_view._obj_ids.items()): + name = str(name)[str(name).find('objects.')+len('objects.'):-2].lower() if 'objects.' in str(name) else str(name) + id_to_item[ind] = name +player_idx = id_to_item.index('player') +print(id_to_item) + +def describe_inventory(info): + result = "" + + status_str = "* Vitals:\n{}".format("\n".join([" - {}: {}/9".format(v, info['inventory'][v]) for v in vitals])) + result += status_str + "\n\n" + + inventory_str = "\n".join([" - {}: {}".format(i, num) for i,num in info['inventory'].items() if i not in vitals and num!=0]) + inventory_str = "* Inventory:\n{}".format(inventory_str) if inventory_str else "Inventory: empty" + result += inventory_str + + return result.strip() + + +REF = np.array([0, 1]) + +def rotation_matrix(v1, v2): + dot = np.dot(v1,v2) + cross = np.cross(v1,v2) + rotation_matrix = np.array([[dot, -cross],[cross, dot]]) + return rotation_matrix + +def describe_dir(ref, P): + desc = [] + desc_detailed = [] + if ref[1] > P[1]: + desc.append("north") + desc_detailed.append("{}N".format(abs(ref[1]-P[1]))) + elif ref[1] < P[1]: + desc.append("south") + desc_detailed.append("{}S".format(abs(ref[1]-P[1]))) + if ref[0] > P[0]: + desc.append("west") + desc_detailed.append("{}W".format(abs(ref[0]-P[0]))) + elif ref[0] < P[0]: + desc.append("east") + desc_detailed.append("{}E".format(abs(ref[0]-P[0]))) + + result = "-".join(desc) + + return result, " ".join(desc_detailed) + +def describe_loc(ref, P, target_facing): + direction, desc_detailed = describe_dir(ref, P) + + if P[0]==target_facing[0] and P[1]==target_facing[1]: + direction += ", {} (facing)".format(desc_detailed) + else: + direction += ", {}".format(desc_detailed) + + return direction + + +def describe_env(info): + assert(info['semantic'][info['player_pos'][0],info['player_pos'][1]] == player_idx) + semantic = info['semantic'][info['player_pos'][0]-info['view'][0]//2:info['player_pos'][0]+info['view'][0]//2+1, info['player_pos'][1]-info['view'][1]//2+1:info['player_pos'][1]+info['view'][1]//2] + center = np.array([info['view'][0]//2,info['view'][1]//2-1]) + result = "" + x = np.arange(semantic.shape[1]) + y = np.arange(semantic.shape[0]) + x1, y1 = np.meshgrid(x,y) + loc = np.stack((y1, x1),axis=-1) + dist = np.absolute(center-loc).sum(axis=-1) + obj_info_list = [] + grass_idx = id_to_item.index('grass') + + facing = info['player_facing'] + target_facing = (center[0] + facing[0], center[1] + facing[1]) + target = id_to_item[semantic[target_facing[0],target_facing[1]]] + around = {} + + obs = "* Observation (1-step):\n" + for d in [[center[0]-1,center[1]],[center[0]+1,center[1]],[center[0],center[1]-1],[center[0],center[1]+1]]: + around[describe_loc(np.array([0,0]), np.array(d) - center, facing)] = id_to_item[semantic[d[0], d[1]]] + + obs = "* Observation (1-step):\n"+"\n".join([" - {}: {}".format(o,d) for d,o in around.items()]) + + for idx in np.unique(semantic): + if idx == player_idx or idx == grass_idx: + continue + + distances = np.where(semantic == idx, dist, np.inf) + smallest_indices = np.unravel_index(np.argsort(distances, axis=None), distances.shape) + smallest_indices = [(smallest_indices[0][i], smallest_indices[1][i]) for i in range(min(2, np.count_nonzero(semantic == idx)))] + + for i in range(len(smallest_indices)): + smallest = smallest_indices[i] + obj_info_list.append((id_to_item[idx], dist[smallest], describe_loc(np.array([0, 0]), smallest - center, facing))) + + if len(obj_info_list)>0: + status_str = "* Near-by objects (7x9 grid):\n{}".format("\n".join([" - {} {} steps to {}".format(name.replace("arrow", "flying-arrow"), dist, loc) for name, dist, loc in obj_info_list])) + else: + status_str = "* Near-by objects (7x9 grid): nothing other than grass" + + # get the player direction and filter semantic to only the front half of the player's facing direction + if facing[0] == 1: + front = semantic[center[0]+1:, :] + elif facing[0] == -1: + front = semantic[:center[0], :] + elif facing[1] == 1: + front = semantic[:, center[1]+1:] + else: + front = semantic[:, :center[1]] + + # get a list of counts for each item in the front + counts = np.bincount(front.flatten()) + # get a sorted named list of counts for each item in the front + counts = sorted([(id_to_item[i], counts[i]) for i in range(len(counts)) if id_to_item[i] not in {'player', 'None'} and counts[i]>0], key=lambda x: x[1], reverse=True) + # find the top 3 items in the front with non-zero counts + counts = counts[:3] + + general_desc = ", ".join(["{} {}(s)".format(count, name) for name, count in counts]) + + general_obs = "* Further to the {}: {}.".format(describe_dir(np.array([0,0]), facing)[0], general_desc) + + result += obs.strip() + "\n\n" + status_str + "\n\n" + general_obs + + return result.strip(), target in {'path', 'grass'} + + +def describe_act(info, repeats): + result = "" + + action_str = info['action'].replace('do_', 'interact_') + if 'move' in action_str: + action_str = action_str.replace('move_up', 'move_north {} step(s)'.format(repeats)) + action_str = action_str.replace('move_down', 'move_south {} step(s)'.format(repeats)) + action_str = action_str.replace('move_left', 'move_west {} step(s)'.format(repeats)) + action_str = action_str.replace('move_right', 'move_east {} step(s)'.format(repeats)) + else: + action_str = action_str + " {} time(s)".format(repeats) + + return action_str.strip() + + +def describe_status(info): + + if info['sleeping']: + return "Player is sleeping, and will not be able take actions until energy is full.\n\n" + elif info['dead']: + return "Player died.\n\n" + else: + return "" + + +def describe_frame(info, repeats): + result = "" + + result+=describe_status(info) + + env_description, front_unblocked = describe_env(info) + + result+=env_description + + result+="\n\n" + + result+=describe_inventory(info) + + return describe_act(info, repeats).strip(), result.strip(), front_unblocked + +action_list = ["Noop", "Move West", "Move East", "Move North", "Move South", "Do", \ + "Sleep", "Place Stone", "Place Table", "Place Furnace", "Place Plant", \ + "Make Wood Pickaxe", "Make Stone Pickaxe", "Make Iron Pickaxe", "Make Wood Sword", \ + "Make Stone Sword", "Make Iron Sword"] +action_list = [a.lower() for a in action_list] + +def match_act(string): + matches = difflib.get_close_matches(string.lower(), action_list, n=1, cutoff=0.85) + if matches: + print("Action matched \"{}\" to \"{}\"".format(string, matches[0])) + return action_list.index(matches[0]), matches[0], "" + else: + return None, None, "'{}' does not seem to match any elemet in the list: {}".format(string, action_list) diff --git a/examples/crafter/crafter_initial_QA.pkl b/examples/crafter/crafter_initial_QA.pkl new file mode 100644 index 0000000..bc2b4d1 Binary files /dev/null and b/examples/crafter/crafter_initial_QA.pkl differ diff --git a/examples/crafter/main.py b/examples/crafter/main.py new file mode 100644 index 0000000..07e0084 --- /dev/null +++ b/examples/crafter/main.py @@ -0,0 +1,534 @@ +import argparse, os + +import crafter +import tqdm +import numpy as np +import pandas as pd +import copy +import json +import datetime +import pickle +from colorama import Fore, Back, Style +from colorama import init +import agentkit +import agentkit.utils as utils +init(autoreset=True) + +parser = argparse.ArgumentParser(description='Process some integers.') +parser.add_argument('--outdir', type=str, default='logs/test_env', help='output directory') +parser.add_argument('--Eps', type=int, default=1, help='Total number of episodes to run') +parser.add_argument('--temperature', type=float, default=0., help='LLM Temperature') +parser.add_argument('--wandb_log_interval', type=int, default=25, help='some integer') +parser.add_argument('--granularities', type=int, nargs='+', default=[500], help='list of integers') +parser.add_argument('--feedback_granularity', type=int, default=3, help='some integer') +parser.add_argument('--planner_reflection_granularity', type=int, default=25, help='some integer') +parser.add_argument('--actor_reflection_granularity', type=int, default=5, help='some integer') +parser.add_argument('--kb_refine_granularity', type=int, default=50, help='some integer') +parser.add_argument('--llm_plan_accurate', type=str, default="gpt-4-turbo-2024-04-09", help='LLM') +parser.add_argument('--llm_plan', type=str, default="gpt-4-turbo-2024-04-09", help='LLM') +parser.add_argument('--llm_spatial', type=str, default="gpt-4-0613", help='LLM') +parser.add_argument('--llm_fast', type=str, default="all-gpt-3.5-turbo", help='LLM') +parser.add_argument('--llm_fast_accurate', type=str, default="gpt-3.5-turbo-0125", help='LLM') +parser.add_argument('-q', '--quiet', action='store_true', help='Quiet mode') +parser.add_argument('--resume_id', type=str, default=None, help='resume wandb run id') + +args = parser.parse_args() +args.verbose = not args.quiet + +# make the saves folder if it doesn't exist +os.makedirs('saves', exist_ok=True) +os.makedirs('prints', exist_ok=True) + +if args.resume_id is not None: + with open("saves/{}.pkl".format(args.resume_id), 'rb') as f: + args = pickle.load(f)['args'] + +from crafter_description import describe_frame, action_list + +env = crafter.Env(area=(256, 256)) +action_space = env.action_space + + +from utils import get_ctxt, describe_achievements +MANUAL = get_ctxt() + +from build_graph_new import build_graph +from compose_prompt import compose_feedback_prompt, compose_gameplay_prompt +from llm_api import get_query, get_token_counts # use agentkit.llm_api.get_query instead +from functools import partial + +import wandb +from wandb.sdk.data_types.trace_tree import Trace +if args.resume_id is not None: + wandb.init(id=args.resume_id, resume="allow", project='AgentKit', config=args.__dict__) +else: + wandb.init(project='AgentKit', config=args.__dict__) + wandb.run.log_code(".") # save the code in the wandb run + +if args.verbose: + def qprint(*args, **kwargs): + print(*args, **kwargs) +else: + def qprint(*args, **kwargs): + pass + +achievements = [ + 'collect_coal', + 'collect_diamond', + 'collect_drink', + 'collect_iron', + 'collect_sapling', + 'collect_stone', + 'collect_wood', + 'defeat_skeleton', + 'defeat_zombie', + 'eat_cow', + 'eat_plant', + 'make_iron_pickaxe', + 'make_iron_sword', + 'make_stone_pickaxe', + 'make_stone_sword', + 'make_wood_pickaxe', + 'make_wood_sword', + 'place_furnace', + 'place_plant', + 'place_stone', + 'place_table', + 'wake_up', +] + +if args.resume_id is not None: + with open("saves/{}.pkl".format(args.resume_id), 'rb') as f: + pkl_save = pickle.load(f) + eps = pkl_save['eps'] + database = pkl_save['database'] +else: + eps = 0 + database = {} + database['kb'] = { + 'unknowns': {}, + 'knowledge_base': {}, + } + database['subgoals'] = { + 'subgoal': "NA", + 'guide': "NA", + } + database['reflection'] = { + "unexpected": [], + "mistake": [], + "correction": [], + "confusion": [], + "all": [], + } + database['history'] = {} + database['skills'] = { + "skill_library": {}, + "skill": None, + "skill_old": None, + } + database['feedback'] = { + 'skill_feedback': {}, + 'feedback': "", + } + +query_fast = get_query(args.llm_fast) +query_fast_accurate = get_query(args.llm_fast_accurate) +query_reason = get_query(args.llm_plan) +query_spatial = get_query(args.llm_spatial) +query_plan_accurate = get_query(args.llm_plan_accurate) + +llm_functions = { + 'query_fast': {'query_model':query_fast, 'token_counter':query_fast.count_tokens}, + 'query_fast_accurate': {'query_model':query_fast_accurate, 'token_counter':query_fast_accurate.count_tokens}, + 'query_reason': {'query_model':partial(query_reason, max_gen=2048, temp=args.temperature), 'token_counter':query_reason.count_tokens}, + 'query_plan_accurate': {'query_model':partial(query_plan_accurate, max_gen=2048, temp=args.temperature), 'token_counter':query_plan_accurate.count_tokens}, + 'query_spatial': {'query_model':partial(query_spatial, max_gen=1500, temp=args.temperature), 'token_counter':query_spatial.count_tokens}, +} +graph = build_graph(llm_functions, database) + +table_questions = list(graph.nodes.keys()) + +columns=["Step", "OBS", "Reward", "Return"] + ["Action", "Repeats", "Skills", "Knowledge Base", "Env step"] + table_questions + +while(eps0: + trajectories[-1][1] = last_act_desc + trajectories.append([step, None, desc]) + text_obs_no_act = "\n\n".join(["== Gamestep {}{} ==\n\n".format(i, "" if i!=trajectories[-1][0] else " (current)",) + "{}".format(d) for i, _, d in trajectories[-2:]]) + text_obs = "\n\n".join(["== Gamestep {}{} ==\n\n".format(i, "" if i!=trajectories[-1][0] else " (current)",) + "{}{}".format(d, "\n\nAction:\n{}".format(a) if a is not None else "") for i, a, d in trajectories[-2:]]) + qprint(text_obs) + + database['environment'] = { + 'manual': describe_achievements(info, MANUAL), + 'observation_2step': text_obs_no_act, + 'observation_2step_with_action': text_obs, + 'observation_current': desc, + 'step': step, + } + qa_history_stream = copy.copy(qa_history) + qa_history_stream.append(graph.get_streaming_history()) + database['history'] = { + 'qa_history': qa_history, + 'qa_history_stream': qa_history_stream, + 'qa_history_actor_length': min(args.actor_reflection_granularity, max(3, skill_length)), + 'qa_history_planner_length': args.planner_reflection_granularity, + 'qa_history_planner_reflection_length': 3, + } + + # Printing + qprint("\n" + Fore.BLACK + Back.GREEN + "--"*10 + " Actor Plans " + "--"*10 + Style.RESET_ALL) + if 'action_summary' in database.keys(): + qprint(Style.DIM+json.dumps(database['action_summary'], indent=2) + Style.RESET_ALL) + qprint("Skill: {} -> {}".format(database['skills']['skill_old'], database['skills']['skill'])) + qprint("Past actions:", " -> ".join(past_actions)) + qprint("\n" + Fore.BLACK + Back.GREEN + "--"*10 + " Subgoal " + "--"*10 + Style.RESET_ALL) + if len(qa_history)>0: + strategy_desc = "\n\n".join(["## {}\n{}".format(d, qa_history[-1][q]) for q,d in database['prompts']['strategy_questions_desc'].items()]) + qprint(Style.DIM+ strategy_desc + Style.RESET_ALL) + qprint("\n" + Fore.BLACK + Back.GREEN + "--"*10 + " Knowledge Base " + "--"*10 + Style.RESET_ALL) + qprint(Style.DIM+json.dumps(database['kb']['knowledge_base'], indent=2) + Style.RESET_ALL) + if 'unknowns_json' in database['kb'].keys(): + qprint("\n" + Fore.BLACK + Back.GREEN + "--"*10 + " Unknowns " + "--"*10 + Style.RESET_ALL) + qprint(Style.DIM+json.dumps(list(database['kb']['unknowns_json'].values())[0], indent=2) + Style.RESET_ALL) + qprint("\n" + Fore.BLACK + Back.GREEN + "--"*10 + " Skills " + "--"*10 + Style.RESET_ALL) + qprint(Style.DIM+json.dumps({k: v['skill_desc'] for k,v in database['skills']['skill_library'].items()}, indent=2) + Style.RESET_ALL) + qprint("\n" + Fore.BLACK + Back.GREEN + "--"*10 + " Skill Feedback " + "--"*10 + Style.RESET_ALL) + qprint(Style.DIM + json.dumps(database['feedback']['skill_feedback'], indent=2) + Style.RESET_ALL) + # qprint("\n" + Fore.BLACK + Back.GREEN + "--"*10 + " Achievements " + "--"*10 + Style.RESET_ALL) + # qprint(Style.DIM + describe_achievements(info, MANUAL) + Style.RESET_ALL) + + # Reasoning + qprint("\n" + Fore.BLACK + Back.GREEN + "--"*10 + " Reasoning " + "--"*10 + Style.RESET_ALL) + database['skills']['skill_old'] = database['skills']['skill'] + qa_results = graph.evaluate() + qa_history.append(qa_results) + + skill = database['skills']['skill'] + skill_old = database['skills']['skill_old'] + attention_rounds = database['reflection'] + + if skill is not None: + if skill not in skill_history.keys(): + skill_history[skill] = [] + skill_history[skill].append(step) + if (len(set(skill_history[skill]) & set(attention_rounds['mistake']).union(set(attention_rounds["confusion"])))+1) % args.feedback_granularity == 0: + CTXT_dict = { + "CTXT": describe_achievements(info, MANUAL), + "attention_rounds": list(set(skill_history[skill]) & set(attention_rounds["mistake"]).union(set(attention_rounds["confusion"]))), + 'step_offset': step, + 'qa_history': qa_history, + 'db': database, + } + messages, shrink_idx = compose_feedback_prompt(CTXT_dict, qa_history, "{}:{}".format(skill, database['skills']['skill_library'][skill]['skill_guide']), database['prompts']['feedback_questions']) + database['feedback']['skill_feedback'][skill] = "" + for shorthand, msg in messages.items(): + answer, _ = llm_functions['query_reason']['query_model'](msg, shrink_idx) + # database['feedback']['skill_feedback'][skill] += "{}:\n{}\n\n".format(shorthand, answer) + database['feedback']['skill_feedback'][skill] = "{}".format(answer) + database['feedback']['skill_feedback'][skill] = database['feedback']['skill_feedback'][skill].strip() + qprint(Fore.MAGENTA + "Feedback for {}:".format(skill) + Style.RESET_ALL) + qprint(Style.DIM+database['feedback']['skill_feedback'][skill] + Style.RESET_ALL) + qprint() + + reward = 0 + if info['sleeping']: + qprint(Fore.RED + "Player is sleeping. We manually take noop until the player's awake to save LLM calls:" + Style.RESET_ALL) + a = action_list.index("noop") + database['action'] = a + rep = 0 + while info['sleeping']: + obs, rr, done, info = env.step(a) + qprint(Style.DIM + "====Sleeping: {} Reward: {}====".format(rep+1, rr) + Style.RESET_ALL) + qprint(Style.DIM + describe_frame(info, 1)[1]) + reward += rr + env_step += 1 + rep += 1 + database['action_repeats'] = rep + else: + a = database['action'] + for _ in range(database['action_repeats']): + obs, rr, done, info = env.step(a) + reward += rr + env_step += 1 + + # if the player is blocked, we stop repeating the action + if 'move' in action_list[a] and not describe_frame(info, 1)[-1]: + break + + new_row.append(action_list[database['action']]) + new_row.append(database['action_repeats']) + new_row.append(json.dumps(database['skills']['skill_library'], indent=2)) + new_row.append(json.dumps(database['kb']['knowledge_base'], indent=2)) + new_row.append(env_step) + + for q in table_questions: + new_row.append(qa_results[q]) + + R += reward + OBS.append(obs.copy()) + + step += 1 + if skill_old != skill: + past_actions = [] + skill_length = 1 + else: + past_actions.append(action_list[a]) + skill_length += 1 + achievement_table.add_data(*[info['achievements'][k] for k in achievements]) + + rollout_history.append(new_row) + + + # Knowledge Base Refinement + if step % args.kb_refine_granularity == 0 and len(database['kb']['knowledge_base']) > 0: + messages = [ + {"role": "system", "content" : "Improve the knowledge base. Note that items in the knowledge base should augment the instruction manual, not duplicate it or contradict it. In addition, the knowledge base should not contain duplicate items."} + ] + messages.append({"role": "system", "content": "Instruction manual:\n\n{}".format(MANUAL)}) + messages.append({"role": "system", "content": "Knowledge base:\n\n{}".format(json.dumps(database['kb']['knowledge_base'], indent=0))}) + + messages.append({"role": "user", "content": """ +For each item in the knowledge base, provide a 1-sentence summary of the related manual information if applicable, and determine whether the item should be included or removed from the knowledge base. +Format the output as a JSON dictionary in the following format: +``` +{ +"item_key": { + "item_value": $ANSWER, + "duplicate": $ANSWER, # Is this item a duplicate? [yes/no] + "manual_summary": $ANSWER, # 1-sentence summary of related manual information. Write "NA" if there's no related manual information. + "addition": $ANSWER, # Does this item offer additional information to the manual? [yes/no] + "contradiction": $ANSWER, # Does this item directly contradict the manual_summary? [yes/no] + } +} +``` +""".strip()}) + for _ in range(10): + result, _ = llm_functions['query_reason']['query_model'](messages, 1) + parsed_answer, error_msg = utils.extract_json_objects(result) + if parsed_answer is None or type(parsed_answer[-1]) != dict: + messages.append({"role": "assistant", "content": result}) + messages.append({"role": "user", "content": "Invalid Type: Expecting the last Json object to be dictionary"}) + continue + problem = False + for k, v in parsed_answer[-1].items(): + if len(v) != 5 or type(v) != dict: + messages.append({"role": "assistant", "content": result}) + messages.append({"role": "user", "content": "Invalid Type: Expecting each value to be a dictionary with 5 keys"}) + problem = True + break + if problem: + continue + qprint(Fore.MAGENTA + "Refining Knowledge Base:" + Style.RESET_ALL) + qprint(Style.DIM+json.dumps(parsed_answer[-1], indent=2) + Style.RESET_ALL) + for k, v in parsed_answer[-1].items(): + if "no" in [v['addition'].strip(), ] or 'yes' in [v['duplicate'].strip(), v['contradiction'].strip()]: + del database['kb']['knowledge_base'][k] + break + + + + qprint() + if step % args.wandb_log_interval == 0 or done: + if root_span is not None: + root_span._span.end_time_ms = round(datetime.datetime.now().timestamp() * 1000) + root_span.log(name="eps-{}-trace".format(eps)) + root_span = None + graph.set_wandb_root_span(root_span) + for skill in skill_history.keys(): + feedback_table.add_data(*([skill, "NA", len(set(skill_history[skill]) & set(attention_rounds["mistake"]).union(set(attention_rounds["confusion"])))] if skill not in database['feedback']['skill_feedback'].keys() else [skill, database['feedback']['skill_feedback'][skill], len(set(skill_history[skill]) & set(attention_rounds["mistake"]))])) + rollouts = wandb.Table(columns=columns, data=copy.deepcopy(rollout_history)) + wandb.log({"eps-{}-rollout/rollout {}~{}".format(eps, last_log, step-1): rollouts, + "eps-{}-achievements/achievements {}~{}".format(eps, last_log, step-1): achievement_table, + "eps-{}-feedback/feedback {}~{}".format(eps, last_log, step-1): feedback_table, + "eps-{}-feedback/feedback-current".format(eps): feedback_table, + "eps-{}-current/rollout-current".format(eps): rollouts, + "eps-{}-current/achievements-current".format(eps): achievement_table, + }) + achievement_table = wandb.Table(columns=achievements) + feedback_table = wandb.Table(columns=["Skill", "feedback", "attention_rounds"]) + last_log = step + + with open("saves/{}.pkl".format(wandb.run.id), 'wb') as f: + pickle.dump({ + 'eps': eps, + 'done': done, + 'step': step, + 'env_step': env_step, + 'env': env, + 'trajectories': trajectories, + 'qa_history': qa_history, + 'gameplay_history': gameplay_history, + 'R': R, + 'OBS': OBS, + 'a': a, + 'obs': obs, + 'reward': reward, + 'info': info, + 'last_log': last_log, + 'rollout_history': rollout_history, + 'skill_length': skill_length, + 'skill_history': skill_history, + 'achievement_table': achievement_table, + 'feedback_table': feedback_table, + 'adaptive_answers': adaptive_answers, + 'database': database, + 'past_actions': past_actions, + }, f) + + if done: + with open("saves/{}_eps{}.pkl".format(wandb.run.id, eps), 'wb') as f: + pickle.dump({ + 'eps': eps, + 'done': done, + 'step': step, + 'env_step': env_step, + 'env': env, + 'trajectories': trajectories, + 'qa_history': qa_history, + 'gameplay_history': gameplay_history, + 'R': R, + 'OBS': OBS, + 'a': a, + 'obs': obs, + 'reward': reward, + 'info': info, + 'last_log': last_log, + 'rollout_history': rollout_history, + 'skill_length': skill_length, + 'skill_history': skill_history, + 'achievement_table': achievement_table, + 'feedback_table': feedback_table, + 'adaptive_answers': adaptive_answers, + 'database': database, + 'past_actions': past_actions, + }, f) + break + + wandb.log({"eps-{}-achievements/achievements {}~{}".format(eps, last_log, step-1): achievement_table, + "eps-{}-feedback/feedback {}~{}".format(eps, last_log, step-1): feedback_table, + "eps-{}-ALL/rollout-ALL".format(eps): wandb.Table(columns=columns, data=rollout_history), + "eps-{}-current/achievements-current".format(eps): achievement_table, + "eps-{}-feedback/feedback-current".format(eps): feedback_table, + }) + achievement_table = wandb.Table(columns=achievements) + last_log = step + + # Developer: This part does not seem to help the agent learn better. It's commented out for now. + # I wrote this part to collect feedback at the end of each round of game, but it seems to be unnecessary. + # + # + # CTXT_dict = { + # "CTXT": MANUAL, + # "db": database, + # "attention_rounds": database['reflection'], + # } + # qa_results = topological_traverse(CTXT_dict, qa_history, database['prompts']['gameplay_questions'], compose_gameplay_prompt, max_gen=1024) + # end_of_round_table = wandb.Table(columns=[s for s in database['prompts']['gameplay_shorthands'].values()]) + # row = [] + # database['feedback']['feedback'] = "" + # for q,s in database['prompts']['gameplay_shorthands'].items(): + # database['feedback']['feedback'] += "{}:\n{}\n\n".format(s, qa_results[q].strip()) + # row.append(qa_results[q].strip()) + # end_of_round_table.add_data(*row) + # qprint(Fore.YELLOW + "End of Round FEEDBACK:" + Style.RESET_ALL) + # qprint(Fore.YELLOW + Style.DIM + database['feedback']['feedback'] + Style.RESET_ALL) + # wandb.log({"eps-{}-end-feedback/feedback-end-of-round".format(eps): end_of_round_table,}) + + + eps+=1 + + + +wandb.finish() \ No newline at end of file diff --git a/examples/crafter/post_processing.py b/examples/crafter/post_processing.py new file mode 100644 index 0000000..4803c2e --- /dev/null +++ b/examples/crafter/post_processing.py @@ -0,0 +1,257 @@ +import json +from utils import parse_tuple +from agentkit import exceptions as ex +from agentkit import SimpleDBNode +from compose_prompt import ComposePlannerPrompt +from colorama import Fore +import traceback +from crafter_description import match_act +from agentkit import after_query as aq +import agentkit.utils as utils + +class SubgoalAfterQuery(aq.JsonAfterQuery): + + def __init__(self): + super().__init__() + self.type = dict + self.required_keys = ['subgoal', 'completion_criteria', 'guide'] + self.length = 3 + + def post_process(self): + parsed_answer = self.parse_json() + self.node.db['subgoals']['subgoal'] = parsed_answer[-1]['subgoal'] + self.node.db['subgoals']['completion_criteria'] = parsed_answer[-1]['completion_criteria'] + self.node.db['subgoals']['guide'] = parsed_answer[-1]['guide'] + +class SkillAfterQuery(aq.BaseAfterQuery): + + def post_process(self): + parsed_answer, error_msg = utils.extract_json_objects(self.node.result) + + error = None + if parsed_answer is None: + error = ex.AfterQueryError("Failed to parse answer", error_msg) + elif parsed_answer[-1] is None or len(parsed_answer[-1])==0: + error = ex.AfterQueryError("No answer", "Invalid Json: It seems that the last Json object in the output above is either invalid or empty.") + elif type(parsed_answer[-1]) != dict: + error = ex.AfterQueryError("Invalid answer", "Invalid Type: Expecting the last Json object to be dictionary, got length {} instead.".format(type(parsed_answer[-1]))) + elif len(parsed_answer[-1]) != 1: + error = ex.AfterQueryError("Invalid answer", "Invalid Length: Expecting only one identified skill in the dictionary, got {} instead.".format(len(parsed_answer[-1]))) + elif list(parsed_answer[-1].values())[0] is None or len(list(parsed_answer[-1].values())[0])!=3: + error = ex.AfterQueryError("Invalid answer", "Invalid Value: Expecting the value in the last Json dictionary to be `[description, supported parameters tuple, usage_guide]`, got length {} instead.".format(list(parsed_answer[-1].values())[0])) + + if error is not None: + raise error + + skill_type = list(parsed_answer[-1].keys())[0] + skill_desc, skill_param, skill_guide = parsed_answer[-1][skill_type] + self.node.result_raw = self.node.result + self.node.result = "[{},{},{}]".format(skill_type, skill_desc, skill_param, skill_guide) + self.node.db['skills']['skill_library'][skill_type] = { + 'skill_desc': skill_desc, + 'skill_param': skill_param, + 'skill_guide': skill_guide, + } + self.node.db['skills']['skill'] = skill_type + +class AdaptiveAfterQuery(aq.JsonAfterQuery): + + def post_process(self): + + if self.node.result.strip() == "N/A": + self.node.result = "N/A" + self.node.db['adaptive_questions'] = None + return + + questions = """Answer the current questions based on the observation, gameplay history, knowledge base, and instruction manual. +In your answer, explicitly state 'missing' if something is missing from the instruction manual and the knowledge base. Do not make assumptions. + +Questions: +{}""".format(self.node.result) + + self.node.graph.add_temporary_node(SimpleDBNode(questions, questions, self.node.graph, self.node.query_llm, ComposePlannerPrompt(), self.node.db)) + for node in self.node.db['prompts']['adaptive_dependencies']: + self.node.graph.add_edge_temporary(node, questions) + + for node in self.node.db['prompts']['adaptive_actor_questions']: + self.node.graph.add_edge_temporary(questions, node, prepend=True) + + self.node.db['adaptive_questions'] = questions + +class KBAddAfterQuery(aq.JsonAfterQuery): + + def __init__(self): + super().__init__() + self.type = dict + self.required_keys = [] + self.length = None + + def post_process(self): + parsed_answer = self.parse_json() + json_dict = parsed_answer[-1] + new_knowledge = {} + try: + features = ['discovered', 'general', 'unknown', 'concrete_and_precise', 'solid'] + for k, v in json_dict.items(): + if False not in ['yes' in v[f].lower() for f in features]: + new_knowledge[k] = v['discovery_short'] + except Exception as e: + raise ex.AfterQueryError("Invalid answer", "{}: {}".format(e, traceback.format_exc())) + self.node.result_raw = self.node.result + self.node.result = json.dumps(json_dict, sort_keys=True, indent=0) + self.node.db['kb']['knowledge_base'].update(new_knowledge) + + +class KBReasonAfterQuery(aq.JsonAfterQuery): + + def __init__(self): + super().__init__() + self.type = dict + self.required_keys = [] + self.length = None + + def post_process(self): + parsed_answer = self.parse_json() + json_dict = parsed_answer[-1] + unknowns = {} + try: + features = ['unknown', 'novel', 'general', 'relevant', 'correct'] + for k, v in json_dict.items(): + if False not in ['yes' in v[f].lower() for f in features]: + unknowns[k] = v['info'] + except Exception as e: + raise ex.AfterQueryError("Invalid answer", "{}: {}".format(e, traceback.format_exc())) + self.node.db['kb']['unknowns']={self.node.prompt: json.dumps(unknowns, sort_keys=True, indent=0)} + self.node.db['kb']['unknowns_json']={self.node.prompt: unknowns} + +class ReflectionAfterQuery(aq.JsonAfterQuery): + + def __init__(self): + super().__init__() + self.type = dict + self.required_keys = [] + self.length = 7 + + def post_process(self): + parsed_answer = self.parse_json() + json_dict = parsed_answer[-1] + if 'yes' in json_dict['unexpected_encounters'].lower(): + self.node.db["reflection"]["unexpected"].append(self.node.db["environment"]["step"]) + if 'yes' in json_dict['mistake'].lower(): + self.node.db["reflection"]["mistake"].append(self.node.db["environment"]["step"]) + if 'yes' in json_dict['correction_planned'].lower(): + self.node.db["reflection"]["correction"].append(self.node.db["environment"]["step"]) + if 'yes' in json_dict['confused'].lower(): + self.node.db["reflection"]["confusion"].append(self.node.db["environment"]["step"]) + if True in ['yes' in json_dict[k].lower() for k in ['unexpected_encounters', 'mistake', 'correction_planned', 'confused']]: + self.node.db["reflection"]["all"].append(self.node.db["environment"]["step"]) + if True not in ['yes' in v.lower() for v in json_dict.values()] and len(self.node.db["history"]["qa_history"]) > 0: + print(Fore.BLUE + "Skipping a bunch of reflection questions..." + Fore.RESET) + self.node.graph.skip_nodes_temporary(self.node.db["prompts"]["reflection_skip_questions"]) + +class ListActionAfterQuery(aq.JsonAfterQuery): + + def __init__(self): + super().__init__() + self.type = dict + self.required_keys = [] + self.length = None + + def post_process(self): + parsed_answer = self.parse_json() + filtered_result = {} + try: + self.node.db['allowed_actions'] = [] + keys_to_keep_yes = ['target', 'allowed', 'unlock new achievement'] + keys_to_keep_no = ['target', 'allowed', 'reasoning'] + for action,v in parsed_answer[-1].items(): + if action.strip().lower() == "noop": # Skip noop. This doesn't change the behavior of the LLM experimentally but saves quite a bit of tokens. + continue + if "yes" in v['allowed'].lower(): + self.node.db['allowed_actions'].append(action) + filtered_result[action] = {k:v[k] for k in keys_to_keep_yes} + else: + filtered_result[action] = {k:v[k] for k in keys_to_keep_no} + except Exception as e: + raise ex.AfterQueryError("Invalid answer", "{}: {}".format(e, traceback.format_exc())) + self.node.result = json.dumps({k:str(i) for i,k in enumerate(filtered_result.keys())}, indent=0).strip() + for i, v in enumerate(filtered_result.values()): + self.node.result = self.node.result.replace('"{}"'.format(i), json.dumps(v)) + + # Adaptive Questions + if 'adaptive_questions' not in self.node.db or self.node.db['adaptive_questions'] is None: + return + + questions = self.node.db['adaptive_questions'] + + self.node.graph.add_temporary_node(SimpleDBNode(questions, questions, self.node.graph, self.node.query_llm, ComposePlannerPrompt(), self.node.db, verbose=self.node.verbose)) + for node in self.node.db['prompts']['adaptive_dependencies']: + self.node.graph.add_edge_temporary(node, questions) + + for node in self.node.db['prompts']['adaptive_strategy_questions']: + self.node.graph.add_edge_temporary(questions, node, prepend=True) + +class ActionSummaryAfterQuery(aq.JsonAfterQuery): + + def __init__(self): + super().__init__() + self.type = dict + self.required_keys = ['plan-sketch', 'details', 'target', 'relevance-criteria', 'expiration-condition', 'notes'] + # self.length = 6 + + def post_process(self): + parsed_answer = self.parse_json() + self.node.db['action_summary'] = parsed_answer[-1] + self.node.db['action_notes'] = parsed_answer[-1]['notes'] + del self.node.db['action_summary']['notes'] + +class ActionAfterQuery(aq.JsonAfterQuery): + + def __init__(self): + super().__init__() + self.type = dict + self.required_keys = ['action', 'repeats', 'hazards', 'obstacles'] + # self.length = 3 + + def post_process(self): + parsed_answer = self.parse_json() + act, action_name, error_msg = match_act(parsed_answer[-1]['action'].replace("(","").replace("_"," ")) + if act is None: + raise ex.AfterQueryError("Invalid answer", "Invalid action: {}".format(parsed_answer[-1]['action'], error_msg)) + if type(parsed_answer[-1]['repeats']) == str and not (parsed_answer[-1]['repeats']).isnumeric(): + raise ex.AfterQueryError("Invalid answer", "Invalid repeats: '{}'. Expecting an integer.".format(parsed_answer[-1]['repeats'])) + self.node.db['action'] = act + if "move" in action_name.lower() and "yes" not in parsed_answer[-1]['hazards'].lower()and "yes" not in parsed_answer[-1]['obstacles'].lower(): + self.node.db['action_repeats'] = min(4,int(parsed_answer[-1]['repeats'])) + elif "do" in action_name.lower(): + self.node.db['action_repeats'] = min(3,int(parsed_answer[-1]['repeats'])) + else: + self.node.db['action_repeats'] = 1 + + self.node.result = json.dumps({ + 'action': action_name, + 'repeats': self.node.db['action_repeats'] + }, indent=2) + +class SummaryAfterQuery(aq.JsonAfterQuery): + + def __init__(self): + super().__init__() + self.type = dict + self.required_keys = ['action', 'repeats', 'target', 'success', 'causes_of_failure'] + # self.length = 5 + + def post_process(self): + parsed_answer = self.parse_json() + + action_desc = "" + if 'move' in parsed_answer[-1]['action'].lower(): + action_desc += "{}, {} steps".format(parsed_answer[-1]['action'], parsed_answer[-1]['repeats']) + else: + action_desc += "{}, {} steps, target: {}".format(parsed_answer[-1]['action'], parsed_answer[-1]['repeats'], parsed_answer[-1]['target']) + if 'no' in parsed_answer[-1]['success'].lower(): + action_desc += " (failed, causes of failure: {})".format(parsed_answer[-1]['causes_of_failure']) + else: + action_desc += " (succeeded)" + + self.node.result = action_desc \ No newline at end of file diff --git a/examples/crafter/prompts.py b/examples/crafter/prompts.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/crafter/tests.py b/examples/crafter/tests.py new file mode 100644 index 0000000..784ba1d --- /dev/null +++ b/examples/crafter/tests.py @@ -0,0 +1,86 @@ +import json + +prompts = [ + str(i) for i in range(11) +] + +edges = { + 2: [1], + 3: [1, 2], + 4: [1, 3], + 5: [2, 4], + 6: [1, 4], + 7: [6], + 8: [6], + 9: [2, 6], + 10: [3, 4, 5, 7, 8, 9], +} + +database = { + "A": "a", + "B": "b", + "C": "c", +} + +# level 1 feature: +# allow user to specify a query_model function for each prompt. +# Run query_model on all '''prompts''' according to topological order defined by '''edges''' +# expected return value: a dictionary mapping prompt to result. + +# level 2 feature: +# allow user to specify a compose_prompt function for each prompt. +# Run compose_prompt before query_model on each prompt. +# +# compose_prompt takes the following arguments: +# dependencies: list of (prompt, result) pairs. We want to query LLM for all the dependencies before querying LLM for the current prompt. +# prompt: the current prompt we want to query LLM for. +# database: a database that supports the following operations (can be accessed by the user as well): +# database.get(key) -> value +# database.put(key, value) +# database.delete(key) +# database.clear() +# database.keys() -> list of keys +# database.values() -> list of values +# database.items() -> list of (key, value) pairs +# +# return value: a tuple (prompt, idx) directly passed to query_model. + +# level 3 feature: +# add a history argument to compose_prompt. +# +# history stores a list of dictionary (see expected return value of level 1). +# history.clear() clears the history. +# adding to history should be automatic. +# +# This allows compose_prompt to access the results of previous queries. + +# level 4 feature: +# allow user to specify a after_query function for each prompt. +# Run after_query after query_model on each prompt. +# +# after_query takes the following arguments: +# prompt: the current prompt we queried LLM for. +# result: the result returned by LLM. +# database: see level 2 +# history: see level 3 +# +# after_query can modify the database, and change how the graph is traversed. +# For example, one can add a new node to the graph, or remove nodes from the graph. (This should be made as easy as possible) +# +# return value: processed_result (can be None) +# +# Catch exceptions in after_query and re-attempt query_model with the exception message added to the prompt. +# If the re-attempt fails k times, return the exception message as the processed_result. + + +def compose_test_prompt(node): + dependencies = node.get_dependencies() # list of nodes + prompt = "" + for dep in dependencies: + prompt += json.dumps(dep.represent()) + "\n" + prompt += "Question: {}".format(node.get_prompt()) + return prompt + + +def query_model_test(msg, shrink_idx, max_gen=512, temp=0.): + return "Answer to:\n\n{}".format(msg) \ No newline at end of file diff --git a/examples/crafter/utils.py b/examples/crafter/utils.py new file mode 100644 index 0000000..a987522 --- /dev/null +++ b/examples/crafter/utils.py @@ -0,0 +1,137 @@ +import os,pickle +import json +import traceback + +def parse_tuple(answer): + try: + start = answer.index("(") + end = answer.rindex(")") + result = eval(answer[start:end+1]) + return result, None + except: + pass + try: + # extract the tuple from the string + start_index = answer.index("(") + # print(start_index, answer[start_index:]) + tup = [] + c = 1 + tracking = None + start = start_index+1 + for i in range(start_index+1, len(answer)): + if answer[i] in {'(','{'}: + c+=1 + elif answer[i] in {')','}'}: + c-=1 + elif answer[i] in {"'",'"'} and tracking == None: + c+=1 + tracking = answer[i] + elif answer[i] == tracking: + c-=1 + tracking = None + elif c==1 and answer[i] == ',': + item = answer[start:i].replace("\"", "").replace("'","").strip() + if item == "False": + item = False + elif item == "True": + item = True + tup.append(item) + start = i+1 + if c==0: + item = answer[start:i].replace("\"", "").replace("'","").strip() + if item == "False": + item = False + elif item == "True": + item = True + tup.append(item) + end_index = i + break + if len(tup)==0: + return None, "Error: could not evaluate answer as a tuple" + return tuple(tup), None + except Exception: + return None, "Error: {}".format(traceback.format_exc()) + +def get_ctxt(): + import pickle + with open("./crafter_initial_QA.pkl", "rb") as f: + QA_data = pickle.load(f) + + QA_data.keys() + + choosen_idx = { + "gameplay":[1, 3,], + "objective":[1,], + "actions":[1,], + } + + + for k, v in QA_data.items(): + print("=="*10) + print(k) + print() + print("\n".join(["{}{}. {}".format("-> " if i in choosen_idx[k] else " ", i, x) for i,x in enumerate(v['questions'])])) + + if os.path.exists("cache/ctxt.pkl"): + with open("cache/ctxt.pkl", 'rb') as f: + CTXT = pickle.load(f) + else: + import itertools + from llm_api import get_query + query_model = get_query("gpt-3.5-turbo-1106") + def get_list(L, idx): + if L==[]: + return [] + if type(L[0]) == str: + return [L[idx]] + else: + return list(itertools.chain.from_iterable([get_list(ll, idx) for ll in L])) + + CTXT = "" + for k, ll in choosen_idx.items(): + for idx in ll: + ans_list = get_list(QA_data[k]['answers'], idx) + CTXT+= QA_data[k]["questions"][idx] + "\n" + prompt = "Question: {}\n".format(QA_data[k]["questions"][idx]) + "\n".join(ans_list) + "\n\nRemove duplicate items. New Answer:\n" + answer = query_model(prompt, 0) + CTXT+= answer + CTXT+= "\n\n" + CTXT = CTXT.strip() + with open("cache/ctxt.pkl", 'wb') as f: + pickle.dump(CTXT, f) + CTXT = CTXT.replace("DO NOT answer in LaTeX.", "") + + CTXT = CTXT.replace("Move Up: Flat ground above the agent.", "Move North: Flat ground to the north of the agent.") + CTXT = CTXT.replace("Move Down: Flat ground below the agent.", "Move South: Flat ground to the south of the agent.") + CTXT = CTXT.replace("Move Left: Flat ground left to the agent.", "Move West: Flat ground to the west of the agent.") + CTXT = CTXT.replace("Move Right: Flat ground right to the agent.", "Move East: Flat ground to the east of the agent.") + # CTXT = CTXT.replace("8. Place Table: Wood in inventory.", "8. Place Table: 2 Wood in inventory.") + # CTXT = CTXT.replace("9. Place Furnace: Stone in inventory.", "9. Place Furnace: 4 Stone in inventory.") + CTXT += "\n\nHealth restores automatically over time, independent from food and hydration." + notes = [ + "Diagonal actions are not supported, only use the four cardinal directions.", + "The game world is infinitely large and procedurally generated from a fixed random seed.", + "If you are within close proximity to a zombie, it will chase you. You must kill the zombie to survive.", + "When sleeping, the player will not be able to take any actions until energy is full and will take triple damage from zombies. Therefore, do not sleep when threats are nearby.", + ] + CTXT += "\n\nNotes:\n" + '\n'.join([" - " + x for x in notes]) + + CTXT = CTXT.replace("In plain text. List all objects I need to interact/avoid to survive in the game. Use \"I would like to X object Y\" in each step. Replace Y by the actual object, X by the actual interaction.", "List of desired interactions:") + CTXT = CTXT.replace("I would like to ", " - ") + + CTXT = CTXT.replace("Write all information helpful for the game in a numbered list.", "List of helpful information:") + CTXT = CTXT.replace("Write all game objectives numbered list. For each objective, list its requirements.", "List of game achievements and their requirements:") + CTXT = CTXT.replace("Write all actions as a numbered list. For each action, list its requirements.", "List of all actions and their requirements:") + + print(CTXT) + return CTXT + +def describe_achievements(info, CTXT): + new_CTXT = CTXT + new_CTXT += "\n\nGame Objective: Survive and accomplish as many of the achievements as possible, and always be prepared for threats in the game." + unaccomplished_list = [k.replace("_", " ") for k,v in info['achievements'].items() if v<1] + accomplished_list = [k.replace("_", " ") for k,v in info['achievements'].items() if v>0] + # print(unaccomplished_list) + new_CTXT += "\nCurrent *accomplished* achievements: " + ", ".join(accomplished_list) + new_CTXT += "\nCurrent *unaccomplished* achievements: " + ", ".join(unaccomplished_list) + return new_CTXT \ No newline at end of file