From 530e405adb5bccf3ec0b213c57d0e65fc1ebe103 Mon Sep 17 00:00:00 2001 From: Aymeric Roucher <69208727+aymeric-roucher@users.noreply.github.com> Date: Mon, 17 Feb 2025 17:24:23 +0100 Subject: [PATCH] Documentation: manage agent's memory (#675) Co-authored-by: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> --- docs/source/en/_toctree.yml | 2 + docs/source/en/guided_tour.md | 11 +++ docs/source/en/tutorials/memory.md | 148 +++++++++++++++++++++++++++++ src/smolagents/memory.py | 16 ++-- tests/test_agents.py | 14 +++ 5 files changed, 183 insertions(+), 8 deletions(-) create mode 100644 docs/source/en/tutorials/memory.md diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index 9859ccd5a..c1efd31dc 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -14,6 +14,8 @@ title: 🛠️ Tools - in-depth guide - local: tutorials/secure_code_execution title: 🛡️ Secure your code execution with E2B + - local: tutorials/memory + title: 📚 Manage your agent's memory - title: Conceptual guides sections: - local: conceptual_guides/intro_agents diff --git a/docs/source/en/guided_tour.md b/docs/source/en/guided_tour.md index 371611023..5eca7fc21 100644 --- a/docs/source/en/guided_tour.md +++ b/docs/source/en/guided_tour.md @@ -416,6 +416,17 @@ You can also use this `reset=False` argument to keep the conversation going in a ## Next steps +Finally, when you've configured your agent to your needs, you can share it to the Hub! + +```py +agent.push_to_hub("m-ric/my_agent") +``` + +Similarly, to load an agent that has been pushed to hub, if you trust the code from its tools, use: +```py +agent.from_hub("m-ric/my_agent", trust_remote_code=True) +``` + For more in-depth usage, you will then want to check out our tutorials: - [the explanation of how our code agents work](./tutorials/secure_code_execution) - [this guide on how to build good agents](./tutorials/building_good_agents). diff --git a/docs/source/en/tutorials/memory.md b/docs/source/en/tutorials/memory.md new file mode 100644 index 000000000..0732d9596 --- /dev/null +++ b/docs/source/en/tutorials/memory.md @@ -0,0 +1,148 @@ + +# 📚 Manage your agent's memory + +[[open-in-colab]] + +In the end, an agent can be defined by simple components: it has tools, prompts. +And most importantly, it has a memory of past steps, drawing a history of planning, execution, and errors. + +### Replay your agent's memory + +We propose several features to inspect a past agent run. + +You can instrument the agent's run to display it in a great UI that lets you zoom in/out on specific steps, as highlighted in the [instrumentation guide](./inspect_runs). + +You can also use `agent.replay()`, as follows: + +After the agent has run: +```py +from smolagents import HfApiModel, CodeAgent + +agent = CodeAgent(tools=[], model=HfApiModel(), verbosity_level=0) + +result = agent.run("What's the 20th Fibonacci number?") +``` + +If you want to replay this last run, just use: +```py +agent.replay() +``` + +### Dynamically change the agent's memory + +Many advanced use cases require dynamic modification of the agent's memory. + +You can access the agent's memory using: + +```py +from smolagents import ActionStep + +system_prompt_step = agent.memory.system_prompt +print("The system prompt given to the agent was:") +print(system_prompt_step.system_prompt) + +task_step = agent.memory.steps[0] +print("\n\nThe first task step was:") +print(task_step.task) + +for step in agent.memory.steps: + if isinstance(step, ActionStep): + if step.error is not None: + print(f"\nStep {step.step_number} got this error:\n{step.error}\n") + else: + print(f"\nStep {step.step_number} got these observations:\n{step.observations}\n") +``` + +Use `agent.memory.get_full_steps()` to get full steps as dictionaries. + +You can also use step callbacks to dynamically change the agent's memory. + +Step callbacks can access the `agent` itself in their arguments, so they can access any memory step as highlighted above, and change it if needed. For instance, let's say you are observing screenshots of each step performed by a web browser agent. You want to log the newest screenshot, and remove the images from ancient steps to save on token costs. + +You culd run something like the following. +_Note: this code is incomplete, some imports and object definitions have been removed for the sake of concision, visit [the original script](https://github.com/huggingface/smolagents/blob/main/src/smolagents/vision_web_browser.py) to get the full working code._ + +```py +import helium +from PIL import Image +from io import BytesIO +from time import sleep + +def update_screenshot(memory_step: ActionStep, agent: CodeAgent) -> None: + sleep(1.0) # Let JavaScript animations happen before taking the screenshot + driver = helium.get_driver() + latest_step = memory_step.step_number + for previous_memory_step in agent.memory.steps: # Remove previous screenshots from logs for lean processing + if isinstance(previous_memory_step, ActionStep) and previous_memory_step.step_number <= latest_step - 2: + previous_memory_step.observations_images = None + png_bytes = driver.get_screenshot_as_png() + image = Image.open(BytesIO(png_bytes)) + memory_step.observations_images = [image.copy()] +``` + +Then you should pass this function in the `step_callbacks` argument upon initialization of your agent: + +```py +CodeAgent( + tools=[DuckDuckGoSearchTool(), go_back, close_popups, search_item_ctrl_f], + model=model, + additional_authorized_imports=["helium"], + step_callbacks=[update_screenshot], + max_steps=20, + verbosity_level=2, +) +``` + +Head to our [vision web browser code](https://github.com/huggingface/smolagents/blob/main/src/smolagents/vision_web_browser.py) to see the full working example. + +### Run agents one step at a time + +This can be useful in case you have tool calls that take days: you can just run your agents step by step. +This will also let you update the memory on each step. + +```py +from smolagents import HfApiModel, CodeAgent, ActionStep, TaskStep + +agent = CodeAgent(tools=[], model=HfApiModel(), verbosity_level=1) +print(agent.memory.system_prompt) + +task = "What is the 20th Fibonacci number?" + +# You could modify the memory as needed here by inputting the memory of another agent. +# agent.memory.steps = previous_agent.memory.steps + +# Let's start a new task! +agent.memory.steps.append(TaskStep(task=task, task_images=[])) + +final_answer = None +step_number = 1 +while final_answer is None and step_number <= 10: + memory_step = ActionStep( + step_number=step_number, + observations_images=[], + ) + # Run one step. + final_answer = agent.step(memory_step) + agent.memory.steps.append(memory_step) + step_number += 1 + + # Change the memory as you please! + # For instance to update the latest step: + # agent.memory.steps[-1] = ... + +print("The final answer is:", final_answer) +``` \ No newline at end of file diff --git a/src/smolagents/memory.py b/src/smolagents/memory.py index 5bd1b0b69..5875db596 100644 --- a/src/smolagents/memory.py +++ b/src/smolagents/memory.py @@ -3,7 +3,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, TypedDict, Union from smolagents.models import ChatMessage, MessageRole -from smolagents.monitoring import AgentLogger +from smolagents.monitoring import AgentLogger, LogLevel from smolagents.utils import AgentError, make_json_serializable @@ -216,19 +216,19 @@ def replay(self, logger: AgentLogger, detailed: bool = False): logger.console.log("Replaying the agent's steps:") for step in self.steps: if isinstance(step, SystemPromptStep) and detailed: - logger.log_markdown(title="System prompt", content=step.system_prompt) + logger.log_markdown(title="System prompt", content=step.system_prompt, level=LogLevel.ERROR) elif isinstance(step, TaskStep): - logger.log_task(step.task, "", 2) + logger.log_task(step.task, "", level=LogLevel.ERROR) elif isinstance(step, ActionStep): - logger.log_rule(f"Step {step.step_number}") + logger.log_rule(f"Step {step.step_number}", level=LogLevel.ERROR) if detailed: logger.log_messages(step.model_input_messages) - logger.log_markdown(title="Agent output:", content=step.model_output) + logger.log_markdown(title="Agent output:", content=step.model_output, level=LogLevel.ERROR) elif isinstance(step, PlanningStep): - logger.log_rule("Planning step") + logger.log_rule("Planning step", level=LogLevel.ERROR) if detailed: - logger.log_messages(step.model_input_messages) - logger.log_markdown(title="Agent output:", content=step.facts + "\n" + step.plan) + logger.log_messages(step.model_input_messages, level=LogLevel.ERROR) + logger.log_markdown(title="Agent output:", content=step.facts + "\n" + step.plan, level=LogLevel.ERROR) __all__ = ["AgentMemory"] diff --git a/tests/test_agents.py b/tests/test_agents.py index e5f490658..376cc0869 100644 --- a/tests/test_agents.py +++ b/tests/test_agents.py @@ -490,6 +490,20 @@ def test_code_agent_missing_import_triggers_advice_in_error_log(self): str_output = capture.get() assert "`additional_authorized_imports`" in str_output.replace("\n", "") + def test_replay_shows_logs(self): + agent = CodeAgent( + tools=[], model=fake_code_model_import, verbosity_level=0, additional_authorized_imports=["numpy"] + ) + agent.run("Count to 3") + + with agent.logger.console.capture() as capture: + agent.replay() + str_output = capture.get().replace("\n", "") + assert "New run" in str_output + assert "Agent output:" in str_output + assert 'final_answer("got' in str_output + assert "```" in str_output + def test_code_nontrivial_final_answer_works(self): def fake_code_model_final_answer(messages, stop_sequences=None, grammar=None): return ChatMessage(