diff --git a/.gitignore b/.gitignore index 4db649aab..d6ab56734 100644 --- a/.gitignore +++ b/.gitignore @@ -164,4 +164,6 @@ cython_debug/ .DS_Store agentops_time_travel.json -.agentops_time_travel.yaml \ No newline at end of file +.agentops_time_travel.yaml + +node_modules \ No newline at end of file diff --git a/README.md b/README.md index 264c5bc4a..e87981dfa 100644 --- a/README.md +++ b/README.md @@ -574,6 +574,14 @@ Check out the [LlamaIndex docs](https://docs.llamaindex.ai/en/stable/module_guid +### Llama Stack 🦙🥞 + +AgentOps provides support for Llama Stack Python Client(>=0.0.53), allowing you to monitor your Agentic applications. + +- [AgentOps integration example 1](https://github.com/AgentOps-AI/agentops/pull/530/files/65a5ab4fdcf310326f191d4b870d4f553591e3ea#diff-fdddf65549f3714f8f007ce7dfd1cde720329fe54155d54389dd50fbd81813cb) +- [AgentOps integration example 2](https://github.com/AgentOps-AI/agentops/pull/530/files/65a5ab4fdcf310326f191d4b870d4f553591e3ea#diff-6688ff4fb7ab1ce7b1cc9b8362ca27264a3060c16737fb1d850305787a6e3699) +- [Official Llama Stack Python Client](https://github.com/meta-llama/llama-stack-client-python) + ## Time travel debugging 🔮
diff --git a/agentops/llms/__init__.py b/agentops/llms/__init__.py index a5852d8cd..b26cd1233 100644 --- a/agentops/llms/__init__.py +++ b/agentops/llms/__init__.py @@ -5,6 +5,8 @@ from packaging.version import Version, parse +from agentops.llms.llama_stack_client import LlamaStackClientProvider + from ..log_config import logger from .cohere import CohereProvider @@ -35,6 +37,9 @@ class LlmTracker: "5.4.0": ("chat", "chat_stream"), }, "ollama": {"0.0.1": ("chat", "Client.chat", "AsyncClient.chat")}, + "llama_stack_client": { + "0.0.53": ("resources.InferenceResource.chat_completion", "lib.agents.agent.Agent.create_turn"), + }, "groq": { "0.9.0": ("Client.chat", "AsyncClient.chat"), }, @@ -151,6 +156,15 @@ def override_api(self): else: logger.warning(f"Only AI21>=2.0.0 supported. v{module_version} found.") + if api == "llama_stack_client": + module_version = version(api) + + if Version(module_version) >= parse("0.0.53"): + provider = LlamaStackClientProvider(self.client) + provider.override() + else: + logger.warning(f"Only LlamaStackClient>=0.0.53 supported. v{module_version} found.") + def stop_instrumenting(self): OpenAiProvider(self.client).undo_override() GroqProvider(self.client).undo_override() @@ -160,3 +174,4 @@ def stop_instrumenting(self): AnthropicProvider(self.client).undo_override() MistralProvider(self.client).undo_override() AI21Provider(self.client).undo_override() + LlamaStackClientProvider(self.client).undo_override() diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py new file mode 100644 index 000000000..8379a6fef --- /dev/null +++ b/agentops/llms/llama_stack_client.py @@ -0,0 +1,297 @@ +import inspect +import pprint +import sys +from typing import Any, AsyncGenerator, Dict, Optional, List +import logging +from typing import Union + +from agentops.event import LLMEvent, ErrorEvent, ToolEvent +from agentops.session import Session +from agentops.log_config import logger +from agentops.helpers import get_ISO_time, check_call_stack_for_agent_id +from agentops.llms.instrumented_provider import InstrumentedProvider + + +class LlamaStackClientProvider(InstrumentedProvider): + original_complete = None + original_create_turn = None + + def __init__(self, client): + super().__init__(client) + self._provider_name = "LlamaStack" + + def handle_response( + self, response, kwargs, init_timestamp, session: Optional[Session] = None, metadata: Optional[Dict] = {} + ) -> dict: + """Handle responses for LlamaStack""" + + try: + stack = [] + accum_delta = None + accum_tool_delta = None + # tool_event = None + # llm_event = None + + def handle_stream_chunk(chunk: dict): + nonlocal stack + + # NOTE: prompt/completion usage not returned in response when streaming + + try: + nonlocal accum_delta + + if chunk.event.event_type == "start": + llm_event = LLMEvent(init_timestamp=get_ISO_time(), params=kwargs) + stack.append({"event_type": "start", "event": llm_event}) + accum_delta = chunk.event.delta + elif chunk.event.event_type == "progress": + accum_delta += chunk.event.delta + elif chunk.event.event_type == "complete": + if ( + stack[-1]["event_type"] == "start" + ): # check if the last event in the stack is a step start event + llm_event = stack.pop().get("event") + llm_event.prompt = [ + {"content": message.content, "role": message.role} for message in kwargs["messages"] + ] + llm_event.agent_id = check_call_stack_for_agent_id() + llm_event.model = kwargs["model_id"] + llm_event.prompt_tokens = None + llm_event.completion = accum_delta or kwargs["completion"] + llm_event.completion_tokens = None + llm_event.end_timestamp = get_ISO_time() + self._safe_record(session, llm_event) + + except Exception as e: + llm_event = LLMEvent(init_timestamp=init_timestamp, end_timestamp=get_ISO_time(), params=kwargs) + self._safe_record(session, ErrorEvent(trigger_event=llm_event, exception=e)) + + kwargs_str = pprint.pformat(kwargs) + chunk = pprint.pformat(chunk) + logger.warning( + f"Unable to parse a chunk for LLM call. Skipping upload to AgentOps\n" + f"chunk:\n {chunk}\n" + f"kwargs:\n {kwargs_str}\n" + ) + + def handle_stream_agent(chunk: dict): + # NOTE: prompt/completion usage not returned in response when streaming + + # nonlocal llm_event + nonlocal stack + + if session is not None: + llm_event.session_id = session.session_id + + try: + if chunk.event.payload.event_type == "turn_start": + logger.debug("turn_start") + stack.append({"event_type": chunk.event.payload.event_type, "event": None}) + elif chunk.event.payload.event_type == "step_start": + logger.debug("step_start") + llm_event = LLMEvent(init_timestamp=get_ISO_time(), params=kwargs) + stack.append({"event_type": chunk.event.payload.event_type, "event": llm_event}) + elif chunk.event.payload.event_type == "step_progress": + if ( + chunk.event.payload.step_type == "inference" + and chunk.event.payload.text_delta_model_response + ): + nonlocal accum_delta + delta = chunk.event.payload.text_delta_model_response + + if accum_delta: + accum_delta += delta + else: + accum_delta = delta + elif chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta: + if chunk.event.payload.tool_call_delta.parse_status == "started": + logger.debug("tool_started") + tool_event = ToolEvent(init_timestamp=get_ISO_time(), params=kwargs) + tool_event.name = "tool_started" + + stack.append({"event_type": "tool_started", "event": tool_event}) + + elif chunk.event.payload.tool_call_delta.parse_status == "in_progress": + nonlocal accum_tool_delta + delta = chunk.event.payload.tool_call_delta.content + if accum_tool_delta: + accum_tool_delta += delta + else: + accum_tool_delta = delta + elif chunk.event.payload.tool_call_delta.parse_status == "success": + logger.debug("ToolExecution - success") + if ( + stack[-1]["event_type"] == "tool_started" + ): # check if the last event in the stack is a tool execution event + tool_event = stack.pop().get("event") + tool_event.end_timestamp = get_ISO_time() + tool_event.params["completion"] = accum_tool_delta + self._safe_record(session, tool_event) + elif chunk.event.payload.tool_call_delta.parse_status == "failure": + logger.warning("ToolExecution - failure") + if stack[-1]["event_type"] == "ToolExecution - started": + tool_event = stack.pop().get("event") + tool_event.end_timestamp = get_ISO_time() + tool_event.params["completion"] = accum_tool_delta + self._safe_record( + session, + ErrorEvent( + trigger_event=tool_event, exception=Exception("ToolExecution - failure") + ), + ) + + elif chunk.event.payload.event_type == "step_complete": + logger.debug("Step complete event received") + + if chunk.event.payload.step_type == "inference": + logger.debug("Step complete inference") + + if stack[-1]["event_type"] == "step_start": + llm_event = stack.pop().get("event") + llm_event.prompt = [ + {"content": message["content"], "role": message["role"]} + for message in kwargs["messages"] + ] + llm_event.agent_id = check_call_stack_for_agent_id() + llm_event.model = metadata.get("model_id", "Unable to identify model") + llm_event.prompt_tokens = None + llm_event.completion = accum_delta or kwargs["completion"] + llm_event.completion_tokens = None + llm_event.end_timestamp = get_ISO_time() + self._safe_record(session, llm_event) + else: + logger.warning("Unexpected event stack state for inference step complete") + elif chunk.event.payload.step_type == "tool_execution": + if stack[-1]["event_type"] == "tool_started": + logger.debug("tool_complete") + tool_event = stack.pop().get("event") + tool_event.name = "tool_complete" + tool_event.params["completion"] = accum_tool_delta + self._safe_record(session, tool_event) + elif chunk.event.payload.event_type == "turn_complete": + if stack[-1]["event_type"] == "turn_start": + logger.debug("turn_start") + pass + + except Exception as e: + llm_event = LLMEvent(init_timestamp=init_timestamp, end_timestamp=get_ISO_time(), params=kwargs) + + self._safe_record(session, ErrorEvent(trigger_event=llm_event, exception=e)) + + kwargs_str = pprint.pformat(kwargs) + chunk = pprint.pformat(chunk) + logger.warning( + f"Unable to parse a chunk for LLM call. Skipping upload to AgentOps\n" + f"chunk:\n {chunk}\n" + f"kwargs:\n {kwargs_str}\n" + ) + + if kwargs.get("stream", False): + + def generator(): + for chunk in response: + handle_stream_chunk(chunk) + yield chunk + + return generator() + elif inspect.isasyncgen(response): + + async def agent_generator(): + async for chunk in response: + handle_stream_agent(chunk) + yield chunk + + return agent_generator() + elif inspect.isgenerator(response): + + def agent_generator(): + for chunk in response: + handle_stream_agent(chunk) + yield chunk + + return agent_generator() + else: + llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs) + if session is not None: + llm_event.session_id = session.session_id + + llm_event.returns = response + llm_event.agent_id = check_call_stack_for_agent_id() + llm_event.model = kwargs["model_id"] + llm_event.prompt = [ + {"content": message.content, "role": message.role} for message in kwargs["messages"] + ] + llm_event.prompt_tokens = None + llm_event.completion = response.completion_message.content + llm_event.completion_tokens = None + llm_event.end_timestamp = get_ISO_time() + + self._safe_record(session, llm_event) + except Exception as e: + self._safe_record(session, ErrorEvent(trigger_event=llm_event, exception=e)) + kwargs_str = pprint.pformat(kwargs) + response = pprint.pformat(response) + logger.warning( + f"Unable to parse response for LLM call. Skipping upload to AgentOps\n" + f"response:\n {response}\n" + f"kwargs:\n {kwargs_str}\n" + ) + + return response + + def _override_complete(self): + from llama_stack_client.resources import InferenceResource + + global original_complete + original_complete = InferenceResource.chat_completion + + def patched_function(*args, **kwargs): + # Call the original function with its original arguments + init_timestamp = get_ISO_time() + session = kwargs.get("session", None) + if "session" in kwargs.keys(): + del kwargs["session"] + result = original_complete(*args, **kwargs) + return self.handle_response(result, kwargs, init_timestamp, session=session) + + # Override the original method with the patched one + InferenceResource.chat_completion = patched_function + + def _override_create_turn(self): + from llama_stack_client.lib.agents.agent import Agent + + self.original_create_turn = Agent.create_turn + + def patched_function(*args, **kwargs): + # Call the original function with its original arguments + init_timestamp = get_ISO_time() + session = kwargs.get("session", None) + if "session" in kwargs.keys(): + del kwargs["session"] + + result = self.original_create_turn(*args, **kwargs) + return self.handle_response( + result, + kwargs, + init_timestamp, + session=session, + metadata={"model_id": args[0].agent_config.get("model")}, + ) + + # Override the original method with the patched one + Agent.create_turn = patched_function + + def override(self): + self._override_complete() + self._override_create_turn() + + def undo_override(self): + if self.original_complete is not None: + from llama_stack_client.resources import InferenceResource + + InferenceResource.chat_completion = self.original_complete + + if self.original_create_turn is not None: + from llama_stack_client.lib.agents.agent import Agent + + Agent.create_turn = self.original_create_turn diff --git a/docs/mint.json b/docs/mint.json index 9f6ae7ad3..ea3cc2684 100644 --- a/docs/mint.json +++ b/docs/mint.json @@ -94,6 +94,7 @@ "v1/integrations/crewai", "v1/integrations/groq", "v1/integrations/langchain", + "v1/integrations/llama_stack", "v1/integrations/litellm", "v1/integrations/mistral", "v1/integrations/multion", diff --git a/docs/v1/examples/examples.mdx b/docs/v1/examples/examples.mdx index df6651884..c148e6728 100644 --- a/docs/v1/examples/examples.mdx +++ b/docs/v1/examples/examples.mdx @@ -42,6 +42,9 @@ mode: "wide" Jupyter Notebook with a sample LangChain integration + + Create an agent to search the web using Brave Search and find the winner of NBA western conference semifinals 2014 + Unified interface for multiple LLM providers diff --git a/docs/v1/integrations/llama_stack.mdx b/docs/v1/integrations/llama_stack.mdx new file mode 100644 index 000000000..bb0f9a83c --- /dev/null +++ b/docs/v1/integrations/llama_stack.mdx @@ -0,0 +1,73 @@ +--- +title: 'Llama Stack' +description: 'Llama Stack is a framework from Meta AI for building Agentic applications.' +--- + +import CodeTooltip from '/snippets/add-code-tooltip.mdx' +import EnvTooltip from '/snippets/add-env-tooltip.mdx' + +AgentOps integrates with Llama Stack via its python [client](https://github.com/meta-llama/llama-stack-client-python) to provide observability into applications that leverage it. + +Llama Stack has comprehensive [documentation](https://llama-stack.readthedocs.io/) available as well as a great [quickstart](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) guide. You can use this guide to setup the Llama Stack server and client or alternatively use our Docker [compose](https://github.com/AgentOps-AI/agentops/blob/main/examples/llama_stack_client_examples/docker-compose.yml) file. + +## Adding AgentOps to Llama Stack applications + + + + + ```bash pip + pip install agentops + ``` + ```bash poetry + poetry add agentops + ``` + + + + + ```bash pip + pip install llama-stack-client + ``` + ```bash poetry + poetry add llama-stack-client + ``` + + + + + + + ```python python + import agentops + agentops.init() + ``` + + + + + + ```python .env + AGENTOPS_API_KEY= + ``` + + Read more about environment variables in [Advanced Configuration](/v1/usage/advanced-configuration) + + + + Execute your program and visit [app.agentops.ai/drilldown](https://app.agentops.ai/drilldown) to observe your waterfall! 🕵️ + + After your run, AgentOps prints a clickable url to console linking directly to your session in the Dashboard + + + + +## Examples + +An example notebook is available [here](https://github.com/AgentOps-AI/agentops/blob/main/examples/llama_stack_client_examples/notebook.ipynb) to showcase how to use the Llama Stack client with AgentOps. + + + + + + + diff --git a/examples/llama_stack_client_examples/.env.tpl b/examples/llama_stack_client_examples/.env.tpl new file mode 100644 index 000000000..5099720e1 --- /dev/null +++ b/examples/llama_stack_client_examples/.env.tpl @@ -0,0 +1,5 @@ +INFERENCE_MODEL=meta-llama/Llama-3.2-1B-Instruct +OLLAMA_MODEL=llama3.2:1b-instruct-fp16 + + + diff --git a/examples/llama_stack_client_examples/README.md b/examples/llama_stack_client_examples/README.md new file mode 100644 index 000000000..c838096d5 --- /dev/null +++ b/examples/llama_stack_client_examples/README.md @@ -0,0 +1,45 @@ +# Llama Stack Client Examples + +The example notebook demonstrates how to use the Llama Stack Client to monitor an Agentic application using AgentOps. We have also provided a `compose.yaml` file to run Ollama in a container. + +## Quick Start + +First run the following command to start the Ollama server with the Llama Stack client: + +```bash +docker compose up +``` + +Next, run the [notebook](./llama_stack_example.ipynb) to see the waterfall visualization in the [AgentOps](https://app.agentops.ai) dashboard. + +## Environment Variables + +| Variable | Description | Default | +|----------|-------------|---------| +| `LLAMA_STACK_PORT` | Server port | 5001 | +| `INFERENCE_MODEL` | Model ID (must match Llama Stack format) | meta-llama/Llama-3.2-1B-Instruct | +| `OLLAMA_MODEL` | Ollama model ID (must match Ollama format) | llama3.2:1b-instruct-fp16 | +| `SAFETY_MODEL` | Optional safety model | - | +| `NETWORK_MODE` | Docker network mode | auto-configured | +| `OLLAMA_URL` | Ollama API URL | auto-configured | + +## Common Gotchas + +1. Model naming conventions differ between Ollama and Llama Stack. The same model is referenced differently. For instance, `meta-llama/Llama-3.2-1B-Instruct` in Llama Stack corresponds to `llama3.2:1b-instruct-fp16` in Ollama. + +2. Ensure Docker is configured with sufficient system memory allocation to run properly. + + +## References + +- [Download Ollama](https://ollama.com/) +- [Llama Stack Fireworks](./llama_stack_fireworks/README.fireworks.md) +- [Llama Stack Docs](https://llama-stack.readthedocs.io) +- [Ollama Run YAML Template](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/ollama/run.yaml) +- [Llama Stack Documentation](https://llama-stack.readthedocs.io) +- [Llama Stack Client Python](https://github.com/meta-llama/llama-stack-client-python) +- [Llama Stack Repository](https://github.com/meta-llama/llama-stack) +- [Meta Models Documentation](https://www.llama.com/docs/getting_the_models/meta/) +- [Getting Started Guide](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) +- [Agents Example](https://github.com/meta-llama/llama-stack-apps/blob/main/examples/agents/hello.py) +- [Model Download Reference](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) diff --git a/examples/llama_stack_client_examples/compose.yaml b/examples/llama_stack_client_examples/compose.yaml new file mode 100644 index 000000000..2139a6620 --- /dev/null +++ b/examples/llama_stack_client_examples/compose.yaml @@ -0,0 +1,73 @@ +services: + ollama: + hostname: ollama + extra_hosts: + - "host.docker.internal:host-gateway" + image: ollama/ollama:latest + volumes: + - ~/.ollama:/root/.ollama + environment: + OLLAMA_DEBUG: 1 + command: [] + deploy: + resources: + limits: + memory: 8G + reservations: + memory: 4G + healthcheck: + test: ["CMD", "bash", "-c", " + python -m llama_stack.distribution.server.server --yaml-config /root/run.yaml --port ${LLAMA_STACK_PORT:-5001} + deploy: + restart_policy: + condition: on-failure + delay: 10s + max_attempts: 3 + window: 60s + networks: + - ollama-network + +networks: + ollama-network: + driver: bridge +volumes: + ollama-init: + llamastack: diff --git a/examples/llama_stack_client_examples/llama_stack_example.ipynb b/examples/llama_stack_client_examples/llama_stack_example.ipynb new file mode 100644 index 000000000..42297557c --- /dev/null +++ b/examples/llama_stack_client_examples/llama_stack_example.ipynb @@ -0,0 +1,250 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Llama Stack Client Examples\n", + "Use the llama_stack_client library to interact with a Llama Stack server" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First let's install the required packages" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install -U llama-stack-client\n", + "%pip install -U llama-stack\n", + "%pip install -U agentops\n", + "%pip install -U python-dotenv\n", + "%pip install -U fastapi\n", + "%pip install opentelemetry-api\n", + "%pip install opentelemetry-sdk\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from llama_stack_client import LlamaStackClient\n", + "from llama_stack_client import LlamaStackClient\n", + "from llama_stack_client.lib.inference.event_logger import EventLogger\n", + "from llama_stack_client.types import UserMessage\n", + "from llama_stack_client.types.agent_create_params import AgentConfig\n", + "from llama_stack_client.lib.agents.agent import Agent\n", + "from dotenv import load_dotenv\n", + "import os\n", + "import agentops\n", + "\n", + "load_dotenv()\n", + "AGENTOPS_API_KEY = os.getenv(\"AGENTOPS_API_KEY\") or \"\"\n", + "\n", + "agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"], auto_start_session=False)\n", + "\n", + "host = \"0.0.0.0\" # LLAMA_STACK_HOST\n", + "port = 5001 # LLAMA_STACK_PORT\n", + "\n", + "full_host = f\"http://{host}:{port}\"\n", + "\n", + "client = LlamaStackClient(\n", + " base_url=f\"{full_host}\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Inference Canary 1 - Completion with Streaming" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agentops.start_session()\n", + "response = client.inference.chat_completion(\n", + " messages=[\n", + " UserMessage(\n", + " content=\"hello world, write me a 3 word poem about the moon\",\n", + " role=\"user\",\n", + " ),\n", + " ],\n", + " model_id=\"meta-llama/Llama-3.2-1B-Instruct\",\n", + " stream=True\n", + ")\n", + "\n", + "async for log in EventLogger().log(response):\n", + " log.print()\n", + "\n", + "agentops.end_session(\"Success\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Inference Canary Example 2 - Completion without Streaming" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agentops.start_session()\n", + "response = client.inference.chat_completion(\n", + " messages=[\n", + " UserMessage(\n", + " content=\"write me a 3 word poem about the moon\",\n", + " role=\"user\",\n", + " ),\n", + " ],\n", + " model_id=\"meta-llama/Llama-3.2-1B-Instruct\",\n", + " stream=False\n", + ")\n", + "\n", + "print(f\"> Response: {response.completion_message.content}\")\n", + "agentops.end_session(\"Success\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Agent Canary Example" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from llama_stack_client import LlamaStackClient\n", + "from llama_stack_client.lib.agents.agent import Agent\n", + "from llama_stack_client.lib.agents.event_logger import EventLogger\n", + "from llama_stack_client.types.agent_create_params import AgentConfig\n", + "\n", + "agentops.start_session()\n", + "\n", + "LLAMA_STACK_PORT = 5001\n", + "\n", + "# Replace with actual API keys for functionality\n", + "BRAVE_SEARCH_API_KEY = os.getenv(\"BRAVE_SEARCH_API_KEY\") or \"your-brave-search-api-key\"\n", + "\n", + "async def agent_test():\n", + " client = LlamaStackClient(\n", + " base_url=f\"http://0.0.0.0:{LLAMA_STACK_PORT}\",\n", + " )\n", + "\n", + " available_shields = [shield.identifier for shield in client.shields.list()]\n", + " if not available_shields:\n", + " print(\"No available shields. Disable safety.\")\n", + " else:\n", + " print(f\"Available shields found: {available_shields}\")\n", + " available_models = [model.identifier for model in client.models.list()]\n", + " if not available_models:\n", + " raise ValueError(\"No available models\")\n", + " else:\n", + " selected_model = available_models[0]\n", + " print(f\"Using model: {selected_model}\")\n", + "\n", + " agent_config = AgentConfig(\n", + " model=selected_model,\n", + " instructions=\"You are a helpful assistant. Just say hello as a greeting.\",\n", + " sampling_params={\n", + " \"strategy\": \"greedy\",\n", + " \"temperature\": 1.0,\n", + " \"top_p\": 0.9,\n", + " },\n", + " tools=[\n", + " {\n", + " \"type\": \"brave_search\",\n", + " \"engine\": \"brave\",\n", + " \"api_key\": BRAVE_SEARCH_API_KEY,\n", + " }\n", + " ],\n", + " tool_choice=\"auto\",\n", + " tool_prompt_format=\"json\",\n", + " input_shields=available_shields if available_shields else [],\n", + " output_shields=available_shields if available_shields else [],\n", + " enable_session_persistence=False,\n", + " )\n", + " agent = Agent(client, agent_config)\n", + " user_prompts = [\n", + " \"Hello\",\n", + " \"Which players played in the winning team of the NBA western conference semifinals of 2014, please use tools\",\n", + " ]\n", + "\n", + " session_id = agent.create_session(\"test-session\")\n", + "\n", + " for prompt in user_prompts:\n", + " response = agent.create_turn(\n", + " messages=[\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": prompt,\n", + " }\n", + " ],\n", + " session_id=session_id,\n", + " )\n", + "\n", + " print(f\"{response=}\")\n", + "\n", + " for log in EventLogger().log(response):\n", + " log.print()\n", + "\n", + "agentops.start_session()\n", + "\n", + "await agent_test()\n", + "\n", + "agentops.end_session(\"Success\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agentops.end_all_sessions()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/llama_stack_client_examples/llama_stack_example_for_ci.ipynb b/examples/llama_stack_client_examples/llama_stack_example_for_ci.ipynb new file mode 100644 index 000000000..7249e04ea --- /dev/null +++ b/examples/llama_stack_client_examples/llama_stack_example_for_ci.ipynb @@ -0,0 +1,207 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Llama Stack Client Examples\n", + "Use the llama_stack_client library to interact with a Llama Stack server" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First let's install the required packages" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install -U llama-stack-client\n", + "%pip install -U llama-stack\n", + "%pip install -U agentops\n", + "%pip install -U python-dotenv\n", + "%pip install -U fastapi\n", + "%pip install opentelemetry-api\n", + "%pip install opentelemetry-sdk\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from llama_stack_client import LlamaStackClient\n", + "from llama_stack_client import LlamaStackClient\n", + "from llama_stack_client.lib.inference.event_logger import EventLogger\n", + "from llama_stack_client.types import UserMessage\n", + "from llama_stack_client.types.agent_create_params import AgentConfig\n", + "from llama_stack_client.lib.agents.agent import Agent\n", + "from dotenv import load_dotenv\n", + "import os\n", + "import agentops\n", + "\n", + "load_dotenv()\n", + "AGENTOPS_API_KEY = os.getenv(\"AGENTOPS_API_KEY\") or \"\"\n", + "\n", + "agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"], auto_start_session=False)\n", + "\n", + "host = \"0.0.0.0\" # LLAMA_STACK_HOST\n", + "port = 5001 # LLAMA_STACK_PORT\n", + "\n", + "full_host = f\"http://{host}:{port}\"\n", + "\n", + "client = LlamaStackClient(\n", + " base_url=f\"{full_host}\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Inference Canary + Agent Canary" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "### Inference Canary\n", + "\n", + "agentops.start_session() # AgentOps start session\n", + "\n", + "response = client.inference.chat_completion(\n", + " messages=[\n", + " UserMessage(\n", + " content=\"hello world, write me a 3 word poem about the moon\",\n", + " role=\"user\",\n", + " ),\n", + " ],\n", + " model_id=\"meta-llama/Llama-3.2-1B-Instruct\",\n", + " stream=True\n", + ")\n", + "\n", + "async for log in EventLogger().log(response):\n", + " log.print()\n", + "\n", + "\n", + "### Agent Canary\n", + "\n", + "import os\n", + "from llama_stack_client import LlamaStackClient\n", + "from llama_stack_client.lib.agents.agent import Agent\n", + "from llama_stack_client.lib.agents.event_logger import EventLogger\n", + "from llama_stack_client.types.agent_create_params import AgentConfig\n", + "\n", + "LLAMA_STACK_PORT = 5001\n", + "\n", + "# Replace with actual API keys for functionality\n", + "BRAVE_SEARCH_API_KEY = os.getenv(\"BRAVE_SEARCH_API_KEY\") or \"your-brave-search-api-key\"\n", + "\n", + "async def agent_test():\n", + " client = LlamaStackClient(\n", + " base_url=f\"http://0.0.0.0:{LLAMA_STACK_PORT}\",\n", + " )\n", + "\n", + " available_shields = [shield.identifier for shield in client.shields.list()]\n", + " if not available_shields:\n", + " print(\"No available shields. Disable safety.\")\n", + " else:\n", + " print(f\"Available shields found: {available_shields}\")\n", + " available_models = [model.identifier for model in client.models.list()]\n", + " if not available_models:\n", + " raise ValueError(\"No available models\")\n", + " else:\n", + " selected_model = available_models[0]\n", + " print(f\"Using model: {selected_model}\")\n", + "\n", + " agent_config = AgentConfig(\n", + " model=selected_model,\n", + " instructions=\"You are a helpful assistant. Just say hello as a greeting.\",\n", + " sampling_params={\n", + " \"strategy\": \"greedy\",\n", + " \"temperature\": 1.0,\n", + " \"top_p\": 0.9,\n", + " },\n", + " tools=[\n", + " {\n", + " \"type\": \"brave_search\",\n", + " \"engine\": \"brave\",\n", + " \"api_key\": BRAVE_SEARCH_API_KEY,\n", + " }\n", + " ],\n", + " tool_choice=\"auto\",\n", + " tool_prompt_format=\"json\",\n", + " input_shields=available_shields if available_shields else [],\n", + " output_shields=available_shields if available_shields else [],\n", + " enable_session_persistence=False,\n", + " )\n", + " agent = Agent(client, agent_config)\n", + " user_prompts = [\n", + " \"Hello\",\n", + " \"Which players played in the winning team of the NBA western conference semifinals of 2014, please use tools\",\n", + " ]\n", + "\n", + " session_id = agent.create_session(\"test-session\")\n", + "\n", + " for prompt in user_prompts:\n", + " response = agent.create_turn(\n", + " messages=[\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": prompt,\n", + " }\n", + " ],\n", + " session_id=session_id,\n", + " )\n", + "\n", + " print(f\"{response=}\")\n", + "\n", + " for log in EventLogger().log(response):\n", + " log.print()\n", + "\n", + "await agent_test()\n", + "\n", + "agentops.end_session(\"Success\") # AgentOps end session" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "agentops.end_all_sessions()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/llama_stack_client_examples/pull-models.sh b/examples/llama_stack_client_examples/pull-models.sh new file mode 100755 index 000000000..cd0690290 --- /dev/null +++ b/examples/llama_stack_client_examples/pull-models.sh @@ -0,0 +1,18 @@ +#!/bin/sh + +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +echo "Preloading (${INFERENCE_MODEL}, ${SAFETY_MODEL})..." +for model in ${INFERENCE_MODEL} ${SAFETY_MODEL}; do + echo "Preloading $model..." + if ! ollama run "$model"; then + echo "Failed to pull and run $model" + exit 1 + fi +done + +echo "All models pulled successfully" \ No newline at end of file diff --git a/examples/llama_stack_client_examples/run-safety-shield.yaml b/examples/llama_stack_client_examples/run-safety-shield.yaml new file mode 100644 index 000000000..2e4f6ac8a --- /dev/null +++ b/examples/llama_stack_client_examples/run-safety-shield.yaml @@ -0,0 +1,62 @@ +version: '2' +image_name: ollama +docker_image: null +conda_env: ollama +apis: +- agents +- inference +- memory +- safety +- telemetry +providers: + inference: + - provider_id: ollama + provider_type: remote::ollama + config: + url: ${env.OLLAMA_URL:http://localhost:11434} + memory: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: {} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: {} +metadata_store: + namespace: null + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db +models: +- metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: ollama + provider_model_id: null +- metadata: {} + model_id: ${env.SAFETY_MODEL} + provider_id: ollama + provider_model_id: null +shields: +- params: null + shield_id: ${env.SAFETY_MODEL} + provider_id: null + provider_shield_id: null +memory_banks: [] +datasets: [] +scoring_fns: [] +eval_tasks: [] \ No newline at end of file diff --git a/examples/llama_stack_client_examples/run.yaml b/examples/llama_stack_client_examples/run.yaml new file mode 100644 index 000000000..4d148ad95 --- /dev/null +++ b/examples/llama_stack_client_examples/run.yaml @@ -0,0 +1,53 @@ +version: '2' +image_name: ollama +docker_image: null +conda_env: ollama +apis: +- agents +- inference +- memory +- safety +- telemetry +providers: + inference: + - provider_id: ollama + provider_type: remote::ollama + config: + url: ${env.OLLAMA_URL:http://ollama:11434} + memory: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: {} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: {} +metadata_store: + namespace: null + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db +models: +- metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: ollama +shields: [] +memory_banks: [] +datasets: [] +scoring_fns: [] +eval_tasks: [] diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py new file mode 100644 index 000000000..1060627db --- /dev/null +++ b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py @@ -0,0 +1,83 @@ +import agentops +import asyncio +import os +from dotenv import load_dotenv + +from llama_stack_client import LlamaStackClient +from llama_stack_client.lib.agents.agent import Agent +from llama_stack_client.lib.agents.event_logger import EventLogger +from llama_stack_client.types.agent_create_params import AgentConfig + +load_dotenv() + +agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False) + +LLAMA_STACK_HOST = "0.0.0.0" +LLAMA_STACK_PORT = 5001 +INFERENCE_MODEL = "meta-llama/Llama-3.2-1B-Instruct" + + +async def agent_test(): + client = LlamaStackClient( + base_url=f"http://{LLAMA_STACK_HOST}:{LLAMA_STACK_PORT}", + ) + + available_shields = [shield.identifier for shield in client.shields.list()] + if not available_shields: + print("No available shields. Disable safety.") + else: + print(f"Available shields found: {available_shields}") + available_models = [model.identifier for model in client.models.list()] + if not available_models: + raise ValueError("No available models") + else: + selected_model = available_models[0] + print(f"Using model: {selected_model}") + + agent_config = AgentConfig( + model=selected_model, + instructions="You are a helpful assistant. Just say hello as a greeting.", + sampling_params={ + "strategy": "greedy", + "temperature": 1.0, + "top_p": 0.9, + }, + tools=[ + { + "type": "brave_search", + "engine": "brave", + "api_key": os.getenv("BRAVE_SEARCH_API_KEY"), + } + ], + tool_choice="auto", + tool_prompt_format="json", + input_shields=available_shields if available_shields else [], + output_shields=available_shields if available_shields else [], + enable_session_persistence=False, + ) + agent = Agent(client, agent_config) + user_prompts = [ + "Hello", + "Which players played in the winning team of the NBA western conference semifinals of 2014, please use tools", + ] + + session_id = agent.create_session("test-session") + + for prompt in user_prompts: + response = agent.create_turn( + messages=[ + { + "role": "user", + "content": prompt, + } + ], + session_id=session_id, + ) + + for log in EventLogger().log(response): + log.print() + + +agentops.start_session() +asyncio.run(agent_test()) +agentops.end_session(end_state="Success") diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py new file mode 100644 index 000000000..c88dfa48c --- /dev/null +++ b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py @@ -0,0 +1,45 @@ +import asyncio +import agentops +import os +from dotenv import load_dotenv +from llama_stack_client import LlamaStackClient +from llama_stack_client.types import UserMessage +from llama_stack_client.lib.inference.event_logger import EventLogger + +load_dotenv() + +agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False) + +host = "0.0.0.0" # LLAMA_STACK_HOST +port = 5001 # LLAMA_STACK_PORT + +full_host = f"http://{host}:{port}" + +client = LlamaStackClient( + base_url=f"{full_host}", +) + + +async def stream_test(): + response = client.inference.chat_completion( + messages=[ + UserMessage( + content="hello world, write me a 3 word poem about the moon", + role="user", + ), + ], + model_id="meta-llama/Llama-3.2-1B-Instruct", + stream=True, + ) + + async for log in EventLogger().log(response): + log.print() + + +def main(): + agentops.start_session() + asyncio.run(stream_test()) + agentops.end_session(end_state="Success") + + +main() diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py new file mode 100644 index 000000000..7c43ce510 --- /dev/null +++ b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py @@ -0,0 +1,57 @@ +import agentops +import os +from dotenv import load_dotenv +from llama_stack_client import LlamaStackClient +from llama_stack_client.types import UserMessage +from llama_stack_client.lib.inference.event_logger import EventLogger + +load_dotenv() + +agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False) + +host = "0.0.0.0" # LLAMA_STACK_HOST +port = 5001 # LLAMA_STACK_PORT + +full_host = f"http://{host}:{port}" + +client = LlamaStackClient( + base_url=f"{full_host}", +) + + +async def stream_test(): + response = client.inference.chat_completion( + messages=[ + UserMessage( + content="hello world, write me a 3 word poem about the moon", + role="user", + ), + ], + model_id="meta-llama/Llama-3.2-1B-Instruct", + stream=True, + ) + + async for log in EventLogger().log(response): + log.print() + + +def main(): + agentops.start_session() + + response = client.inference.chat_completion( + messages=[ + UserMessage( + content="hello world, write me a 3 word poem about the moon", + role="user", + ), + ], + model_id="meta-llama/Llama-3.2-1B-Instruct", + stream=False, + ) + + print(response.completion_message.content) + + agentops.end_session(end_state="Success") + + +main() diff --git a/tests/test_host_env.py b/tests/test_host_env.py index e6194d3ac..c22796f3f 100644 --- a/tests/test_host_env.py +++ b/tests/test_host_env.py @@ -7,18 +7,8 @@ def mock_partitions(): return [ - sdiskpart( - device="/dev/sda1", - mountpoint="/", - fstype="ext4", - opts="rw,relatime" - ), - sdiskpart( - device="z:\\", - mountpoint="z:\\", - fstype="ntfs", - opts="rw,relatime" - ), + sdiskpart(device="/dev/sda1", mountpoint="/", fstype="ext4", opts="rw,relatime"), + sdiskpart(device="z:\\", mountpoint="z:\\", fstype="ntfs", opts="rw,relatime"), ]