diff --git a/.gitignore b/.gitignore
index 4db649aab..d6ab56734 100644
--- a/.gitignore
+++ b/.gitignore
@@ -164,4 +164,6 @@ cython_debug/
 .DS_Store
 
 agentops_time_travel.json
-.agentops_time_travel.yaml
\ No newline at end of file
+.agentops_time_travel.yaml
+
+node_modules
\ No newline at end of file
diff --git a/README.md b/README.md
index 264c5bc4a..e87981dfa 100644
--- a/README.md
+++ b/README.md
@@ -574,6 +574,14 @@ Check out the [LlamaIndex docs](https://docs.llamaindex.ai/en/stable/module_guid
 
 </details>
 
+### Llama Stack 🦙🥞
+
+AgentOps provides support for Llama Stack Python Client(>=0.0.53), allowing you to monitor your Agentic applications. 
+
+- [AgentOps integration example 1](https://github.com/AgentOps-AI/agentops/pull/530/files/65a5ab4fdcf310326f191d4b870d4f553591e3ea#diff-fdddf65549f3714f8f007ce7dfd1cde720329fe54155d54389dd50fbd81813cb)
+- [AgentOps integration example 2](https://github.com/AgentOps-AI/agentops/pull/530/files/65a5ab4fdcf310326f191d4b870d4f553591e3ea#diff-6688ff4fb7ab1ce7b1cc9b8362ca27264a3060c16737fb1d850305787a6e3699)
+- [Official Llama Stack Python Client](https://github.com/meta-llama/llama-stack-client-python)
+
 ## Time travel debugging 🔮
 
 <div style="justify-content: center">
diff --git a/agentops/llms/__init__.py b/agentops/llms/__init__.py
index a5852d8cd..b26cd1233 100644
--- a/agentops/llms/__init__.py
+++ b/agentops/llms/__init__.py
@@ -5,6 +5,8 @@
 
 from packaging.version import Version, parse
 
+from agentops.llms.llama_stack_client import LlamaStackClientProvider
+
 from ..log_config import logger
 
 from .cohere import CohereProvider
@@ -35,6 +37,9 @@ class LlmTracker:
             "5.4.0": ("chat", "chat_stream"),
         },
         "ollama": {"0.0.1": ("chat", "Client.chat", "AsyncClient.chat")},
+        "llama_stack_client": {
+            "0.0.53": ("resources.InferenceResource.chat_completion", "lib.agents.agent.Agent.create_turn"),
+        },
         "groq": {
             "0.9.0": ("Client.chat", "AsyncClient.chat"),
         },
@@ -151,6 +156,15 @@ def override_api(self):
                     else:
                         logger.warning(f"Only AI21>=2.0.0 supported. v{module_version} found.")
 
+                if api == "llama_stack_client":
+                    module_version = version(api)
+
+                    if Version(module_version) >= parse("0.0.53"):
+                        provider = LlamaStackClientProvider(self.client)
+                        provider.override()
+                    else:
+                        logger.warning(f"Only LlamaStackClient>=0.0.53 supported. v{module_version} found.")
+
     def stop_instrumenting(self):
         OpenAiProvider(self.client).undo_override()
         GroqProvider(self.client).undo_override()
@@ -160,3 +174,4 @@ def stop_instrumenting(self):
         AnthropicProvider(self.client).undo_override()
         MistralProvider(self.client).undo_override()
         AI21Provider(self.client).undo_override()
+        LlamaStackClientProvider(self.client).undo_override()
diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py
new file mode 100644
index 000000000..8379a6fef
--- /dev/null
+++ b/agentops/llms/llama_stack_client.py
@@ -0,0 +1,297 @@
+import inspect
+import pprint
+import sys
+from typing import Any, AsyncGenerator, Dict, Optional, List
+import logging
+from typing import Union
+
+from agentops.event import LLMEvent, ErrorEvent, ToolEvent
+from agentops.session import Session
+from agentops.log_config import logger
+from agentops.helpers import get_ISO_time, check_call_stack_for_agent_id
+from agentops.llms.instrumented_provider import InstrumentedProvider
+
+
+class LlamaStackClientProvider(InstrumentedProvider):
+    original_complete = None
+    original_create_turn = None
+
+    def __init__(self, client):
+        super().__init__(client)
+        self._provider_name = "LlamaStack"
+
+    def handle_response(
+        self, response, kwargs, init_timestamp, session: Optional[Session] = None, metadata: Optional[Dict] = {}
+    ) -> dict:
+        """Handle responses for LlamaStack"""
+
+        try:
+            stack = []
+            accum_delta = None
+            accum_tool_delta = None
+            # tool_event = None
+            # llm_event = None
+
+            def handle_stream_chunk(chunk: dict):
+                nonlocal stack
+
+                # NOTE: prompt/completion usage not returned in response when streaming
+
+                try:
+                    nonlocal accum_delta
+
+                    if chunk.event.event_type == "start":
+                        llm_event = LLMEvent(init_timestamp=get_ISO_time(), params=kwargs)
+                        stack.append({"event_type": "start", "event": llm_event})
+                        accum_delta = chunk.event.delta
+                    elif chunk.event.event_type == "progress":
+                        accum_delta += chunk.event.delta
+                    elif chunk.event.event_type == "complete":
+                        if (
+                            stack[-1]["event_type"] == "start"
+                        ):  # check if the last event in the stack is a step start event
+                            llm_event = stack.pop().get("event")
+                            llm_event.prompt = [
+                                {"content": message.content, "role": message.role} for message in kwargs["messages"]
+                            ]
+                            llm_event.agent_id = check_call_stack_for_agent_id()
+                            llm_event.model = kwargs["model_id"]
+                            llm_event.prompt_tokens = None
+                            llm_event.completion = accum_delta or kwargs["completion"]
+                            llm_event.completion_tokens = None
+                            llm_event.end_timestamp = get_ISO_time()
+                            self._safe_record(session, llm_event)
+
+                except Exception as e:
+                    llm_event = LLMEvent(init_timestamp=init_timestamp, end_timestamp=get_ISO_time(), params=kwargs)
+                    self._safe_record(session, ErrorEvent(trigger_event=llm_event, exception=e))
+
+                    kwargs_str = pprint.pformat(kwargs)
+                    chunk = pprint.pformat(chunk)
+                    logger.warning(
+                        f"Unable to parse a chunk for LLM call. Skipping upload to AgentOps\n"
+                        f"chunk:\n {chunk}\n"
+                        f"kwargs:\n {kwargs_str}\n"
+                    )
+
+            def handle_stream_agent(chunk: dict):
+                # NOTE: prompt/completion usage not returned in response when streaming
+
+                # nonlocal llm_event
+                nonlocal stack
+
+                if session is not None:
+                    llm_event.session_id = session.session_id
+
+                try:
+                    if chunk.event.payload.event_type == "turn_start":
+                        logger.debug("turn_start")
+                        stack.append({"event_type": chunk.event.payload.event_type, "event": None})
+                    elif chunk.event.payload.event_type == "step_start":
+                        logger.debug("step_start")
+                        llm_event = LLMEvent(init_timestamp=get_ISO_time(), params=kwargs)
+                        stack.append({"event_type": chunk.event.payload.event_type, "event": llm_event})
+                    elif chunk.event.payload.event_type == "step_progress":
+                        if (
+                            chunk.event.payload.step_type == "inference"
+                            and chunk.event.payload.text_delta_model_response
+                        ):
+                            nonlocal accum_delta
+                            delta = chunk.event.payload.text_delta_model_response
+
+                            if accum_delta:
+                                accum_delta += delta
+                            else:
+                                accum_delta = delta
+                        elif chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta:
+                            if chunk.event.payload.tool_call_delta.parse_status == "started":
+                                logger.debug("tool_started")
+                                tool_event = ToolEvent(init_timestamp=get_ISO_time(), params=kwargs)
+                                tool_event.name = "tool_started"
+
+                                stack.append({"event_type": "tool_started", "event": tool_event})
+
+                            elif chunk.event.payload.tool_call_delta.parse_status == "in_progress":
+                                nonlocal accum_tool_delta
+                                delta = chunk.event.payload.tool_call_delta.content
+                                if accum_tool_delta:
+                                    accum_tool_delta += delta
+                                else:
+                                    accum_tool_delta = delta
+                            elif chunk.event.payload.tool_call_delta.parse_status == "success":
+                                logger.debug("ToolExecution - success")
+                                if (
+                                    stack[-1]["event_type"] == "tool_started"
+                                ):  # check if the last event in the stack is a tool execution event
+                                    tool_event = stack.pop().get("event")
+                                    tool_event.end_timestamp = get_ISO_time()
+                                    tool_event.params["completion"] = accum_tool_delta
+                                    self._safe_record(session, tool_event)
+                            elif chunk.event.payload.tool_call_delta.parse_status == "failure":
+                                logger.warning("ToolExecution - failure")
+                                if stack[-1]["event_type"] == "ToolExecution - started":
+                                    tool_event = stack.pop().get("event")
+                                    tool_event.end_timestamp = get_ISO_time()
+                                    tool_event.params["completion"] = accum_tool_delta
+                                    self._safe_record(
+                                        session,
+                                        ErrorEvent(
+                                            trigger_event=tool_event, exception=Exception("ToolExecution - failure")
+                                        ),
+                                    )
+
+                    elif chunk.event.payload.event_type == "step_complete":
+                        logger.debug("Step complete event received")
+
+                        if chunk.event.payload.step_type == "inference":
+                            logger.debug("Step complete inference")
+
+                            if stack[-1]["event_type"] == "step_start":
+                                llm_event = stack.pop().get("event")
+                                llm_event.prompt = [
+                                    {"content": message["content"], "role": message["role"]}
+                                    for message in kwargs["messages"]
+                                ]
+                                llm_event.agent_id = check_call_stack_for_agent_id()
+                                llm_event.model = metadata.get("model_id", "Unable to identify model")
+                                llm_event.prompt_tokens = None
+                                llm_event.completion = accum_delta or kwargs["completion"]
+                                llm_event.completion_tokens = None
+                                llm_event.end_timestamp = get_ISO_time()
+                                self._safe_record(session, llm_event)
+                            else:
+                                logger.warning("Unexpected event stack state for inference step complete")
+                        elif chunk.event.payload.step_type == "tool_execution":
+                            if stack[-1]["event_type"] == "tool_started":
+                                logger.debug("tool_complete")
+                                tool_event = stack.pop().get("event")
+                                tool_event.name = "tool_complete"
+                                tool_event.params["completion"] = accum_tool_delta
+                                self._safe_record(session, tool_event)
+                    elif chunk.event.payload.event_type == "turn_complete":
+                        if stack[-1]["event_type"] == "turn_start":
+                            logger.debug("turn_start")
+                        pass
+
+                except Exception as e:
+                    llm_event = LLMEvent(init_timestamp=init_timestamp, end_timestamp=get_ISO_time(), params=kwargs)
+
+                    self._safe_record(session, ErrorEvent(trigger_event=llm_event, exception=e))
+
+                    kwargs_str = pprint.pformat(kwargs)
+                    chunk = pprint.pformat(chunk)
+                    logger.warning(
+                        f"Unable to parse a chunk for LLM call. Skipping upload to AgentOps\n"
+                        f"chunk:\n {chunk}\n"
+                        f"kwargs:\n {kwargs_str}\n"
+                    )
+
+            if kwargs.get("stream", False):
+
+                def generator():
+                    for chunk in response:
+                        handle_stream_chunk(chunk)
+                        yield chunk
+
+                return generator()
+            elif inspect.isasyncgen(response):
+
+                async def agent_generator():
+                    async for chunk in response:
+                        handle_stream_agent(chunk)
+                        yield chunk
+
+                return agent_generator()
+            elif inspect.isgenerator(response):
+
+                def agent_generator():
+                    for chunk in response:
+                        handle_stream_agent(chunk)
+                        yield chunk
+
+                return agent_generator()
+            else:
+                llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
+                if session is not None:
+                    llm_event.session_id = session.session_id
+
+                llm_event.returns = response
+                llm_event.agent_id = check_call_stack_for_agent_id()
+                llm_event.model = kwargs["model_id"]
+                llm_event.prompt = [
+                    {"content": message.content, "role": message.role} for message in kwargs["messages"]
+                ]
+                llm_event.prompt_tokens = None
+                llm_event.completion = response.completion_message.content
+                llm_event.completion_tokens = None
+                llm_event.end_timestamp = get_ISO_time()
+
+                self._safe_record(session, llm_event)
+        except Exception as e:
+            self._safe_record(session, ErrorEvent(trigger_event=llm_event, exception=e))
+            kwargs_str = pprint.pformat(kwargs)
+            response = pprint.pformat(response)
+            logger.warning(
+                f"Unable to parse response for LLM call. Skipping upload to AgentOps\n"
+                f"response:\n {response}\n"
+                f"kwargs:\n {kwargs_str}\n"
+            )
+
+        return response
+
+    def _override_complete(self):
+        from llama_stack_client.resources import InferenceResource
+
+        global original_complete
+        original_complete = InferenceResource.chat_completion
+
+        def patched_function(*args, **kwargs):
+            # Call the original function with its original arguments
+            init_timestamp = get_ISO_time()
+            session = kwargs.get("session", None)
+            if "session" in kwargs.keys():
+                del kwargs["session"]
+            result = original_complete(*args, **kwargs)
+            return self.handle_response(result, kwargs, init_timestamp, session=session)
+
+        # Override the original method with the patched one
+        InferenceResource.chat_completion = patched_function
+
+    def _override_create_turn(self):
+        from llama_stack_client.lib.agents.agent import Agent
+
+        self.original_create_turn = Agent.create_turn
+
+        def patched_function(*args, **kwargs):
+            # Call the original function with its original arguments
+            init_timestamp = get_ISO_time()
+            session = kwargs.get("session", None)
+            if "session" in kwargs.keys():
+                del kwargs["session"]
+
+            result = self.original_create_turn(*args, **kwargs)
+            return self.handle_response(
+                result,
+                kwargs,
+                init_timestamp,
+                session=session,
+                metadata={"model_id": args[0].agent_config.get("model")},
+            )
+
+        # Override the original method with the patched one
+        Agent.create_turn = patched_function
+
+    def override(self):
+        self._override_complete()
+        self._override_create_turn()
+
+    def undo_override(self):
+        if self.original_complete is not None:
+            from llama_stack_client.resources import InferenceResource
+
+            InferenceResource.chat_completion = self.original_complete
+
+        if self.original_create_turn is not None:
+            from llama_stack_client.lib.agents.agent import Agent
+
+            Agent.create_turn = self.original_create_turn
diff --git a/docs/mint.json b/docs/mint.json
index 9f6ae7ad3..ea3cc2684 100644
--- a/docs/mint.json
+++ b/docs/mint.json
@@ -94,6 +94,7 @@
         "v1/integrations/crewai",
         "v1/integrations/groq",
         "v1/integrations/langchain",
+        "v1/integrations/llama_stack",
         "v1/integrations/litellm",
         "v1/integrations/mistral",
         "v1/integrations/multion",
diff --git a/docs/v1/examples/examples.mdx b/docs/v1/examples/examples.mdx
index df6651884..c148e6728 100644
--- a/docs/v1/examples/examples.mdx
+++ b/docs/v1/examples/examples.mdx
@@ -42,6 +42,9 @@ mode: "wide"
     <Card title="LangChain" icon="crow" href="/v1/examples/langchain">
         Jupyter Notebook with a sample LangChain integration
     </Card>
+    <Card title="Llama Stack" icon="crow" href="/v1/integrations/llama_stack">
+        Create an agent to search the web using Brave Search and find the winner of NBA western conference semifinals 2014
+    </Card>
     <Card title="LiteLLM" icon="wand-magic-sparkles" href="/v1/integrations/litellm">
         Unified interface for multiple LLM providers
     </Card>
diff --git a/docs/v1/integrations/llama_stack.mdx b/docs/v1/integrations/llama_stack.mdx
new file mode 100644
index 000000000..bb0f9a83c
--- /dev/null
+++ b/docs/v1/integrations/llama_stack.mdx
@@ -0,0 +1,73 @@
+---
+title: 'Llama Stack'
+description: 'Llama Stack is a framework from Meta AI for building Agentic applications.'
+---
+
+import CodeTooltip from '/snippets/add-code-tooltip.mdx'
+import EnvTooltip from '/snippets/add-env-tooltip.mdx'
+
+AgentOps integrates with Llama Stack via its python [client](https://github.com/meta-llama/llama-stack-client-python) to provide observability into applications that leverage it.
+
+Llama Stack has comprehensive [documentation](https://llama-stack.readthedocs.io/) available as well as a great [quickstart](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) guide. You can use this guide to setup the Llama Stack server and client or alternatively use our Docker [compose](https://github.com/AgentOps-AI/agentops/blob/main/examples/llama_stack_client_examples/docker-compose.yml) file.
+
+## Adding AgentOps to Llama Stack applications
+
+<Steps>
+	<Step title="Install the AgentOps SDK">
+		<CodeGroup>
+			```bash pip 
+			pip install agentops
+			```
+			```bash poetry
+			poetry add agentops
+			```
+		</CodeGroup>
+	</Step>
+	<Step title="Install the Llama Stack Client">
+		<CodeGroup>
+			```bash pip
+			pip install llama-stack-client
+			```
+			```bash poetry
+			poetry add llama-stack-client
+			```
+		</CodeGroup>
+	</Step>
+	<Step title="Add 3 lines of code">
+		<CodeTooltip/>
+		<span className="api-key-container">
+			<CodeGroup>
+				```python python
+				import agentops
+				agentops.init(<INSERT YOUR API KEY HERE>)
+				```
+			</CodeGroup>
+		</span>
+		<EnvTooltip />
+    <span className="api-key-container">
+      <CodeGroup>
+        ```python .env
+        AGENTOPS_API_KEY=<YOUR API KEY>
+        ```
+      </CodeGroup>
+      Read more about environment variables in [Advanced Configuration](/v1/usage/advanced-configuration)
+    </span>
+	</Step>
+	<Step title="Run your 🦙🥞 application">
+		Execute your program and visit [app.agentops.ai/drilldown](https://app.agentops.ai/drilldown) to observe your waterfall! 🕵️
+		<Tip>
+			After your run, AgentOps prints a clickable url to console linking directly to your session in the Dashboard
+		</Tip> 
+	</Step>
+</Steps>
+
+## Examples
+
+An example notebook is available [here](https://github.com/AgentOps-AI/agentops/blob/main/examples/llama_stack_client_examples/notebook.ipynb) to showcase how to use the Llama Stack client with AgentOps.
+
+<script type="module" src="/scripts/github_stars.js"></script>
+<script type="module" src="/scripts/link_to_api_button.js"></script>
+<script type="module" src="/scripts/scroll-img-fadein-animation.js"></script>
+<script type="module" src="/scripts/button_heartbeat_animation.js"></script>
+<script type="css" src="/styles/styles.css"></script>
+<script type="module" src="/scripts/adjust_api_dynamically.js"></script>
diff --git a/examples/llama_stack_client_examples/.env.tpl b/examples/llama_stack_client_examples/.env.tpl
new file mode 100644
index 000000000..5099720e1
--- /dev/null
+++ b/examples/llama_stack_client_examples/.env.tpl
@@ -0,0 +1,5 @@
+INFERENCE_MODEL=meta-llama/Llama-3.2-1B-Instruct
+OLLAMA_MODEL=llama3.2:1b-instruct-fp16
+
+
+
diff --git a/examples/llama_stack_client_examples/README.md b/examples/llama_stack_client_examples/README.md
new file mode 100644
index 000000000..c838096d5
--- /dev/null
+++ b/examples/llama_stack_client_examples/README.md
@@ -0,0 +1,45 @@
+# Llama Stack Client Examples
+
+The example notebook demonstrates how to use the Llama Stack Client to monitor an Agentic application using AgentOps. We have also provided a `compose.yaml` file to run Ollama in a container.
+
+## Quick Start
+
+First run the following command to start the Ollama server with the Llama Stack client:
+
+```bash
+docker compose up
+```
+
+Next, run the [notebook](./llama_stack_example.ipynb) to see the waterfall visualization in the [AgentOps](https://app.agentops.ai) dashboard.
+
+## Environment Variables
+
+| Variable | Description | Default |
+|----------|-------------|---------|
+| `LLAMA_STACK_PORT` | Server port | 5001 |
+| `INFERENCE_MODEL` | Model ID (must match Llama Stack format) | meta-llama/Llama-3.2-1B-Instruct |
+| `OLLAMA_MODEL` | Ollama model ID (must match Ollama format) | llama3.2:1b-instruct-fp16 |
+| `SAFETY_MODEL` | Optional safety model | - |
+| `NETWORK_MODE` | Docker network mode | auto-configured |
+| `OLLAMA_URL` | Ollama API URL | auto-configured |
+
+## Common Gotchas
+
+1. Model naming conventions differ between Ollama and Llama Stack. The same model is referenced differently. For instance, `meta-llama/Llama-3.2-1B-Instruct` in Llama Stack corresponds to `llama3.2:1b-instruct-fp16` in Ollama.
+
+2. Ensure Docker is configured with sufficient system memory allocation to run properly.
+
+
+## References
+
+- [Download Ollama](https://ollama.com/)
+- [Llama Stack Fireworks](./llama_stack_fireworks/README.fireworks.md)
+- [Llama Stack Docs](https://llama-stack.readthedocs.io)
+- [Ollama Run YAML Template](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/ollama/run.yaml)
+- [Llama Stack Documentation](https://llama-stack.readthedocs.io)
+- [Llama Stack Client Python](https://github.com/meta-llama/llama-stack-client-python)
+- [Llama Stack Repository](https://github.com/meta-llama/llama-stack)
+- [Meta Models Documentation](https://www.llama.com/docs/getting_the_models/meta/)
+- [Getting Started Guide](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html)
+- [Agents Example](https://github.com/meta-llama/llama-stack-apps/blob/main/examples/agents/hello.py)
+- [Model Download Reference](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html)
diff --git a/examples/llama_stack_client_examples/compose.yaml b/examples/llama_stack_client_examples/compose.yaml
new file mode 100644
index 000000000..2139a6620
--- /dev/null
+++ b/examples/llama_stack_client_examples/compose.yaml
@@ -0,0 +1,73 @@
+services:
+  ollama:
+    hostname: ollama
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    image: ollama/ollama:latest
+    volumes:
+      - ~/.ollama:/root/.ollama
+    environment:
+      OLLAMA_DEBUG: 1
+    command: []
+    deploy:
+      resources:
+        limits:
+          memory: 8G
+        reservations:
+          memory: 4G
+    healthcheck:
+      test: ["CMD", "bash", "-c", "</dev/tcp/localhost/11434"]
+      interval: 3s
+      timeout: 5s
+      retries: 5
+    networks:
+      - ollama-network
+
+  ollama-init:
+    image: ollama/ollama:latest
+    depends_on:
+      - ollama
+    environment:
+      - OLLAMA_HOST=ollama
+      - INFERENCE_MODEL=${OLLAMA_MODEL:-llama3.2:3b-instruct-fp16}
+      - SAFETY_MODEL=${SAFETY_MODEL:-}
+    volumes:
+      - ~/.ollama:/root/.ollama
+      - ./pull-models.sh:/root/pull-models.sh
+    entrypoint: ["/root/pull-models.sh"]
+    networks:
+      - ollama-network
+
+  llamastack:
+    depends_on:
+      ollama:
+        condition: service_healthy
+      ollama-init:
+        condition: service_completed_successfully
+    image: ${LLAMA_STACK_IMAGE:-llamastack/distribution-ollama}
+    volumes:
+      - ~/.llama:/root/.llama
+      - ./run${SAFETY_MODEL:+-with-safety}.yaml:/root/run.yaml
+    ports:
+      - "${LLAMA_STACK_PORT:-5001}:${LLAMA_STACK_PORT:-5001}"
+    environment:
+      - INFERENCE_MODEL=${INFERENCE_MODEL:-meta-llama/Llama-3.2-3B-Instruct}
+      - SAFETY_MODEL=${SAFETY_MODEL:-}
+      - OLLAMA_URL=http://ollama:11434
+    entrypoint: >
+        python -m llama_stack.distribution.server.server --yaml-config /root/run.yaml --port ${LLAMA_STACK_PORT:-5001}
+    deploy:
+      restart_policy:
+        condition: on-failure
+        delay: 10s
+        max_attempts: 3
+        window: 60s
+    networks:
+      - ollama-network
+
+networks:
+  ollama-network:
+    driver: bridge
+volumes:
+  ollama-init:
+  llamastack:
diff --git a/examples/llama_stack_client_examples/llama_stack_example.ipynb b/examples/llama_stack_client_examples/llama_stack_example.ipynb
new file mode 100644
index 000000000..42297557c
--- /dev/null
+++ b/examples/llama_stack_client_examples/llama_stack_example.ipynb
@@ -0,0 +1,250 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Llama Stack Client Examples\n",
+    "Use the llama_stack_client library to interact with a Llama Stack server"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "First let's install the required packages"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install -U llama-stack-client\n",
+    "%pip install -U llama-stack\n",
+    "%pip install -U agentops\n",
+    "%pip install -U python-dotenv\n",
+    "%pip install -U fastapi\n",
+    "%pip install opentelemetry-api\n",
+    "%pip install opentelemetry-sdk\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_stack_client import LlamaStackClient\n",
+    "from llama_stack_client import LlamaStackClient\n",
+    "from llama_stack_client.lib.inference.event_logger import EventLogger\n",
+    "from llama_stack_client.types import UserMessage\n",
+    "from llama_stack_client.types.agent_create_params import AgentConfig\n",
+    "from llama_stack_client.lib.agents.agent import Agent\n",
+    "from dotenv import load_dotenv\n",
+    "import os\n",
+    "import agentops\n",
+    "\n",
+    "load_dotenv()\n",
+    "AGENTOPS_API_KEY = os.getenv(\"AGENTOPS_API_KEY\") or \"<your_agentops_key>\"\n",
+    "\n",
+    "agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"], auto_start_session=False)\n",
+    "\n",
+    "host = \"0.0.0.0\" # LLAMA_STACK_HOST\n",
+    "port = 5001 # LLAMA_STACK_PORT\n",
+    "\n",
+    "full_host = f\"http://{host}:{port}\"\n",
+    "\n",
+    "client = LlamaStackClient(\n",
+    "    base_url=f\"{full_host}\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Inference Canary 1 - Completion with Streaming"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "agentops.start_session()\n",
+    "response = client.inference.chat_completion(\n",
+    "    messages=[\n",
+    "        UserMessage(\n",
+    "            content=\"hello world, write me a 3 word poem about the moon\",\n",
+    "            role=\"user\",\n",
+    "        ),\n",
+    "    ],\n",
+    "    model_id=\"meta-llama/Llama-3.2-1B-Instruct\",\n",
+    "    stream=True\n",
+    ")\n",
+    "\n",
+    "async for log in EventLogger().log(response):\n",
+    "    log.print()\n",
+    "\n",
+    "agentops.end_session(\"Success\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Inference Canary Example 2 - Completion without Streaming"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "agentops.start_session()\n",
+    "response = client.inference.chat_completion(\n",
+    "    messages=[\n",
+    "        UserMessage(\n",
+    "            content=\"write me a 3 word poem about the moon\",\n",
+    "            role=\"user\",\n",
+    "        ),\n",
+    "    ],\n",
+    "    model_id=\"meta-llama/Llama-3.2-1B-Instruct\",\n",
+    "    stream=False\n",
+    ")\n",
+    "\n",
+    "print(f\"> Response: {response.completion_message.content}\")\n",
+    "agentops.end_session(\"Success\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Agent Canary Example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from llama_stack_client import LlamaStackClient\n",
+    "from llama_stack_client.lib.agents.agent import Agent\n",
+    "from llama_stack_client.lib.agents.event_logger import EventLogger\n",
+    "from llama_stack_client.types.agent_create_params import AgentConfig\n",
+    "\n",
+    "agentops.start_session()\n",
+    "\n",
+    "LLAMA_STACK_PORT = 5001\n",
+    "\n",
+    "# Replace with actual API keys for functionality\n",
+    "BRAVE_SEARCH_API_KEY = os.getenv(\"BRAVE_SEARCH_API_KEY\") or \"your-brave-search-api-key\"\n",
+    "\n",
+    "async def agent_test():\n",
+    "    client = LlamaStackClient(\n",
+    "        base_url=f\"http://0.0.0.0:{LLAMA_STACK_PORT}\",\n",
+    "    )\n",
+    "\n",
+    "    available_shields = [shield.identifier for shield in client.shields.list()]\n",
+    "    if not available_shields:\n",
+    "        print(\"No available shields. Disable safety.\")\n",
+    "    else:\n",
+    "        print(f\"Available shields found: {available_shields}\")\n",
+    "    available_models = [model.identifier for model in client.models.list()]\n",
+    "    if not available_models:\n",
+    "        raise ValueError(\"No available models\")\n",
+    "    else:\n",
+    "        selected_model = available_models[0]\n",
+    "        print(f\"Using model: {selected_model}\")\n",
+    "\n",
+    "    agent_config = AgentConfig(\n",
+    "        model=selected_model,\n",
+    "        instructions=\"You are a helpful assistant. Just say hello as a greeting.\",\n",
+    "        sampling_params={\n",
+    "            \"strategy\": \"greedy\",\n",
+    "            \"temperature\": 1.0,\n",
+    "            \"top_p\": 0.9,\n",
+    "        },\n",
+    "        tools=[\n",
+    "            {\n",
+    "                \"type\": \"brave_search\",\n",
+    "                \"engine\": \"brave\",\n",
+    "                \"api_key\": BRAVE_SEARCH_API_KEY,\n",
+    "            }\n",
+    "        ],\n",
+    "        tool_choice=\"auto\",\n",
+    "        tool_prompt_format=\"json\",\n",
+    "        input_shields=available_shields if available_shields else [],\n",
+    "        output_shields=available_shields if available_shields else [],\n",
+    "        enable_session_persistence=False,\n",
+    "    )\n",
+    "    agent = Agent(client, agent_config)\n",
+    "    user_prompts = [\n",
+    "        \"Hello\",\n",
+    "        \"Which players played in the winning team of the NBA western conference semifinals of 2014, please use tools\",\n",
+    "    ]\n",
+    "\n",
+    "    session_id = agent.create_session(\"test-session\")\n",
+    "\n",
+    "    for prompt in user_prompts:\n",
+    "        response = agent.create_turn(\n",
+    "            messages=[\n",
+    "                {\n",
+    "                    \"role\": \"user\",\n",
+    "                    \"content\": prompt,\n",
+    "                }\n",
+    "            ],\n",
+    "            session_id=session_id,\n",
+    "        )\n",
+    "\n",
+    "        print(f\"{response=}\")\n",
+    "\n",
+    "        for log in EventLogger().log(response):\n",
+    "            log.print()\n",
+    "\n",
+    "agentops.start_session()\n",
+    "\n",
+    "await agent_test()\n",
+    "\n",
+    "agentops.end_session(\"Success\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "agentops.end_all_sessions()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/llama_stack_client_examples/llama_stack_example_for_ci.ipynb b/examples/llama_stack_client_examples/llama_stack_example_for_ci.ipynb
new file mode 100644
index 000000000..7249e04ea
--- /dev/null
+++ b/examples/llama_stack_client_examples/llama_stack_example_for_ci.ipynb
@@ -0,0 +1,207 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Llama Stack Client Examples\n",
+    "Use the llama_stack_client library to interact with a Llama Stack server"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "First let's install the required packages"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install -U llama-stack-client\n",
+    "%pip install -U llama-stack\n",
+    "%pip install -U agentops\n",
+    "%pip install -U python-dotenv\n",
+    "%pip install -U fastapi\n",
+    "%pip install opentelemetry-api\n",
+    "%pip install opentelemetry-sdk\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_stack_client import LlamaStackClient\n",
+    "from llama_stack_client import LlamaStackClient\n",
+    "from llama_stack_client.lib.inference.event_logger import EventLogger\n",
+    "from llama_stack_client.types import UserMessage\n",
+    "from llama_stack_client.types.agent_create_params import AgentConfig\n",
+    "from llama_stack_client.lib.agents.agent import Agent\n",
+    "from dotenv import load_dotenv\n",
+    "import os\n",
+    "import agentops\n",
+    "\n",
+    "load_dotenv()\n",
+    "AGENTOPS_API_KEY = os.getenv(\"AGENTOPS_API_KEY\") or \"<your_agentops_key>\"\n",
+    "\n",
+    "agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"], auto_start_session=False)\n",
+    "\n",
+    "host = \"0.0.0.0\" # LLAMA_STACK_HOST\n",
+    "port = 5001 # LLAMA_STACK_PORT\n",
+    "\n",
+    "full_host = f\"http://{host}:{port}\"\n",
+    "\n",
+    "client = LlamaStackClient(\n",
+    "    base_url=f\"{full_host}\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Inference Canary + Agent Canary"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "### Inference Canary\n",
+    "\n",
+    "agentops.start_session() # AgentOps start session\n",
+    "\n",
+    "response = client.inference.chat_completion(\n",
+    "    messages=[\n",
+    "        UserMessage(\n",
+    "            content=\"hello world, write me a 3 word poem about the moon\",\n",
+    "            role=\"user\",\n",
+    "        ),\n",
+    "    ],\n",
+    "    model_id=\"meta-llama/Llama-3.2-1B-Instruct\",\n",
+    "    stream=True\n",
+    ")\n",
+    "\n",
+    "async for log in EventLogger().log(response):\n",
+    "    log.print()\n",
+    "\n",
+    "\n",
+    "### Agent Canary\n",
+    "\n",
+    "import os\n",
+    "from llama_stack_client import LlamaStackClient\n",
+    "from llama_stack_client.lib.agents.agent import Agent\n",
+    "from llama_stack_client.lib.agents.event_logger import EventLogger\n",
+    "from llama_stack_client.types.agent_create_params import AgentConfig\n",
+    "\n",
+    "LLAMA_STACK_PORT = 5001\n",
+    "\n",
+    "# Replace with actual API keys for functionality\n",
+    "BRAVE_SEARCH_API_KEY = os.getenv(\"BRAVE_SEARCH_API_KEY\") or \"your-brave-search-api-key\"\n",
+    "\n",
+    "async def agent_test():\n",
+    "    client = LlamaStackClient(\n",
+    "        base_url=f\"http://0.0.0.0:{LLAMA_STACK_PORT}\",\n",
+    "    )\n",
+    "\n",
+    "    available_shields = [shield.identifier for shield in client.shields.list()]\n",
+    "    if not available_shields:\n",
+    "        print(\"No available shields. Disable safety.\")\n",
+    "    else:\n",
+    "        print(f\"Available shields found: {available_shields}\")\n",
+    "    available_models = [model.identifier for model in client.models.list()]\n",
+    "    if not available_models:\n",
+    "        raise ValueError(\"No available models\")\n",
+    "    else:\n",
+    "        selected_model = available_models[0]\n",
+    "        print(f\"Using model: {selected_model}\")\n",
+    "\n",
+    "    agent_config = AgentConfig(\n",
+    "        model=selected_model,\n",
+    "        instructions=\"You are a helpful assistant. Just say hello as a greeting.\",\n",
+    "        sampling_params={\n",
+    "            \"strategy\": \"greedy\",\n",
+    "            \"temperature\": 1.0,\n",
+    "            \"top_p\": 0.9,\n",
+    "        },\n",
+    "        tools=[\n",
+    "            {\n",
+    "                \"type\": \"brave_search\",\n",
+    "                \"engine\": \"brave\",\n",
+    "                \"api_key\": BRAVE_SEARCH_API_KEY,\n",
+    "            }\n",
+    "        ],\n",
+    "        tool_choice=\"auto\",\n",
+    "        tool_prompt_format=\"json\",\n",
+    "        input_shields=available_shields if available_shields else [],\n",
+    "        output_shields=available_shields if available_shields else [],\n",
+    "        enable_session_persistence=False,\n",
+    "    )\n",
+    "    agent = Agent(client, agent_config)\n",
+    "    user_prompts = [\n",
+    "        \"Hello\",\n",
+    "        \"Which players played in the winning team of the NBA western conference semifinals of 2014, please use tools\",\n",
+    "    ]\n",
+    "\n",
+    "    session_id = agent.create_session(\"test-session\")\n",
+    "\n",
+    "    for prompt in user_prompts:\n",
+    "        response = agent.create_turn(\n",
+    "            messages=[\n",
+    "                {\n",
+    "                    \"role\": \"user\",\n",
+    "                    \"content\": prompt,\n",
+    "                }\n",
+    "            ],\n",
+    "            session_id=session_id,\n",
+    "        )\n",
+    "\n",
+    "        print(f\"{response=}\")\n",
+    "\n",
+    "        for log in EventLogger().log(response):\n",
+    "            log.print()\n",
+    "\n",
+    "await agent_test()\n",
+    "\n",
+    "agentops.end_session(\"Success\") # AgentOps end session"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "agentops.end_all_sessions()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/llama_stack_client_examples/pull-models.sh b/examples/llama_stack_client_examples/pull-models.sh
new file mode 100755
index 000000000..cd0690290
--- /dev/null
+++ b/examples/llama_stack_client_examples/pull-models.sh
@@ -0,0 +1,18 @@
+#!/bin/sh
+
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+echo "Preloading (${INFERENCE_MODEL}, ${SAFETY_MODEL})..."
+for model in ${INFERENCE_MODEL} ${SAFETY_MODEL}; do
+  echo "Preloading $model..."
+  if ! ollama run "$model"; then
+    echo "Failed to pull and run $model"
+    exit 1
+  fi
+done
+
+echo "All models pulled successfully"
\ No newline at end of file
diff --git a/examples/llama_stack_client_examples/run-safety-shield.yaml b/examples/llama_stack_client_examples/run-safety-shield.yaml
new file mode 100644
index 000000000..2e4f6ac8a
--- /dev/null
+++ b/examples/llama_stack_client_examples/run-safety-shield.yaml
@@ -0,0 +1,62 @@
+version: '2'
+image_name: ollama
+docker_image: null
+conda_env: ollama
+apis:
+- agents
+- inference
+- memory
+- safety
+- telemetry
+providers:
+  inference:
+  - provider_id: ollama
+    provider_type: remote::ollama
+    config:
+      url: ${env.OLLAMA_URL:http://localhost:11434}
+  memory:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config: {}
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
+  telemetry:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+metadata_store:
+  namespace: null
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
+models:
+- metadata: {}
+  model_id: ${env.INFERENCE_MODEL}
+  provider_id: ollama
+  provider_model_id: null
+- metadata: {}
+  model_id: ${env.SAFETY_MODEL}
+  provider_id: ollama
+  provider_model_id: null
+shields:
+- params: null
+  shield_id: ${env.SAFETY_MODEL}
+  provider_id: null
+  provider_shield_id: null
+memory_banks: []
+datasets: []
+scoring_fns: []
+eval_tasks: []
\ No newline at end of file
diff --git a/examples/llama_stack_client_examples/run.yaml b/examples/llama_stack_client_examples/run.yaml
new file mode 100644
index 000000000..4d148ad95
--- /dev/null
+++ b/examples/llama_stack_client_examples/run.yaml
@@ -0,0 +1,53 @@
+version: '2'
+image_name: ollama
+docker_image: null
+conda_env: ollama
+apis:
+- agents
+- inference
+- memory
+- safety
+- telemetry
+providers:
+  inference:
+  - provider_id: ollama
+    provider_type: remote::ollama
+    config:
+      url: ${env.OLLAMA_URL:http://ollama:11434}
+  memory:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config: {}
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
+  telemetry:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+metadata_store:
+  namespace: null
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
+models:
+- metadata: {}
+  model_id: ${env.INFERENCE_MODEL}
+  provider_id: ollama
+shields: []
+memory_banks: []
+datasets: []
+scoring_fns: []
+eval_tasks: []
diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
new file mode 100644
index 000000000..1060627db
--- /dev/null
+++ b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
@@ -0,0 +1,83 @@
+import agentops
+import asyncio
+import os
+from dotenv import load_dotenv
+
+from llama_stack_client import LlamaStackClient
+from llama_stack_client.lib.agents.agent import Agent
+from llama_stack_client.lib.agents.event_logger import EventLogger
+from llama_stack_client.types.agent_create_params import AgentConfig
+
+load_dotenv()
+
+agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False)
+
+LLAMA_STACK_HOST = "0.0.0.0"
+LLAMA_STACK_PORT = 5001
+INFERENCE_MODEL = "meta-llama/Llama-3.2-1B-Instruct"
+
+
+async def agent_test():
+    client = LlamaStackClient(
+        base_url=f"http://{LLAMA_STACK_HOST}:{LLAMA_STACK_PORT}",
+    )
+
+    available_shields = [shield.identifier for shield in client.shields.list()]
+    if not available_shields:
+        print("No available shields. Disable safety.")
+    else:
+        print(f"Available shields found: {available_shields}")
+    available_models = [model.identifier for model in client.models.list()]
+    if not available_models:
+        raise ValueError("No available models")
+    else:
+        selected_model = available_models[0]
+        print(f"Using model: {selected_model}")
+
+    agent_config = AgentConfig(
+        model=selected_model,
+        instructions="You are a helpful assistant. Just say hello as a greeting.",
+        sampling_params={
+            "strategy": "greedy",
+            "temperature": 1.0,
+            "top_p": 0.9,
+        },
+        tools=[
+            {
+                "type": "brave_search",
+                "engine": "brave",
+                "api_key": os.getenv("BRAVE_SEARCH_API_KEY"),
+            }
+        ],
+        tool_choice="auto",
+        tool_prompt_format="json",
+        input_shields=available_shields if available_shields else [],
+        output_shields=available_shields if available_shields else [],
+        enable_session_persistence=False,
+    )
+    agent = Agent(client, agent_config)
+    user_prompts = [
+        "Hello",
+        "Which players played in the winning team of the NBA western conference semifinals of 2014, please use tools",
+    ]
+
+    session_id = agent.create_session("test-session")
+
+    for prompt in user_prompts:
+        response = agent.create_turn(
+            messages=[
+                {
+                    "role": "user",
+                    "content": prompt,
+                }
+            ],
+            session_id=session_id,
+        )
+
+        for log in EventLogger().log(response):
+            log.print()
+
+
+agentops.start_session()
+asyncio.run(agent_test())
+agentops.end_session(end_state="Success")
diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py
new file mode 100644
index 000000000..c88dfa48c
--- /dev/null
+++ b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py
@@ -0,0 +1,45 @@
+import asyncio
+import agentops
+import os
+from dotenv import load_dotenv
+from llama_stack_client import LlamaStackClient
+from llama_stack_client.types import UserMessage
+from llama_stack_client.lib.inference.event_logger import EventLogger
+
+load_dotenv()
+
+agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False)
+
+host = "0.0.0.0"  # LLAMA_STACK_HOST
+port = 5001  # LLAMA_STACK_PORT
+
+full_host = f"http://{host}:{port}"
+
+client = LlamaStackClient(
+    base_url=f"{full_host}",
+)
+
+
+async def stream_test():
+    response = client.inference.chat_completion(
+        messages=[
+            UserMessage(
+                content="hello world, write me a 3 word poem about the moon",
+                role="user",
+            ),
+        ],
+        model_id="meta-llama/Llama-3.2-1B-Instruct",
+        stream=True,
+    )
+
+    async for log in EventLogger().log(response):
+        log.print()
+
+
+def main():
+    agentops.start_session()
+    asyncio.run(stream_test())
+    agentops.end_session(end_state="Success")
+
+
+main()
diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py
new file mode 100644
index 000000000..7c43ce510
--- /dev/null
+++ b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py
@@ -0,0 +1,57 @@
+import agentops
+import os
+from dotenv import load_dotenv
+from llama_stack_client import LlamaStackClient
+from llama_stack_client.types import UserMessage
+from llama_stack_client.lib.inference.event_logger import EventLogger
+
+load_dotenv()
+
+agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False)
+
+host = "0.0.0.0"  # LLAMA_STACK_HOST
+port = 5001  # LLAMA_STACK_PORT
+
+full_host = f"http://{host}:{port}"
+
+client = LlamaStackClient(
+    base_url=f"{full_host}",
+)
+
+
+async def stream_test():
+    response = client.inference.chat_completion(
+        messages=[
+            UserMessage(
+                content="hello world, write me a 3 word poem about the moon",
+                role="user",
+            ),
+        ],
+        model_id="meta-llama/Llama-3.2-1B-Instruct",
+        stream=True,
+    )
+
+    async for log in EventLogger().log(response):
+        log.print()
+
+
+def main():
+    agentops.start_session()
+
+    response = client.inference.chat_completion(
+        messages=[
+            UserMessage(
+                content="hello world, write me a 3 word poem about the moon",
+                role="user",
+            ),
+        ],
+        model_id="meta-llama/Llama-3.2-1B-Instruct",
+        stream=False,
+    )
+
+    print(response.completion_message.content)
+
+    agentops.end_session(end_state="Success")
+
+
+main()
diff --git a/tests/test_host_env.py b/tests/test_host_env.py
index e6194d3ac..c22796f3f 100644
--- a/tests/test_host_env.py
+++ b/tests/test_host_env.py
@@ -7,18 +7,8 @@
 
 def mock_partitions():
     return [
-        sdiskpart(
-            device="/dev/sda1",
-            mountpoint="/",
-            fstype="ext4",
-            opts="rw,relatime"
-        ),
-        sdiskpart(
-            device="z:\\",
-            mountpoint="z:\\",
-            fstype="ntfs",
-            opts="rw,relatime"
-        ),
+        sdiskpart(device="/dev/sda1", mountpoint="/", fstype="ext4", opts="rw,relatime"),
+        sdiskpart(device="z:\\", mountpoint="z:\\", fstype="ntfs", opts="rw,relatime"),
     ]