From 787d6703c06161c943c45da251c0422647cca26e Mon Sep 17 00:00:00 2001 From: Gabriel Fior Date: Wed, 10 Apr 2024 19:20:24 -0300 Subject: [PATCH] Executed benchmark for 50 markets --- .../crewai_subsequential_agent/benchmark.py | 12 +++------ .../crewai_agent_subquestions.py | 27 +++++++++++++++---- .../crewai_subsequential_agent/deploy.py | 7 +++-- prediction_market_agent/tools/crewai_tools.py | 3 +-- 4 files changed, 30 insertions(+), 19 deletions(-) diff --git a/prediction_market_agent/agents/crewai_subsequential_agent/benchmark.py b/prediction_market_agent/agents/crewai_subsequential_agent/benchmark.py index 4d11494f..c6108219 100644 --- a/prediction_market_agent/agents/crewai_subsequential_agent/benchmark.py +++ b/prediction_market_agent/agents/crewai_subsequential_agent/benchmark.py @@ -2,7 +2,6 @@ from datetime import datetime import typer -from dotenv import load_dotenv from prediction_market_agent_tooling.benchmark.agents import ( AbstractBenchmarkedAgent, FixedAgent, @@ -46,9 +45,8 @@ def build_binary_agent_market_from_question(question: str) -> AgentMarket: class CrewAIAgentSubquestionsBenchmark(AbstractBenchmarkedAgent): def __init__( self, - agent_name: str, max_workers: int, - model: str, + agent_name: str, max_tries: int, ) -> None: self.max_tries = max_tries @@ -65,8 +63,8 @@ def predict(self, market_question: str) -> Prediction: def main( - n: int = 5, - output: str = "./benchmark_report.md", + n: int = 50, + output: str = "./benchmark_report_50markets.md", reference: MarketType = MarketType.MANIFOLD, filter: FilterBy = FilterBy.OPEN, sort: SortBy = SortBy.NONE, @@ -78,7 +76,6 @@ def main( Polymarket usually contains higher quality questions, but on Manifold, additionally to filtering by MarketFilter.resolved, you can sort by MarketSort.newest. """ - load_dotenv() markets = get_binary_markets(n, reference, filter_by=filter, sort_by=sort) markets_deduplicated = list(({m.question: m for m in markets}.values())) if len(markets) != len(markets_deduplicated): @@ -92,10 +89,9 @@ def main( markets=markets_deduplicated, agents=[ CrewAIAgentSubquestionsBenchmark( - "subsequential-questions-crewai", + agent_name="subsequential-questions-crewai", max_workers=max_workers, max_tries=1, - model="gpt-3.5-turbo-0125", ), RandomAgent(agent_name="random", max_workers=max_workers), FixedAgent( diff --git a/prediction_market_agent/agents/crewai_subsequential_agent/crewai_agent_subquestions.py b/prediction_market_agent/agents/crewai_subsequential_agent/crewai_agent_subquestions.py index 795b4b83..ebaf1a60 100644 --- a/prediction_market_agent/agents/crewai_subsequential_agent/crewai_agent_subquestions.py +++ b/prediction_market_agent/agents/crewai_subsequential_agent/crewai_agent_subquestions.py @@ -1,18 +1,21 @@ import typing as t from crewai import Agent, Crew, Process, Task +from langchain_core.language_models import BaseChatModel +from langchain_openai import ChatOpenAI from pydantic import BaseModel from prediction_market_agent.agents.crewai_subsequential_agent.prompts import ( - PROBABILITY_CLASS_OUTPUT, + CREATE_OUTCOMES_FROM_SCENARIO_OUTPUT, + CREATE_OUTCOMES_FROM_SCENARIO_PROMPT, FINAL_DECISION_PROMPT, + PROBABILITY_CLASS_OUTPUT, PROBABILITY_FOR_ONE_OUTCOME_PROMPT, RESEARCH_OUTCOME_OUTPUT, RESEARCH_OUTCOME_PROMPT, - CREATE_OUTCOMES_FROM_SCENARIO_OUTPUT, - CREATE_OUTCOMES_FROM_SCENARIO_PROMPT, ) from prediction_market_agent.tools.crewai_tools import TavilyDevTool +from prediction_market_agent.utils import APIKeys tavily_search = TavilyDevTool() @@ -29,8 +32,8 @@ class ProbabilityOutput(BaseModel): class CrewAIAgentSubquestions: - def __init__(self, openai_model_name: str | None) -> None: - # openai_model_name as str automatically interpreted by CrewAI, else create LLM object. + def __init__(self) -> None: + llm = self._build_llm() self.researcher = Agent( role="Research Analyst", goal="Research and report on some future event, giving high quality and nuanced analysis", @@ -38,6 +41,7 @@ def __init__(self, openai_model_name: str | None) -> None: verbose=True, allow_delegation=False, tools=[tavily_search], + llm=llm, ) self.predictor = Agent( @@ -46,7 +50,20 @@ def __init__(self, openai_model_name: str | None) -> None: backstory="You are a professional gambler who is adept at predicting and betting on the outcomes of future events.", verbose=True, allow_delegation=False, + llm=llm, + ) + + def _build_llm(self) -> BaseChatModel: + keys = APIKeys() + llm = ChatOpenAI( + model="gpt-3.5-turbo-0125", + openai_api_key=keys.openai_api_key.get_secret_value(), # type: ignore ) + # llm = OpenAI( + # openai_api_key=keys.openai_api_key.get_secret_value(), # type: ignore + # model_name="gpt-4-turbo-preview", + # ) + return llm def split_research_into_outcomes(self, question: str) -> Outcomes: create_outcomes_task = Task( diff --git a/prediction_market_agent/agents/crewai_subsequential_agent/deploy.py b/prediction_market_agent/agents/crewai_subsequential_agent/deploy.py index 70d025e9..0b6e6e86 100644 --- a/prediction_market_agent/agents/crewai_subsequential_agent/deploy.py +++ b/prediction_market_agent/agents/crewai_subsequential_agent/deploy.py @@ -1,4 +1,3 @@ -import os import random from decimal import Decimal @@ -15,6 +14,8 @@ class DeployableThinkThoroughlyAgent(DeployableAgent): # For cheaper credits at this experimental stage + def __init__(self) -> None: + super().__init__() def pick_markets(self, markets: list[AgentMarket]) -> list[AgentMarket]: # We simply pick 5 random markets to bet on @@ -34,9 +35,7 @@ def pick_markets(self, markets: list[AgentMarket]) -> list[AgentMarket]: def answer_binary_market(self, market: AgentMarket) -> bool: # The answer has already been determined in `pick_markets` so we just # return it here. - os.environ["OPENAI_MODEL_NAME"] = "gpt-4-turbo-preview" - agent = CrewAIAgentSubquestions() - result = agent.answer_binary_market(market.question) + result = CrewAIAgentSubquestions().answer_binary_market(market.question) return True if result.decision == "y" else False def calculate_bet_amount(self, answer: bool, market: AgentMarket) -> BetAmount: diff --git a/prediction_market_agent/tools/crewai_tools.py b/prediction_market_agent/tools/crewai_tools.py index d1571df6..42893290 100644 --- a/prediction_market_agent/tools/crewai_tools.py +++ b/prediction_market_agent/tools/crewai_tools.py @@ -1,4 +1,3 @@ -import os from typing import Any, Type from crewai_tools.tools.base_tool import BaseTool @@ -35,5 +34,5 @@ def _run( ) -> Any: keys = APIKeys() return TavilySearchAPIWrapper( - tavily_api_key=SecretStr(os.environ["TAVILY_API_KEY"]) + tavily_api_key=SecretStr(keys.tavily_api_key.get_secret_value()) ).results(query=search_query)