From 502b3fe1cad05f3487b345e8e68c60fc0a462b30 Mon Sep 17 00:00:00 2001 From: samgozman Date: Sat, 30 Dec 2023 22:42:58 +0200 Subject: [PATCH 1/9] add together AI client --- app.go | 2 +- composer/clients.go | 100 +++++++++++++++++++++++++++++++++++++++++++ composer/composer.go | 20 +++++---- config.go | 1 + main.go | 1 + 5 files changed, 115 insertions(+), 9 deletions(-) create mode 100644 composer/clients.go diff --git a/app.go b/app.go index 90cd8cf..20f0243 100644 --- a/app.go +++ b/app.go @@ -30,7 +30,7 @@ func (a *App) start() { panic(err) } - composer := NewComposer(a.cnf.env.OpenAiToken) + composer := NewComposer(a.cnf.env.OpenAiToken, a.cnf.env.TogetherAIToken) marketJournalist := NewJournalist("MarketNews", []NewsProvider{ NewRssProvider("benzinga:large-cap", "https://www.benzinga.com/news/large-cap/feed"), diff --git a/composer/clients.go b/composer/clients.go new file mode 100644 index 0000000..b16bfee --- /dev/null +++ b/composer/clients.go @@ -0,0 +1,100 @@ +package composer + +import ( + "bytes" + "context" + "encoding/json" + "github.com/sashabaranov/go-openai" + "io" + "net/http" +) + +// OpenAiClientInterface is an interface for OpenAI API client +type OpenAiClientInterface interface { + CreateChatCompletion(ctx context.Context, req openai.ChatCompletionRequest) (response openai.ChatCompletionResponse, error error) +} + +// TogetherAIClientInterface is an interface for TogetherAI API client +type TogetherAIClientInterface interface { + CreateChatCompletion(ctx context.Context, options TogetherAIRequest) (TogetherAIResponse, error) +} + +// TogetherAIRequest is a struct that contains options for TogetherAI API requests +type TogetherAIRequest struct { + Model string `json:"model"` + Prompt string `json:"prompt"` + MaxTokens int `json:"max_tokens"` + Temperature float64 `json:"temperature"` + TopP float64 `json:"top_p"` + TopK int `json:"top_k"` + RepetitionPenalty float64 `json:"repetition_penalty"` +} + +// TogetherAIResponse is a struct that contains response from TogetherAI API +type TogetherAIResponse struct { + ID string `json:"id"` + Choices []struct { + Text string `json:"text"` + } `json:"choices"` + Usage struct { + PromptTokens int `json:"prompt_tokens"` + CompletionTokens int `json:"completion_tokens"` + TotalTokens int `json:"total_tokens"` + } + Created int64 `json:"created"` + Model string `json:"model"` + Object string `json:"object"` +} + +// TogetherAI client to interact with TogetherAI API (replacement for OpenAI API in some cases) +type TogetherAI struct { + APIKey string + URL string +} + +// CreateChatCompletion creates a new chat completion request to TogetherAI API +func (t *TogetherAI) CreateChatCompletion(ctx context.Context, options TogetherAIRequest) (TogetherAIResponse, error) { + var response TogetherAIResponse + + bodyJSON, err := json.Marshal(options) + if err != nil { + return response, err + } + + req, err := http.NewRequest("POST", t.URL, bytes.NewBuffer(bodyJSON)) + if err != nil { + return response, err + } + + req.Header.Set("Authorization", "Bearer "+t.APIKey) + req.Header.Set("Content-Type", "application/json") + req.WithContext(ctx) + + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + return response, err + } + + defer func(Body io.ReadCloser) { + err := Body.Close() + if err != nil { + return + } + }(resp.Body) + + err = json.NewDecoder(resp.Body).Decode(&response) + if err != nil { + return response, err + } + + return response, nil +} + +// NewTogetherAI creates new TogetherAI client +func NewTogetherAI(apiKey string) *TogetherAI { + return &TogetherAI{ + APIKey: apiKey, + URL: "https://api.together.xyz/completions", + } +} diff --git a/composer/composer.go b/composer/composer.go index 0667e04..d5a8611 100644 --- a/composer/composer.go +++ b/composer/composer.go @@ -12,17 +12,21 @@ import ( "github.com/sashabaranov/go-openai" ) -type OpenAiClientInterface interface { - CreateChatCompletion(ctx context.Context, req openai.ChatCompletionRequest) (response openai.ChatCompletionResponse, error error) -} - +// Composer is used to compose (rephrase) news and events, find some meta information about them, +// filter out some unnecessary stuff, summarise them and so on. type Composer struct { - OpenAiClient OpenAiClientInterface - Config *PromptConfig + OpenAiClient OpenAiClientInterface + TogetherAIClient TogetherAIClientInterface + Config *PromptConfig } -func NewComposer(oaiToken string) *Composer { - return &Composer{OpenAiClient: openai.NewClient(oaiToken), Config: DefaultPromptConfig()} +// NewComposer creates a new Composer instance with OpenAI and TogetherAI clients and default config +func NewComposer(oaiToken, tgrAiToken string) *Composer { + return &Composer{ + OpenAiClient: openai.NewClient(oaiToken), + TogetherAIClient: NewTogetherAI(tgrAiToken), + Config: DefaultPromptConfig(), + } } func (c *Composer) Compose(ctx context.Context, news journalist.NewsList) ([]*ComposedNews, error) { diff --git a/config.go b/config.go index 4cec8c0..4d44f25 100644 --- a/config.go +++ b/config.go @@ -5,6 +5,7 @@ type Env struct { TelegramChannelID string `mapstructure:"TELEGRAM_CHANNEL_ID"` TelegramBotToken string `mapstructure:"TELEGRAM_BOT_TOKEN"` OpenAiToken string `mapstructure:"OPENAI_TOKEN"` + TogetherAIToken string `mapstructure:"TOGETHER_AI_TOKEN"` PostgresDSN string `mapstructure:"POSTGRES_DSN"` SentryDSN string `mapstructure:"SENTRY_DSN"` } diff --git a/main.go b/main.go index ef309c7..dccb81a 100644 --- a/main.go +++ b/main.go @@ -25,6 +25,7 @@ func main() { TelegramChannelID: os.Getenv("TELEGRAM_CHANNEL_ID"), TelegramBotToken: os.Getenv("TELEGRAM_BOT_TOKEN"), OpenAiToken: os.Getenv("OPENAI_TOKEN"), + TogetherAIToken: os.Getenv("TOGETHER_AI_TOKEN"), PostgresDSN: os.Getenv("POSTGRES_DSN"), SentryDSN: os.Getenv("SENTRY_DSN"), } From bf646746e29d6e9fa9ca5bb42ca97e0b9a32c637 Mon Sep 17 00:00:00 2001 From: samgozman Date: Sat, 30 Dec 2023 22:43:42 +0200 Subject: [PATCH 2/9] go mod tidy --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 208ed2f..f76beef 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/samgozman/fin-thread go 1.21.5 require ( + github.com/avast/retry-go v3.0.0+incompatible github.com/cenkalti/backoff/v4 v4.2.1 github.com/getsentry/sentry-go v0.25.0 github.com/go-co-op/gocron/v2 v2.0.1 @@ -23,7 +24,6 @@ require ( require ( github.com/PuerkitoBio/goquery v1.8.1 // indirect github.com/andybalholm/cascadia v1.3.2 // indirect - github.com/avast/retry-go v3.0.0+incompatible // indirect github.com/aymerick/douceur v0.2.0 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/fsnotify/fsnotify v1.7.0 // indirect From 78eae1bf61d4acdd086f2b48aad47cad1a12876c Mon Sep 17 00:00:00 2001 From: samgozman Date: Sun, 31 Dec 2023 10:28:14 +0200 Subject: [PATCH 3/9] update gocron & viper --- go.mod | 6 +++--- go.sum | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/go.mod b/go.mod index f76beef..0687ace 100644 --- a/go.mod +++ b/go.mod @@ -6,14 +6,14 @@ require ( github.com/avast/retry-go v3.0.0+incompatible github.com/cenkalti/backoff/v4 v4.2.1 github.com/getsentry/sentry-go v0.25.0 - github.com/go-co-op/gocron/v2 v2.0.1 + github.com/go-co-op/gocron/v2 v2.1.1 github.com/go-telegram-bot-api/telegram-bot-api v4.6.4+incompatible github.com/google/uuid v1.5.0 github.com/microcosm-cc/bluemonday v1.0.26 github.com/mmcdole/gofeed v1.2.1 github.com/samber/lo v1.39.0 github.com/sashabaranov/go-openai v1.17.9 - github.com/spf13/viper v1.18.1 + github.com/spf13/viper v1.18.2 github.com/stretchr/testify v1.8.4 golang.org/x/sync v0.5.0 gorm.io/datatypes v1.2.0 @@ -57,7 +57,7 @@ require ( github.com/technoweenie/multipartstreamer v1.0.1 // indirect go.uber.org/multierr v1.11.0 // indirect golang.org/x/crypto v0.16.0 // indirect - golang.org/x/exp v0.0.0-20231206192017-f3f8817b8deb // indirect + golang.org/x/exp v0.0.0-20231219180239-dc181d75b848 // indirect golang.org/x/net v0.19.0 // indirect golang.org/x/sys v0.15.0 // indirect golang.org/x/text v0.14.0 // indirect diff --git a/go.sum b/go.sum index 936cf14..59270d5 100644 --- a/go.sum +++ b/go.sum @@ -19,8 +19,8 @@ github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nos github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= github.com/getsentry/sentry-go v0.25.0 h1:q6Eo+hS+yoJlTO3uu/azhQadsD8V+jQn2D8VvX1eOyI= github.com/getsentry/sentry-go v0.25.0/go.mod h1:lc76E2QywIyW8WuBnwl8Lc4bkmQH4+w1gwTf25trprY= -github.com/go-co-op/gocron/v2 v2.0.1 h1:SnaPYOZ/FZJ+m6i0OLVEc+LUq9i/8fqiDIgImO2Zyxk= -github.com/go-co-op/gocron/v2 v2.0.1/go.mod h1:DodDqurAedt8cj/dbFM8obVSgPv0Vch80eF7neNVwmg= +github.com/go-co-op/gocron/v2 v2.1.1 h1:vQPaVzCFUbfNTKjLYPCUiLlgE3mJ78XfYCo+CTfutHs= +github.com/go-co-op/gocron/v2 v2.1.1/go.mod h1:0MfNAXEchzeSH1vtkZrTAcSMWqyL435kL6CA4b0bjrg= github.com/go-errors/errors v1.4.2 h1:J6MZopCL4uSllY1OfXM374weqZFFItUbrImctkmUxIA= github.com/go-errors/errors v1.4.2/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3BopGUQ5a5Og= github.com/go-sql-driver/mysql v1.7.0/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI= @@ -109,8 +109,8 @@ github.com/spf13/cast v1.6.0 h1:GEiTHELF+vaR5dhz3VqZfFSzZjYbgeKDpBxQVS4GYJ0= github.com/spf13/cast v1.6.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= -github.com/spf13/viper v1.18.1 h1:rmuU42rScKWlhhJDyXZRKJQHXFX02chSVW1IvkPGiVM= -github.com/spf13/viper v1.18.1/go.mod h1:EKmWIqdnk5lOcmR72yw6hS+8OPYcwD0jteitLMVB+yk= +github.com/spf13/viper v1.18.2 h1:LUXCnvUvSM6FXAsj6nnfc8Q2tp1dIgUfY9Kc8GsSOiQ= +github.com/spf13/viper v1.18.2/go.mod h1:EKmWIqdnk5lOcmR72yw6hS+8OPYcwD0jteitLMVB+yk= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= @@ -136,8 +136,8 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.16.0 h1:mMMrFzRSCF0GvB7Ne27XVtVAaXLrPmgPC7/v0tkwHaY= golang.org/x/crypto v0.16.0/go.mod h1:gCAAfMLgwOJRpTjQ2zCCt2OcSfYMTeZVSRtQlPC7Nq4= -golang.org/x/exp v0.0.0-20231206192017-f3f8817b8deb h1:c0vyKkb6yr3KR7jEfJaOSv4lG7xPkbN6r52aJz1d8a8= -golang.org/x/exp v0.0.0-20231206192017-f3f8817b8deb/go.mod h1:iRJReGqOEeBhDZGkGbynYwcHlctCvnjTYIamk7uXpHI= +golang.org/x/exp v0.0.0-20231219180239-dc181d75b848 h1:+iq7lrkxmFNBM7xx+Rae2W6uyPfhPeDWD+n+JgppptE= +golang.org/x/exp v0.0.0-20231219180239-dc181d75b848/go.mod h1:iRJReGqOEeBhDZGkGbynYwcHlctCvnjTYIamk7uXpHI= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= From 857f7393c8fda833256bfbe1e3def709bdfad690 Mon Sep 17 00:00:00 2001 From: samgozman Date: Sun, 31 Dec 2023 10:45:32 +0200 Subject: [PATCH 4/9] add env to samples --- .env_sample | 1 + 1 file changed, 1 insertion(+) diff --git a/.env_sample b/.env_sample index 5c468b8..4cff2a7 100644 --- a/.env_sample +++ b/.env_sample @@ -2,6 +2,7 @@ TELEGRAM_CHANNEL_ID= TELEGRAM_BOT_TOKEN= OPENAI_TOKEN= +TOGETHER_AI_TOKEN= # DSN in gorm format POSTGRES_DSN="host=postgres user=postgres password=postgres dbname=finfeed port=5432 sslmode=disable" SENTRY_DSN=https://public@sentry.example.com/1 From 2d68f6e23342da3d6ad5bb8f83db48c3c141fb37 Mon Sep 17 00:00:00 2001 From: samgozman Date: Mon, 1 Jan 2024 12:16:42 +0200 Subject: [PATCH 5/9] add Filter method --- composer/composer.go | 43 ++++++++++++ composer/composer_test.go | 135 +++++++++++++++++++++++++++++++++++++- composer/prompt.go | 29 ++++---- 3 files changed, 195 insertions(+), 12 deletions(-) diff --git a/composer/composer.go b/composer/composer.go index d5a8611..521be7f 100644 --- a/composer/composer.go +++ b/composer/composer.go @@ -29,6 +29,8 @@ func NewComposer(oaiToken, tgrAiToken string) *Composer { } } +// Compose creates a new AI-composed news from the given news list. +// It will also find some meta information about the news and events (markets, tickers, hashtags). func (c *Composer) Compose(ctx context.Context, news journalist.NewsList) ([]*ComposedNews, error) { // RemoveDuplicates out news that are not from today var todayNews journalist.NewsList = lo.Filter(news, func(n *journalist.News, _ int) bool { @@ -149,6 +151,47 @@ func (c *Composer) Summarise(ctx context.Context, headlines []*Headline, headlin return h, nil } +// Filter removes unnecessary news from the given news list using TogetherAI API. +func (c *Composer) Filter(ctx context.Context, news journalist.NewsList) (journalist.NewsList, error) { + if len(news) == 0 { + return nil, nil + } + + jsonNews, err := news.ToJSON() + if err != nil { + return nil, newErr(err, "Filter", "json.Marshal news").WithValue(fmt.Sprintf("%+v", news)) + } + + resp, err := c.TogetherAIClient.CreateChatCompletion( + ctx, + TogetherAIRequest{ + Model: "mistralai/Mistral-7B-Instruct-v0.2", + Prompt: c.Config.FilterPromptInstruct(jsonNews), + MaxTokens: 2048, + Temperature: 0.7, + TopP: 0.7, + TopK: 50, + RepetitionPenalty: 1, + }, + ) + if err != nil { + return nil, newErr(err, "Filter", "TogetherAIClient.CreateChatCompletion") + } + + matches, err := openaiJSONStringFixer(resp.Choices[0].Text) + if err != nil { + return nil, newErr(err, "Filter", "openaiJSONStringFixer") + } + + var filtered journalist.NewsList + err = json.Unmarshal([]byte(matches), &filtered) + if err != nil { + return nil, newErr(err, "Filter", "json.Unmarshal").WithValue(resp.Choices[0].Text) + } + + return filtered, nil +} + // Headline is the base data structure for the data to summarise type Headline struct { ID string `json:"id"` diff --git a/composer/composer_test.go b/composer/composer_test.go index 37bc459..001a114 100644 --- a/composer/composer_test.go +++ b/composer/composer_test.go @@ -23,6 +23,15 @@ func (m *MockOpenAiClient) CreateChatCompletion(ctx context.Context, req openai. return args.Get(0).(openai.ChatCompletionResponse), args.Error(1) } +type MockTogetherAIClient struct { + mock.Mock +} + +func (m *MockTogetherAIClient) CreateChatCompletion(ctx context.Context, options TogetherAIRequest) (TogetherAIResponse, error) { + args := m.Called(ctx, options) + return args.Get(0).(TogetherAIResponse), args.Error(1) +} + func TestComposer_Compose(t *testing.T) { news := journalist.NewsList{ { @@ -46,7 +55,7 @@ func TestComposer_Compose(t *testing.T) { Title: "Wholesale prices fell 0.5% in October for biggest monthly drop since April 2020", Description: "Wholesale prices fell 0.5% in October for biggest monthly drop since April 2020", Link: "https://www.cnbc.com/", - Date: time.Now().Add(-24 * time.Hour * 2).UTC(), // Should be filtered out + Date: time.Now().UTC(), ProviderName: "cnbc", }, } @@ -84,6 +93,13 @@ func TestComposer_Compose(t *testing.T) { Markets: []string{}, Hashtags: []string{"interestrates"}, }, + { + ID: "3", + Text: "Wholesale prices fell 0.5% in October for biggest monthly drop since April 2020", + Tickers: []string{}, + Markets: []string{}, + Hashtags: []string{}, + }, }, wantErr: false, }, @@ -291,3 +307,120 @@ func TestComposer_Summarise(t *testing.T) { }) } } + +func TestComposer_Filter(t *testing.T) { + type args struct { + ctx context.Context + news journalist.NewsList + } + tests := []struct { + name string + args args + want journalist.NewsList + wantErr bool + }{ + { + name: "Should pass and return correct filtered news", + args: args{ + ctx: context.Background(), + news: journalist.NewsList{ + { + ID: "1", + Title: "Ray Dalio says U.S. reaching an inflection point where the debt problem quickly gets even worse", + Description: "Soaring U.S. government debt is reaching a point where it will begin creating larger problems, the hedge fund titan said Friday.", + Link: "https://www.cnbc.com/", + Date: time.Now().UTC(), + ProviderName: "cnbc", + }, + { + ID: "2", + Title: "The market thinks the Fed is going to start cutting rates aggressively. Investors could be in for a letdown", + Description: "Markets may be at least a tad optimistic, particularly considering the cautious approach central bank officials have taken.", + Link: "https://www.cnbc.com/", + Date: time.Now().UTC(), + ProviderName: "cnbc", + }, + { + ID: "3", + Title: "Wholesale prices fell 0.5% in October for biggest monthly drop since April 2020", + Description: "Wholesale prices fell 0.5% in October for biggest monthly drop since April 2020", + Link: "https://www.cnbc.com/", + Date: time.Now().UTC(), + ProviderName: "cnbc", + }, + }, + }, + want: journalist.NewsList{ + { + ID: "1", + Title: "Ray Dalio says U.S. reaching an inflection point where the debt problem quickly gets even worse", + Description: "Soaring U.S. government debt is reaching a point where it will begin creating larger problems, the hedge fund titan said Friday.", + Link: "https://www.cnbc.com/", + Date: time.Now().UTC(), + ProviderName: "cnbc", + }, + { + ID: "3", + Title: "Wholesale prices fell 0.5% in October for biggest monthly drop since April 2020", + Description: "Wholesale prices fell 0.5% in October for biggest monthly drop since April 2020", + Link: "https://www.cnbc.com/", + Date: time.Now().UTC(), + ProviderName: "cnbc", + }, + }, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + mockClient := new(MockTogetherAIClient) + defConf := DefaultPromptConfig() + + // Set expectations for the mock client + if tt.wantErr { + mockError := errors.New("some error") + mockClient.On("CreateChatCompletion", mock.Anything, mock.Anything).Return(TogetherAIResponse{}, mockError) + } else { + jsonNews, _ := tt.args.news.ToJSON() + expectedJsonNews, _ := tt.want.ToJSON() + + mockClient.On("CreateChatCompletion", + mock.Anything, + TogetherAIRequest{ + Model: "mistralai/Mistral-7B-Instruct-v0.2", + Prompt: defConf.FilterPromptInstruct(jsonNews), + MaxTokens: 2048, + Temperature: 0.7, + TopP: 0.7, + TopK: 50, + RepetitionPenalty: 1, + }, + ).Return(TogetherAIResponse{ + Choices: []struct { + Text string `json:"text"` + }{ + { + Text: expectedJsonNews, + }, + }, + }, nil) + } + + c := &Composer{ + TogetherAIClient: mockClient, + Config: DefaultPromptConfig(), + } + got, err := c.Filter(tt.args.ctx, tt.args.news) + if (err != nil) != tt.wantErr { + t.Errorf("Filter() error = %v, wantErr %v", err, tt.wantErr) + return + } + + for i, n := range got { + if !reflect.DeepEqual(n, tt.want[i]) { + t.Errorf("Filter() = %v, want %v", n, tt.want[i]) + } + } + }) + } +} diff --git a/composer/prompt.go b/composer/prompt.go index 9ed56d7..a43831e 100644 --- a/composer/prompt.go +++ b/composer/prompt.go @@ -3,8 +3,9 @@ package composer import "fmt" type PromptConfig struct { - ComposePrompt string - SummarisePrompt SummarisePromptFunc + ComposePrompt string + SummarisePrompt SummarisePromptFunc + FilterPromptInstruct FilterPromptFunc } const ( @@ -14,10 +15,7 @@ const ( func DefaultPromptConfig() *PromptConfig { return &PromptConfig{ ComposePrompt: `You will be answering only in JSON array format: [{id:"", text:"", tickers:[], markets:[], hashtags:[]}] - You need to remove from array blank, spam, purposeless, clickbait, tabloid, advertising, unspecified, anonymous or non-financial news. - Most important news right know is inflation, interest rates, war, elections, crisis, unemployment index, regulations. - If none of the news are important, return empty array []. - Next you need to fill some (or none) tickers, markets and hashtags arrays for each news. + You need to fill some (or none) tickers, markets and hashtags arrays for each news. If news are mentioning some companies and stocks you need to find appropriate stocks 'tickers'. If news are about some market events you need to fill 'markets' with some index tickers (like SPY, QQQ, or RUT etc.) based on the context. News context can be also related to some popular topics, we call it 'hashtags'. @@ -28,16 +26,25 @@ func DefaultPromptConfig() *PromptConfig { `, SummarisePrompt: func(headlinesLimit int) string { return fmt.Sprintf(`You will receive a JSON array of news with IDs. -You need to create a short (%v words max) summary for the %v most important financial, -economical, stock market news what happened from the start of the day. -Find the main verb in the string and put it into the result JSON. -Response in JSON array format: -[{summary:"", verb:"", id:"", link:""}]`, + You need to create a short (%v words max) summary for the %v most important financial, + economical, stock market news what happened from the start of the day. + Find the main verb in the string and put it into the result JSON. + Response in JSON array format: + [{summary:"", verb:"", id:"", link:""}]`, MaxWordsPerSentence, headlinesLimit, ) }, + FilterPromptInstruct: func(newsJson string) string { + return fmt.Sprintf(`[INST]You will be given a JSON array of financial news. + You need to remove from array blank, purposeless, clickbait, advertising or non-financial news. + Most important news right know is inflation, interest rates, war, elections, crisis, unemployment index etc. + Return the response in the same JSON format. If none of the news are important, return empty array []. + Do not add any additional text or explanation, just plain JSON response.\n%s[/INST]`, newsJson) + }, } } type SummarisePromptFunc = func(headlinesLimit int) string + +type FilterPromptFunc = func(newsJson string) string From df75937799d02a489360e31bdafe84461cbcd26b Mon Sep 17 00:00:00 2001 From: samgozman Date: Mon, 1 Jan 2024 12:25:47 +0200 Subject: [PATCH 6/9] Add comment --- composer/composer.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/composer/composer.go b/composer/composer.go index 521be7f..975a089 100644 --- a/composer/composer.go +++ b/composer/composer.go @@ -157,6 +157,9 @@ func (c *Composer) Filter(ctx context.Context, news journalist.NewsList) (journa return nil, nil } + // TODO: This can be optimised by using ToContentJSON() method. + // But it will require to map the response back to the original news list. + // Also prompt can be optimised to return only IDs of the news to reduce tokens count. jsonNews, err := news.ToJSON() if err != nil { return nil, newErr(err, "Filter", "json.Marshal news").WithValue(fmt.Sprintf("%+v", news)) From 8d5ab7c12cb7339b305299b8d2ab6555f4631022 Mon Sep 17 00:00:00 2001 From: samgozman Date: Mon, 1 Jan 2024 12:30:38 +0200 Subject: [PATCH 7/9] add filter for the job --- jobs/job.go | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/jobs/job.go b/jobs/job.go index 2fc9aa8..93aad80 100644 --- a/jobs/job.go +++ b/jobs/job.go @@ -175,6 +175,23 @@ func (job *Job) Run() JobFunc { return } + span = tx.StartChild("filter") + jobData.News, err = job.composer.Filter(ctx, jobData.News) + span.Finish() + if err != nil { + job.logger.Info(fmt.Sprintf("[%s][filter]", jobName), "error", err) + hub.CaptureException(err) + return + } + hub.AddBreadcrumb(&sentry.Breadcrumb{ + Category: "successful", + Message: fmt.Sprintf("filter returned %d news", len(jobData.News)), + Level: sentry.LevelInfo, + }, nil) + if len(jobData.News) == 0 { + return + } + span = tx.StartChild("composeNews") jobData.ComposedNews, err = job.composeNews(ctx, jobData.News) span.Finish() From 8b16d4a62319f257078dd86b101442d461184698 Mon Sep 17 00:00:00 2001 From: samgozman Date: Mon, 1 Jan 2024 12:47:04 +0200 Subject: [PATCH 8/9] increase context deadline --- jobs/job.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jobs/job.go b/jobs/job.go index 93aad80..7a4f32d 100644 --- a/jobs/job.go +++ b/jobs/job.go @@ -116,7 +116,7 @@ func (job *Job) SaveToDB() *Job { // Run return job function that will be executed by the scheduler func (job *Job) Run() JobFunc { return func() { - ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second) + ctx, cancel := context.WithTimeout(context.Background(), 25*time.Second) defer cancel() jobName := fmt.Sprintf("Run.%s", job.journalist.Name) From 20639965332d7569e3fb821d3f1dfc142aaf89f9 Mon Sep 17 00:00:00 2001 From: samgozman Date: Mon, 1 Jan 2024 12:54:55 +0200 Subject: [PATCH 9/9] fix test --- composer/composer_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer/composer_test.go b/composer/composer_test.go index 001a114..375c3b2 100644 --- a/composer/composer_test.go +++ b/composer/composer_test.go @@ -77,7 +77,7 @@ func TestComposer_Compose(t *testing.T) { ctx: context.Background(), news: news, }, - expectedFilteredNews: journalist.NewsList{news[0], news[1]}, + expectedFilteredNews: journalist.NewsList{news[0], news[1], news[2]}, want: []*ComposedNews{ { ID: "1",