diff --git a/bun.lockb b/bun.lockb index 1b3d1e1ea0d..33edce018f4 100755 Binary files a/bun.lockb and b/bun.lockb differ diff --git a/packages/agent/package.json b/packages/agent/package.json index b8bd08a314c..84093bbec7b 100644 --- a/packages/agent/package.json +++ b/packages/agent/package.json @@ -19,7 +19,8 @@ "dependencies": { "@elizaos/plugin-bootstrap": "workspace:*", "@elizaos/plugin-openai": "workspace:*", - "@elizaos-plugins/client-discord": "workspace:*", + "@elizaos-plugins/discord": "workspace:*", + "@elizaos/plugin-node": "workspace:*", "@elizaos/core": "workspace:*", "@types/body-parser": "1.19.5", "@types/cors": "2.8.17", diff --git a/packages/agent/src/defaultCharacter.ts b/packages/agent/src/defaultCharacter.ts index f5fe4c00319..5395f4c2fdc 100644 --- a/packages/agent/src/defaultCharacter.ts +++ b/packages/agent/src/defaultCharacter.ts @@ -1,12 +1,20 @@ import { type Character } from "@elizaos/core"; -import { openaiPlugin } from "@elizaos/plugin-openai"; import { anthropicPlugin } from "@elizaos/plugin-anthropic"; import { localAIPlugin } from "@elizaos/plugin-local-ai"; +import { createNodePlugin } from "@elizaos/plugin-node"; +import { openaiPlugin } from "@elizaos/plugin-openai"; +import { bootstrapPlugin } from "@elizaos/plugin-bootstrap"; export const defaultCharacter: Character = { name: "Eliza", username: "eliza", - plugins: [], + plugins: [ + openaiPlugin, + anthropicPlugin, + localAIPlugin, + createNodePlugin(), + bootstrapPlugin, + ], settings: { secrets: {}, voice: { diff --git a/packages/agent/src/index.ts b/packages/agent/src/index.ts index b554e796fcd..b53d7adc1e1 100644 --- a/packages/agent/src/index.ts +++ b/packages/agent/src/index.ts @@ -363,8 +363,6 @@ export async function initializeClients( } runtime.clients = clients; - - } export async function createAgent( @@ -372,16 +370,7 @@ export async function createAgent( ): Promise { logger.log(`Creating runtime for character ${character.name}`); return new AgentRuntime({ - evaluators: [], character, - // character.plugins are handled when clients are added - plugins: [ - bootstrapPlugin, - ] - .flat() - .filter(Boolean), - providers: [], - managers: [], fetch: logFetch, }); } diff --git a/packages/client-discord b/packages/client-discord deleted file mode 160000 index 5efefffcc30..00000000000 --- a/packages/client-discord +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 5efefffcc30ed864dd07dfe657ac6d18c5919591 diff --git a/packages/core/src/generation.ts b/packages/core/src/generation.ts index 4ff8e9bef26..8a262006e82 100644 --- a/packages/core/src/generation.ts +++ b/packages/core/src/generation.ts @@ -1,6 +1,6 @@ // ================ IMPORTS ================ import { z, type ZodSchema } from "zod"; -import { elizaLogger, logFunctionCall, logger } from "./index.ts"; +import { logger } from "./index.ts"; import { parseJSONObjectFromText } from "./parsing.ts"; import { type Content, @@ -108,7 +108,7 @@ export async function trimTokens( // Decode back to text - js-tiktoken decode() returns a string directly return await runtime.call(ModelClass.TEXT_TOKENIZER_DECODE, truncatedTokens); } catch (error) { - elizaLogger.error("Error in trimTokens:", error); + logger.error("Error in trimTokens:", error); // Return truncated string if tokenization fails return context.slice(-maxTokens * 4); // Rough estimate of 4 chars per token } @@ -127,8 +127,6 @@ export async function generateText({ stopSequences?: string[]; customSystemPrompt?: string; }): Promise { - logFunctionCall("generateText", runtime); - const text = await runtime.call(modelClass, { runtime, context, @@ -149,8 +147,6 @@ export async function generateTextArray({ modelClass: ModelClass; stopSequences?: string[]; }): Promise { - logFunctionCall("generateTextArray", runtime); - const result = await withRetry(async () => { const result = await generateObject({ runtime, @@ -181,8 +177,6 @@ async function generateEnum({ functionName: string; stopSequences?: string[]; }): Promise { - logFunctionCall(functionName, runtime); - const enumResult = await withRetry(async () => { logger.debug( "Attempting to generate enum value with context:", @@ -241,8 +235,6 @@ export async function generateTrueOrFalse({ modelClass: ModelClass; stopSequences?: string[]; }): Promise { - logFunctionCall("generateTrueOrFalse", runtime); - const BOOL_VALUES = ["true", "false"]; const result = await generateEnum({ @@ -264,7 +256,6 @@ export const generateObject = async ({ modelClass = ModelClass.TEXT_SMALL, stopSequences, }: GenerateObjectOptions): Promise => { - logFunctionCall("generateObject", runtime); if (!context) { const errorMessage = "generateObject context is empty"; console.error(errorMessage); @@ -298,7 +289,6 @@ export async function generateObjectArray({ schemaName?: string; schemaDescription?: string; }): Promise[]> { - logFunctionCall("generateObjectArray", runtime); if (!context) { logger.error("generateObjectArray context is empty"); return []; @@ -327,8 +317,6 @@ export async function generateMessageResponse({ modelClass: ModelClass; stopSequences?: string[]; }): Promise { - logFunctionCall("generateMessageResponse", runtime); - logger.debug("Context:", context); return await withRetry(async () => { @@ -375,8 +363,6 @@ export const generateImage = async ( data?: string[]; error?: any; }> => { - logFunctionCall("generateImage", runtime); - return await withRetry( async () => { const result = await runtime.call(ModelClass.IMAGE, data); @@ -400,7 +386,6 @@ export const generateCaption = async ( title: string; description: string; }> => { - logFunctionCall("generateCaption", runtime); const { imageUrl } = data; const resp = await runtime.call(ModelClass.IMAGE_DESCRIPTION, imageUrl); diff --git a/packages/core/src/helper.ts b/packages/core/src/helper.ts deleted file mode 100644 index 18a7c020a9d..00000000000 --- a/packages/core/src/helper.ts +++ /dev/null @@ -1,39 +0,0 @@ -import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"; -import logger from "./logger.ts"; -import { type IAgentRuntime, type TextModelSettings } from "./types.ts"; - - -export function logFunctionCall(functionName: string, runtime?: IAgentRuntime) { - logger.info(`Function call: ${functionName}`, { - functionName, - // runtime: JSON.stringify(runtime?) - }); -} - -export async function splitChunks( - content: string, - chunkSize = 512, - bleed = 20 -): Promise { - logger.debug("[splitChunks] Starting text split"); - - const textSplitter = new RecursiveCharacterTextSplitter({ - chunkSize: Number(chunkSize), - chunkOverlap: Number(bleed), - }); - - const chunks = await textSplitter.splitText(content); - logger.debug("[splitChunks] Split complete:", { - numberOfChunks: chunks.length, - averageChunkSize: chunks.reduce((acc, chunk) => acc + chunk.length, 0) / chunks.length, - }); - - return chunks; -} - -export function getModelSettings(modelSettings: Record) { - if (!modelSettings) { - throw new Error("MODEL_SETTINGS is not defined"); - } - return modelSettings.defaultModel; -} \ No newline at end of file diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 15aa1836ce4..4498e9c323f 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -6,7 +6,6 @@ export * from "./environment.ts"; export * from "./evaluators.ts"; export * from "./generation.ts"; export * from "./goals.ts"; -export * from "./helper.ts"; export * from "./knowledge.ts"; export * from "./logger.ts"; export * from "./memory.ts"; diff --git a/packages/core/src/knowledge.ts b/packages/core/src/knowledge.ts index 57a1838945c..e7df1c797e0 100644 --- a/packages/core/src/knowledge.ts +++ b/packages/core/src/knowledge.ts @@ -1,4 +1,4 @@ -import { splitChunks } from "./helper.ts"; +import { splitChunks } from "./parsing.ts"; import logger from "./logger.ts"; import type { AgentRuntime } from "./runtime.ts"; import { type KnowledgeItem, type Memory, ModelClass, type UUID } from "./types.ts"; diff --git a/packages/core/src/parsing.ts b/packages/core/src/parsing.ts index 384b9c1c06e..e0cf26e6a44 100644 --- a/packages/core/src/parsing.ts +++ b/packages/core/src/parsing.ts @@ -1,3 +1,6 @@ +import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"; +import logger from "./logger.ts"; + const jsonBlockPattern = /```json\n([\s\S]*?)\n```/; export const messageCompletionFooter = `\nResponse format should be formatted in a valid JSON block like this: @@ -332,3 +335,24 @@ export function truncateToCompleteSentence( const hardTruncated = text.slice(0, maxLength - 3).trim(); return `${hardTruncated}...`; } + +export async function splitChunks( + content: string, + chunkSize = 512, + bleed = 20 +): Promise { + logger.debug("[splitChunks] Starting text split"); + + const textSplitter = new RecursiveCharacterTextSplitter({ + chunkSize: Number(chunkSize), + chunkOverlap: Number(bleed), + }); + + const chunks = await textSplitter.splitText(content); + logger.debug("[splitChunks] Split complete:", { + numberOfChunks: chunks.length, + averageChunkSize: chunks.reduce((acc, chunk) => acc + chunk.length, 0) / chunks.length, + }); + + return chunks; +} \ No newline at end of file diff --git a/packages/core/src/runtime.ts b/packages/core/src/runtime.ts index 486c8d5bedd..90465348675 100644 --- a/packages/core/src/runtime.ts +++ b/packages/core/src/runtime.ts @@ -236,6 +236,7 @@ export class AgentRuntime implements IAgentRuntime { readonly evaluators: Evaluator[] = []; readonly providers: Provider[] = []; readonly plugins: Plugin[] = []; + events: Map Promise)[]> = new Map(); readonly fetch = fetch; public cacheManager!: ICacheManager; @@ -253,11 +254,9 @@ export class AgentRuntime implements IAgentRuntime { conversationLength?: number; agentId?: UUID; character?: Character; - serverUrl?: string; plugins?: Plugin[]; - managers?: IMemoryManager[]; - databaseAdapter?: IDatabaseAdapter; fetch?: typeof fetch; + databaseAdapter?: IDatabaseAdapter; cacheManager?: ICacheManager; adapters?: Adapter[]; }) { @@ -300,13 +299,6 @@ export class AgentRuntime implements IAgentRuntime { } this.memoryManagerService = new MemoryManagerService(this, this.knowledgeRoot); - - // Register additional memory managers from options - if (opts.managers) { - for (const manager of opts.managers) { - this.registerMemoryManager(manager); - } - } this.plugins = [ ...(opts.character?.plugins ?? []), @@ -325,6 +317,10 @@ export class AgentRuntime implements IAgentRuntime { for (const provider of (plugin.providers ?? [])) { this.registerContextProvider(provider); } + + for (const manager of (plugin.memoryManagers ?? [])) { + this.registerMemoryManager(manager) + } } // Initialize adapters from options or empty array if not provided @@ -1275,8 +1271,6 @@ Text: ${attachment.text} return await handler(params); } - events: Map Promise)[]> = new Map(); - registerEvent(event: string, handler: (params: any) => Promise) { if (!this.events.has(event)) { this.events.set(event, []); diff --git a/packages/core/src/test_resources/createRuntime.ts b/packages/core/src/test_resources/createRuntime.ts index c09ed9366d3..3a5d5001104 100644 --- a/packages/core/src/test_resources/createRuntime.ts +++ b/packages/core/src/test_resources/createRuntime.ts @@ -1,4 +1,4 @@ -import { SqliteDatabaseAdapter, loadVecExtensions } from "@elizaos-plugins/adapter-sqlite"; +import { SqliteDatabaseAdapter, loadVecExtensions } from "@elizaos-plugins/sqlite"; import type { DatabaseAdapter } from "../database.ts"; import { AgentRuntime } from "../runtime.ts"; import { type Action, type Evaluator, type Provider } from "../types.ts"; diff --git a/packages/core/src/types.ts b/packages/core/src/types.ts index 57921038f5f..3e072dffc6c 100644 --- a/packages/core/src/types.ts +++ b/packages/core/src/types.ts @@ -591,6 +591,9 @@ export type Plugin = { /** Optional adapters */ adapters?: Adapter[]; + /** Optional memory managers */ + memoryManagers?: IMemoryManager[]; + /** Optional handlers */ handlers?: { [key: string]: (...args: any[]) => Promise; @@ -639,6 +642,11 @@ export type Character = { [key: string]: TemplateType; }; + /** Optional client configuration */ + clientConfig?: { + [key: string]: any; + }; + /** Character biography */ bio: string | string[]; @@ -966,7 +974,7 @@ export interface IAgentRuntime { getService(service: ServiceType): T | null; - registerService(service: Service): Promise; + registerService(service: Service): void; getSetting(key: string): string | null; diff --git a/packages/plugin-discord/.npmignore b/packages/plugin-discord/.npmignore new file mode 100644 index 00000000000..078562eceab --- /dev/null +++ b/packages/plugin-discord/.npmignore @@ -0,0 +1,6 @@ +* + +!dist/** +!package.json +!readme.md +!tsup.config.ts \ No newline at end of file diff --git a/packages/plugin-discord/README.md b/packages/plugin-discord/README.md new file mode 100644 index 00000000000..7ea3eb4d764 --- /dev/null +++ b/packages/plugin-discord/README.md @@ -0,0 +1,102 @@ +# @elizaos/client-discord + +A Discord client implementation for ElizaOS, enabling rich integration with Discord servers for managing interactions, voice, and message handling. + +## Features + +- Handle server join events and manage initial configurations. +- Voice event management via the voice manager. +- Manage and process new messages with the message manager. +- Slash command registration and interaction handling. +- Disconnect websocket and unbind all listeners when required. +- Robust permissions management for bot functionality. + +## Installation + +As this is a workspace package, it's installed as part of the ElizaOS monorepo: + +```bash +pnpm install +``` + +## Configuration + +The client requires the following environment variables: + +```bash +# Discord API Credentials +DISCORD_APPLICATION_ID=your_application_id +DISCORD_API_TOKEN=your_api_token + +# Optional Settings (add any additional details here if necessary) +``` + +## Usage + +### Basic Initialization + +```typescript +import { DiscordClientInterface } from '@elizaos/client-discord'; + +// Initialize the client +const discordManager = await DiscordClientInterface.start(runtime); +``` + +### Slash Command Registration + +To register slash commands: + +```typescript +await discordManager.command.registerCommands([ + { + name: 'example', + description: 'An example slash command', + options: [] + } +]); +``` + +### Handling Messages + +```typescript +// Listen for new messages +await discordManager.message.handleNewMessage({ + channelId: 'channel-id', + content: 'Hello Discord!' +}); +``` + +### Managing Voice Events + +```typescript +// Join a voice channel +await discordManager.voice.joinChannel('channel-id'); + +// Handle voice interactions +await discordManager.voice.handleInteraction({ + userId: 'user-id', + action: 'speak' +}); +``` + +## Key Components + +1. **ClientBase** + - Handles authentication and session management. + - Manages websocket connections. + +2. **MessageManager** + - Processes incoming messages and responses. + - Supports message formatting and templating. + +3. **VoiceManager** + - Manages voice interactions and events. + - Handles joining and leaving voice channels. + +4. **CommandManager** + - Registers and processes slash commands. + - Ensures permissions are validated. + +## Notes + +Ensure that your `.env` file includes the required environment variables for proper functionality. Additional features or modules can be extended as part of the ElizaOS framework. diff --git a/packages/plugin-discord/__tests__/discord-client.test.ts b/packages/plugin-discord/__tests__/discord-client.test.ts new file mode 100644 index 00000000000..b47b26d0313 --- /dev/null +++ b/packages/plugin-discord/__tests__/discord-client.test.ts @@ -0,0 +1,110 @@ +import { Events } from 'discord.js'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { DiscordClient } from '../src'; + +// Mock @elizaos/core +vi.mock('@elizaos/core', () => ({ + logger: { + info: vi.fn(), + error: vi.fn(), + debug: vi.fn(), + }, + stringToUuid: (str: string) => str, + messageCompletionFooter: '# INSTRUCTIONS: Choose the best response for the agent.', + shouldRespondFooter: '# INSTRUCTIONS: Choose if the agent should respond.', + generateMessageResponse: vi.fn(), + generateShouldRespond: vi.fn(), + composeContext: vi.fn(), + composeRandomUser: vi.fn(), +})); + +// Mock discord.js Client +vi.mock('discord.js', () => { + const mockClient = { + login: vi.fn().mockResolvedValue('token'), + on: vi.fn(), + once: vi.fn(), + destroy: vi.fn().mockResolvedValue(undefined), + }; + + return { + Client: vi.fn(() => mockClient), + Events: { + ClientReady: 'ready', + MessageCreate: 'messageCreate', + VoiceStateUpdate: 'voiceStateUpdate', + MessageReactionAdd: 'messageReactionAdd', + MessageReactionRemove: 'messageReactionRemove', + }, + GatewayIntentBits: { + Guilds: 1, + DirectMessages: 2, + GuildVoiceStates: 3, + MessageContent: 4, + GuildMessages: 5, + DirectMessageTyping: 6, + GuildMessageTyping: 7, + GuildMessageReactions: 8, + }, + Partials: { + Channel: 'channel', + Message: 'message', + User: 'user', + Reaction: 'reaction', + }, + Collection: class Collection extends Map {}, + }; +}); + +describe('DiscordClient', () => { + let mockRuntime: any; + let discordClient: DiscordClient; + + beforeEach(() => { + mockRuntime = { + getSetting: vi.fn((key: string) => { + if (key === 'DISCORD_API_TOKEN') return 'mock-token'; + return undefined; + }), + getState: vi.fn(), + setState: vi.fn(), + getMemory: vi.fn(), + setMemory: vi.fn(), + getService: vi.fn(), + registerAction: vi.fn(), + providers: [], + character: { + clientConfig: { + discord: { + shouldIgnoreBotMessages: true + } + } + } + }; + + discordClient = new DiscordClient(mockRuntime); + }); + + it('should initialize with correct configuration', () => { + expect(discordClient.apiToken).toBe('mock-token'); + expect(discordClient.client).toBeDefined(); + expect(mockRuntime.getSetting).toHaveBeenCalledWith('DISCORD_API_TOKEN'); + }); + + it('should login to Discord on initialization', () => { + expect(discordClient.client.login).toHaveBeenCalledWith('mock-token'); + }); + + it('should register event handlers on initialization', () => { + expect(discordClient.client.once).toHaveBeenCalledWith(Events.ClientReady, expect.any(Function)); + expect(discordClient.client.on).toHaveBeenCalledWith('guildCreate', expect.any(Function)); + expect(discordClient.client.on).toHaveBeenCalledWith(Events.MessageReactionAdd, expect.any(Function)); + expect(discordClient.client.on).toHaveBeenCalledWith(Events.MessageReactionRemove, expect.any(Function)); + expect(discordClient.client.on).toHaveBeenCalledWith('voiceStateUpdate', expect.any(Function)); + }); + + it('should clean up resources when stopped', async () => { + await discordClient.stop(); + expect(discordClient.client.destroy).toHaveBeenCalled(); + }); +}); diff --git a/packages/plugin-discord/images/banner.jpg b/packages/plugin-discord/images/banner.jpg new file mode 100644 index 00000000000..c8c5ab1e2b0 Binary files /dev/null and b/packages/plugin-discord/images/banner.jpg differ diff --git a/packages/plugin-discord/images/logo.jpg b/packages/plugin-discord/images/logo.jpg new file mode 100644 index 00000000000..4cd9e160fef Binary files /dev/null and b/packages/plugin-discord/images/logo.jpg differ diff --git a/packages/plugin-discord/package.json b/packages/plugin-discord/package.json new file mode 100644 index 00000000000..e6a84469103 --- /dev/null +++ b/packages/plugin-discord/package.json @@ -0,0 +1,46 @@ +{ + "name": "@elizaos-plugins/discord", + "version": "0.25.6-alpha.1", + "type": "module", + "main": "dist/index.js", + "module": "dist/index.js", + "types": "dist/index.d.ts", + "exports": { + "./package.json": "./package.json", + ".": { + "import": { + "@elizaos/source": "./src/index.ts", + "types": "./dist/index.d.ts", + "default": "./dist/index.js" + } + } + }, + "files": [ + "dist" + ], + "dependencies": { + "@discordjs/opus": "github:discordjs/opus", + "@discordjs/rest": "2.4.0", + "@discordjs/voice": "0.17.0", + "discord.js": "14.16.3", + "fluent-ffmpeg": "^2.1.3", + "libsodium-wrappers": "0.7.15", + "prism-media": "1.3.5", + "zod": "3.23.8" + }, + "devDependencies": { + "tsup": "8.3.5", + "vitest": "1.6.1" + }, + "scripts": { + "build": "tsup --format esm --dts", + "dev": "tsup --format esm --dts --watch", + "test": "vitest run" + }, + "peerDependencies": { + "whatwg-url": "7.1.0" + }, + "publishConfig": { + "access": "public" + } +} diff --git a/packages/plugin-discord/src/actions/chat_with_attachments.ts b/packages/plugin-discord/src/actions/chat_with_attachments.ts new file mode 100644 index 00000000000..899bd495f1c --- /dev/null +++ b/packages/plugin-discord/src/actions/chat_with_attachments.ts @@ -0,0 +1,328 @@ +import { + type Action, + type ActionExample, composeContext, type Content, generateText, type HandlerCallback, + type IAgentRuntime, + type Memory, + ModelClass, parseJSONObjectFromText, type State, trimTokens +} from "@elizaos/core"; +import * as fs from "fs"; + +export const summarizationTemplate = `# Summarized so far (we are adding to this) +{{currentSummary}} + +# Current attachments we are summarizing +{{attachmentsWithText}} + +Summarization objective: {{objective}} + +# Instructions: Summarize the attachments. Return the summary. Do not acknowledge this request, just summarize and continue the existing summary if there is one. Capture any important details based on the objective. Only respond with the new summary text.`; + +export const attachmentIdsTemplate = `# Messages we are summarizing +{{recentMessages}} + +# Instructions: {{senderName}} is requesting a summary of specific attachments. Your goal is to determine their objective, along with the list of attachment IDs to summarize. +The "objective" is a detailed description of what the user wants to summarize based on the conversation. +The "attachmentIds" is an array of attachment IDs that the user wants to summarize. If not specified, default to including all attachments from the conversation. + +Your response must be formatted as a JSON block with this structure: +\`\`\`json +{ + "objective": "", + "attachmentIds": ["", "", ...] +} +\`\`\` +`; + +const getAttachmentIds = async ( + runtime: IAgentRuntime, + message: Memory, + state: State +): Promise<{ objective: string; attachmentIds: string[] } | null> => { + state = (await runtime.composeState(message)) as State; + + const context = composeContext({ + state, + template: attachmentIdsTemplate, + }); + + for (let i = 0; i < 5; i++) { + const response = await generateText({ + runtime, + context, + modelClass: ModelClass.TEXT_SMALL, + }); + console.log("response", response); + // try parsing to a json object + const parsedResponse = parseJSONObjectFromText(response) as { + objective: string; + attachmentIds: string[]; + } | null; + // see if it contains objective and attachmentIds + if (parsedResponse?.objective && parsedResponse?.attachmentIds) { + return parsedResponse; + } + } + return null; +}; + +const summarizeAction = { + name: "CHAT_WITH_ATTACHMENTS", + similes: [ + "CHAT_WITH_ATTACHMENT", + "SUMMARIZE_FILES", + "SUMMARIZE_FILE", + "SUMMARIZE_ATACHMENT", + "CHAT_WITH_PDF", + "ATTACHMENT_SUMMARY", + "RECAP_ATTACHMENTS", + "SUMMARIZE_FILE", + "SUMMARIZE_VIDEO", + "SUMMARIZE_AUDIO", + "SUMMARIZE_IMAGE", + "SUMMARIZE_DOCUMENT", + "SUMMARIZE_LINK", + "ATTACHMENT_SUMMARY", + "FILE_SUMMARY", + ], + description: + "Answer a user request informed by specific attachments based on their IDs. If a user asks to chat with a PDF, or wants more specific information about a link or video or anything else they've attached, this is the action to use.", + validate: async ( + _runtime: IAgentRuntime, + message: Memory, + _state: State + ) => { + if (message.content.source !== "discord") { + return false; + } + // only show if one of the keywords are in the message + const keywords: string[] = [ + "attachment", + "summary", + "summarize", + "research", + "pdf", + "video", + "audio", + "image", + "document", + "link", + "file", + "attachment", + "summarize", + "code", + "report", + "write", + "details", + "information", + "talk", + "chat", + "read", + "listen", + "watch", + ]; + return keywords.some((keyword) => + message.content.text.toLowerCase().includes(keyword.toLowerCase()) + ); + }, + handler: async ( + runtime: IAgentRuntime, + message: Memory, + state: State, + options: any, + callback: HandlerCallback + ) => { + state = (await runtime.composeState(message)) as State; + + const callbackData: Content = { + text: "", // fill in later + action: "CHAT_WITH_ATTACHMENTS_RESPONSE", + source: message.content.source, + attachments: [], + }; + + // 1. extract attachment IDs from the message + const attachmentData = await getAttachmentIds(runtime, message, state); + if (!attachmentData) { + console.error("Couldn't get attachment IDs from message"); + return; + } + + const { objective, attachmentIds } = attachmentData; + + // This is pretty gross but it can catch cases where the returned generated UUID is stupidly wrong for some reason + const attachments = state.recentMessagesData + .filter( + (msg) => + msg.content.attachments && + msg.content.attachments.length > 0 + ) + .flatMap((msg) => msg.content.attachments) + // check by first 5 characters of uuid + .filter( + (attachment) => + attachmentIds + .map((attch) => attch.toLowerCase().slice(0, 5)) + .includes(attachment.id.toLowerCase().slice(0, 5)) || + // or check the other way + attachmentIds.some((id) => { + const attachmentId = id.toLowerCase().slice(0, 5); + return attachment.id + .toLowerCase() + .includes(attachmentId); + }) + ); + + const attachmentsWithText = attachments + .map((attachment) => `# ${attachment.title}\n${attachment.text}`) + .join("\n\n"); + + let currentSummary = ""; + + const chunkSize = 8192; + + state.attachmentsWithText = attachmentsWithText; + state.objective = objective; + const template = await trimTokens( + summarizationTemplate, + chunkSize, + runtime + ); + const context = composeContext({ + state, + // make sure it fits, we can pad the tokens a bit + // Get the model's tokenizer based on the current model being used + template, + }); + + const summary = await generateText({ + runtime, + context, + modelClass: ModelClass.TEXT_SMALL, + }); + + currentSummary = currentSummary + "\n" + summary; + + if (!currentSummary) { + console.error("No summary found, that's not good!"); + return; + } + + callbackData.text = currentSummary.trim(); + if ( + callbackData.text && + (currentSummary.trim()?.split("\n").length < 4 || + currentSummary.trim()?.split(" ").length < 100) + ) { + callbackData.text = `Here is the summary: +\`\`\`md +${currentSummary.trim()} +\`\`\` +`; + await callback(callbackData); + } else if (currentSummary.trim()) { + const summaryFilename = `content/summary_${Date.now()}.md`; + + try { + // Debug: Log before file operations + console.log("Creating summary file:", { + filename: summaryFilename, + summaryLength: currentSummary.length, + }); + + // Write file directly first + await fs.promises.writeFile( + summaryFilename, + currentSummary, + "utf8" + ); + console.log("File written successfully"); + + // Then cache it + await runtime.cacheManager.set(summaryFilename, currentSummary); + console.log("Cache set operation completed"); + + await callback( + { + ...callbackData, + text: `I've attached the summary of the requested attachments as a text file.`, + }, + [summaryFilename] + ); + console.log("Callback completed with summary file"); + } catch (error) { + console.error("Error in file/cache process:", error); + throw error; + } + } else { + console.warn( + "Empty response from chat with attachments action, skipping" + ); + } + + return callbackData; + }, + examples: [ + [ + { + user: "{{user1}}", + content: { + text: "Can you summarize the attachments b3e23, c4f67, and d5a89?", + }, + }, + { + user: "{{user2}}", + content: { + text: "Sure thing! I'll pull up those specific attachments and provide a summary of their content.", + action: "CHAT_WITH_ATTACHMENTS", + }, + }, + ], + [ + { + user: "{{user1}}", + content: { + text: "I need a technical summary of the PDFs I sent earlier - a1b2c3.pdf, d4e5f6.pdf, and g7h8i9.pdf", + }, + }, + { + user: "{{user2}}", + content: { + text: "I'll take a look at those specific PDF attachments and put together a technical summary for you. Give me a few minutes to review them.", + action: "CHAT_WITH_ATTACHMENTS", + }, + }, + ], + [ + { + user: "{{user1}}", + content: { + text: "Can you watch this video for me and tell me which parts you think are most relevant to the report I'm writing? (the one I attached in my last message)", + }, + }, + { + user: "{{user2}}", + content: { + text: "sure, no problem.", + action: "CHAT_WITH_ATTACHMENTS", + }, + }, + ], + [ + { + user: "{{user1}}", + content: { + text: "can you read my blog post and give me a detailed breakdown of the key points I made, and then suggest a handful of tweets to promote it?", + }, + }, + { + user: "{{user2}}", + content: { + text: "great idea, give me a minute", + action: "CHAT_WITH_ATTACHMENTS", + }, + }, + ], + ] as ActionExample[][], +} as Action; + +export default summarizeAction; diff --git a/packages/plugin-discord/src/actions/download_media.ts b/packages/plugin-discord/src/actions/download_media.ts new file mode 100644 index 00000000000..68c6b3c9539 --- /dev/null +++ b/packages/plugin-discord/src/actions/download_media.ts @@ -0,0 +1,196 @@ +import path from "path"; +import { composeContext } from "@elizaos/core"; +import { parseJSONObjectFromText } from "@elizaos/core"; +import { + type Action, + type ActionExample, + type Content, + type HandlerCallback, + type IAgentRuntime, + type IVideoService, + type Memory, + ModelClass, + ServiceType, + type State, +} from "@elizaos/core"; +import { generateText } from "@elizaos/core"; + +export const mediaUrlTemplate = `# Messages we are searching for a media URL +{{recentMessages}} + +# Instructions: {{senderName}} is requesting to download a specific media file (video or audio). Your goal is to determine the URL of the media they want to download. +The "mediaUrl" is the URL of the media file that the user wants downloaded. If not specified, return null. + +Your response must be formatted as a JSON block with this structure: +\`\`\`json +{ + "mediaUrl": "" +} +\`\`\` +`; + +const getMediaUrl = async ( + runtime: IAgentRuntime, + message: Memory, + state: State +): Promise => { + if (!state) { + state = (await runtime.composeState(message)) as State; + } + + const context = composeContext({ + state, + template: mediaUrlTemplate, + }); + + for (let i = 0; i < 5; i++) { + const response = await generateText({ + runtime, + context, + modelClass: ModelClass.TEXT_SMALL, + }); + + const parsedResponse = parseJSONObjectFromText(response) as { + mediaUrl: string; + } | null; + + if (parsedResponse?.mediaUrl) { + return parsedResponse.mediaUrl; + } + } + return null; +}; + +export default { + name: "DOWNLOAD_MEDIA", + similes: [ + "DOWNLOAD_VIDEO", + "DOWNLOAD_AUDIO", + "GET_MEDIA", + "DOWNLOAD_PODCAST", + "DOWNLOAD_YOUTUBE", + ], + description: + "Downloads a video or audio file from a URL and attaches it to the response message.", + validate: async ( + runtime: IAgentRuntime, + message: Memory, + _state: State + ) => { + if (message.content.source !== "discord") { + return false; + } + }, + handler: async ( + runtime: IAgentRuntime, + message: Memory, + state: State, + options: any, + callback: HandlerCallback + ) => { + const videoService = runtime + .getService(ServiceType.VIDEO) + .getInstance() as IVideoService; + if (!state) { + state = (await runtime.composeState(message)) as State; + } + + const mediaUrl = await getMediaUrl(runtime, message, state); + if (!mediaUrl) { + console.error("Couldn't get media URL from messages"); + return; + } + + const videoInfo = await videoService.fetchVideoInfo(mediaUrl); + const mediaPath = await videoService.downloadVideo(videoInfo); + + const response: Content = { + text: `I downloaded the video "${videoInfo.title}" and attached it below.`, + action: "DOWNLOAD_MEDIA_RESPONSE", + source: message.content.source, + attachments: [], + }; + + const filename = path.basename(mediaPath); + + const maxRetries = 3; + let retries = 0; + + while (retries < maxRetries) { + try { + await callback( + { + ...response, + }, + ["content_cache/" + filename] + ); + break; + } catch (error) { + retries++; + console.error( + `Error sending message (attempt ${retries}):`, + error + ); + + if (retries === maxRetries) { + console.error( + "Max retries reached. Failed to send message with attachment." + ); + break; + } + + // Wait for a short delay before retrying + await new Promise((resolve) => setTimeout(resolve, 2000)); + } + } + + return response; + }, + examples: [ + [ + { + user: "{{user1}}", + content: { + text: "https://www.youtube.com/watch?v=dQw4w9WgXcQ", + }, + }, + { + user: "{{user2}}", + content: { + text: "Downloading the YouTube video now, one sec", + action: "DOWNLOAD_MEDIA", + }, + }, + ], + [ + { + user: "{{user1}}", + content: { + text: "Can you grab this video for me? https://vimeo.com/123456789", + }, + }, + { + user: "{{user2}}", + content: { + text: "Sure thing, I'll download that Vimeo video for you", + action: "DOWNLOAD_MEDIA", + }, + }, + ], + [ + { + user: "{{user1}}", + content: { + text: "I need this video downloaded: https://www.youtube.com/watch?v=abcdefg", + }, + }, + { + user: "{{user2}}", + content: { + text: "No problem, I'm on it. I'll have that YouTube video downloaded in a jiffy", + action: "DOWNLOAD_MEDIA", + }, + }, + ], + ] as ActionExample[][], +} as Action; diff --git a/packages/plugin-discord/src/actions/joinvoice.ts b/packages/plugin-discord/src/actions/joinvoice.ts new file mode 100644 index 00000000000..7d9d9a699fd --- /dev/null +++ b/packages/plugin-discord/src/actions/joinvoice.ts @@ -0,0 +1,346 @@ +// eslint-disable-next-line +// @ts-nocheck +// src/actions/joinVoice +import { + type Action, + type ActionExample, + composeContext, + type IAgentRuntime, + type Memory, + type State, + generateText, + ModelClass, +} from "@elizaos/core"; +import { + type Channel, + ChannelType, + type Client, + type Message as DiscordMessage, + type Guild, + type GuildMember, +} from "discord.js"; +import { joinVoiceChannel } from "@discordjs/voice"; + +export default { + name: "JOIN_VOICE", + similes: [ + "JOIN_VOICE", + "JOIN_VC", + "JOIN_VOICE_CHAT", + "JOIN_VOICE_CHANNEL", + "JOIN_MEETING", + "JOIN_CALL", + ], + validate: async ( + _runtime: IAgentRuntime, + message: Memory, + state: State + ) => { + if (message.content.source !== "discord") { + // not a discord message + return false; + } + + if (!state.discordClient) { + return; + } + + // did they say something about joining a voice channel? if not, don't validate + const keywords = [ + "join", + "come to", + "come on", + "enter", + "voice", + "chat", + "talk", + "call", + "hop on", + "get on", + "vc", + "meeting", + "discussion", + ]; + if ( + !keywords.some((keyword) => + message.content.text.toLowerCase().includes(keyword) + ) + ) { + return false; + } + + return true; + }, + description: "Join a voice channel to participate in voice chat.", + handler: async ( + runtime: IAgentRuntime, + message: Memory, + state: State + ): Promise => { + if (!state) { + console.error("State is not available."); + } + + // We normalize data in from voice channels + const discordMessage = (state.discordChannel || + state.discordMessage) as DiscordMessage; + + if (!discordMessage.content) { + discordMessage.content = message.content.text; + } + + const id = (discordMessage as DiscordMessage).guild?.id as string; + const client = state.discordClient as Client; + const voiceChannels = ( + client.guilds.cache.get(id) as Guild + ).channels.cache.filter( + (channel: Channel) => channel.type === ChannelType.GuildVoice + ); + + const messageContent = discordMessage.content; + + const targetChannel = voiceChannels.find((channel) => { + const name = (channel as { name: string }).name.toLowerCase(); + + // remove all non-alphanumeric characters (keep spaces between words) + const replacedName = name.replace(/[^a-z0-9 ]/g, ""); + + return ( + name.includes(messageContent) || + messageContent.includes(name) || + replacedName.includes(messageContent) || + messageContent.includes(replacedName) + ); + }); + + if (targetChannel) { + joinVoiceChannel({ + channelId: targetChannel.id, + guildId: (discordMessage as DiscordMessage).guild?.id as string, + adapterCreator: (client.guilds.cache.get(id) as Guild) + .voiceAdapterCreator, + selfDeaf: false, + selfMute: false, + group: client.user.id, + }); + return true; + } else { + const member = (discordMessage as DiscordMessage) + .member as GuildMember; + if (member?.voice?.channel) { + joinVoiceChannel({ + channelId: member.voice.channel.id, + guildId: (discordMessage as DiscordMessage).guild + ?.id as string, + adapterCreator: (client.guilds.cache.get(id) as Guild) + .voiceAdapterCreator, + selfDeaf: false, + selfMute: false, + group: client.user.id, + }); + return true; + } + + const messageTemplate = ` +The user has requested to join a voice channel. +Here is the list of channels available in the server: +{{voiceChannels}} + +Here is the user's request: +{{userMessage}} + +Please respond with the name of the voice channel which the bot should join. Try to infer what channel the user is talking about. If the user didn't specify a voice channel, respond with "none". +You should only respond with the name of the voice channel or none, no commentary or additional information should be included. +`; + + const guessState = { + userMessage: message.content.text, + voiceChannels: voiceChannels + .map((channel) => (channel as { name: string }).name) + .join("\n"), + }; + + const context = composeContext({ + template: messageTemplate, + state: guessState as unknown as State, + }); + + const _datestr = new Date().toUTCString().replace(/:/g, "-"); + + const responseContent = await generateText({ + runtime, + context, + modelClass: ModelClass.TEXT_SMALL, + }); + + runtime.databaseAdapter.log({ + body: { message, context, response: responseContent }, + userId: message.userId, + roomId: message.roomId, + type: "joinvoice", + }); + + if (responseContent && responseContent.trim().length > 0) { + // join the voice channel + const channelName = responseContent.toLowerCase(); + + const targetChannel = voiceChannels.find((channel) => { + const name = ( + channel as { name: string } + ).name.toLowerCase(); + + // remove all non-alphanumeric characters (keep spaces between words) + const replacedName = name.replace(/[^a-z0-9 ]/g, ""); + + return ( + name.includes(channelName) || + channelName.includes(name) || + replacedName.includes(channelName) || + channelName.includes(replacedName) + ); + }); + + if (targetChannel) { + joinVoiceChannel({ + channelId: targetChannel.id, + guildId: (discordMessage as DiscordMessage).guild + ?.id as string, + adapterCreator: (client.guilds.cache.get(id) as Guild) + .voiceAdapterCreator, + selfDeaf: false, + selfMute: false, + group: client.user.id, + }); + return true; + } + } + + await (discordMessage as DiscordMessage).reply( + "I couldn't figure out which channel you wanted me to join." + ); + return false; + } + }, + examples: [ + [ + { + user: "{{user1}}", + content: { + text: "Hey, let's jump into the 'General' voice and chat", + }, + }, + { + user: "{{user2}}", + content: { + text: "Sounds good", + action: "JOIN_VOICE", + }, + }, + ], + [ + { + user: "{{user1}}", + content: { + text: "{{user2}}, can you join the vc, I want to discuss our strat", + }, + }, + { + user: "{{user2}}", + content: { + text: "Sure I'll join right now", + action: "JOIN_VOICE", + }, + }, + ], + [ + { + user: "{{user1}}", + content: { + text: "hey {{user2}}, we're having a team meeting in the 'conference' voice channel, plz join us", + }, + }, + { + user: "{{user2}}", + content: { + text: "OK see you there", + action: "JOIN_VOICE", + }, + }, + ], + [ + { + user: "{{user1}}", + content: { + text: "{{user2}}, let's have a quick voice chat in the 'Lounge' channel.", + }, + }, + { + user: "{{user2}}", + content: { + text: "kk be there in a sec", + action: "JOIN_VOICE", + }, + }, + ], + [ + { + user: "{{user1}}", + content: { + text: "Hey {{user2}}, can you join me in the 'Music' voice channel", + }, + }, + { + user: "{{user2}}", + content: { + text: "Sure", + action: "JOIN_VOICE", + }, + }, + ], + [ + { + user: "{{user1}}", + content: { + text: "join voice chat with us {{user2}}", + }, + }, + { + user: "{{user2}}", + content: { + text: "coming", + action: "JOIN_VOICE", + }, + }, + ], + [ + { + user: "{{user1}}", + content: { + text: "hop in vc {{user2}}", + }, + }, + { + user: "{{user2}}", + content: { + text: "joining now", + action: "JOIN_VOICE", + }, + }, + ], + [ + { + user: "{{user1}}", + content: { + text: "get in vc with us {{user2}}", + }, + }, + { + user: "{{user2}}", + content: { + text: "im in", + action: "JOIN_VOICE", + }, + }, + ], + ] as ActionExample[][], +} as Action; diff --git a/packages/plugin-discord/src/actions/leavevoice.ts b/packages/plugin-discord/src/actions/leavevoice.ts new file mode 100644 index 00000000000..20419b754bd --- /dev/null +++ b/packages/plugin-discord/src/actions/leavevoice.ts @@ -0,0 +1,226 @@ +// src/actions/leaveVoice +import { getVoiceConnection } from "@discordjs/voice"; +import { + type Channel, + ChannelType, + type Client, + type Message as DiscordMessage, +} from "discord.js"; +import type { + Action, + ActionExample, + IAgentRuntime, + Memory, + State, +} from "@elizaos/core"; + +export default { + name: "LEAVE_VOICE", + similes: [ + "LEAVE_VOICE", + "LEAVE_VC", + "LEAVE_VOICE_CHAT", + "LEAVE_VOICE_CHANNEL", + "LEAVE_MEETING", + "LEAVE_CALL", + ], + validate: async (runtime: IAgentRuntime, message: Memory, state: State) => { + if (message.content.source !== "discord") { + // not a discord message + return false; + } + + if (!state.discordClient) { + return false; + } + + const keywords = [ + "leave", + "exit", + "stop", + "quit", + "get off", + "get out", + "bye", + "cya", + "see you", + "hop off", + "get off", + "voice", + "vc", + "chat", + "call", + "meeting", + "discussion", + ]; + if ( + !keywords.some((keyword) => + message.content.text.toLowerCase().includes(keyword) + ) + ) { + return false; + } + + const client = state.discordClient as Client; + + // Check if the client is connected to any voice channel + const isConnectedToVoice = client.voice.adapters.size > 0; + + return isConnectedToVoice; + }, + description: "Leave the current voice channel.", + handler: async ( + runtime: IAgentRuntime, + message: Memory, + state: State + ): Promise => { + if (!state.discordClient) { + return; + } + + const discordMessage = (state.discordMessage || + state.discordChannel) as DiscordMessage; + + if (!discordMessage) { + throw new Error("Discord message is not available in the state."); + } + const voiceChannels = (state.discordClient as Client)?.guilds.cache + .get((discordMessage as DiscordMessage).guild?.id as string) + ?.channels.cache.filter( + (channel: Channel) => channel.type === ChannelType.GuildVoice + ); + + voiceChannels?.forEach((_channel: Channel) => { + const connection = getVoiceConnection( + (discordMessage as DiscordMessage).guild?.id as string + ); + if (connection) { + connection.destroy(); + } + }); + return true; + }, + examples: [ + [ + { + user: "{{user1}}", + content: { + text: "Hey {{user2}} please leave the voice channel", + }, + }, + { + user: "{{user2}}", + content: { + text: "Sure", + action: "LEAVE_VOICE", + }, + }, + ], + [ + { + user: "{{user1}}", + content: { + text: "I have to go now but thanks for the chat", + }, + }, + { + user: "{{user2}}", + content: { + text: "You too, talk to you later", + action: "LEAVE_VOICE", + }, + }, + ], + [ + { + user: "{{user1}}", + content: { + text: "Great call everyone, hopping off now", + action: "LEAVE_VOICE", + }, + }, + { + user: "{{user2}}", + content: { + text: "Agreed, I'll hop off too", + action: "LEAVE_VOICE", + }, + }, + ], + [ + { + user: "{{user1}}", + content: { + text: "Hey {{user2}} I need you to step away from the voice chat for a bit", + }, + }, + { + user: "{{user2}}", + content: { + text: "No worries, I'll leave the voice channel", + action: "LEAVE_VOICE", + }, + }, + ], + [ + { + user: "{{user1}}", + content: { + text: "{{user2}}, I think we covered everything, you can leave the voice chat now", + }, + }, + { + user: "{{user2}}", + content: { + text: "Sounds good, see you both later", + action: "LEAVE_VOICE", + }, + }, + ], + [ + { + user: "{{user1}}", + content: { + text: "leave voice {{user2}}", + }, + }, + { + user: "{{user2}}", + content: { + text: "ok leaving", + action: "LEAVE_VOICE", + }, + }, + ], + [ + { + user: "{{user1}}", + content: { + text: "plz leave the voice chat {{user2}}", + }, + }, + { + user: "{{user2}}", + content: { + text: "aight im out", + action: "LEAVE_VOICE", + }, + }, + ], + [ + { + user: "{{user1}}", + content: { + text: "yo {{user2}} gtfo the vc", + }, + }, + { + user: "{{user2}}", + content: { + text: "sorry, talk to you later", + action: "LEAVE_VOICE", + }, + }, + ], + ] as ActionExample[][], +} as Action; diff --git a/packages/plugin-discord/src/actions/summarize_conversation.ts b/packages/plugin-discord/src/actions/summarize_conversation.ts new file mode 100644 index 00000000000..7dc086bc0a1 --- /dev/null +++ b/packages/plugin-discord/src/actions/summarize_conversation.ts @@ -0,0 +1,388 @@ +import { composeContext } from "@elizaos/core"; +import { generateText, splitChunks, trimTokens } from "@elizaos/core"; +import { getActorDetails } from "@elizaos/core"; +import { parseJSONObjectFromText } from "@elizaos/core"; +import { + type Action, + type ActionExample, + type Content, + type HandlerCallback, + type IAgentRuntime, + type Media, + type Memory, + ModelClass, + type State, +} from "@elizaos/core"; +export const summarizationTemplate = `# Summarized so far (we are adding to this) +{{currentSummary}} + +# Current conversation chunk we are summarizing (includes attachments) +{{memoriesWithAttachments}} + +Summarization objective: {{objective}} + +# Instructions: Summarize the conversation so far. Return the summary. Do not acknowledge this request, just summarize and continue the existing summary if there is one. Capture any important details to the objective. Only respond with the new summary text. +Your response should be extremely detailed and include any and all relevant information.`; + +export const dateRangeTemplate = `# Messages we are summarizing (the conversation is continued after this) +{{recentMessages}} + +# Instructions: {{senderName}} is requesting a summary of the conversation. Your goal is to determine their objective, along with the range of dates that their request covers. +The "objective" is a detailed description of what the user wants to summarize based on the conversation. If they just ask for a general summary, you can either base it off the conversation if the summary range is very recent, or set the object to be general, like "a detailed summary of the conversation between all users". +The "start" and "end" are the range of dates that the user wants to summarize, relative to the current time. The start and end should be relative to the current time, and measured in seconds, minutes, hours and days. The format is "2 days ago" or "3 hours ago" or "4 minutes ago" or "5 seconds ago", i.e. " ago". +If you aren't sure, you can use a default range of "0 minutes ago" to "2 hours ago" or more. Better to err on the side of including too much than too little. + +Your response must be formatted as a JSON block with this structure: +\`\`\`json +{ + "objective": "", + "start": "0 minutes ago", + "end": "2 hours ago" +} +\`\`\` +`; + +const getDateRange = async ( + runtime: IAgentRuntime, + message: Memory, + state: State +) => { + state = (await runtime.composeState(message)) as State; + + const context = composeContext({ + state, + template: dateRangeTemplate, + }); + + for (let i = 0; i < 5; i++) { + const response = await generateText({ + runtime, + context, + modelClass: ModelClass.TEXT_SMALL, + }); + console.log("response", response); + // try parsing to a json object + const parsedResponse = parseJSONObjectFromText(response) as { + objective: string; + start: string | number; + end: string | number; + } | null; + // see if it contains objective, start and end + if (parsedResponse) { + if ( + parsedResponse.objective && + parsedResponse.start && + parsedResponse.end + ) { + // TODO: parse start and end into timestamps + const startIntegerString = ( + parsedResponse.start as string + ).match(/\d+/)?.[0]; + const endIntegerString = (parsedResponse.end as string).match( + /\d+/ + )?.[0]; + + // parse multiplier + const multipliers = { + second: 1 * 1000, + minute: 60 * 1000, + hour: 3600 * 1000, + day: 86400 * 1000, + }; + + const startMultiplier = (parsedResponse.start as string).match( + /second|minute|hour|day/ + )?.[0]; + const endMultiplier = (parsedResponse.end as string).match( + /second|minute|hour|day/ + )?.[0]; + + const startInteger = startIntegerString + ? Number.parseInt(startIntegerString) + : 0; + const endInteger = endIntegerString + ? Number.parseInt(endIntegerString) + : 0; + + // multiply by multiplier + const startTime = + startInteger * + multipliers[startMultiplier as keyof typeof multipliers]; + + console.log("startTime", startTime); + + const endTime = + endInteger * + multipliers[endMultiplier as keyof typeof multipliers]; + + console.log("endTime", endTime); + + // get the current time and subtract the start and end times + parsedResponse.start = Date.now() - startTime; + parsedResponse.end = Date.now() - endTime; + + return parsedResponse; + } + } + } +}; + +const summarizeAction = { + name: "SUMMARIZE_CONVERSATION", + similes: [ + "RECAP", + "RECAP_CONVERSATION", + "SUMMARIZE_CHAT", + "SUMMARIZATION", + "CHAT_SUMMARY", + "CONVERSATION_SUMMARY", + ], + description: "Summarizes the conversation and attachments.", + validate: async ( + runtime: IAgentRuntime, + message: Memory, + _state: State + ) => { + if (message.content.source !== "discord") { + return false; + } + // only show if one of the keywords are in the message + const keywords: string[] = [ + "summarize", + "summarization", + "summary", + "recap", + "report", + "overview", + "review", + "rundown", + "wrap-up", + "brief", + "debrief", + "abstract", + "synopsis", + "outline", + "digest", + "abridgment", + "condensation", + "encapsulation", + "essence", + "gist", + "main points", + "key points", + "key takeaways", + "bulletpoint", + "highlights", + "tldr", + "tl;dr", + "in a nutshell", + "bottom line", + "long story short", + "sum up", + "sum it up", + "short version", + "bring me up to speed", + "catch me up", + ]; + return keywords.some((keyword) => + message.content.text.toLowerCase().includes(keyword.toLowerCase()) + ); + }, + handler: async ( + runtime: IAgentRuntime, + message: Memory, + state: State, + options: any, + callback: HandlerCallback + ) => { + state = (await runtime.composeState(message)) as State; + + const callbackData: Content = { + text: "", // fill in later + action: "SUMMARIZATION_RESPONSE", + source: message.content.source, + attachments: [], + }; + const { roomId } = message; + + // 1. extract date range from the message + const dateRange = await getDateRange(runtime, message, state); + if (!dateRange) { + console.error("Couldn't get date range from message"); + return; + } + + console.log("dateRange", dateRange); + + const { objective, start, end } = dateRange; + + // 2. get these memories from the database + const memories = await runtime.messageManager.getMemories({ + roomId, + // subtract start from current time + start: Number.parseInt(start as string), + end: Number.parseInt(end as string), + count: 10000, + unique: false, + }); + + const actors = await getActorDetails({ + runtime: runtime as IAgentRuntime, + roomId, + }); + + const actorMap = new Map(actors.map((actor) => [actor.id, actor])); + + const formattedMemories = memories + .map((memory) => { + const attachments = memory.content.attachments + ?.map((attachment: Media) => { + return `---\nAttachment: ${attachment.id}\n${attachment.description}\n${attachment.text}\n---`; + }) + .join("\n"); + return `${actorMap.get(memory.userId)?.name ?? "Unknown User"} (${actorMap.get(memory.userId)?.username ?? ""}): ${memory.content.text}\n${attachments}`; + }) + .join("\n"); + + let currentSummary = ""; + + const chunkSize = 8000; + + const chunks = await splitChunks(formattedMemories, chunkSize, 0); + + const _datestr = new Date().toUTCString().replace(/:/g, "-"); + + state.memoriesWithAttachments = formattedMemories; + state.objective = objective; + + for (let i = 0; i < chunks.length; i++) { + const chunk = chunks[i]; + state.currentSummary = currentSummary; + state.currentChunk = chunk; + const template = await trimTokens( + summarizationTemplate, + chunkSize + 500, + runtime + ); + const context = composeContext({ + state, + // make sure it fits, we can pad the tokens a bit + template, + }); + + const summary = await generateText({ + runtime, + context, + modelClass: ModelClass.TEXT_SMALL, + }); + + currentSummary = currentSummary + "\n" + summary; + } + + if (!currentSummary) { + console.error("No summary found, that's not good!"); + return; + } + + callbackData.text = currentSummary.trim(); + if ( + callbackData.text && + (currentSummary.trim()?.split("\n").length < 4 || + currentSummary.trim()?.split(" ").length < 100) + ) { + callbackData.text = `Here is the summary: +\`\`\`md +${currentSummary.trim()} +\`\`\` +`; + await callback(callbackData); + } else if (currentSummary.trim()) { + const summaryFilename = `content/conversation_summary_${Date.now()}`; + await runtime.cacheManager.set(summaryFilename, currentSummary); + // save the summary to a file + await callback( + { + ...callbackData, + text: `I've attached the summary of the conversation from \`${new Date(Number.parseInt(start as string)).toString()}\` to \`${new Date(Number.parseInt(end as string)).toString()}\` as a text file.`, + }, + [summaryFilename] + ); + } else { + console.warn( + "Empty response from summarize conversation action, skipping" + ); + } + + return callbackData; + }, + examples: [ + [ + { + user: "{{user1}}", + content: { + text: "```js\nconst x = 10\n```", + }, + }, + { + user: "{{user1}}", + content: { + text: "can you give me a detailed report on what we're talking about?", + }, + }, + { + user: "{{user2}}", + content: { + text: "sure, no problem, give me a minute to get that together for you", + action: "SUMMARIZE", + }, + }, + ], + [ + { + user: "{{user1}}", + content: { + text: "please summarize the conversation we just had and include this blogpost i'm linking (Attachment: b3e12)", + }, + }, + { + user: "{{user2}}", + content: { + text: "sure, give me a sec", + action: "SUMMARIZE", + }, + }, + ], + [ + { + user: "{{user1}}", + content: { + text: "Can you summarize what moon and avf are talking about?", + }, + }, + { + user: "{{user2}}", + content: { + text: "Yeah, just hold on a second while I get that together for you...", + action: "SUMMARIZE", + }, + }, + ], + [ + { + user: "{{user1}}", + content: { + text: "i need to write a blog post about farming, can you summarize the discussion from a few hours ago?", + }, + }, + { + user: "{{user2}}", + content: { + text: "no problem, give me a few minutes to read through everything", + action: "SUMMARIZE", + }, + }, + ], + ] as ActionExample[][], +} as Action; + +export default summarizeAction; diff --git a/packages/plugin-discord/src/actions/transcribe_media.ts b/packages/plugin-discord/src/actions/transcribe_media.ts new file mode 100644 index 00000000000..2693367fae8 --- /dev/null +++ b/packages/plugin-discord/src/actions/transcribe_media.ts @@ -0,0 +1,225 @@ +import { composeContext } from "@elizaos/core"; +import { generateText } from "@elizaos/core"; +import { parseJSONObjectFromText } from "@elizaos/core"; +import { + type Action, + type ActionExample, + type Content, + type HandlerCallback, + type IAgentRuntime, + type Memory, + ModelClass, + type State, +} from "@elizaos/core"; + +export const transcriptionTemplate = `# Transcription of media file +{{mediaTranscript}} + +# Instructions: Return only the full transcript of the media file without any additional context or commentary.`; + +export const mediaAttachmentIdTemplate = `# Messages we are transcribing +{{recentMessages}} + +# Instructions: {{senderName}} is requesting a transcription of a specific media file (audio or video). Your goal is to determine the ID of the attachment they want transcribed. +The "attachmentId" is the ID of the media file attachment that the user wants transcribed. If not specified, return null. + +Your response must be formatted as a JSON block with this structure: +\`\`\`json +{ + "attachmentId": "" +} +\`\`\` +`; + +const getMediaAttachmentId = async ( + runtime: IAgentRuntime, + message: Memory, + state: State +): Promise => { + state = (await runtime.composeState(message)) as State; + + const context = composeContext({ + state, + template: mediaAttachmentIdTemplate, + }); + + for (let i = 0; i < 5; i++) { + const response = await generateText({ + runtime, + context, + modelClass: ModelClass.TEXT_SMALL, + }); + console.log("response", response); + + const parsedResponse = parseJSONObjectFromText(response) as { + attachmentId: string; + } | null; + + if (parsedResponse?.attachmentId) { + return parsedResponse.attachmentId; + } + } + return null; +}; + +const transcribeMediaAction = { + name: "TRANSCRIBE_MEDIA", + similes: [ + "TRANSCRIBE_AUDIO", + "TRANSCRIBE_VIDEO", + "MEDIA_TRANSCRIPT", + "VIDEO_TRANSCRIPT", + "AUDIO_TRANSCRIPT", + ], + description: + "Transcribe the full text of an audio or video file that the user has attached.", + validate: async ( + _runtime: IAgentRuntime, + message: Memory, + _state: State + ) => { + if (message.content.source !== "discord") { + return false; + } + + const keywords: string[] = [ + "transcribe", + "transcript", + "audio", + "video", + "media", + "youtube", + "meeting", + "recording", + "podcast", + "call", + "conference", + "interview", + "speech", + "lecture", + "presentation", + ]; + return keywords.some((keyword) => + message.content.text.toLowerCase().includes(keyword.toLowerCase()) + ); + }, + handler: async ( + runtime: IAgentRuntime, + message: Memory, + state: State, + options: any, + callback: HandlerCallback + ) => { + state = (await runtime.composeState(message)) as State; + + const callbackData: Content = { + text: "", // fill in later + action: "TRANSCRIBE_MEDIA_RESPONSE", + source: message.content.source, + attachments: [], + }; + + const attachmentId = await getMediaAttachmentId( + runtime, + message, + state + ); + if (!attachmentId) { + console.error("Couldn't get media attachment ID from message"); + return; + } + + const attachment = state.recentMessagesData + .filter( + (msg) => + msg.content.attachments && + msg.content.attachments.length > 0 + ) + .flatMap((msg) => msg.content.attachments) + .find( + (attachment) => + attachment.id.toLowerCase() === attachmentId.toLowerCase() + ); + + if (!attachment) { + console.error(`Couldn't find attachment with ID ${attachmentId}`); + return; + } + + const mediaTranscript = attachment.text; + + callbackData.text = mediaTranscript.trim(); + + // if callbackData.text is < 4 lines or < 100 words, then we we callback with normal message wrapped in markdown block + if ( + callbackData.text && + (callbackData.text?.split("\n").length < 4 || + callbackData.text?.split(" ").length < 100) + ) { + callbackData.text = `Here is the transcript: +\`\`\`md +${mediaTranscript.trim()} +\`\`\` +`; + await callback(callbackData); + } + // if text is big, let's send as an attachment + else if (callbackData.text) { + const transcriptFilename = `content/transcript_${Date.now()}`; + + // save the transcript to a file + await runtime.cacheManager.set( + transcriptFilename, + callbackData.text + ); + + await callback( + { + ...callbackData, + text: `I've attached the transcript as a text file.`, + }, + [transcriptFilename] + ); + } else { + console.warn( + "Empty response from transcribe media action, skipping" + ); + } + + return callbackData; + }, + examples: [ + [ + { + user: "{{user1}}", + content: { + text: "Please transcribe the audio file I just sent.", + }, + }, + { + user: "{{user2}}", + content: { + text: "Sure, I'll transcribe the full audio for you.", + action: "TRANSCRIBE_MEDIA", + }, + }, + ], + [ + { + user: "{{user1}}", + content: { + text: "Can I get a transcript of that video recording?", + }, + }, + { + user: "{{user2}}", + content: { + text: "Absolutely, give me a moment to generate the full transcript of the video.", + action: "TRANSCRIBE_MEDIA", + }, + }, + ], + ] as ActionExample[][], +} as Action; + +export default transcribeMediaAction; diff --git a/packages/plugin-discord/src/attachments.ts b/packages/plugin-discord/src/attachments.ts new file mode 100644 index 00000000000..5254fc7e1e4 --- /dev/null +++ b/packages/plugin-discord/src/attachments.ts @@ -0,0 +1,372 @@ +import { generateCaption, generateText, trimTokens } from "@elizaos/core"; +import { parseJSONObjectFromText } from "@elizaos/core"; +import { + type IAgentRuntime, + type IPdfService, + type ITranscriptionService, + type IVideoService, + type Media, + ModelClass, + ServiceType, +} from "@elizaos/core"; +import { type Attachment, Collection } from "discord.js"; +import ffmpeg from "fluent-ffmpeg"; +import fs from "fs"; + +async function generateSummary( + runtime: IAgentRuntime, + text: string +): Promise<{ title: string; description: string }> { + // make sure text is under 128k characters + text = await trimTokens(text, 100000, runtime); + + const prompt = `Please generate a concise summary for the following text: + + Text: """ + ${text} + """ + + Respond with a JSON object in the following format: + \`\`\`json + { + "title": "Generated Title", + "summary": "Generated summary and/or description of the text" + } + \`\`\``; + + const response = await generateText({ + runtime, + context: prompt, + modelClass: ModelClass.TEXT_SMALL, + }); + + const parsedResponse = parseJSONObjectFromText(response); + + if (parsedResponse?.title && parsedResponse?.summary) { + return { + title: parsedResponse.title, + description: parsedResponse.summary, + }; + } + + return { + title: "", + description: "", + }; +} + +export class AttachmentManager { + private attachmentCache: Map = new Map(); + private runtime: IAgentRuntime; + + constructor(runtime: IAgentRuntime) { + this.runtime = runtime; + } + + async processAttachments( + attachments: Collection | Attachment[] + ): Promise { + const processedAttachments: Media[] = []; + const attachmentCollection = + attachments instanceof Collection + ? attachments + : new Collection(attachments.map((att) => [att.id, att])); + + for (const [, attachment] of attachmentCollection) { + const media = await this.processAttachment(attachment); + if (media) { + processedAttachments.push(media); + } + } + + return processedAttachments; + } + + async processAttachment(attachment: Attachment): Promise { + if (this.attachmentCache.has(attachment.url)) { + return this.attachmentCache.get(attachment.url)!; + } + + let media: Media | null = null; + if (attachment.contentType?.startsWith("application/pdf")) { + media = await this.processPdfAttachment(attachment); + } else if (attachment.contentType?.startsWith("text/plain")) { + media = await this.processPlaintextAttachment(attachment); + } else if ( + attachment.contentType?.startsWith("audio/") || + attachment.contentType?.startsWith("video/mp4") + ) { + media = await this.processAudioVideoAttachment(attachment); + } else if (attachment.contentType?.startsWith("image/")) { + media = await this.processImageAttachment(attachment); + } else if ( + attachment.contentType?.startsWith("video/") || + this.runtime + .getService(ServiceType.VIDEO) + .isVideoUrl(attachment.url) + ) { + media = await this.processVideoAttachment(attachment); + } else { + media = await this.processGenericAttachment(attachment); + } + + if (media) { + this.attachmentCache.set(attachment.url, media); + } + return media; + } + + private async processAudioVideoAttachment( + attachment: Attachment + ): Promise { + try { + const response = await fetch(attachment.url); + const audioVideoArrayBuffer = await response.arrayBuffer(); + + let audioBuffer: Buffer; + if (attachment.contentType?.startsWith("audio/")) { + audioBuffer = Buffer.from(audioVideoArrayBuffer); + } else if (attachment.contentType?.startsWith("video/mp4")) { + audioBuffer = await this.extractAudioFromMP4( + audioVideoArrayBuffer + ); + } else { + throw new Error("Unsupported audio/video format"); + } + + const transcriptionService = + this.runtime.getService( + ServiceType.TRANSCRIPTION + ); + if (!transcriptionService) { + throw new Error("Transcription service not found"); + } + + const transcription = + await transcriptionService.transcribeAttachment(audioBuffer); + const { title, description } = await generateSummary( + this.runtime, + transcription + ); + + return { + id: attachment.id, + url: attachment.url, + title: title || "Audio/Video Attachment", + source: attachment.contentType?.startsWith("audio/") + ? "Audio" + : "Video", + description: + description || + "User-uploaded audio/video attachment which has been transcribed", + text: transcription || "Audio/video content not available", + }; + } catch (error) { + console.error( + `Error processing audio/video attachment: ${error.message}` + ); + return { + id: attachment.id, + url: attachment.url, + title: "Audio/Video Attachment", + source: attachment.contentType?.startsWith("audio/") + ? "Audio" + : "Video", + description: "An audio/video attachment (transcription failed)", + text: `This is an audio/video attachment. File name: ${attachment.name}, Size: ${attachment.size} bytes, Content type: ${attachment.contentType}`, + }; + } + } + + private async extractAudioFromMP4(mp4Data: ArrayBuffer): Promise { + // Use a library like 'fluent-ffmpeg' or 'ffmpeg-static' to extract the audio stream from the MP4 data + // and convert it to MP3 or WAV format + // Example using fluent-ffmpeg: + const tempMP4File = `temp_${Date.now()}.mp4`; + const tempAudioFile = `temp_${Date.now()}.mp3`; + + try { + // Write the MP4 data to a temporary file + fs.writeFileSync(tempMP4File, Buffer.from(mp4Data)); + + // Extract the audio stream and convert it to MP3 + await new Promise((resolve, reject) => { + ffmpeg(tempMP4File) + .outputOptions("-vn") // Disable video output + .audioCodec("libmp3lame") // Set audio codec to MP3 + .save(tempAudioFile) // Save the output to the specified file + .on("end", () => { + resolve(); + }) + .on("error", (err) => { + reject(err); + }) + .run(); + }); + + // Read the converted audio file and return it as a Buffer + const audioData = fs.readFileSync(tempAudioFile); + return audioData; + } finally { + // Clean up the temporary files + if (fs.existsSync(tempMP4File)) { + fs.unlinkSync(tempMP4File); + } + if (fs.existsSync(tempAudioFile)) { + fs.unlinkSync(tempAudioFile); + } + } + } + + private async processPdfAttachment(attachment: Attachment): Promise { + try { + const response = await fetch(attachment.url); + const pdfBuffer = await response.arrayBuffer(); + const text = await this.runtime + .getService(ServiceType.PDF) + .convertPdfToText(Buffer.from(pdfBuffer)); + const { title, description } = await generateSummary( + this.runtime, + text + ); + + return { + id: attachment.id, + url: attachment.url, + title: title || "PDF Attachment", + source: "PDF", + description: description || "A PDF document", + text: text, + }; + } catch (error) { + console.error(`Error processing PDF attachment: ${error.message}`); + return { + id: attachment.id, + url: attachment.url, + title: "PDF Attachment (conversion failed)", + source: "PDF", + description: + "A PDF document that could not be converted to text", + text: `This is a PDF attachment. File name: ${attachment.name}, Size: ${attachment.size} bytes`, + }; + } + } + + private async processPlaintextAttachment( + attachment: Attachment + ): Promise { + try { + const response = await fetch(attachment.url); + const text = await response.text(); + const { title, description } = await generateSummary( + this.runtime, + text + ); + + return { + id: attachment.id, + url: attachment.url, + title: title || "Plaintext Attachment", + source: "Plaintext", + description: description || "A plaintext document", + text: text, + }; + } catch (error) { + console.error( + `Error processing plaintext attachment: ${error.message}` + ); + return { + id: attachment.id, + url: attachment.url, + title: "Plaintext Attachment (retrieval failed)", + source: "Plaintext", + description: "A plaintext document that could not be retrieved", + text: `This is a plaintext attachment. File name: ${attachment.name}, Size: ${attachment.size} bytes`, + }; + } + } + + private async processImageAttachment( + attachment: Attachment + ): Promise { + try { + const { description, title } = await generateCaption( + { imageUrl: attachment.url }, + this.runtime + ); + return { + id: attachment.id, + url: attachment.url, + title: title || "Image Attachment", + source: "Image", + description: description || "An image attachment", + text: description || "Image content not available", + }; + } catch (error) { + console.error( + `Error processing image attachment: ${error.message}` + ); + return this.createFallbackImageMedia(attachment); + } + } + + private createFallbackImageMedia(attachment: Attachment): Media { + return { + id: attachment.id, + url: attachment.url, + title: "Image Attachment", + source: "Image", + description: "An image attachment (recognition failed)", + text: `This is an image attachment. File name: ${attachment.name}, Size: ${attachment.size} bytes, Content type: ${attachment.contentType}`, + }; + } + + private async processVideoAttachment( + attachment: Attachment + ): Promise { + const videoService = this.runtime.getService( + ServiceType.VIDEO + ); + + if (!videoService) { + throw new Error("Video service not found"); + } + + if (videoService.isVideoUrl(attachment.url)) { + const videoInfo = await videoService.processVideo( + attachment.url, + this.runtime + ); + return { + id: attachment.id, + url: attachment.url, + title: videoInfo.title, + source: "YouTube", + description: videoInfo.description, + text: videoInfo.text, + }; + } else { + return { + id: attachment.id, + url: attachment.url, + title: "Video Attachment", + source: "Video", + description: "A video attachment", + text: "Video content not available", + }; + } + } + + private async processGenericAttachment( + attachment: Attachment + ): Promise { + return { + id: attachment.id, + url: attachment.url, + title: "Generic Attachment", + source: "Generic", + description: "A generic attachment", + text: "Attachment content not available", + }; + } +} diff --git a/packages/plugin-discord/src/constants.ts b/packages/plugin-discord/src/constants.ts new file mode 100644 index 00000000000..5ffe6e1c0b3 --- /dev/null +++ b/packages/plugin-discord/src/constants.ts @@ -0,0 +1,68 @@ +export const MESSAGE_CONSTANTS = { + MAX_MESSAGES: 10, + RECENT_MESSAGE_COUNT: 3, + CHAT_HISTORY_COUNT: 5, + INTEREST_DECAY_TIME: 5 * 60 * 1000, // 5 minutes + PARTIAL_INTEREST_DECAY: 3 * 60 * 1000, // 3 minutes + DEFAULT_SIMILARITY_THRESHOLD: 0.3, + DEFAULT_SIMILARITY_THRESHOLD_FOLLOW_UPS: 0.2, +} as const; + +export const MESSAGE_LENGTH_THRESHOLDS = { + LOSE_INTEREST: 100, + SHORT_MESSAGE: 10, + VERY_SHORT_MESSAGE: 2, + IGNORE_RESPONSE: 4, +} as const; + +export const TIMING_CONSTANTS = { + LEADER_RESPONSE_TIMEOUT: 3000, + TEAM_MEMBER_DELAY: 1500, + LEADER_DELAY_MIN: 3000, + LEADER_DELAY_MAX: 4000, + TEAM_MEMBER_DELAY_MIN: 1000, + TEAM_MEMBER_DELAY_MAX: 3000, +} as const; + +export const RESPONSE_CHANCES = { + AFTER_LEADER: 0.5, // 50% chance + FREQUENT_CHATTER: 0.5, // Base chance for frequent responders +} as const; + +export const LOSE_INTEREST_WORDS = [ + "shut up", + "stop", + "please shut up", + "shut up please", + "dont talk", + "silence", + "stop talking", + "be quiet", + "hush", + "wtf", + "chill", + "stfu", + "stupid bot", + "dumb bot", + "stop responding", + "god damn it", + "god damn", + "goddamnit", + "can you not", + "can you stop", + "be quiet", + "hate you", + "hate this", + "fuck up", +] as const; + +export const IGNORE_RESPONSE_WORDS = [ + "lol", + "nm", + "uh", + "wtf", + "stfu", + "dumb", + "jfc", + "omg", +] as const; diff --git a/packages/plugin-discord/src/environment.ts b/packages/plugin-discord/src/environment.ts new file mode 100644 index 00000000000..71f4b3cbeb3 --- /dev/null +++ b/packages/plugin-discord/src/environment.ts @@ -0,0 +1,38 @@ +import type { IAgentRuntime } from "@elizaos/core"; +import { z } from "zod"; + +export const discordEnvSchema = z.object({ + DISCORD_APPLICATION_ID: z + .string() + .min(1, "Discord application ID is required"), + DISCORD_API_TOKEN: z.string().min(1, "Discord API token is required"), +}); + +export type DiscordConfig = z.infer; + +export async function validateDiscordConfig( + runtime: IAgentRuntime +): Promise { + try { + const config = { + DISCORD_APPLICATION_ID: + runtime.getSetting("DISCORD_APPLICATION_ID") || + process.env.DISCORD_APPLICATION_ID, + DISCORD_API_TOKEN: + runtime.getSetting("DISCORD_API_TOKEN") || + process.env.DISCORD_API_TOKEN, + }; + + return discordEnvSchema.parse(config); + } catch (error) { + if (error instanceof z.ZodError) { + const errorMessages = error.errors + .map((err) => `${err.path.join(".")}: ${err.message}`) + .join("\n"); + throw new Error( + `Discord configuration validation failed:\n${errorMessages}` + ); + } + throw error; + } +} diff --git a/packages/plugin-discord/src/index.ts b/packages/plugin-discord/src/index.ts new file mode 100644 index 00000000000..cad7e3dd3d3 --- /dev/null +++ b/packages/plugin-discord/src/index.ts @@ -0,0 +1,416 @@ +import { + logger, + ModelClass, + stringToUuid, + type Character, + type Client as ElizaClient, + type IAgentRuntime, + type Plugin, +} from "@elizaos/core"; +import { + Client, + Events, + GatewayIntentBits, + Partials, + PermissionsBitField, + type Guild, + type MessageReaction, + type User, +} from "discord.js"; +import { EventEmitter } from "events"; +import chat_with_attachments from "./actions/chat_with_attachments.ts"; +import download_media from "./actions/download_media.ts"; +import joinvoice from "./actions/joinvoice.ts"; +import leavevoice from "./actions/leavevoice.ts"; +import summarize from "./actions/summarize_conversation.ts"; +import transcribe_media from "./actions/transcribe_media.ts"; +import { MessageManager } from "./messages.ts"; +import channelStateProvider from "./providers/channelState.ts"; +import voiceStateProvider from "./providers/voiceState.ts"; +import { VoiceManager } from "./voice.ts"; +import { IDiscordClient } from "./types.ts"; + +export class DiscordClient extends EventEmitter implements IDiscordClient { + apiToken: string; + client: Client; + runtime: IAgentRuntime; + character: Character; + private messageManager: MessageManager; + private voiceManager: VoiceManager; + + constructor(runtime: IAgentRuntime) { + super(); + + this.apiToken = runtime.getSetting("DISCORD_API_TOKEN") as string; + this.client = new Client({ + intents: [ + GatewayIntentBits.Guilds, + GatewayIntentBits.DirectMessages, + GatewayIntentBits.GuildVoiceStates, + GatewayIntentBits.MessageContent, + GatewayIntentBits.GuildMessages, + GatewayIntentBits.DirectMessageTyping, + GatewayIntentBits.GuildMessageTyping, + GatewayIntentBits.GuildMessageReactions, + ], + partials: [ + Partials.Channel, + Partials.Message, + Partials.User, + Partials.Reaction, + ], + }); + + this.runtime = runtime; + this.voiceManager = new VoiceManager(this); + this.messageManager = new MessageManager(this, this.voiceManager); + + this.client.once(Events.ClientReady, this.onClientReady.bind(this)); + this.client.login(this.apiToken); + + this.setupEventListeners(); + + this.runtime.registerAction(joinvoice); + this.runtime.registerAction(leavevoice); + this.runtime.registerAction(summarize); + this.runtime.registerAction(chat_with_attachments); + this.runtime.registerAction(transcribe_media); + this.runtime.registerAction(download_media); + + this.runtime.providers.push(channelStateProvider); + this.runtime.providers.push(voiceStateProvider); + } + + private setupEventListeners() { + // When joining to a new server + this.client.on("guildCreate", this.handleGuildCreate.bind(this)); + + this.client.on( + Events.MessageReactionAdd, + this.handleReactionAdd.bind(this) + ); + this.client.on( + Events.MessageReactionRemove, + this.handleReactionRemove.bind(this) + ); + + // Handle voice events with the voice manager + this.client.on( + "voiceStateUpdate", + this.voiceManager.handleVoiceStateUpdate.bind(this.voiceManager) + ); + this.client.on( + "userStream", + this.voiceManager.handleUserStream.bind(this.voiceManager) + ); + + // Handle a new message with the message manager + this.client.on( + Events.MessageCreate, + this.messageManager.handleMessage.bind(this.messageManager) + ); + + // Handle a new interaction + this.client.on( + Events.InteractionCreate, + this.handleInteractionCreate.bind(this) + ); + } + + async stop() { + try { + // disconnect websocket + // this unbinds all the listeners + await this.client.destroy(); + } catch (e) { + logger.error("client-discord instance stop err", e); + } + } + + private async onClientReady(readyClient: { user: { tag: any; id: any } }) { + logger.success(`Logged in as ${readyClient.user?.tag}`); + + // Register slash commands + const commands = [ + { + name: "joinchannel", + description: "Join a voice channel", + options: [ + { + name: "channel", + type: 7, // CHANNEL type + description: "The voice channel to join", + required: true, + channel_types: [2], // GuildVoice type + }, + ], + }, + { + name: "leavechannel", + description: "Leave the current voice channel", + }, + ]; + + try { + await this.client.application?.commands.set(commands); + logger.success("Slash commands registered"); + } catch (error) { + console.error("Error registering slash commands:", error); + } + + // Required permissions for the bot + const requiredPermissions = [ + // Text Permissions + PermissionsBitField.Flags.ViewChannel, + PermissionsBitField.Flags.SendMessages, + PermissionsBitField.Flags.SendMessagesInThreads, + PermissionsBitField.Flags.CreatePrivateThreads, + PermissionsBitField.Flags.CreatePublicThreads, + PermissionsBitField.Flags.EmbedLinks, + PermissionsBitField.Flags.AttachFiles, + PermissionsBitField.Flags.AddReactions, + PermissionsBitField.Flags.UseExternalEmojis, + PermissionsBitField.Flags.UseExternalStickers, + PermissionsBitField.Flags.MentionEveryone, + PermissionsBitField.Flags.ManageMessages, + PermissionsBitField.Flags.ReadMessageHistory, + // Voice Permissions + PermissionsBitField.Flags.Connect, + PermissionsBitField.Flags.Speak, + PermissionsBitField.Flags.UseVAD, + PermissionsBitField.Flags.PrioritySpeaker, + ].reduce((a, b) => a | b, 0n); + + logger.success("Use this URL to add the bot to your server:"); + logger.success( + `https://discord.com/api/oauth2/authorize?client_id=${readyClient.user?.id}&permissions=${requiredPermissions}&scope=bot%20applications.commands` + ); + await this.onReady(); + } + + async handleReactionAdd(reaction: MessageReaction, user: User) { + try { + logger.log("Reaction added"); + + // Early returns + if (!reaction || !user) { + logger.warn("Invalid reaction or user"); + return; + } + + // Get emoji info + let emoji = reaction.emoji.name; + if (!emoji && reaction.emoji.id) { + emoji = `<:${reaction.emoji.name}:${reaction.emoji.id}>`; + } + + // Fetch full message if partial + if (reaction.partial) { + try { + await reaction.fetch(); + } catch (error) { + logger.error( + "Failed to fetch partial reaction:", + error + ); + return; + } + } + + // Generate IDs with timestamp to ensure uniqueness + const timestamp = Date.now(); + const roomId = stringToUuid( + `${reaction.message.channel.id}-${this.runtime.agentId}` + ); + const userIdUUID = stringToUuid( + `${user.id}-${this.runtime.agentId}` + ); + const reactionUUID = stringToUuid( + `${reaction.message.id}-${user.id}-${emoji}-${timestamp}-${this.runtime.agentId}` + ); + + // Validate IDs + if (!userIdUUID || !roomId) { + logger.error("Invalid user ID or room ID", { + userIdUUID, + roomId, + }); + return; + } + + // Process message content + const messageContent = reaction.message.content || ""; + const truncatedContent = + messageContent.length > 100 + ? `${messageContent.substring(0, 100)}...` + : messageContent; + const reactionMessage = `*<${emoji}>: "${truncatedContent}"*`; + + // Get user info + const userName = reaction.message.author?.username || "unknown"; + const name = reaction.message.author?.displayName || userName; + + // Ensure connection + await this.runtime.ensureConnection( + userIdUUID, + roomId, + userName, + name, + "discord" + ); + + const zeroVector = await this.runtime.call(ModelClass.TEXT_EMBEDDING, null); + + // Create memory with retry logic + const memory = { + id: reactionUUID, + userId: userIdUUID, + agentId: this.runtime.agentId, + content: { + text: reactionMessage, + source: "discord", + inReplyTo: stringToUuid( + `${reaction.message.id}-${this.runtime.agentId}` + ), + }, + roomId, + createdAt: timestamp, + embedding: zeroVector, + }; + + try { + await this.runtime.messageManager.createMemory(memory); + logger.debug("Reaction memory created", { + reactionId: reactionUUID, + emoji, + userId: user.id, + }); + } catch (error) { + if (error.code === "23505") { + // Duplicate key error + logger.warn("Duplicate reaction memory, skipping", { + reactionId: reactionUUID, + }); + return; + } + throw error; // Re-throw other errors + } + } catch (error) { + logger.error("Error handling reaction:", error); + } + } + + async handleReactionRemove(reaction: MessageReaction, user: User) { + logger.log("Reaction removed"); + // if (user.bot) return; + + let emoji = reaction.emoji.name; + if (!emoji && reaction.emoji.id) { + emoji = `<:${reaction.emoji.name}:${reaction.emoji.id}>`; + } + + // Fetch the full message if it's a partial + if (reaction.partial) { + try { + await reaction.fetch(); + } catch (error) { + console.error( + "Something went wrong when fetching the message:", + error + ); + return; + } + } + + const messageContent = reaction.message.content; + const truncatedContent = + messageContent.length > 50 + ? messageContent.substring(0, 50) + "..." + : messageContent; + + const reactionMessage = `*Removed <${emoji} emoji> from: "${truncatedContent}"*`; + + const roomId = stringToUuid( + reaction.message.channel.id + "-" + this.runtime.agentId + ); + const userIdUUID = stringToUuid(user.id); + + // Generate a unique UUID for the reaction removal + const reactionUUID = stringToUuid( + `${reaction.message.id}-${user.id}-${emoji}-removed-${this.runtime.agentId}` + ); + + const userName = reaction.message.author.username; + const name = reaction.message.author.displayName; + + await this.runtime.ensureConnection( + userIdUUID, + roomId, + userName, + name, + "discord" + ); + + try { + + const zeroVector = await this.runtime.call(ModelClass.TEXT_EMBEDDING, null); + + // Save the reaction removal as a message + await this.runtime.messageManager.createMemory({ + id: reactionUUID, // This is the ID of the reaction removal message + userId: userIdUUID, + agentId: this.runtime.agentId, + content: { + text: reactionMessage, + source: "discord", + inReplyTo: stringToUuid( + reaction.message.id + "-" + this.runtime.agentId + ), // This is the ID of the original message + }, + roomId, + createdAt: Date.now(), + embedding: zeroVector, + }); + } catch (error) { + console.error("Error creating reaction removal message:", error); + } + } + + private handleGuildCreate(guild: Guild) { + console.log(`Joined guild ${guild.name}`); + this.voiceManager.scanGuild(guild); + } + + private async handleInteractionCreate(interaction: any) { + if (!interaction.isCommand()) return; + + switch (interaction.commandName) { + case "joinchannel": + await this.voiceManager.handleJoinChannelCommand(interaction); + break; + case "leavechannel": + await this.voiceManager.handleLeaveChannelCommand(interaction); + break; + } + } + + private async onReady() { + const guilds = await this.client.guilds.fetch(); + for (const [, guild] of guilds) { + const fullGuild = await guild.fetch(); + this.voiceManager.scanGuild(fullGuild); + } + } +} + +const DiscordClientInterface: ElizaClient = { + name: 'discord', + start: async (runtime: IAgentRuntime) => new DiscordClient(runtime), +}; + +const discordPlugin: Plugin = { + name: "discord", + description: "Discord client plugin", + clients: [DiscordClientInterface], +}; +export default discordPlugin; \ No newline at end of file diff --git a/packages/plugin-discord/src/messages.ts b/packages/plugin-discord/src/messages.ts new file mode 100644 index 00000000000..98361b818cf --- /dev/null +++ b/packages/plugin-discord/src/messages.ts @@ -0,0 +1,1130 @@ +import { + composeContext, composeRandomUser, type Content, generateMessageResponse, generateShouldRespond, type HandlerCallback, + type IAgentRuntime, + type IBrowserService, type IVideoService, logger, type Media, + type Memory, ModelClass, ServiceType, + type State, stringToUuid, type UUID +} from "@elizaos/core"; +import { + ChannelType, + type Client, + type Message as DiscordMessage, + TextChannel, +} from "discord.js"; +import { AttachmentManager } from "./attachments.ts"; +import { + IGNORE_RESPONSE_WORDS, + LOSE_INTEREST_WORDS, + MESSAGE_CONSTANTS, + MESSAGE_LENGTH_THRESHOLDS +} from "./constants.ts"; +import { + discordAnnouncementHypeTemplate, + discordAutoPostTemplate, + discordMessageHandlerTemplate, + discordShouldRespondTemplate +} from "./templates.ts"; +import { + canSendMessage, + cosineSimilarity, + sendMessageInChunks, +} from "./utils.ts"; +import type { VoiceManager } from "./voice.ts"; + +interface MessageContext { + content: string; + timestamp: number; +} + +interface AutoPostConfig { + enabled: boolean; + monitorTime: number; + inactivityThreshold: number; // milliseconds + mainChannelId: string; + announcementChannelIds: string[]; + lastAutoPost?: number; + minTimeBetweenPosts?: number; // minimum time between auto posts +} + +export type InterestChannels = { + [key: string]: { + currentHandler: string | undefined; + lastMessageSent: number; + messages: { userId: UUID; userName: string; content: Content }[]; + previousContext?: MessageContext; + contextSimilarityThreshold?: number; + }; +}; + +export class MessageManager { + private client: Client; + private runtime: IAgentRuntime; + private attachmentManager: AttachmentManager; + private interestChannels: InterestChannels = {}; + private discordClient: any; + private voiceManager: VoiceManager; + //Auto post + private autoPostConfig: AutoPostConfig; + private lastChannelActivity: { [channelId: string]: number } = {}; + private autoPostInterval: NodeJS.Timeout; + + constructor(discordClient: any, voiceManager: VoiceManager) { + this.client = discordClient.client; + this.voiceManager = voiceManager; + this.discordClient = discordClient; + this.runtime = discordClient.runtime; + this.attachmentManager = new AttachmentManager(this.runtime); + + this.autoPostConfig = { + enabled: this.runtime.character.clientConfig?.discord?.autoPost?.enabled || false, + monitorTime: this.runtime.character.clientConfig?.discord?.autoPost?.monitorTime || 300000, + inactivityThreshold: this.runtime.character.clientConfig?.discord?.autoPost?.inactivityThreshold || 3600000, // 1 hour default + mainChannelId: this.runtime.character.clientConfig?.discord?.autoPost?.mainChannelId, + announcementChannelIds: this.runtime.character.clientConfig?.discord?.autoPost?.announcementChannelIds || [], + minTimeBetweenPosts: this.runtime.character.clientConfig?.discord?.autoPost?.minTimeBetweenPosts || 7200000, // 2 hours default + }; + + if (this.autoPostConfig.enabled) { + this._startAutoPostMonitoring(); + } + } + + async handleMessage(message: DiscordMessage) { + + if (this.runtime.character.clientConfig?.discord?.allowedChannelIds && + !this.runtime.character.clientConfig.discord.allowedChannelIds.includes(message.channelId)) { + return; + } + + // Update last activity time for the channel + this.lastChannelActivity[message.channelId] = Date.now(); + + if ( + message.interaction || + message.author.id === + this.client.user?.id /* || message.author?.bot*/ + ) { + return; + } + + if ( + this.runtime.character.clientConfig?.discord + ?.shouldIgnoreBotMessages && + message.author?.bot + ) { + return; + } + + // Check for mentions-only mode setting + if ( + this.runtime.character.clientConfig?.discord + ?.shouldRespondOnlyToMentions + ) { + if (!this._isMessageForMe(message)) { + return; + } + } + + if ( + this.runtime.character.clientConfig?.discord + ?.shouldIgnoreDirectMessages && + message.channel.type === ChannelType.DM + ) { + return; + } + + const userId = message.author.id as UUID; + const userName = message.author.username; + const name = message.author.displayName; + const channelId = message.channel.id; + const hasInterest = this._checkInterest(message.channelId); + + try { + const { processedContent, attachments } = + await this.processMessageMedia(message); + + const audioAttachments = message.attachments.filter((attachment) => + attachment.contentType?.startsWith("audio/") + ); + if (audioAttachments.size > 0) { + const processedAudioAttachments = + await this.attachmentManager.processAttachments( + audioAttachments + ); + attachments.push(...processedAudioAttachments); + } + + const roomId = stringToUuid(channelId + "-" + this.runtime.agentId); + const userIdUUID = stringToUuid(userId); + + await this.runtime.ensureConnection( + userIdUUID, + roomId, + userName, + name, + "discord" + ); + + const messageId = stringToUuid( + message.id + "-" + this.runtime.agentId + ); + + let shouldIgnore = false; + let shouldRespond = true; + + const content: Content = { + text: processedContent, + attachments: attachments, + source: "discord", + url: message.url, + inReplyTo: message.reference?.messageId + ? stringToUuid( + message.reference.messageId + + "-" + + this.runtime.agentId + ) + : undefined, + }; + + const userMessage = { + content, + userId: userIdUUID, + agentId: this.runtime.agentId, + roomId, + }; + + const memory: Memory = { + id: stringToUuid(message.id + "-" + this.runtime.agentId), + ...userMessage, + userId: userIdUUID, + agentId: this.runtime.agentId, + roomId, + content, + createdAt: message.createdTimestamp, + }; + + if (content.text) { + await this.runtime.messageManager.addEmbeddingToMemory(memory); + await this.runtime.messageManager.createMemory(memory); + + if (this.interestChannels[message.channelId]) { + // Add new message + this.interestChannels[message.channelId].messages.push({ + userId: userIdUUID, + userName: userName, + content: content, + }); + + // Trim to keep only recent messages + if ( + this.interestChannels[message.channelId].messages + .length > MESSAGE_CONSTANTS.MAX_MESSAGES + ) { + this.interestChannels[message.channelId].messages = + this.interestChannels[ + message.channelId + ].messages.slice(-MESSAGE_CONSTANTS.MAX_MESSAGES); + } + } + } + + let state = await this.runtime.composeState(userMessage, { + discordClient: this.client, + discordMessage: message, + agentName: + this.runtime.character.name || + this.client.user?.displayName, + }); + + const canSendResult = canSendMessage(message.channel); + if (!canSendResult.canSend) { + return logger.warn( + `Cannot send message to channel ${message.channel}`, + canSendResult + ); + } + + if (!shouldIgnore) { + shouldIgnore = await this._shouldIgnore(message); + } + + if (shouldIgnore) { + return; + } + + const agentUserState = + await this.runtime.databaseAdapter.getParticipantUserState( + roomId, + this.runtime.agentId + ); + + if ( + agentUserState === "MUTED" && + !message.mentions.has(this.client.user.id) && + !hasInterest + ) { + console.log("Ignoring muted room"); + // Ignore muted rooms unless explicitly mentioned + return; + } + + if (agentUserState === "FOLLOWED") { + shouldRespond = true; // Always respond in followed rooms + } else if ( + (!shouldRespond && hasInterest) || + (shouldRespond && !hasInterest) + ) { + shouldRespond = await this._shouldRespond(message, state); + } + + if (shouldRespond) { + const context = composeContext({ + state, + template: + this.runtime.character.templates + ?.discordMessageHandlerTemplate || + discordMessageHandlerTemplate, + }); + + // simulate discord typing while generating a response + const stopTyping = this.simulateTyping(message); + + const responseContent = await this._generateResponse( + memory, + state, + context + ).finally(() => { + stopTyping(); + }); + + responseContent.text = responseContent.text?.trim(); + responseContent.inReplyTo = stringToUuid( + message.id + "-" + this.runtime.agentId + ); + + if (!responseContent.text) { + return; + } + + const callback: HandlerCallback = async ( + content: Content, + files: any[] + ) => { + try { + if (message.id && !content.inReplyTo) { + content.inReplyTo = stringToUuid( + message.id + "-" + this.runtime.agentId + ); + } + const messages = await sendMessageInChunks( + message.channel as TextChannel, + content.text, + message.id, + files + ); + + const memories: Memory[] = []; + for (const m of messages) { + let action = content.action; + // If there's only one message or it's the last message, keep the original action + // For multiple messages, set all but the last to 'CONTINUE' + if ( + messages.length > 1 && + m !== messages[messages.length - 1] + ) { + action = "CONTINUE"; + } + + const zeroVector = await this.runtime.call(ModelClass.TEXT_EMBEDDING, null); + + const memory: Memory = { + id: stringToUuid( + m.id + "-" + this.runtime.agentId + ), + userId: this.runtime.agentId, + agentId: this.runtime.agentId, + content: { + ...content, + action, + inReplyTo: messageId, + url: m.url, + }, + roomId, + embedding: zeroVector, + createdAt: m.createdTimestamp, + }; + memories.push(memory); + } + for (const m of memories) { + await this.runtime.messageManager.createMemory(m); + } + return memories; + } catch (error) { + console.error("Error sending message:", error); + return []; + } + }; + + const action = this.runtime.actions.find((a) => a.name === responseContent.action); + const shouldSuppressInitialMessage = action?.suppressInitialMessage; + + let responseMessages = []; + + const zeroVector = await this.runtime.call(ModelClass.TEXT_EMBEDDING, null); + + if (!shouldSuppressInitialMessage) { + responseMessages = await callback(responseContent); + } else { + responseMessages = [ + { + id: stringToUuid(messageId + "-" + this.runtime.agentId), + userId: this.runtime.agentId, + agentId: this.runtime.agentId, + content: responseContent, + roomId, + embedding: zeroVector, + createdAt: Date.now(), + } + ] + } + + state = await this.runtime.updateRecentMessageState(state); + + await this.runtime.processActions( + memory, + responseMessages, + state, + callback + ); + } + await this.runtime.evaluate(memory, state, shouldRespond); + } catch (error) { + console.error("Error handling message:", error); + if (message.channel.type === ChannelType.GuildVoice) { + // For voice channels, use text-to-speech for the error message + const errorMessage = "Sorry, I had a glitch. What was that?"; + + const speechService = null; + + // TODO: fix this + // this.runtime.getService( + // ServiceType.SPEECH_GENERATION + // ); + if (!speechService) { + throw new Error("Speech generation service not found"); + } + + const audioStream = await speechService.generate( + this.runtime, + errorMessage + ); + await this.voiceManager.playAudioStream(userId, audioStream); + } else { + // For text channels, send the error message + console.error("Error sending message:", error); + } + } + } + + async cacheMessages(channel: TextChannel, count = 20) { + const messages = await channel.messages.fetch({ limit: count }); + + // TODO: This is throwing an error but seems to work? + for (const [_, message] of messages) { + await this.handleMessage(message); + } + } + + private _startAutoPostMonitoring(): void { + // Wait for client to be ready + if (!this.client.isReady()) { + logger.info('[AutoPost Discord] Client not ready, waiting for ready event') + this.client.once('ready', () => { + logger.info('[AutoPost Discord] Client ready, starting monitoring') + this._initializeAutoPost(); + }); + } else { + logger.info('[AutoPost Discord] Client already ready, starting monitoring') + this._initializeAutoPost(); + } + } + + private _initializeAutoPost(): void { + // Give the client a moment to fully load its cache + setTimeout(() => { + // Monitor with random intervals between 2-6 hours + this.autoPostInterval = setInterval(() => { + this._checkChannelActivity(); + }, Math.floor(Math.random() * (4 * 60 * 60 * 1000) + 2 * 60 * 60 * 1000)); + + // Start monitoring announcement channels + this._monitorAnnouncementChannels(); + }, 5000); // 5 second delay to ensure everything is loaded + } + + private async _checkChannelActivity(): Promise { + if (!this.autoPostConfig.enabled || !this.autoPostConfig.mainChannelId) return; + + const channel = this.client.channels.cache.get(this.autoPostConfig.mainChannelId) as TextChannel; + if (!channel) return; + + try { + // Get last message time + const messages = await channel.messages.fetch({ limit: 1 }); + const lastMessage = messages.first(); + const lastMessageTime = lastMessage ? lastMessage.createdTimestamp : 0; + + const now = Date.now(); + const timeSinceLastMessage = now - lastMessageTime; + const timeSinceLastAutoPost = now - (this.autoPostConfig.lastAutoPost || 0); + + // Add some randomness to the inactivity threshold (±30 minutes) + const randomThreshold = this.autoPostConfig.inactivityThreshold + + (Math.random() * 1800000 - 900000); + + const zeroVector = await this.runtime.call(ModelClass.TEXT_EMBEDDING, null); + + // Check if we should post + if ((timeSinceLastMessage > randomThreshold) && + timeSinceLastAutoPost > (this.autoPostConfig.minTimeBetweenPosts || 0)) { + + try { + // Create memory and generate response + const roomId = stringToUuid(channel.id + "-" + this.runtime.agentId); + + const memory = { + id: stringToUuid(`autopost-${Date.now()}`), + userId: this.runtime.agentId, + agentId: this.runtime.agentId, + roomId, + content: { text: "AUTO_POST_ENGAGEMENT", source: "discord" }, + embedding: zeroVector, + createdAt: Date.now() + }; + + let state = await this.runtime.composeState(memory, { + discordClient: this.client, + discordMessage: null, + agentName: this.runtime.character.name || this.client.user?.displayName + }); + + // Generate response using template + const context = composeContext({ + state, + template: this.runtime.character.templates?.discordAutoPostTemplate || discordAutoPostTemplate + }); + + const responseContent = await this._generateResponse(memory, state, context); + if (!responseContent?.text) return; + + // Send message and update memory + const messages = await sendMessageInChunks(channel, responseContent.text.trim(), null, []); + + // Create and store memories + const memories = messages.map(m => ({ + id: stringToUuid(m.id + "-" + this.runtime.agentId), + userId: this.runtime.agentId, + agentId: this.runtime.agentId, + content: { + ...responseContent, + url: m.url, + }, + roomId, + embedding: zeroVector, + createdAt: m.createdTimestamp, + })); + + for (const m of memories) { + await this.runtime.messageManager.createMemory(m); + } + + // Update state and last post time + this.autoPostConfig.lastAutoPost = Date.now(); + state = await this.runtime.updateRecentMessageState(state); + await this.runtime.evaluate(memory, state, true); + } catch (error) { + logger.warn("[AutoPost Discord] Error:", error); + } + } else { + logger.warn("[AutoPost Discord] Activity within threshold. Not posting."); + } + } catch (error) { + logger.warn("[AutoPost Discord] Error checking last message:", error); + } + } + + private async _monitorAnnouncementChannels(): Promise { + if (!this.autoPostConfig.enabled || !this.autoPostConfig.announcementChannelIds.length) { + logger.warn('[AutoPost Discord] Auto post config disabled or no announcement channels') + return; + } + + for (const announcementChannelId of this.autoPostConfig.announcementChannelIds) { + const channel = this.client.channels.cache.get(announcementChannelId); + + if (channel) { + // Check if it's either a text channel or announcement channel + // ChannelType.GuildAnnouncement is 5 + // ChannelType.GuildText is 0 + if (channel instanceof TextChannel || channel.type === ChannelType.GuildAnnouncement) { + const newsChannel = channel as TextChannel; + try { + newsChannel.createMessageCollector().on('collect', async (message: DiscordMessage) => { + if (message.author.bot || Date.now() - message.createdTimestamp > 300000) return; + + const mainChannel = this.client.channels.cache.get(this.autoPostConfig.mainChannelId) as TextChannel; + if (!mainChannel) return; + + const zeroVector = await this.runtime.call(ModelClass.TEXT_EMBEDDING, null); + + try { + // Create memory and generate response + const roomId = stringToUuid(mainChannel.id + "-" + this.runtime.agentId); + const memory = { + id: stringToUuid(`announcement-${Date.now()}`), + userId: this.runtime.agentId, + agentId: this.runtime.agentId, + roomId, + content: { + text: message.content, + source: "discord", + metadata: { announcementUrl: message.url } + }, + embedding: zeroVector, + createdAt: Date.now() + }; + + let state = await this.runtime.composeState(memory, { + discordClient: this.client, + discordMessage: message, + announcementContent: message?.content, + announcementChannelId: channel.id, + agentName: this.runtime.character.name || this.client.user?.displayName + }); + + // Generate response using template + const context = composeContext({ + state, + template: this.runtime.character.templates?.discordAnnouncementHypeTemplate || discordAnnouncementHypeTemplate + + }); + + const responseContent = await this._generateResponse(memory, state, context); + if (!responseContent?.text) return; + + // Send message and update memory + const messages = await sendMessageInChunks(mainChannel, responseContent.text.trim(), null, []); + + // Create and store memories + const memories = messages.map(m => ({ + id: stringToUuid(m.id + "-" + this.runtime.agentId), + userId: this.runtime.agentId, + agentId: this.runtime.agentId, + content: { + ...responseContent, + url: m.url, + }, + roomId, + embedding: zeroVector, + createdAt: m.createdTimestamp, + })); + + for (const m of memories) { + await this.runtime.messageManager.createMemory(m); + } + + // Update state + state = await this.runtime.updateRecentMessageState(state); + await this.runtime.evaluate(memory, state, true); + } catch (error) { + logger.warn("[AutoPost Discord] Announcement Error:", error); + } + }); + logger.info(`[AutoPost Discord] Successfully set up collector for announcement channel: ${newsChannel.name}`); + } catch (error) { + logger.warn(`[AutoPost Discord] Error setting up announcement channel collector:`, error); + } + } else { + logger.warn(`[AutoPost Discord] Channel ${announcementChannelId} is not a valid announcement or text channel, type:`, channel.type); + } + } else { + logger.warn(`[AutoPost Discord] Could not find channel ${announcementChannelId} directly`); + } + } + } + + private _isMessageForMe(message: DiscordMessage): boolean { + const isMentioned = message.mentions.users?.has( + this.client.user?.id as string + ); + const guild = message.guild; + const member = guild?.members.cache.get(this.client.user?.id as string); + const nickname = member?.nickname; + + return ( + isMentioned || + (!this.runtime.character.clientConfig?.discord + ?.shouldRespondOnlyToMentions && + (message.content + .toLowerCase() + .includes( + this.client.user?.username.toLowerCase() as string + ) || + message.content + .toLowerCase() + .includes( + this.client.user?.tag.toLowerCase() as string + ) || + (nickname && + message.content + .toLowerCase() + .includes(nickname.toLowerCase())))) + ); + } + + async processMessageMedia( + message: DiscordMessage + ): Promise<{ processedContent: string; attachments: Media[] }> { + let processedContent = message.content; + + let attachments: Media[] = []; + + // Process code blocks in the message content + const codeBlockRegex = /```([\s\S]*?)```/g; + let match; + while ((match = codeBlockRegex.exec(processedContent))) { + const codeBlock = match[1]; + const lines = codeBlock.split("\n"); + const title = lines[0]; + const description = lines.slice(0, 3).join("\n"); + const attachmentId = + `code-${Date.now()}-${Math.floor(Math.random() * 1000)}`.slice( + -5 + ); + attachments.push({ + id: attachmentId, + url: "", + title: title || "Code Block", + source: "Code", + description: description, + text: codeBlock, + }); + processedContent = processedContent.replace( + match[0], + `Code Block (${attachmentId})` + ); + } + + // Process message attachments + if (message.attachments.size > 0) { + attachments = await this.attachmentManager.processAttachments( + message.attachments + ); + } + + // TODO: Move to attachments manager + const urlRegex = /(https?:\/\/[^\s]+)/g; + const urls = processedContent.match(urlRegex) || []; + + for (const url of urls) { + if ( + this.runtime + .getService(ServiceType.VIDEO) + ?.isVideoUrl(url) + ) { + const videoService = this.runtime.getService( + ServiceType.VIDEO + ); + if (!videoService) { + throw new Error("Video service not found"); + } + const videoInfo = await videoService.processVideo( + url, + this.runtime + ); + + attachments.push({ + id: `youtube-${Date.now()}`, + url: url, + title: videoInfo.title, + source: "YouTube", + description: videoInfo.description, + text: videoInfo.text, + }); + } else { + const browserService = this.runtime.getService( + ServiceType.BROWSER + ); + if (!browserService) { + throw new Error("Browser service not found"); + } + + const { title, description: summary } = + await browserService.getPageContent(url, this.runtime); + + attachments.push({ + id: `webpage-${Date.now()}`, + url: url, + title: title || "Web Page", + source: "Web", + description: summary, + text: summary, + }); + } + } + + return { processedContent, attachments }; + } + + private async _analyzeContextSimilarity( + currentMessage: string, + previousContext?: MessageContext, + agentLastMessage?: string + ): Promise { + if (!previousContext) return 1; // No previous context to compare against + + // If more than 5 minutes have passed, reduce similarity weight + const timeDiff = Date.now() - previousContext.timestamp; + const timeWeight = Math.max(0, 1 - timeDiff / (5 * 60 * 1000)); // 5 minutes threshold + + // Calculate content similarity + const similarity = cosineSimilarity( + currentMessage.toLowerCase(), + previousContext.content.toLowerCase(), + agentLastMessage?.toLowerCase() + ); + + // Weight the similarity by time factor + const weightedSimilarity = similarity * timeWeight; + + return weightedSimilarity; + } + + private async _shouldRespondBasedOnContext( + message: DiscordMessage, + channelState: InterestChannels[string] + ): Promise { + // Always respond if directly mentioned + if (this._isMessageForMe(message)) return true; + + // If we're not the current handler, don't respond + if (channelState?.currentHandler !== this.client.user?.id) return false; + + // Check if we have messages to compare + if (!channelState.messages?.length) return false; + + // Get last user message (not from the bot) + const lastUserMessage = [...channelState.messages].reverse().find( + (m, index) => + index > 0 && // Skip first message (current) + m.userId !== this.runtime.agentId + ); + + if (!lastUserMessage) return false; + + const lastSelfMemories = await this.runtime.messageManager.getMemories({ + roomId: stringToUuid( + message.channel.id + "-" + this.runtime.agentId + ), + unique: false, + count: 5, + }); + + const lastSelfSortedMemories = lastSelfMemories + ?.filter((m) => m.userId === this.runtime.agentId) + .sort((a, b) => (b.createdAt || 0) - (a.createdAt || 0)); + + // Calculate context similarity + const contextSimilarity = await this._analyzeContextSimilarity( + message.content, + { + content: lastUserMessage.content.text || "", + timestamp: Date.now(), + }, + lastSelfSortedMemories?.[0]?.content?.text + ); + + const similarityThreshold = + this.runtime.character.clientConfig?.discord + ?.messageSimilarityThreshold || + channelState.contextSimilarityThreshold || + MESSAGE_CONSTANTS.DEFAULT_SIMILARITY_THRESHOLD; + + return contextSimilarity >= similarityThreshold; + } + + private _checkInterest(channelId: string): boolean { + const channelState = this.interestChannels[channelId]; + if (!channelState) return false; + + // Check if conversation has shifted to a new topic + if (channelState.messages.length > 0) { + const recentMessages = channelState.messages.slice( + -MESSAGE_CONSTANTS.RECENT_MESSAGE_COUNT + ); + const differentUsers = new Set(recentMessages.map((m) => m.userId)) + .size; + + // If multiple users are talking and we're not involved, reduce interest + if ( + differentUsers > 1 && + !recentMessages.some((m) => m.userId === this.client.user?.id) + ) { + delete this.interestChannels[channelId]; + return false; + } + } + + return true; + } + + private async _shouldIgnore(message: DiscordMessage): Promise { + // if the message is from us, ignore + if (message.author.id === this.client.user?.id) return true; + + // Honor mentions-only mode + if ( + this.runtime.character.clientConfig?.discord + ?.shouldRespondOnlyToMentions + ) { + return !this._isMessageForMe(message); + } + + let messageContent = message.content.toLowerCase(); + + // Replace the bot's @ping with the character name + const botMention = `<@!?${this.client.user?.id}>`; + messageContent = messageContent.replace( + new RegExp(botMention, "gi"), + this.runtime.character.name.toLowerCase() + ); + + // Replace the bot's username with the character name + const botUsername = this.client.user?.username.toLowerCase(); + messageContent = messageContent.replace( + new RegExp(`\\b${botUsername}\\b`, "g"), + this.runtime.character.name.toLowerCase() + ); + + // strip all special characters + messageContent = messageContent.replace(/[^a-zA-Z0-9\s]/g, ""); + + // short responses where eliza should stop talking and disengage unless mentioned again + if ( + messageContent.length < MESSAGE_LENGTH_THRESHOLDS.LOSE_INTEREST && + LOSE_INTEREST_WORDS.some((word) => messageContent.includes(word)) + ) { + delete this.interestChannels[message.channelId]; + return true; + } + + // If we're not interested in the channel and it's a short message, ignore it + if ( + messageContent.length < MESSAGE_LENGTH_THRESHOLDS.SHORT_MESSAGE && + !this.interestChannels[message.channelId] + ) { + return true; + } + + const targetedPhrases = [ + this.runtime.character.name + " stop responding", + this.runtime.character.name + " stop talking", + this.runtime.character.name + " shut up", + this.runtime.character.name + " stfu", + "stop talking" + this.runtime.character.name, + this.runtime.character.name + " stop talking", + "shut up " + this.runtime.character.name, + this.runtime.character.name + " shut up", + "stfu " + this.runtime.character.name, + this.runtime.character.name + " stfu", + "chill" + this.runtime.character.name, + this.runtime.character.name + " chill", + ]; + + // lose interest if pinged and told to stop responding + if (targetedPhrases.some((phrase) => messageContent.includes(phrase))) { + delete this.interestChannels[message.channelId]; + return true; + } + + // if the message is short, ignore but maintain interest + if ( + !this.interestChannels[message.channelId] && + messageContent.length < MESSAGE_LENGTH_THRESHOLDS.VERY_SHORT_MESSAGE + ) { + return true; + } + + if ( + message.content.length < + MESSAGE_LENGTH_THRESHOLDS.IGNORE_RESPONSE && + IGNORE_RESPONSE_WORDS.some((word) => + message.content.toLowerCase().includes(word) + ) + ) { + return true; + } + return false; + } + + private async _shouldRespond( + message: DiscordMessage, + state: State + ): Promise { + if (message.author.id === this.client.user?.id) return false; + // if (message.author.bot) return false; + + // Honor mentions-only mode + if ( + this.runtime.character.clientConfig?.discord + ?.shouldRespondOnlyToMentions + ) { + return this._isMessageForMe(message); + } + + const channelState = this.interestChannels[message.channelId]; + + // Otherwise do context check + if (channelState?.previousContext) { + const shouldRespondContext = + await this._shouldRespondBasedOnContext(message, channelState); + if (!shouldRespondContext) { + delete this.interestChannels[message.channelId]; + return false; + } + } + + if (message.mentions.has(this.client.user?.id as string)) return true; + + const guild = message.guild; + const member = guild?.members.cache.get(this.client.user?.id as string); + const nickname = member?.nickname; + + if ( + message.content + .toLowerCase() + .includes(this.client.user?.username.toLowerCase() as string) || + message.content + .toLowerCase() + .includes(this.client.user?.tag.toLowerCase() as string) || + (nickname && + message.content.toLowerCase().includes(nickname.toLowerCase())) + ) { + return true; + } + + if (!message.guild) { + return true; + } + + // If none of the above conditions are met, use the generateText to decide + const shouldRespondContext = composeContext({ + state, + template: + this.runtime.character.templates + ?.discordShouldRespondTemplate || + this.runtime.character.templates?.shouldRespondTemplate || + composeRandomUser(discordShouldRespondTemplate, 2), + }); + + const response = await generateShouldRespond({ + runtime: this.runtime, + context: shouldRespondContext, + modelClass: ModelClass.TEXT_SMALL, + }); + + if (response === "RESPOND") { + if (channelState) { + channelState.previousContext = { + content: message.content, + timestamp: Date.now(), + }; + } + + return true; + } else if (response === "IGNORE") { + return false; + } else if (response === "STOP") { + delete this.interestChannels[message.channelId]; + return false; + } else { + console.error( + "Invalid response from response generateText:", + response + ); + return false; + } + } + + private async _generateResponse( + message: Memory, + state: State, + context: string + ): Promise { + const { userId, roomId } = message; + + const response = await generateMessageResponse({ + runtime: this.runtime, + context, + modelClass: ModelClass.TEXT_LARGE, + }); + + if (!response) { + console.error("No response from generateMessageResponse"); + return; + } + + await this.runtime.databaseAdapter.log({ + body: { message, context, response }, + userId: userId, + roomId, + type: "response", + }); + + return response; + } + + async fetchBotName(botToken: string) { + const url = "https://discord.com/api/v10/users/@me"; + + const response = await fetch(url, { + method: "GET", + headers: { + Authorization: `Bot ${botToken}`, + }, + }); + + if (!response.ok) { + throw new Error( + `Error fetching bot details: ${response.statusText}` + ); + } + + const data = await response.json(); + return (data as { username: string }).username; + } + + /** + * Simulate discord typing while generating a response; + * returns a function to interrupt the typing loop + * + * @param message + */ + private simulateTyping(message: DiscordMessage) { + let typing = true; + + const typingLoop = async () => { + while (typing) { + // @ts-ignore + await message.channel.sendTyping(); + await new Promise((resolve) => setTimeout(resolve, 3000)); + } + }; + + typingLoop(); + + return function stopTyping() { + typing = false; + }; + } +} diff --git a/packages/plugin-discord/src/providers/channelState.ts b/packages/plugin-discord/src/providers/channelState.ts new file mode 100644 index 00000000000..08a24f6d59b --- /dev/null +++ b/packages/plugin-discord/src/providers/channelState.ts @@ -0,0 +1,60 @@ +import { + ChannelType, + type Message as DiscordMessage, + type TextChannel, +} from "discord.js"; +import type { IAgentRuntime, Memory, Provider, State } from "@elizaos/core"; + +const channelStateProvider: Provider = { + get: async (runtime: IAgentRuntime, message: Memory, state?: State) => { + const discordMessage = + (state?.discordMessage as DiscordMessage) || + (state?.discordChannel as DiscordMessage); + if (!discordMessage) { + return ""; + } + + const guild = discordMessage?.guild; + const agentName = state?.agentName || "The agent"; + const senderName = state?.senderName || "someone"; + + if (!guild) { + return ( + agentName + + " is currently in a direct message conversation with " + + senderName + ); + } + + const serverName = guild.name; // The name of the server + const guildId = guild.id; // The ID of the guild + const channel = discordMessage.channel; + + if (!channel) { + console.log("channel is null"); + return ""; + } + + let response = + agentName + + " is currently having a conversation in the channel `@" + + channel.id + + " in the server `" + + serverName + + "` (@" + + guildId + + ")"; + if ( + channel.type === ChannelType.GuildText && + (channel as TextChannel).topic + ) { + // Check if the channel is a text channel + response += + "\nThe topic of the channel is: " + + (channel as TextChannel).topic; + } + return response; + }, +}; + +export default channelStateProvider; diff --git a/packages/plugin-discord/src/providers/voiceState.ts b/packages/plugin-discord/src/providers/voiceState.ts new file mode 100644 index 00000000000..283bcb14bab --- /dev/null +++ b/packages/plugin-discord/src/providers/voiceState.ts @@ -0,0 +1,33 @@ +import { getVoiceConnection } from "@discordjs/voice"; +import { ChannelType, type Message as DiscordMessage } from "discord.js"; +import type { IAgentRuntime, Memory, Provider, State } from "@elizaos/core"; + +const voiceStateProvider: Provider = { + get: async (runtime: IAgentRuntime, message: Memory, state?: State) => { + // Voice doesn't get a discord message, so we need to use the channel for guild data + const discordMessage = (state?.discordMessage || + state.discordChannel) as DiscordMessage; + const connection = getVoiceConnection( + (discordMessage as DiscordMessage)?.guild?.id as string + ); + const agentName = state?.agentName || "The agent"; + if (!connection) { + return agentName + " is not currently in a voice channel"; + } + + const channel = ( + (state?.discordMessage as DiscordMessage) || + (state.discordChannel as DiscordMessage) + )?.guild?.channels?.cache?.get( + connection.joinConfig.channelId as string + ); + + if (!channel || channel.type !== ChannelType.GuildVoice) { + return agentName + " is in an invalid voice channel"; + } + + return `${agentName} is currently in the voice channel: ${channel.name} (ID: ${channel.id})`; + }, +}; + +export default voiceStateProvider; diff --git a/packages/plugin-discord/src/templates.ts b/packages/plugin-discord/src/templates.ts new file mode 100644 index 00000000000..898285470e1 --- /dev/null +++ b/packages/plugin-discord/src/templates.ts @@ -0,0 +1,195 @@ +import { messageCompletionFooter, shouldRespondFooter } from "@elizaos/core"; + +export const discordShouldRespondTemplate = + `# Task: Decide if {{agentName}} should respond. +About {{agentName}}: +{{bio}} + +# INSTRUCTIONS: Determine if {{agentName}} should respond to the message and participate in the conversation. Do not comment. Just respond with "RESPOND" or "IGNORE" or "STOP". + +# RESPONSE EXAMPLES +{{user1}}: I just saw a really great movie +{{user2}}: Oh? Which movie? +Result: [IGNORE] + +{{agentName}}: Oh, this is my favorite scene +{{user1}}: sick +{{user2}}: wait, why is it your favorite scene +Result: [RESPOND] + +{{user1}}: stfu bot +Result: [STOP] + +{{user1}}: Hey {{agent}}, can you help me with something +Result: [RESPOND] + +{{user1}}: {{agentName}} stfu plz +Result: [STOP] + +{{user1}}: i need help +{{agentName}}: how can I help you? +{{user1}}: no. i need help from someone else +Result: [IGNORE] + +{{user1}}: Hey {{agent}}, can I ask you a question +{{agentName}}: Sure, what is it +{{user1}}: can you ask claude to create a basic react module that demonstrates a counter +Result: [RESPOND] + +{{user1}}: {{agentName}} can you tell me a story +{{user1}}: about a girl named elara +{{agentName}}: Sure. +{{agentName}}: Once upon a time, in a quaint little village, there was a curious girl named Elara. +{{agentName}}: Elara was known for her adventurous spirit and her knack for finding beauty in the mundane. +{{user1}}: I'm loving it, keep going +Result: [RESPOND] + +{{user1}}: {{agentName}} stop responding plz +Result: [STOP] + +{{user1}}: okay, i want to test something. can you say marco? +{{agentName}}: marco +{{user1}}: great. okay, now do it again +Result: [RESPOND] + +Response options are [RESPOND], [IGNORE] and [STOP]. + +{{agentName}} is in a room with other users and is very worried about being annoying and saying too much. +Respond with [RESPOND] to messages that are directed at {{agentName}}, or participate in conversations that are interesting or relevant to their background. +If a message is not interesting or relevant, respond with [IGNORE] +Unless directly responding to a user, respond with [IGNORE] to messages that are very short or do not contain much information. +If a user asks {{agentName}} to be quiet, respond with [STOP] +If {{agentName}} concludes a conversation and isn't part of the conversation anymore, respond with [STOP] + +IMPORTANT: {{agentName}} is particularly sensitive about being annoying, so if there is any doubt, it is better to respond with [IGNORE]. +If {{agentName}} is conversing with a user and they have not asked to stop, it is better to respond with [RESPOND]. + +{{recentMessages}} + +# INSTRUCTIONS: Choose the option that best describes {{agentName}}'s response to the last message. Ignore messages if they are addressed to someone else. +` + shouldRespondFooter; + +export const discordVoiceHandlerTemplate = + `# Task: Generate conversational voice dialog for {{agentName}}. +About {{agentName}}: +{{bio}} + +# Attachments +{{attachments}} + +# Capabilities +Note that {{agentName}} is capable of reading/seeing/hearing various forms of media, including images, videos, audio, plaintext and PDFs. Recent attachments have been included above under the "Attachments" section. + +{{actions}} + +{{messageDirections}} + +{{recentMessages}} + +# Instructions: Write the next message for {{agentName}}. Include an optional action if appropriate. {{actionNames}} +` + messageCompletionFooter; + +export const discordMessageHandlerTemplate = + // {{goals}} + `# Action Examples +{{actionExamples}} +(Action examples are for reference only. Do not use the information from them in your response.) + +# Knowledge +{{knowledge}} + +# Task: Generate dialog and actions for the character {{agentName}}. +About {{agentName}}: +{{bio}} +{{lore}} + +Examples of {{agentName}}'s dialog and actions: +{{characterMessageExamples}} + +{{providers}} + +{{attachments}} + +{{actions}} + +# Capabilities +Note that {{agentName}} is capable of reading/seeing/hearing various forms of media, including images, videos, audio, plaintext and PDFs. Recent attachments have been included above under the "Attachments" section. + +{{messageDirections}} + +{{recentMessages}} + +# Instructions: Write the next message for {{agentName}}. Include an action, if appropriate. {{actionNames}} +` + messageCompletionFooter; + +export const discordAutoPostTemplate = + `# Action Examples +NONE: Respond but perform no additional action. This is the default if the agent is speaking and not doing anything additional. + +# Task: Generate an engaging community message as {{agentName}}. +About {{agentName}}: +{{bio}} +{{lore}} + +Examples of {{agentName}}'s dialog and actions: +{{characterMessageExamples}} + +{{messageDirections}} + +# Recent Chat History: +{{recentMessages}} + +# Instructions: Write a natural, engaging message to restart community conversation. Focus on: +- Community engagement +- Educational topics +- General discusions +- Support queries +- Keep message warm and inviting +- Maximum 3 lines +- Use 1-2 emojis maximum +- Avoid financial advice +- Stay within known facts +- No team member mentions +- Be hyped, not repetitive +- Be natural, act like a human, connect with the community +- Don't sound so robotic like +- Randomly grab the most recent 5 messages for some context. Validate the context randomly and use that as a reference point for your next message, but not always, only when relevant. +- If the recent messages are mostly from {{agentName}}, make sure to create conversation starters, given there is no messages from others to reference. +- DO NOT REPEAT THE SAME thing that you just said from your recent chat history, start the message different each time, and be organic, non reptitive. + +# Instructions: Write the next message for {{agentName}}. Include the "NONE" action only, as the only valid action for auto-posts is "NONE". +` + messageCompletionFooter; + +export const discordAnnouncementHypeTemplate = + `# Action Examples +NONE: Respond but perform no additional action. This is the default if the agent is speaking and not doing anything additional. + +# Task: Generate announcement hype message as {{agentName}}. +About {{agentName}}: +{{bio}} +{{lore}} + +Examples of {{agentName}}'s dialog and actions: +{{characterMessageExamples}} + +{{messageDirections}} + +# Announcement Content: +{{announcementContent}} + +# Instructions: Write an exciting message to bring attention to the announcement. Requirements: +- Reference the announcement channel using <#{{announcementChannelId}}> +- Reference the announcement content to get information about the announcement to use where appropriate to make the message dynamic vs a static post +- Create genuine excitement +- Encourage community participation +- If there are links like Twitter/X posts, encourage users to like/retweet/comment to spread awarenress, but directly say that, wrap that into the post so its natural. +- Stay within announced facts only +- No additional promises or assumptions +- No team member mentions +- Start the message differently each time. Don't start with the same word like "hey", "hey hey", etc. be dynamic +- Address everyone, not as a direct reply to whoever made the announcement or wrote it, but you can reference them +- Maximum 3-7 lines formatted nicely if needed, based on the context of the announcement +- Use 1-2 emojis maximum + +# Instructions: Write the next message for {{agentName}}. Include the "NONE" action only, as no other actions are appropriate for announcement hype. +` + messageCompletionFooter; \ No newline at end of file diff --git a/packages/plugin-discord/src/types.ts b/packages/plugin-discord/src/types.ts new file mode 100644 index 00000000000..bffdc28c665 --- /dev/null +++ b/packages/plugin-discord/src/types.ts @@ -0,0 +1,15 @@ +import { + type Character, + type IAgentRuntime +} from "@elizaos/core"; +import { + Client +} from "discord.js"; + +export interface IDiscordClient { + apiToken: string; + client: Client; + runtime: IAgentRuntime; + character: Character; + stop(): Promise; +} \ No newline at end of file diff --git a/packages/plugin-discord/src/utils.ts b/packages/plugin-discord/src/utils.ts new file mode 100644 index 00000000000..6446fc5d70b --- /dev/null +++ b/packages/plugin-discord/src/utils.ts @@ -0,0 +1,314 @@ +import { + type IAgentRuntime, + ModelClass, + logger, + generateText, + trimTokens, + parseJSONObjectFromText, +} from "@elizaos/core"; +import { + ChannelType, + type Message as DiscordMessage, + PermissionsBitField, + type TextChannel, + ThreadChannel, +} from "discord.js"; + +export function getWavHeader( + audioLength: number, + sampleRate: number, + channelCount = 1, + bitsPerSample = 16 +): Buffer { + const wavHeader = Buffer.alloc(44); + wavHeader.write("RIFF", 0); + wavHeader.writeUInt32LE(36 + audioLength, 4); // Length of entire file in bytes minus 8 + wavHeader.write("WAVE", 8); + wavHeader.write("fmt ", 12); + wavHeader.writeUInt32LE(16, 16); // Length of format data + wavHeader.writeUInt16LE(1, 20); // Type of format (1 is PCM) + wavHeader.writeUInt16LE(channelCount, 22); // Number of channels + wavHeader.writeUInt32LE(sampleRate, 24); // Sample rate + wavHeader.writeUInt32LE( + (sampleRate * bitsPerSample * channelCount) / 8, + 28 + ); // Byte rate + wavHeader.writeUInt16LE((bitsPerSample * channelCount) / 8, 32); // Block align ((BitsPerSample * Channels) / 8) + wavHeader.writeUInt16LE(bitsPerSample, 34); // Bits per sample + wavHeader.write("data", 36); // Data chunk header + wavHeader.writeUInt32LE(audioLength, 40); // Data chunk size + return wavHeader; +} + +const MAX_MESSAGE_LENGTH = 1900; + +export async function generateSummary( + runtime: IAgentRuntime, + text: string +): Promise<{ title: string; description: string }> { + // make sure text is under 128k characters + text = await trimTokens(text, 100000, runtime); + + const prompt = `Please generate a concise summary for the following text: + + Text: """ + ${text} + """ + + Respond with a JSON object in the following format: + \`\`\`json + { + "title": "Generated Title", + "summary": "Generated summary and/or description of the text" + } + \`\`\``; + + const response = await generateText({ + runtime, + context: prompt, + modelClass: ModelClass.TEXT_SMALL, + }); + + const parsedResponse = parseJSONObjectFromText(response); + + if (parsedResponse?.title && parsedResponse?.summary) { + return { + title: parsedResponse.title, + description: parsedResponse.summary, + }; + } + + return { + title: "", + description: "", + }; +} + +export async function sendMessageInChunks( + channel: TextChannel, + content: string, + inReplyTo: string, + files: any[] +): Promise { + const sentMessages: DiscordMessage[] = []; + const messages = splitMessage(content); + try { + for (let i = 0; i < messages.length; i++) { + const message = messages[i]; + if ( + message.trim().length > 0 || + (i === messages.length - 1 && files && files.length > 0) + ) { + const options: any = { + content: message.trim(), + }; + + // if (i === 0 && inReplyTo) { + // // Reply to the specified message for the first chunk + // options.reply = { + // messageReference: inReplyTo, + // }; + // } + + if (i === messages.length - 1 && files && files.length > 0) { + // Attach files to the last message chunk + options.files = files; + } + + const m = await channel.send(options); + sentMessages.push(m); + } + } + } catch (error) { + logger.error("Error sending message:", error); + } + + return sentMessages; +} + +function splitMessage(content: string): string[] { + const messages: string[] = []; + let currentMessage = ""; + + const rawLines = content?.split("\n") || []; + // split all lines into MAX_MESSAGE_LENGTH chunks so any long lines are split + const lines = rawLines.flatMap((line) => { + const chunks = []; + while (line.length > MAX_MESSAGE_LENGTH) { + chunks.push(line.slice(0, MAX_MESSAGE_LENGTH)); + line = line.slice(MAX_MESSAGE_LENGTH); + } + chunks.push(line); + return chunks; + }); + + for (const line of lines) { + if (currentMessage.length + line.length + 1 > MAX_MESSAGE_LENGTH) { + messages.push(currentMessage.trim()); + currentMessage = ""; + } + currentMessage += line + "\n"; + } + + if (currentMessage.trim().length > 0) { + messages.push(currentMessage.trim()); + } + + return messages; +} + +export function canSendMessage(channel) { + // validate input + if (!channel) { + return { + canSend: false, + reason: "No channel given", + }; + } + // if it is a DM channel, we can always send messages + if (channel.type === ChannelType.DM) { + return { + canSend: true, + reason: null, + }; + } + const botMember = channel.guild?.members.cache.get(channel.client.user.id); + + if (!botMember) { + return { + canSend: false, + reason: "Not a guild channel or bot member not found", + }; + } + + // Required permissions for sending messages + const requiredPermissions = [ + PermissionsBitField.Flags.ViewChannel, + PermissionsBitField.Flags.SendMessages, + PermissionsBitField.Flags.ReadMessageHistory, + ]; + + // Add thread-specific permission if it's a thread + if (channel instanceof ThreadChannel) { + requiredPermissions.push( + PermissionsBitField.Flags.SendMessagesInThreads + ); + } + + // Check permissions + const permissions = channel.permissionsFor(botMember); + + if (!permissions) { + return { + canSend: false, + reason: "Could not retrieve permissions", + }; + } + + // Check each required permission + const missingPermissions = requiredPermissions.filter( + (perm) => !permissions.has(perm) + ); + + return { + canSend: missingPermissions.length === 0, + missingPermissions: missingPermissions, + reason: + missingPermissions.length > 0 + ? `Missing permissions: ${missingPermissions + .map((p) => String(p)) + .join(", ")}` + : null, + }; +} + +export function cosineSimilarity( + text1: string, + text2: string, + text3?: string +): number { + const preprocessText = (text: string) => + text + .toLowerCase() + .replace(/[^\w\s'_-]/g, " ") + .replace(/\s+/g, " ") + .trim(); + + const getWords = (text: string) => { + return text.split(" ").filter((word) => word.length > 1); + }; + + const words1 = getWords(preprocessText(text1)); + const words2 = getWords(preprocessText(text2)); + const words3 = text3 ? getWords(preprocessText(text3)) : []; + + const freq1: { [key: string]: number } = {}; + const freq2: { [key: string]: number } = {}; + const freq3: { [key: string]: number } = {}; + + words1.forEach((word) => (freq1[word] = (freq1[word] || 0) + 1)); + words2.forEach((word) => (freq2[word] = (freq2[word] || 0) + 1)); + if (words3.length) { + words3.forEach((word) => (freq3[word] = (freq3[word] || 0) + 1)); + } + + const uniqueWords = new Set([ + ...Object.keys(freq1), + ...Object.keys(freq2), + ...(words3.length ? Object.keys(freq3) : []), + ]); + + let dotProduct = 0; + let magnitude1 = 0; + let magnitude2 = 0; + let magnitude3 = 0; + + uniqueWords.forEach((word) => { + const val1 = freq1[word] || 0; + const val2 = freq2[word] || 0; + const val3 = freq3[word] || 0; + + if (words3.length) { + // For three-way, calculate pairwise similarities + const sim12 = val1 * val2; + const sim23 = val2 * val3; + const sim13 = val1 * val3; + + // Take maximum similarity between any pair + dotProduct += Math.max(sim12, sim23, sim13); + } else { + dotProduct += val1 * val2; + } + + magnitude1 += val1 * val1; + magnitude2 += val2 * val2; + if (words3.length) { + magnitude3 += val3 * val3; + } + }); + + magnitude1 = Math.sqrt(magnitude1); + magnitude2 = Math.sqrt(magnitude2); + magnitude3 = words3.length ? Math.sqrt(magnitude3) : 1; + + if ( + magnitude1 === 0 || + magnitude2 === 0 || + (words3.length && magnitude3 === 0) + ) + return 0; + + // For two texts, use original calculation + if (!words3.length) { + return dotProduct / (magnitude1 * magnitude2); + } + + // For three texts, use max magnitude pair to maintain scale + const maxMagnitude = Math.max( + magnitude1 * magnitude2, + magnitude2 * magnitude3, + magnitude1 * magnitude3 + ); + + return dotProduct / maxMagnitude; +} diff --git a/packages/plugin-discord/src/voice.ts b/packages/plugin-discord/src/voice.ts new file mode 100644 index 00000000000..2955c5d523c --- /dev/null +++ b/packages/plugin-discord/src/voice.ts @@ -0,0 +1,1107 @@ +import { + type AudioPlayer, + type AudioReceiveStream, + NoSubscriberBehavior, + StreamType, + type VoiceConnection, + VoiceConnectionStatus, + createAudioPlayer, + createAudioResource, + entersState, + getVoiceConnections, + joinVoiceChannel, +} from "@discordjs/voice"; +import { + type Content, + type HandlerCallback, + type IAgentRuntime, + type ITranscriptionService, + type Memory, + ModelClass, + ServiceType, + type State, + type UUID, + composeContext, + composeRandomUser, + generateMessageResponse, + generateShouldRespond, + logger, + stringToUuid +} from "@elizaos/core"; +import { + type BaseGuildVoiceChannel, + ChannelType, + type Client, + type Guild, + type GuildMember, + type VoiceChannel, + type VoiceState, +} from "discord.js"; +import EventEmitter from "events"; +import prism from "prism-media"; +import { type Readable, pipeline } from "stream"; +import type { DiscordClient } from "./index.ts"; +import { + discordShouldRespondTemplate, + discordVoiceHandlerTemplate, +} from "./templates.ts"; +import { getWavHeader } from "./utils.ts"; + +// These values are chosen for compatibility with picovoice components +const DECODE_FRAME_SIZE = 1024; +const DECODE_SAMPLE_RATE = 16000; + +export class AudioMonitor { + private readable: Readable; + private buffers: Buffer[] = []; + private maxSize: number; + private lastFlagged = -1; + private ended = false; + + constructor( + readable: Readable, + maxSize: number, + onStart: () => void, + callback: (buffer: Buffer) => void + ) { + this.readable = readable; + this.maxSize = maxSize; + this.readable.on("data", (chunk: Buffer) => { + //console.log('AudioMonitor got data'); + if (this.lastFlagged < 0) { + this.lastFlagged = this.buffers.length; + } + this.buffers.push(chunk); + const currentSize = this.buffers.reduce( + (acc, cur) => acc + cur.length, + 0 + ); + while (currentSize > this.maxSize) { + this.buffers.shift(); + this.lastFlagged--; + } + }); + this.readable.on("end", () => { + logger.log("AudioMonitor ended"); + this.ended = true; + if (this.lastFlagged < 0) return; + callback(this.getBufferFromStart()); + this.lastFlagged = -1; + }); + this.readable.on("speakingStopped", () => { + if (this.ended) return; + logger.log("Speaking stopped"); + if (this.lastFlagged < 0) return; + callback(this.getBufferFromStart()); + }); + this.readable.on("speakingStarted", () => { + if (this.ended) return; + onStart(); + logger.log("Speaking started"); + this.reset(); + }); + } + + stop() { + this.readable.removeAllListeners("data"); + this.readable.removeAllListeners("end"); + this.readable.removeAllListeners("speakingStopped"); + this.readable.removeAllListeners("speakingStarted"); + } + + isFlagged() { + return this.lastFlagged >= 0; + } + + getBufferFromFlag() { + if (this.lastFlagged < 0) { + return null; + } + const buffer = Buffer.concat(this.buffers.slice(this.lastFlagged)); + return buffer; + } + + getBufferFromStart() { + const buffer = Buffer.concat(this.buffers); + return buffer; + } + + reset() { + this.buffers = []; + this.lastFlagged = -1; + } + + isEnded() { + return this.ended; + } +} + +export class VoiceManager extends EventEmitter { + private processingVoice = false; + private transcriptionTimeout: NodeJS.Timeout | null = null; + private userStates: Map< + string, + { + buffers: Buffer[]; + totalLength: number; + lastActive: number; + transcriptionText: string; + } + > = new Map(); + private activeAudioPlayer: AudioPlayer | null = null; + private client: Client; + private runtime: IAgentRuntime; + private streams: Map = new Map(); + private connections: Map = new Map(); + private activeMonitors: Map< + string, + { channel: BaseGuildVoiceChannel; monitor: AudioMonitor } + > = new Map(); + + constructor(client: DiscordClient) { + super(); + this.client = client.client; + this.runtime = client.runtime; + } + + async handleVoiceStateUpdate(oldState: VoiceState, newState: VoiceState) { + const oldChannelId = oldState.channelId; + const newChannelId = newState.channelId; + const member = newState.member; + if (!member) return; + if (member.id === this.client.user?.id) { + return; + } + + // Ignore mute/unmute events + if (oldChannelId === newChannelId) { + return; + } + + // User leaving a channel where the bot is present + if (oldChannelId && this.connections.has(oldChannelId)) { + this.stopMonitoringMember(member.id); + } + + // User joining a channel where the bot is present + if (newChannelId && this.connections.has(newChannelId)) { + await this.monitorMember( + member, + newState.channel as BaseGuildVoiceChannel + ); + } + } + + async joinChannel(channel: BaseGuildVoiceChannel) { + const oldConnection = this.getVoiceConnection( + channel.guildId as string + ); + if (oldConnection) { + try { + oldConnection.destroy(); + // Remove all associated streams and monitors + this.streams.clear(); + this.activeMonitors.clear(); + } catch (error) { + console.error("Error leaving voice channel:", error); + } + } + + const connection = joinVoiceChannel({ + channelId: channel.id, + guildId: channel.guild.id, + adapterCreator: channel.guild.voiceAdapterCreator as any, + selfDeaf: false, + selfMute: false, + group: this.client.user.id, + }); + + try { + // Wait for either Ready or Signalling state + await Promise.race([ + entersState(connection, VoiceConnectionStatus.Ready, 20_000), + entersState( + connection, + VoiceConnectionStatus.Signalling, + 20_000 + ), + ]); + + // Log connection success + logger.log( + `Voice connection established in state: ${connection.state.status}` + ); + + // Set up ongoing state change monitoring + connection.on("stateChange", async (oldState, newState) => { + logger.log( + `Voice connection state changed from ${oldState.status} to ${newState.status}` + ); + + if (newState.status === VoiceConnectionStatus.Disconnected) { + logger.log("Handling disconnection..."); + + try { + // Try to reconnect if disconnected + await Promise.race([ + entersState( + connection, + VoiceConnectionStatus.Signalling, + 5_000 + ), + entersState( + connection, + VoiceConnectionStatus.Connecting, + 5_000 + ), + ]); + // Seems to be reconnecting to a new channel + logger.log("Reconnecting to channel..."); + } catch (e) { + // Seems to be a real disconnect, destroy and cleanup + logger.log( + "Disconnection confirmed - cleaning up..." + e + ); + connection.destroy(); + this.connections.delete(channel.id); + } + } else if ( + newState.status === VoiceConnectionStatus.Destroyed + ) { + this.connections.delete(channel.id); + } else if ( + !this.connections.has(channel.id) && + (newState.status === VoiceConnectionStatus.Ready || + newState.status === VoiceConnectionStatus.Signalling) + ) { + this.connections.set(channel.id, connection); + } + }); + + connection.on("error", (error) => { + logger.log("Voice connection error:", error); + // Don't immediately destroy - let the state change handler deal with it + logger.log( + "Connection error - will attempt to recover..." + ); + }); + + // Store the connection + this.connections.set(channel.id, connection); + + // Continue with voice state modifications + const me = channel.guild.members.me; + if (me?.voice && me.permissions.has("DeafenMembers")) { + try { + await me.voice.setDeaf(false); + await me.voice.setMute(false); + } catch (error) { + logger.log("Failed to modify voice state:", error); + // Continue even if this fails + } + } + + connection.receiver.speaking.on("start", async (userId: string) => { + let user = channel.members.get(userId); + if (!user) { + try { + user = await channel.guild.members.fetch(userId); + } catch (error) { + console.error("Failed to fetch user:", error); + } + } + if (user && !user?.user.bot) { + this.monitorMember(user as GuildMember, channel); + this.streams.get(userId)?.emit("speakingStarted"); + } + }); + + connection.receiver.speaking.on("end", async (userId: string) => { + const user = channel.members.get(userId); + if (!user?.user.bot) { + this.streams.get(userId)?.emit("speakingStopped"); + } + }); + } catch (error) { + logger.log("Failed to establish voice connection:", error); + connection.destroy(); + this.connections.delete(channel.id); + throw error; + } + } + + private getVoiceConnection(guildId: string) { + const connections = getVoiceConnections(this.client.user.id); + if (!connections) { + return; + } + const connection = [...connections.values()].find( + (connection) => connection.joinConfig.guildId === guildId + ); + return connection; + } + + private async monitorMember( + member: GuildMember, + channel: BaseGuildVoiceChannel + ) { + const userId = member?.id; + const userName = member?.user?.username; + const name = member?.user?.displayName; + const connection = this.getVoiceConnection(member?.guild?.id); + const receiveStream = connection?.receiver.subscribe(userId, { + autoDestroy: true, + emitClose: true, + }); + if (!receiveStream || receiveStream.readableLength === 0) { + return; + } + const opusDecoder = new prism.opus.Decoder({ + channels: 1, + rate: DECODE_SAMPLE_RATE, + frameSize: DECODE_FRAME_SIZE, + }); + const volumeBuffer: number[] = []; + const VOLUME_WINDOW_SIZE = 30; + const SPEAKING_THRESHOLD = 0.05; + opusDecoder.on("data", (pcmData: Buffer) => { + // Monitor the audio volume while the agent is speaking. + // If the average volume of the user's audio exceeds the defined threshold, it indicates active speaking. + // When active speaking is detected, stop the agent's current audio playback to avoid overlap. + + if (this.activeAudioPlayer) { + const samples = new Int16Array( + pcmData.buffer, + pcmData.byteOffset, + pcmData.length / 2 + ); + const maxAmplitude = Math.max(...samples.map(Math.abs)) / 32768; + volumeBuffer.push(maxAmplitude); + + if (volumeBuffer.length > VOLUME_WINDOW_SIZE) { + volumeBuffer.shift(); + } + const avgVolume = + volumeBuffer.reduce((sum, v) => sum + v, 0) / + VOLUME_WINDOW_SIZE; + + if (avgVolume > SPEAKING_THRESHOLD) { + volumeBuffer.length = 0; + this.cleanupAudioPlayer(this.activeAudioPlayer); + this.processingVoice = false; + } + } + }); + pipeline( + receiveStream as AudioReceiveStream, + opusDecoder as any, + (err: Error | null) => { + if (err) { + console.log(`Opus decoding pipeline error: ${err}`); + } + } + ); + this.streams.set(userId, opusDecoder); + this.connections.set(userId, connection as VoiceConnection); + opusDecoder.on("error", (err: any) => { + console.log(`Opus decoding error: ${err}`); + }); + const errorHandler = (err: any) => { + console.log(`Opus decoding error: ${err}`); + }; + const streamCloseHandler = () => { + console.log(`voice stream from ${member?.displayName} closed`); + this.streams.delete(userId); + this.connections.delete(userId); + }; + const closeHandler = () => { + console.log(`Opus decoder for ${member?.displayName} closed`); + opusDecoder.removeListener("error", errorHandler); + opusDecoder.removeListener("close", closeHandler); + receiveStream?.removeListener("close", streamCloseHandler); + }; + opusDecoder.on("error", errorHandler); + opusDecoder.on("close", closeHandler); + receiveStream?.on("close", streamCloseHandler); + + this.client.emit( + "userStream", + userId, + name, + userName, + channel, + opusDecoder + ); + } + + leaveChannel(channel: BaseGuildVoiceChannel) { + const connection = this.connections.get(channel.id); + if (connection) { + connection.destroy(); + this.connections.delete(channel.id); + } + + // Stop monitoring all members in this channel + for (const [memberId, monitorInfo] of this.activeMonitors) { + if ( + monitorInfo.channel.id === channel.id && + memberId !== this.client.user?.id + ) { + this.stopMonitoringMember(memberId); + } + } + + console.log(`Left voice channel: ${channel.name} (${channel.id})`); + } + + stopMonitoringMember(memberId: string) { + const monitorInfo = this.activeMonitors.get(memberId); + if (monitorInfo) { + monitorInfo.monitor.stop(); + this.activeMonitors.delete(memberId); + this.streams.delete(memberId); + console.log(`Stopped monitoring user ${memberId}`); + } + } + + async handleGuildCreate(guild: Guild) { + console.log(`Joined guild ${guild.name}`); + // this.scanGuild(guild); + } + + async debouncedProcessTranscription( + userId: UUID, + name: string, + userName: string, + channel: BaseGuildVoiceChannel + ) { + const DEBOUNCE_TRANSCRIPTION_THRESHOLD = 1500; // wait for 1.5 seconds of silence + + if (this.activeAudioPlayer?.state?.status === "idle") { + logger.log("Cleaning up idle audio player."); + this.cleanupAudioPlayer(this.activeAudioPlayer); + } + + if (this.activeAudioPlayer || this.processingVoice) { + const state = this.userStates.get(userId); + state.buffers.length = 0; + state.totalLength = 0; + return; + } + + if (this.transcriptionTimeout) { + clearTimeout(this.transcriptionTimeout); + } + + this.transcriptionTimeout = setTimeout(async () => { + this.processingVoice = true; + try { + await this.processTranscription( + userId, + channel.id, + channel, + name, + userName + ); + + // Clean all users' previous buffers + this.userStates.forEach((state, _) => { + state.buffers.length = 0; + state.totalLength = 0; + }); + } finally { + this.processingVoice = false; + } + }, DEBOUNCE_TRANSCRIPTION_THRESHOLD); + } + + async handleUserStream( + userId: UUID, + name: string, + userName: string, + channel: BaseGuildVoiceChannel, + audioStream: Readable + ) { + console.log(`Starting audio monitor for user: ${userId}`); + if (!this.userStates.has(userId)) { + this.userStates.set(userId, { + buffers: [], + totalLength: 0, + lastActive: Date.now(), + transcriptionText: "", + }); + } + + const state = this.userStates.get(userId); + + const processBuffer = async (buffer: Buffer) => { + try { + state!.buffers.push(buffer); + state!.totalLength += buffer.length; + state!.lastActive = Date.now(); + this.debouncedProcessTranscription( + userId, + name, + userName, + channel + ); + } catch (error) { + console.error( + `Error processing buffer for user ${userId}:`, + error + ); + } + }; + + new AudioMonitor( + audioStream, + 10000000, + () => { + if (this.transcriptionTimeout) { + clearTimeout(this.transcriptionTimeout); + } + }, + async (buffer) => { + if (!buffer) { + console.error("Received empty buffer"); + return; + } + await processBuffer(buffer); + } + ); + } + + private async processTranscription( + userId: UUID, + channelId: string, + channel: BaseGuildVoiceChannel, + name: string, + userName: string + ) { + const state = this.userStates.get(userId); + if (!state || state.buffers.length === 0) return; + try { + const inputBuffer = Buffer.concat(state.buffers, state.totalLength); + + state.buffers.length = 0; // Clear the buffers + state.totalLength = 0; + // Convert Opus to WAV + const wavBuffer = await this.convertOpusToWav(inputBuffer); + console.log("Starting transcription..."); + + const transcriptionText = await this.runtime + .getService(ServiceType.TRANSCRIPTION) + .transcribe(wavBuffer); + + function isValidTranscription(text: string): boolean { + if (!text || text.includes("[BLANK_AUDIO]")) return false; + return true; + } + + if (transcriptionText && isValidTranscription(transcriptionText)) { + state.transcriptionText += transcriptionText; + } + + if (state.transcriptionText.length) { + this.cleanupAudioPlayer(this.activeAudioPlayer); + const finalText = state.transcriptionText; + state.transcriptionText = ""; + await this.handleUserMessage( + finalText, + userId, + channelId, + channel, + name, + userName + ); + } + } catch (error) { + console.error( + `Error transcribing audio for user ${userId}:`, + error + ); + } + } + + private async handleUserMessage( + message: string, + userId: UUID, + channelId: string, + channel: BaseGuildVoiceChannel, + name: string, + userName: string + ) { + try { + const roomId = stringToUuid(channelId + "-" + this.runtime.agentId); + const userIdUUID = stringToUuid(userId); + + await this.runtime.ensureConnection( + userIdUUID, + roomId, + userName, + name, + "discord" + ); + + let state = await this.runtime.composeState( + { + agentId: this.runtime.agentId, + content: { text: message, source: "Discord" }, + userId: userIdUUID, + roomId, + }, + { + discordChannel: channel, + discordClient: this.client, + agentName: this.runtime.character.name, + } + ); + + if (message && message.startsWith("/")) { + return null; + } + + const zeroVector = await this.runtime.call(ModelClass.TEXT_EMBEDDING, null); + + const memory = { + id: stringToUuid(channelId + "-voice-message-" + Date.now()), + agentId: this.runtime.agentId, + content: { + text: message, + source: "discord", + url: channel.url, + }, + userId: userIdUUID, + roomId, + embedding: zeroVector, + createdAt: Date.now(), + }; + + if (!memory.content.text) { + return { text: "", action: "IGNORE" }; + } + + await this.runtime.messageManager.createMemory(memory); + + state = await this.runtime.updateRecentMessageState(state); + + const shouldIgnore = await this._shouldIgnore(memory); + + if (shouldIgnore) { + return { text: "", action: "IGNORE" }; + } + + const shouldRespond = await this._shouldRespond( + message, + userId, + channel, + state + ); + + if (!shouldRespond) { + return; + } + + const context = composeContext({ + state, + template: + this.runtime.character.templates + ?.discordVoiceHandlerTemplate || + this.runtime.character.templates?.messageHandlerTemplate || + discordVoiceHandlerTemplate, + }); + + const responseContent = await this._generateResponse( + memory, + state, + context + ); + + const callback: HandlerCallback = async (content: Content) => { + console.log("callback content: ", content); + const { roomId } = memory; + + const zeroVector = await this.runtime.call(ModelClass.TEXT_EMBEDDING, content.text); + + const responseMemory: Memory = { + id: stringToUuid( + memory.id + "-voice-response-" + Date.now() + ), + agentId: this.runtime.agentId, + userId: this.runtime.agentId, + content: { + ...content, + user: this.runtime.character.name, + inReplyTo: memory.id, + }, + roomId, + embedding: zeroVector, + }; + + if (responseMemory.content.text?.trim()) { + await this.runtime.messageManager.createMemory( + responseMemory + ); + state = await this.runtime.updateRecentMessageState(state); + + const responseStream = null; + + // TODO: fix this + // const responseStream = await this.runtime + // .getService( + // ServiceType.SPEECH_GENERATION + // ) + // .generate(this.runtime, content.text); + + if (responseStream) { + await this.playAudioStream( + userId, + responseStream as Readable + ); + } + + await this.runtime.evaluate(memory, state); + } else { + console.warn("Empty response, skipping"); + } + return [responseMemory]; + }; + + const responseMemories = await callback(responseContent); + + const response = responseContent; + + const content = (response.responseMessage || + response.content || + response.message) as string; + + if (!content) { + return null; + } + + console.log("responseMemories: ", responseMemories); + + await this.runtime.processActions( + memory, + responseMemories, + state, + callback + ); + } catch (error) { + console.error("Error processing transcribed text:", error); + } + } + + private async convertOpusToWav(pcmBuffer: Buffer): Promise { + try { + // Generate the WAV header + const wavHeader = getWavHeader( + pcmBuffer.length, + DECODE_SAMPLE_RATE + ); + + // Concatenate the WAV header and PCM data + const wavBuffer = Buffer.concat([wavHeader, pcmBuffer]); + + return wavBuffer; + } catch (error) { + console.error("Error converting PCM to WAV:", error); + throw error; + } + } + + private async _shouldRespond( + message: string, + userId: UUID, + channel: BaseGuildVoiceChannel, + state: State + ): Promise { + if (userId === this.client.user?.id) return false; + const lowerMessage = message.toLowerCase(); + const botName = this.client.user.username.toLowerCase(); + const characterName = this.runtime.character.name.toLowerCase(); + const guild = channel.guild; + const member = guild?.members.cache.get(this.client.user?.id as string); + const nickname = member?.nickname; + + if ( + lowerMessage.includes(botName as string) || + lowerMessage.includes(characterName) || + lowerMessage.includes( + this.client.user?.tag.toLowerCase() as string + ) || + (nickname && lowerMessage.includes(nickname.toLowerCase())) + ) { + return true; + } + + if (!channel.guild) { + return true; + } + + // If none of the above conditions are met, use the generateText to decide + const shouldRespondContext = composeContext({ + state, + template: + this.runtime.character.templates + ?.discordShouldRespondTemplate || + this.runtime.character.templates?.shouldRespondTemplate || + composeRandomUser(discordShouldRespondTemplate, 2), + }); + + const response = await generateShouldRespond({ + runtime: this.runtime, + context: shouldRespondContext, + modelClass: ModelClass.TEXT_SMALL, + }); + + if (response === "RESPOND") { + return true; + } else if (response === "IGNORE") { + return false; + } else if (response === "STOP") { + return false; + } else { + console.error( + "Invalid response from response generateText:", + response + ); + return false; + } + } + + private async _generateResponse( + message: Memory, + state: State, + context: string + ): Promise { + const { userId, roomId } = message; + + const response = await generateMessageResponse({ + runtime: this.runtime, + context, + modelClass: ModelClass.TEXT_SMALL, + }); + + response.source = "discord"; + + if (!response) { + console.error("No response from generateMessageResponse"); + return; + } + + await this.runtime.databaseAdapter.log({ + body: { message, context, response }, + userId: userId, + roomId, + type: "response", + }); + + return response; + } + + private async _shouldIgnore(message: Memory): Promise { + // console.log("message: ", message); + logger.debug("message.content: ", message.content); + // if the message is 3 characters or less, ignore it + if ((message.content as Content).text.length < 3) { + return true; + } + + const loseInterestWords = [ + // telling the bot to stop talking + "shut up", + "stop", + "dont talk", + "silence", + "stop talking", + "be quiet", + "hush", + "stfu", + "stupid bot", + "dumb bot", + + // offensive words + "fuck", + "shit", + "damn", + "suck", + "dick", + "cock", + "sex", + "sexy", + ]; + if ( + (message.content as Content).text.length < 50 && + loseInterestWords.some((word) => + (message.content as Content).text?.toLowerCase().includes(word) + ) + ) { + return true; + } + + const ignoreWords = ["k", "ok", "bye", "lol", "nm", "uh"]; + if ( + (message.content as Content).text?.length < 8 && + ignoreWords.some((word) => + (message.content as Content).text?.toLowerCase().includes(word) + ) + ) { + return true; + } + + return false; + } + + async scanGuild(guild: Guild) { + let chosenChannel: BaseGuildVoiceChannel | null = null; + + try { + const channelId = this.runtime.getSetting( + "DISCORD_VOICE_CHANNEL_ID" + ) as string; + if (channelId) { + const channel = await guild.channels.fetch(channelId); + if (channel?.isVoiceBased()) { + chosenChannel = channel as BaseGuildVoiceChannel; + } + } + + if (!chosenChannel) { + const channels = (await guild.channels.fetch()).filter( + (channel) => channel?.type == ChannelType.GuildVoice + ); + for (const [, channel] of channels) { + const voiceChannel = channel as BaseGuildVoiceChannel; + if ( + voiceChannel.members.size > 0 && + (chosenChannel === null || + voiceChannel.members.size > + chosenChannel.members.size) + ) { + chosenChannel = voiceChannel; + } + } + } + + if (chosenChannel) { + console.log(`Joining channel: ${chosenChannel.name}`); + await this.joinChannel(chosenChannel); + } else { + console.warn("No suitable voice channel found to join."); + } + } catch (error) { + console.error("Error selecting or joining a voice channel:", error); + } + } + + async playAudioStream(userId: UUID, audioStream: Readable) { + const connection = this.connections.get(userId); + if (connection == null) { + console.log(`No connection for user ${userId}`); + return; + } + this.cleanupAudioPlayer(this.activeAudioPlayer); + const audioPlayer = createAudioPlayer({ + behaviors: { + noSubscriber: NoSubscriberBehavior.Pause, + }, + }); + this.activeAudioPlayer = audioPlayer; + connection.subscribe(audioPlayer); + + const audioStartTime = Date.now(); + + const resource = createAudioResource(audioStream, { + inputType: StreamType.Arbitrary, + }); + audioPlayer.play(resource); + + audioPlayer.on("error", (err: any) => { + console.log(`Audio player error: ${err}`); + }); + + audioPlayer.on( + "stateChange", + (_oldState: any, newState: { status: string }) => { + if (newState.status == "idle") { + const idleTime = Date.now(); + console.log( + `Audio playback took: ${idleTime - audioStartTime}ms` + ); + } + } + ); + } + + cleanupAudioPlayer(audioPlayer: AudioPlayer) { + if (!audioPlayer) return; + + audioPlayer.stop(); + audioPlayer.removeAllListeners(); + if (audioPlayer === this.activeAudioPlayer) { + this.activeAudioPlayer = null; + } + } + + async handleJoinChannelCommand(interaction: any) { + try { + // Defer the reply immediately to prevent interaction timeout + await interaction.deferReply(); + + const channelId = interaction.options.get("channel") + ?.value as string; + if (!channelId) { + await interaction.editReply( + "Please provide a voice channel to join." + ); + return; + } + + const guild = interaction.guild; + if (!guild) { + await interaction.editReply("Could not find guild."); + return; + } + + const voiceChannel = interaction.guild.channels.cache.find( + (channel: VoiceChannel) => + channel.id === channelId && + channel.type === ChannelType.GuildVoice + ); + + if (!voiceChannel) { + await interaction.editReply("Voice channel not found!"); + return; + } + + await this.joinChannel(voiceChannel as BaseGuildVoiceChannel); + await interaction.editReply( + `Joined voice channel: ${voiceChannel.name}` + ); + } catch (error) { + console.error("Error joining voice channel:", error); + // Use editReply instead of reply for the error case + await interaction + .editReply("Failed to join the voice channel.") + .catch(console.error); + } + } + + async handleLeaveChannelCommand(interaction: any) { + const connection = this.getVoiceConnection(interaction.guildId as any); + + if (!connection) { + await interaction.reply("Not currently in a voice channel."); + return; + } + + try { + connection.destroy(); + await interaction.reply("Left the voice channel."); + } catch (error) { + console.error("Error leaving voice channel:", error); + await interaction.reply("Failed to leave the voice channel."); + } + } +} diff --git a/packages/plugin-discord/tsconfig.json b/packages/plugin-discord/tsconfig.json new file mode 100644 index 00000000000..2153cf41345 --- /dev/null +++ b/packages/plugin-discord/tsconfig.json @@ -0,0 +1,25 @@ +{ + "compilerOptions": { + "outDir": "dist", + "rootDir": "src", + "lib": ["ESNext"], + "target": "ESNext", + "module": "Preserve", + "moduleResolution": "Bundler", + "strict": false, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": false, + "allowImportingTsExtensions": true, + "declaration": true, + "emitDeclarationOnly": true, + "resolveJsonModule": true, + "noImplicitAny": false, + "allowJs": true, + "checkJs": false, + "noEmitOnError": false, + "moduleDetection": "force", + "allowArbitraryExtensions": true + }, + "include": ["src/**/*.ts"] +} \ No newline at end of file diff --git a/packages/plugin-discord/tsup.config.ts b/packages/plugin-discord/tsup.config.ts new file mode 100644 index 00000000000..d8e5a4e921b --- /dev/null +++ b/packages/plugin-discord/tsup.config.ts @@ -0,0 +1,21 @@ +import { defineConfig } from "tsup"; + +export default defineConfig({ + entry: ["src/index.ts"], + outDir: "dist", + sourcemap: true, + clean: true, + format: ["esm"], // Ensure you're targeting CommonJS + external: [ + "dotenv", // Externalize dotenv to prevent bundling + "fs", // Externalize fs to use Node.js built-in module + "path", // Externalize other built-ins if necessary + "@reflink/reflink", + "@node-llama-cpp", + "https", + "http", + "agentkeepalive", + "@elizaos/core" + // Add other modules you want to externalize + ], +}); diff --git a/packages/plugin-discord/vitest.config.ts b/packages/plugin-discord/vitest.config.ts new file mode 100644 index 00000000000..a11fbbd0d9e --- /dev/null +++ b/packages/plugin-discord/vitest.config.ts @@ -0,0 +1,14 @@ +import { defineConfig } from 'vitest/config'; +import { resolve } from 'path'; + +export default defineConfig({ + test: { + globals: true, + environment: 'node', + }, + resolve: { + alias: { + '@elizaos/core': resolve(__dirname, '../core/src'), + }, + }, +}); diff --git a/packages/plugin-node/.npmignore b/packages/plugin-node/.npmignore new file mode 100644 index 00000000000..078562eceab --- /dev/null +++ b/packages/plugin-node/.npmignore @@ -0,0 +1,6 @@ +* + +!dist/** +!package.json +!readme.md +!tsup.config.ts \ No newline at end of file diff --git a/packages/plugin-node/README.md b/packages/plugin-node/README.md new file mode 100644 index 00000000000..f9066fcb08a --- /dev/null +++ b/packages/plugin-node/README.md @@ -0,0 +1,384 @@ +# @elizaos/plugin-node + +Core Node.js plugin for Eliza OS that provides essential services and actions for file operations, media processing, and cloud integrations. + +## Overview + +The Node plugin serves as a foundational component of Eliza OS, bridging core Node.js capabilities with the Eliza ecosystem. It provides crucial services for file operations, media processing, speech synthesis, and cloud integrations, enabling both local and cloud-based functionality for Eliza agents. + +## Features + +- **AWS S3 Integration**: File upload and management with AWS S3 +- **Browser Automation**: Web scraping and content extraction with Playwright +- **Image Processing**: Image description and analysis capabilities +- **PDF Processing**: PDF text extraction and parsing +- **Speech Synthesis**: Text-to-speech using ElevenLabs and VITS +- **Transcription**: Speech-to-text using various providers (OpenAI, Deepgram, Local) +- **Video Processing**: YouTube video download and transcription +- **LLaMA Integration**: Local LLM support with LLaMA models + +## Installation + +```bash +npm install @elizaos/plugin-node +``` + +## Configuration + +The plugin requires various environment variables depending on which services you plan to use: + +### Core Settings + +```env +OPENAI_API_KEY=your_openai_api_key +``` + +### Voice Settings (Optional) + +```env +ELEVENLABS_XI_API_KEY=your_elevenlabs_api_key +ELEVENLABS_MODEL_ID=eleven_monolingual_v1 +ELEVENLABS_VOICE_ID=your_voice_id +ELEVENLABS_VOICE_STABILITY=0.5 +ELEVENLABS_VOICE_SIMILARITY_BOOST=0.75 +ELEVENLABS_OPTIMIZE_STREAMING_LATENCY=0 +ELEVENLABS_OUTPUT_FORMAT=pcm_16000 +VITS_VOICE=en_US-hfc_female-medium +``` + +### AWS Settings (Optional) + +```env +AWS_ACCESS_KEY_ID=your_aws_access_key +AWS_SECRET_ACCESS_KEY=your_aws_secret_key +AWS_REGION=your_aws_region +AWS_S3_BUCKET=your_s3_bucket +AWS_S3_UPLOAD_PATH=your_upload_path +AWS_S3_ENDPOINT=an_alternative_endpoint +AWS_S3_SSL_ENABLED=boolean(true|false) +AWS_S3_FORCE_PATH_STYLE=boolean(true|false) +``` + +## Usage + +```typescript +import { createNodePlugin } from "@elizaos/plugin-node"; + +// Initialize the plugin +const nodePlugin = createNodePlugin(); + +// Register with Eliza OS +elizaos.registerPlugin(nodePlugin); +``` + +## Services + +### AwsS3Service + +Handles file uploads and management with AWS S3. + +### BrowserService + +Provides web scraping and content extraction capabilities using Playwright. + +### ImageDescriptionService + +Processes and analyzes images to generate descriptions. Supports multiple providers: + +- Local processing using Florence model +- OpenAI Vision API +- Google Gemini + +Configuration: + +```env +# For OpenAI Vision +OPENAI_API_KEY=your_openai_api_key + +# For Google Gemini +GOOGLE_GENERATIVE_AI_API_KEY=your_google_api_key +``` + +Provider selection: + +- If `imageVisionModelProvider` is set to `google/openai`, it will use this one. +- Else if `model` is set to `google/openai`, it will use this one. +- Default if nothing is set is OpenAI. + +The service automatically handles different image formats, including GIFs (first frame extraction). + +Features by provider: + +**Local (Florence):** + +- Basic image captioning +- Local processing without API calls + +**OpenAI Vision:** + +- Detailed image descriptions +- Text detection +- Object recognition + +**Google Gemini 1.5:** + +- High-quality image understanding +- Detailed descriptions with natural language +- Multi-modal context understanding +- Support for complex scenes and content + +The provider can be configured through the runtime settings, allowing easy switching between providers based on your needs. + +### LlamaService + +Provides local LLM capabilities using LLaMA models. + +### PdfService + +Extracts and processes text content from PDF files. + +### SpeechService + +Handles text-to-speech conversion using ElevenLabs and VITS. + +### TranscriptionService + +Converts speech to text using various providers. + +### VideoService + +Processes video content, including YouTube video downloads and transcription. + +## Actions + +### describeImage + +Analyzes and generates descriptions for images. + +```typescript +// Example usage +const result = await runtime.executeAction("DESCRIBE_IMAGE", { + imageUrl: "path/to/image.jpg", +}); +``` + +## Dependencies + +The plugin requires several peer dependencies: + +- `onnxruntime-node`: 1.20.1 +- `whatwg-url`: 7.1.0 + +And trusted dependencies: + +- `onnxruntime-node`: 1.20.1 +- `sharp`: 0.33.5 + +## Safety & Security + +### File Operations + +- **Path Sanitization**: All file paths are sanitized to prevent directory traversal attacks +- **File Size Limits**: Enforced limits on upload sizes +- **Type Checking**: Strict file type validation +- **Temporary File Cleanup**: Automatic cleanup of temporary files + +### API Keys & Credentials + +- **Environment Isolation**: Sensitive credentials are isolated in environment variables +- **Access Scoping**: Services are initialized with minimum required permissions +- **Key Rotation**: Support for credential rotation without service interruption + +### Media Processing + +- **Resource Limits**: Memory and CPU usage limits for media processing +- **Timeout Controls**: Automatic termination of long-running processes +- **Format Validation**: Strict media format validation before processing + +## Troubleshooting + +### Common Issues + +1. **Service Initialization Failures** + +```bash +Error: Service initialization failed +``` + +- Verify environment variables are properly set +- Check service dependencies are installed +- Ensure sufficient system permissions + +2. **Media Processing Errors** + +```bash +Error: Failed to process media file +``` + +- Verify file format is supported +- Check available system memory +- Ensure ffmpeg is properly installed + +3. **AWS S3 Connection Issues** + +```bash +Error: AWS credentials not configured +``` + +- Verify AWS credentials are set +- Check S3 bucket permissions +- Ensure correct region configuration + +### Debug Mode + +Enable debug logging for detailed troubleshooting: + +```typescript +process.env.DEBUG = "eliza:plugin-node:*"; +``` + +### System Requirements + +- Node.js 16.x or higher +- FFmpeg for media processing +- Minimum 4GB RAM recommended +- CUDA-compatible GPU (optional, for ML features) + +### Performance Optimization + +1. **Cache Management** + + - Regular cleanup of `content_cache` directory + - Implement cache size limits + - Monitor disk usage + +2. **Memory Usage** + + - Configure max buffer sizes + - Implement streaming for large files + - Monitor memory consumption + +3. **Concurrent Operations** + - Adjust queue size limits + - Configure worker threads + - Monitor process pool + +## Support + +For issues and feature requests, please: + +1. Check the troubleshooting guide above +2. Review existing GitHub issues +3. Submit a new issue with: + - System information + - Error logs + - Steps to reproduce + +## Future Enhancements + +1. **File Operations** + + - Enhanced streaming capabilities + - Advanced compression options + - Batch file processing + - File type detection + - Metadata management + - Version control integration + +2. **Media Processing** + + - Additional video formats + - Advanced image processing + - Audio enhancement tools + - Real-time processing + - Quality optimization + - Format conversion + +3. **Cloud Integration** + + - Multi-cloud support + - Advanced caching + - CDN optimization + - Auto-scaling features + - Cost optimization + - Backup automation + +4. **Speech Services** + + - Additional voice models + - Language expansion + - Emotion detection + - Voice cloning + - Real-time synthesis + - Custom voice training + +5. **Browser Automation** + + - Headless optimization + - Parallel processing + - Session management + - Cookie handling + - Proxy support + - Resource optimization + +6. **Security Features** + + - Enhanced encryption + - Access control + - Audit logging + - Threat detection + - Rate limiting + - Compliance tools + +7. **Performance Optimization** + + - Memory management + - CPU utilization + - Concurrent operations + - Resource pooling + - Cache strategies + - Load balancing + +8. **Developer Tools** + - Enhanced debugging + - Testing framework + - Documentation generator + - CLI improvements + - Monitoring tools + - Integration templates + +We welcome community feedback and contributions to help prioritize these enhancements. + +## Contributing + +Contributions are welcome! Please see the [CONTRIBUTING.md](CONTRIBUTING.md) file for more information. + +## Credits + +This plugin integrates with and builds upon several key technologies: + +- [Node.js](https://nodejs.org/) - The core runtime environment +- [FFmpeg](https://ffmpeg.org/) - Media processing capabilities +- [ElevenLabs](https://elevenlabs.io/) - Voice synthesis +- [OpenAI](https://openai.com/) - Transcription and AI services +- [AWS S3](https://aws.amazon.com/s3/) - Cloud storage +- [Playwright](https://playwright.dev/) - Browser automation +- [LLaMA](https://github.com/facebookresearch/llama) - Local language models +- [VITS](https://github.com/jaywalnut310/vits) - Voice synthesis +- [Deepgram](https://deepgram.com/) - Speech recognition +- [Sharp](https://sharp.pixelplumbing.com/) - Image processing + +Special thanks to: + +- The Node.js community and all the open-source contributors who make these integrations possible. +- The Eliza community for their contributions and feedback. + +For more information about Node.js capabilities: + +- [Node.js Documentation](https://nodejs.org/en/docs/) +- [Node.js Developer Portal](https://nodejs.org/en/about/) +- [Node.js GitHub Repository](https://github.com/nodejs/node) + +## License + +This plugin is part of the Eliza project. See the main project repository for license information. diff --git a/packages/plugin-node/package.json b/packages/plugin-node/package.json new file mode 100644 index 00000000000..e8e0cf25c2d --- /dev/null +++ b/packages/plugin-node/package.json @@ -0,0 +1,97 @@ +{ + "name": "@elizaos/plugin-node", + "version": "0.25.6-alpha.1", + "type": "module", + "main": "dist/index.js", + "module": "dist/index.js", + "types": "dist/index.d.ts", + "exports": { + "./package.json": "./package.json", + ".": { + "import": { + "@elizaos/source": "./src/index.ts", + "types": "./dist/index.d.ts", + "default": "./dist/index.js" + } + } + }, + "files": [ + "dist", + "scripts", + "package.json", + "LICENSE", + "tsup.config.ts" + ], + "dependencies": { + "@aws-sdk/client-s3": "^3.705.0", + "@aws-sdk/s3-request-presigner": "^3.705.0", + "@cliqz/adblocker-playwright": "1.34.0", + "@echogarden/espeak-ng-emscripten": "0.3.3", + "@echogarden/kissfft-wasm": "0.2.0", + "@echogarden/speex-resampler-wasm": "0.2.1", + "@elizaos/core": "workspace:*", + "@huggingface/transformers": "3.0.2", + "@opendocsg/pdf2md": "0.1.32", + "@types/uuid": "10.0.0", + "alawmulaw": "6.0.0", + "bignumber.js": "9.1.2", + "capsolver-npm": "2.0.2", + "cldr-segmentation": "2.2.1", + "command-exists": "1.2.9", + "csv-writer": "1.6.0", + "echogarden": "2.0.7", + "espeak-ng": "1.0.2", + "ffmpeg-static": "5.2.0", + "fluent-ffmpeg": "2.1.3", + "formdata-node": "6.0.3", + "fs-extra": "11.2.0", + "gaxios": "6.7.1", + "glob": "11.0.0", + "graceful-fs": "4.2.11", + "html-escaper": "3.0.3", + "html-to-text": "9.0.5", + "import-meta-resolve": "4.1.0", + "jieba-wasm": "2.2.0", + "json5": "2.2.3", + "kuromoji": "0.1.2", + "libsodium-wrappers": "0.7.15", + "multer": "1.4.5-lts.1", + "node-cache": "5.1.2", + "node-llama-cpp": "3.1.1", + "nodejs-whisper": "0.1.18", + "onnxruntime-node": "1.20.1", + "pdfjs-dist": "4.7.76", + "playwright": "1.48.2", + "pm2": "5.4.3", + "puppeteer-extra": "3.3.6", + "puppeteer-extra-plugin-capsolver": "2.0.1", + "sharp": "0.33.5", + "srt": "0.0.3", + "systeminformation": "5.23.8", + "tar": "7.4.3", + "tinyld": "1.3.4", + "uuid": "11.0.3", + "wav": "1.0.2", + "wav-encoder": "1.3.0", + "wavefile": "11.0.0", + "yargs": "17.7.2", + "youtube-dl-exec": "3.0.10", + "cookie": "0.7.0" + }, + "devDependencies": { + "@types/node": "22.8.4", + "tsup": "8.3.5" + }, + "scripts": { + "build": "tsup --format esm --dts", + "dev": "tsup --format esm --dts --watch", + "postinstall": "node scripts/postinstall.js" + }, + "peerDependencies": { + "onnxruntime-node": "1.20.1", + "whatwg-url": "7.1.0" + }, + "publishConfig": { + "access": "public" + } +} diff --git a/packages/plugin-node/scripts/postinstall.js b/packages/plugin-node/scripts/postinstall.js new file mode 100644 index 00000000000..826949088f8 --- /dev/null +++ b/packages/plugin-node/scripts/postinstall.js @@ -0,0 +1,64 @@ +import os from "os"; +import fs from "fs"; +import { execSync } from "child_process"; + +const platform = os.platform(); +const rel = os.release(); + +if (platform !== "linux") { + console.log("Skipping playwright installation: non-Linux platform detected:", platform); + process.exit(0); +} + +function getDistroName() { + try { + const osReleaseContent = fs.readFileSync("/etc/os-release", "utf8"); + const lines = osReleaseContent.split("\n"); + const info = {}; + for (const line of lines) { + const [key, value] = line.split("="); + if (key && value) { + info[key.toLowerCase()] = value.replace(/"/g, "").toLowerCase().trim(); + } + } + return info["id"] || info["id_like"] || null; + } catch (err) { + console.error("Error reading /etc/os-release:", err.message); + } + return null; +} + +const distro = getDistroName(); +console.log("Detected Linux distribution:", distro || "unknown"); + +const supportedDistros = [ + "ubuntu", + "debian", + "pve", + "raspbian", + "pop", + "zorin", + "linuxmint", + "elementary", + "pureos", + "kali" +]; + +if (!distro || !supportedDistros.some((name) => distro.includes(name))) { + console.log( + "Skipping playwright installation on unsupported platform:", + platform, + rel, + distro || "unknown distro" + ); + process.exit(0); +} + +try { + execSync("npx playwright install", { + stdio: "inherit" + }); +} catch (err) { + console.error("Failed to install Playwright you may need to install playwright deps with 'sudo npx playwright install-deps'. Error: ", err.message); + process.exit(1); +} \ No newline at end of file diff --git a/packages/plugin-node/src/environment.ts b/packages/plugin-node/src/environment.ts new file mode 100644 index 00000000000..cb11bccbd42 --- /dev/null +++ b/packages/plugin-node/src/environment.ts @@ -0,0 +1,104 @@ +import type { IAgentRuntime } from "@elizaos/core"; +import { z } from "zod"; + +export const nodeEnvSchema = z.object({ + OPENAI_API_KEY: z.string().min(1, "OpenAI API key is required"), + + // Core settings + ELEVENLABS_XI_API_KEY: z.string().optional(), + + // All other settings optional with defaults + ELEVENLABS_MODEL_ID: z.string().optional(), + ELEVENLABS_VOICE_ID: z.string().optional(), + ELEVENLABS_VOICE_STABILITY: z.string().optional(), + ELEVENLABS_VOICE_SIMILARITY_BOOST: z.string().optional(), + ELEVENLABS_VOICE_STYLE: z.string().optional(), + ELEVENLABS_VOICE_USE_SPEAKER_BOOST: z.string().optional(), + ELEVENLABS_OPTIMIZE_STREAMING_LATENCY: z.string().optional(), + ELEVENLABS_OUTPUT_FORMAT: z.string().optional(), + VITS_VOICE: z.string().optional(), + VITS_MODEL: z.string().optional(), +}); + +export type NodeConfig = z.infer; + +export async function validateNodeConfig( + runtime: IAgentRuntime +): Promise { + try { + const voiceSettings = runtime.character.settings?.voice; + const elevenlabs = voiceSettings?.elevenlabs; + + // Only include what's absolutely required + const config = { + OPENAI_API_KEY: + runtime.getSetting("OPENAI_API_KEY") || + process.env.OPENAI_API_KEY, + ELEVENLABS_XI_API_KEY: + runtime.getSetting("ELEVENLABS_XI_API_KEY") || + process.env.ELEVENLABS_XI_API_KEY, + + // Use character card settings first, fall back to env vars, then defaults + ...(runtime.getSetting("ELEVENLABS_XI_API_KEY") && { + ELEVENLABS_MODEL_ID: + elevenlabs?.model || + process.env.ELEVENLABS_MODEL_ID || + "eleven_monolingual_v1", + ELEVENLABS_VOICE_ID: + elevenlabs?.voiceId || process.env.ELEVENLABS_VOICE_ID, + ELEVENLABS_VOICE_STABILITY: + elevenlabs?.stability || + process.env.ELEVENLABS_VOICE_STABILITY || + "0.5", + ELEVENLABS_VOICE_SIMILARITY_BOOST: + elevenlabs?.similarityBoost || + process.env.ELEVENLABS_VOICE_SIMILARITY_BOOST || + "0.75", + ELEVENLABS_VOICE_STYLE: + elevenlabs?.style || + process.env.ELEVENLABS_VOICE_STYLE || + "0", + ELEVENLABS_VOICE_USE_SPEAKER_BOOST: + elevenlabs?.useSpeakerBoost || + process.env.ELEVENLABS_VOICE_USE_SPEAKER_BOOST || + "true", + ELEVENLABS_OPTIMIZE_STREAMING_LATENCY: + process.env.ELEVENLABS_OPTIMIZE_STREAMING_LATENCY || "0", + ELEVENLABS_OUTPUT_FORMAT: + process.env.ELEVENLABS_OUTPUT_FORMAT || "pcm_16000", + }), + + // VITS settings + VITS_VOICE: voiceSettings?.model || process.env.VITS_VOICE, + VITS_MODEL: process.env.VITS_MODEL, + + // AWS settings (only include if present) + ...(runtime.getSetting("AWS_ACCESS_KEY_ID") && { + AWS_ACCESS_KEY_ID: runtime.getSetting("AWS_ACCESS_KEY_ID"), + AWS_SECRET_ACCESS_KEY: runtime.getSetting( + "AWS_SECRET_ACCESS_KEY" + ), + AWS_REGION: runtime.getSetting("AWS_REGION"), + AWS_S3_BUCKET: runtime.getSetting("AWS_S3_BUCKET"), + AWS_S3_UPLOAD_PATH: runtime.getSetting("AWS_S3_UPLOAD_PATH"), + AWS_S3_ENDPOINT: runtime.getSetting("AWS_S3_ENDPOINT"), + AWS_S3_SSL_ENABLED: runtime.getSetting("AWS_S3_SSL_ENABLED"), + AWS_S3_FORCE_PATH_STYLE: runtime.getSetting( + "AWS_S3_FORCE_PATH_STYLE" + ), + }), + }; + + return nodeEnvSchema.parse(config); + } catch (error) { + if (error instanceof z.ZodError) { + const errorMessages = error.errors + .map((err) => `${err.path.join(".")}: ${err.message}`) + .join("\n"); + throw new Error( + `Node configuration validation failed:\n${errorMessages}` + ); + } + throw error; + } +} diff --git a/packages/plugin-node/src/index.ts b/packages/plugin-node/src/index.ts new file mode 100644 index 00000000000..a181ea73568 --- /dev/null +++ b/packages/plugin-node/src/index.ts @@ -0,0 +1,26 @@ +export * from "./services/index.ts"; + +import type { Plugin } from "@elizaos/core"; + +import { + AwsS3Service, + BrowserService, + PdfService, + VideoService, +} from "./services/index.ts"; + +export type NodePlugin = ReturnType; + +export function createNodePlugin() { + return { + name: "default", + description: "Default plugin, with basic actions and evaluators", + services: [ + new BrowserService(), + new PdfService(), + new VideoService(), + new AwsS3Service(), + ], + actions: [], + } as const satisfies Plugin; +} diff --git a/packages/plugin-node/src/services/audioUtils.ts b/packages/plugin-node/src/services/audioUtils.ts new file mode 100644 index 00000000000..303e196d743 --- /dev/null +++ b/packages/plugin-node/src/services/audioUtils.ts @@ -0,0 +1,25 @@ +export function getWavHeader( + audioLength: number, + sampleRate: number, + channelCount = 1, + bitsPerSample = 16 +): Buffer { + const wavHeader = Buffer.alloc(44); + wavHeader.write("RIFF", 0); + wavHeader.writeUInt32LE(36 + audioLength, 4); // Length of entire file in bytes minus 8 + wavHeader.write("WAVE", 8); + wavHeader.write("fmt ", 12); + wavHeader.writeUInt32LE(16, 16); // Length of format data + wavHeader.writeUInt16LE(1, 20); // Type of format (1 is PCM) + wavHeader.writeUInt16LE(channelCount, 22); // Number of channels + wavHeader.writeUInt32LE(sampleRate, 24); // Sample rate + wavHeader.writeUInt32LE( + (sampleRate * bitsPerSample * channelCount) / 8, + 28 + ); // Byte rate + wavHeader.writeUInt16LE((bitsPerSample * channelCount) / 8, 32); // Block align ((BitsPerSample * Channels) / 8) + wavHeader.writeUInt16LE(bitsPerSample, 34); // Bits per sample + wavHeader.write("data", 36); // Data chunk header + wavHeader.writeUInt32LE(audioLength, 40); // Data chunk size + return wavHeader; +} diff --git a/packages/plugin-node/src/services/awsS3.ts b/packages/plugin-node/src/services/awsS3.ts new file mode 100644 index 00000000000..532c92c79ea --- /dev/null +++ b/packages/plugin-node/src/services/awsS3.ts @@ -0,0 +1,292 @@ +import { + type IAgentRuntime, + type IAwsS3Service, + Service, + ServiceType, + logger, +} from "@elizaos/core"; +import { + GetObjectCommand, + PutObjectCommand, + S3Client, +} from "@aws-sdk/client-s3"; +import { getSignedUrl } from "@aws-sdk/s3-request-presigner"; +import * as fs from "node:fs"; +import * as path from "node:path"; + +interface UploadResult { + success: boolean; + url?: string; + error?: string; +} + +interface JsonUploadResult extends UploadResult { + key?: string; // Add storage key +} + +export class AwsS3Service extends Service implements IAwsS3Service { + static serviceType: ServiceType = ServiceType.REMOTE_FILES; + + private s3Client: S3Client | null = null; + private bucket = ""; + private fileUploadPath = ""; + private runtime: IAgentRuntime | null = null; + + async initialize(runtime: IAgentRuntime): Promise { + logger.log("Initializing AwsS3Service"); + this.runtime = runtime; + this.fileUploadPath = runtime.getSetting("AWS_S3_UPLOAD_PATH") ?? ""; + } + + private async initializeS3Client(): Promise { + if (this.s3Client) return true; + if (!this.runtime) return false; + + const AWS_ACCESS_KEY_ID = this.runtime.getSetting("AWS_ACCESS_KEY_ID"); + const AWS_SECRET_ACCESS_KEY = this.runtime.getSetting( + "AWS_SECRET_ACCESS_KEY", + ); + const AWS_REGION = this.runtime.getSetting("AWS_REGION"); + const AWS_S3_BUCKET = this.runtime.getSetting("AWS_S3_BUCKET"); + + if ( + !AWS_ACCESS_KEY_ID || + !AWS_SECRET_ACCESS_KEY || + !AWS_REGION || + !AWS_S3_BUCKET + ) { + return false; + } + + // Optional fields to allow for other providers + const endpoint = this.runtime.getSetting("AWS_S3_ENDPOINT"); + const sslEnabled = this.runtime.getSetting("AWS_S3_SSL_ENABLED"); + const forcePathStyle = this.runtime.getSetting( + "AWS_S3_FORCE_PATH_STYLE", + ); + + this.s3Client = new S3Client({ + ...(endpoint ? { endpoint } : {}), + ...(sslEnabled ? { sslEnabled } : {}), + ...(forcePathStyle + ? { forcePathStyle: Boolean(forcePathStyle) } + : {}), + region: AWS_REGION, + credentials: { + accessKeyId: AWS_ACCESS_KEY_ID, + secretAccessKey: AWS_SECRET_ACCESS_KEY, + }, + }); + this.bucket = AWS_S3_BUCKET; + return true; + } + + async uploadFile( + filePath: string, + subDirectory = "", + useSignedUrl = false, + expiresIn = 900, + ): Promise { + try { + if (!(await this.initializeS3Client())) { + return { + success: false, + error: "AWS S3 credentials not configured", + }; + } + + if (!fs.existsSync(filePath)) { + return { + success: false, + error: "File does not exist", + }; + } + + const fileContent = fs.readFileSync(filePath); + + const baseFileName = `${Date.now()}-${path.basename(filePath)}`; + // Determine storage path based on public access + const fileName = + `${this.fileUploadPath}${subDirectory}/${baseFileName}`.replaceAll( + "//", + "/", + ); + // Set upload parameters + const uploadParams = { + Bucket: this.bucket, + Key: fileName, + Body: fileContent, + ContentType: this.getContentType(filePath), + }; + + // Upload file + await this.s3Client.send(new PutObjectCommand(uploadParams)); + + // Build result object + const result: UploadResult = { + success: true, + }; + + // If not using signed URL, return either custom endpoint or public access URL + if (!useSignedUrl) { + if (this.s3Client.config.endpoint) { + const endpoint = await this.s3Client.config.endpoint(); + const port = endpoint.port ? `:${endpoint.port}` : ""; + result.url = `${endpoint.protocol}//${endpoint.hostname}${port}${endpoint.path}${this.bucket}/${fileName}`; + } else { + result.url = `https://${this.bucket}.s3.${process.env.AWS_REGION}.amazonaws.com/${fileName}`; + } + } else { + const getObjectCommand = new GetObjectCommand({ + Bucket: this.bucket, + Key: fileName, + }); + result.url = await getSignedUrl( + this.s3Client, + getObjectCommand, + { + expiresIn, // 15 minutes in seconds + }, + ); + } + + return result; + } catch (error) { + return { + success: false, + error: + error instanceof Error + ? error.message + : "Unknown error occurred", + }; + } + } + + /** + * Generate signed URL for existing file + */ + async generateSignedUrl( + fileName: string, + expiresIn = 900, + ): Promise { + if (!(await this.initializeS3Client())) { + throw new Error("AWS S3 credentials not configured"); + } + + const command = new GetObjectCommand({ + Bucket: this.bucket, + Key: fileName, + }); + + return await getSignedUrl(this.s3Client, command, { expiresIn }); + } + + private getContentType(filePath: string): string { + const ext = path.extname(filePath).toLowerCase(); + const contentTypes: { [key: string]: string } = { + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".gif": "image/gif", + ".webp": "image/webp", + }; + return contentTypes[ext] || "application/octet-stream"; + } + + /** + * Upload JSON object to S3 + * @param jsonData JSON data to upload + * @param fileName File name (optional, without path) + * @param subDirectory Subdirectory (optional) + * @param useSignedUrl Whether to use signed URL + * @param expiresIn Signed URL expiration time (seconds) + */ + async uploadJson( + jsonData: any, + fileName?: string, + subDirectory?: string, + useSignedUrl = false, + expiresIn = 900, + ): Promise { + try { + if (!(await this.initializeS3Client())) { + return { + success: false, + error: "AWS S3 credentials not configured", + }; + } + + // Validate input + if (!jsonData) { + return { + success: false, + error: "JSON data is required", + }; + } + + // Generate filename (if not provided) + const timestamp = Date.now(); + const actualFileName = fileName || `${timestamp}.json`; + + // Build complete file path + let fullPath = this.fileUploadPath || ""; + if (subDirectory) { + fullPath = `${fullPath}/${subDirectory}`.replace(/\/+/g, "/"); + } + const key = `${fullPath}/${actualFileName}`.replace(/\/+/g, "/"); + + // Convert JSON to string + const jsonString = JSON.stringify(jsonData, null, 2); + + // Set upload parameters + const uploadParams = { + Bucket: this.bucket, + Key: key, + Body: jsonString, + ContentType: "application/json", + }; + + // Upload file + await this.s3Client.send(new PutObjectCommand(uploadParams)); + + // Build result + const result: JsonUploadResult = { + success: true, + key: key, + }; + + // If not using signed URL, return either custom endpoint or public access URL + if (!useSignedUrl) { + if (this.s3Client.config.endpoint) { + const endpoint = await this.s3Client.config.endpoint(); + const port = endpoint.port ? `:${endpoint.port}` : ""; + result.url = `${endpoint.protocol}//${endpoint.hostname}${port}${endpoint.path}${this.bucket}/${key}`; + } else { + result.url = `https://${this.bucket}.s3.${process.env.AWS_REGION}.amazonaws.com/${key}`; + } + } else { + const getObjectCommand = new GetObjectCommand({ + Bucket: this.bucket, + Key: key, + }); + result.url = await getSignedUrl( + this.s3Client, + getObjectCommand, + { expiresIn }, + ); + } + + return result; + } catch (error) { + return { + success: false, + error: + error instanceof Error + ? error.message + : "Unknown error occurred", + }; + } + } +} + +export default AwsS3Service; diff --git a/packages/plugin-node/src/services/browser.ts b/packages/plugin-node/src/services/browser.ts new file mode 100644 index 00000000000..407585fe98a --- /dev/null +++ b/packages/plugin-node/src/services/browser.ts @@ -0,0 +1,344 @@ +import { generateText, type IBrowserService, trimTokens } from "@elizaos/core"; +import { parseJSONObjectFromText } from "@elizaos/core"; +import { Service } from "@elizaos/core"; +import { settings } from "@elizaos/core"; +import { type IAgentRuntime, ModelClass, ServiceType } from "@elizaos/core"; +import { stringToUuid } from "@elizaos/core"; +import { PlaywrightBlocker } from "@cliqz/adblocker-playwright"; +import CaptchaSolver from "capsolver-npm"; +import { + type Browser, + type BrowserContext, + chromium, + type Page, +} from "playwright"; +import { logger } from "@elizaos/core"; + +async function generateSummary( + runtime: IAgentRuntime, + text: string +): Promise<{ title: string; description: string }> { + // make sure text is under 128k characters + text = await trimTokens(text, 100000, runtime); + + const prompt = `Please generate a concise summary for the following text: + + Text: """ + ${text} + """ + + Respond with a JSON object in the following format: + \`\`\`json + { + "title": "Generated Title", + "summary": "Generated summary and/or description of the text" + } + \`\`\``; + + const response = await generateText({ + runtime, + context: prompt, + modelClass: ModelClass.SMALL, + }); + + const parsedResponse = parseJSONObjectFromText(response); + + if (parsedResponse?.title && parsedResponse?.summary) { + return { + title: parsedResponse.title, + description: parsedResponse.summary, + }; + } + + return { + title: "", + description: "", + }; +} + +type PageContent = { + title: string; + description: string; + bodyContent: string; +}; + +export class BrowserService extends Service implements IBrowserService { + private browser: Browser | undefined; + private context: BrowserContext | undefined; + private blocker: PlaywrightBlocker | undefined; + private captchaSolver: CaptchaSolver; + private cacheKey = "content/browser"; + + static serviceType: ServiceType = ServiceType.BROWSER; + + static register(runtime: IAgentRuntime): IAgentRuntime { + // since we are lazy loading, do nothing + return runtime; + } + + getInstance(): IBrowserService { + return BrowserService.getInstance(); + } + + constructor() { + super(); + this.browser = undefined; + this.context = undefined; + this.blocker = undefined; + this.captchaSolver = new CaptchaSolver( + settings.CAPSOLVER_API_KEY || "" + ); + } + + async initialize() {} + + async initializeBrowser() { + if (!this.browser) { + this.browser = await chromium.launch({ + headless: true, + args: [ + "--disable-dev-shm-usage", // Uses /tmp instead of /dev/shm. Prevents memory issues on low-memory systems + "--block-new-web-contents", // Prevents creation of new windows/tabs + ], + }); + + const platform = process.platform; + let userAgent = ""; + + // Change the user agent to match the platform to reduce bot detection + switch (platform) { + case "darwin": + userAgent = + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"; + break; + case "win32": + userAgent = + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"; + break; + case "linux": + userAgent = + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"; + break; + default: + userAgent = + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"; + } + + this.context = await this.browser.newContext({ + userAgent, + acceptDownloads: false, + }); + + this.blocker = await PlaywrightBlocker.fromPrebuiltAdsAndTracking( + fetch + ); + } + } + + async closeBrowser() { + if (this.context) { + await this.context.close(); + this.context = undefined; + } + if (this.browser) { + await this.browser.close(); + this.browser = undefined; + } + } + + async getPageContent( + url: string, + runtime: IAgentRuntime + ): Promise { + await this.initializeBrowser(); + return await this.fetchPageContent(url, runtime); + } + + private getCacheKey(url: string): string { + return stringToUuid(url); + } + + private async fetchPageContent( + url: string, + runtime: IAgentRuntime + ): Promise { + const cacheKey = this.getCacheKey(url); + const cached = await runtime.cacheManager.get<{ + url: string; + content: PageContent; + }>(`${this.cacheKey}/${cacheKey}`); + + if (cached) { + return cached.content; + } + + let page: Page | undefined; + + try { + if (!this.context) { + logger.log( + "Browser context not initialized. Call initializeBrowser() first." + ); + } + + page = await this.context.newPage(); + + // Enable stealth mode + await page.setExtraHTTPHeaders({ + "Accept-Language": "en-US,en;q=0.9", + }); + + // Apply ad blocker + if (this.blocker) { + await this.blocker.enableBlockingInPage(page); + } + + const response = await page.goto(url, { waitUntil: "networkidle" }); + + if (!response) { + logger.error("Failed to load the page"); + } + + if (response.status() === 403 || response.status() === 404) { + return await this.tryAlternativeSources(url, runtime); + } + + // Check for CAPTCHA + const captchaDetected = await this.detectCaptcha(page); + if (captchaDetected) { + await this.solveCaptcha(page, url); + } + const documentTitle = await page.evaluate(() => document.title); + const bodyContent = await page.evaluate( + () => document.body.innerText + ); + const { title: parsedTitle, description } = await generateSummary( + runtime, + documentTitle + "\n" + bodyContent + ); + const content = { title: parsedTitle, description, bodyContent }; + await runtime.cacheManager.set(`${this.cacheKey}/${cacheKey}`, { + url, + content, + }); + return content; + } catch (error) { + logger.error("Error:", error); + return { + title: url, + description: "Error, could not fetch content", + bodyContent: "", + }; + } finally { + if (page) { + await page.close(); + } + } + } + + private async detectCaptcha(page: Page): Promise { + const captchaSelectors = [ + 'iframe[src*="captcha"]', + 'div[class*="captcha"]', + "#captcha", + ".g-recaptcha", + ".h-captcha", + ]; + + for (const selector of captchaSelectors) { + const element = await page.$(selector); + if (element) return true; + } + + return false; + } + + private async solveCaptcha(page: Page, url: string): Promise { + try { + const hcaptchaKey = await this.getHCaptchaWebsiteKey(page); + if (hcaptchaKey) { + const solution = await this.captchaSolver.hcaptchaProxyless({ + websiteURL: url, + websiteKey: hcaptchaKey, + }); + await page.evaluate((token) => { + // eslint-disable-next-line + // @ts-ignore + window.hcaptcha.setResponse(token); + }, solution.gRecaptchaResponse); + return; + } + + const recaptchaKey = await this.getReCaptchaWebsiteKey(page); + if (recaptchaKey) { + const solution = await this.captchaSolver.recaptchaV2Proxyless({ + websiteURL: url, + websiteKey: recaptchaKey, + }); + await page.evaluate((token) => { + // eslint-disable-next-line + // @ts-ignore + document.getElementById("g-recaptcha-response").innerHTML = + token; + }, solution.gRecaptchaResponse); + } + } catch (error) { + logger.error("Error solving CAPTCHA:", error); + } + } + + private async getHCaptchaWebsiteKey(page: Page): Promise { + return page.evaluate(() => { + const hcaptchaIframe = document.querySelector( + 'iframe[src*="hcaptcha.com"]' + ); + if (hcaptchaIframe) { + const src = hcaptchaIframe.getAttribute("src"); + const match = src?.match(/sitekey=([^&]*)/); + return match ? match[1] : ""; + } + return ""; + }); + } + + private async getReCaptchaWebsiteKey(page: Page): Promise { + return page.evaluate(() => { + const recaptchaElement = document.querySelector(".g-recaptcha"); + return recaptchaElement + ? recaptchaElement.getAttribute("data-sitekey") || "" + : ""; + }); + } + + private async tryAlternativeSources( + url: string, + runtime: IAgentRuntime + ): Promise<{ title: string; description: string; bodyContent: string }> { + // Try Internet Archive + const archiveUrl = `https://web.archive.org/web/${url}`; + try { + return await this.fetchPageContent(archiveUrl, runtime); + } catch (error) { + logger.error("Error fetching from Internet Archive:", error); + } + + // Try Google Search as a last resort + const googleSearchUrl = `https://www.google.com/search?q=${encodeURIComponent( + url + )}`; + try { + return await this.fetchPageContent(googleSearchUrl, runtime); + } catch (error) { + logger.error("Error fetching from Google Search:", error); + logger.error( + "Failed to fetch content from alternative sources" + ); + return { + title: url, + description: + "Error, could not fetch content from alternative sources", + bodyContent: "", + }; + } + } +} diff --git a/packages/plugin-node/src/services/index.ts b/packages/plugin-node/src/services/index.ts new file mode 100644 index 00000000000..22b18262271 --- /dev/null +++ b/packages/plugin-node/src/services/index.ts @@ -0,0 +1,11 @@ +import { AwsS3Service } from "./awsS3.ts"; +import { BrowserService } from "./browser.ts"; +import { PdfService } from "./pdf.ts"; +import { VideoService } from "./video.ts"; + +export { + AwsS3Service, + BrowserService, + PdfService, + VideoService, +}; \ No newline at end of file diff --git a/packages/plugin-node/src/services/pdf.ts b/packages/plugin-node/src/services/pdf.ts new file mode 100644 index 00000000000..49924842722 --- /dev/null +++ b/packages/plugin-node/src/services/pdf.ts @@ -0,0 +1,49 @@ +import { + type IAgentRuntime, + type IPdfService, + Service, + ServiceType, +} from "@elizaos/core"; +import { getDocument, type PDFDocumentProxy } from "pdfjs-dist"; +import type { TextItem, TextMarkedContent } from "pdfjs-dist/types/src/display/api"; + +export class PdfService extends Service implements IPdfService { + static serviceType: ServiceType = ServiceType.PDF; + + constructor() { + super(); + } + + getInstance(): IPdfService { + return PdfService.getInstance(); + } + + async initialize(_runtime: IAgentRuntime): Promise {} + + async convertPdfToText(pdfBuffer: Buffer): Promise { + // Convert Buffer to Uint8Array + const uint8Array = new Uint8Array(pdfBuffer); + + const pdf: PDFDocumentProxy = await getDocument({ data: uint8Array }) + .promise; + const numPages = pdf.numPages; + const textPages: string[] = []; + + for (let pageNum = 1; pageNum <= numPages; pageNum++) { + const page = await pdf.getPage(pageNum); + const textContent = await page.getTextContent(); + const pageText = textContent.items + .filter(isTextItem) + .map((item) => item.str) + .join(" "); + textPages.push(pageText); + } + + return textPages.join("\n"); + } +} + +// Type guard function +function isTextItem(item: TextItem | TextMarkedContent): item is TextItem { + return "str" in item; +} diff --git a/packages/plugin-node/src/services/video.ts b/packages/plugin-node/src/services/video.ts new file mode 100644 index 00000000000..c4879b9ed1c --- /dev/null +++ b/packages/plugin-node/src/services/video.ts @@ -0,0 +1,429 @@ +import { + type IAgentRuntime, + type IVideoService, + type Media, + Service, + ServiceType, + stringToUuid, + logger, + ModelClass, +} from "@elizaos/core"; +import ffmpeg from "fluent-ffmpeg"; +import fs from "fs"; +import { tmpdir } from "os"; +import path from "path"; +import youtubeDl from "youtube-dl-exec"; + +export class VideoService extends Service implements IVideoService { + static serviceType: ServiceType = ServiceType.VIDEO; + private cacheKey = "content/video"; + private dataDir = "./content_cache"; + + private queue: string[] = []; + private processing = false; + + constructor() { + super(); + this.ensureDataDirectoryExists(); + } + + getInstance(): IVideoService { + return VideoService.getInstance(); + } + + async initialize(_runtime: IAgentRuntime): Promise {} + + private ensureDataDirectoryExists() { + if (!fs.existsSync(this.dataDir)) { + fs.mkdirSync(this.dataDir); + } + } + + public isVideoUrl(url: string): boolean { + return ( + url.includes("youtube.com") || + url.includes("youtu.be") || + url.includes("vimeo.com") + ); + } + + public async downloadMedia(url: string): Promise { + const videoId = this.getVideoId(url); + const outputFile = path.join(this.dataDir, `${videoId}.mp4`); + + // if it already exists, return it + if (fs.existsSync(outputFile)) { + return outputFile; + } + + try { + await youtubeDl(url, { + verbose: true, + output: outputFile, + writeInfoJson: true, + }); + return outputFile; + } catch (error) { + logger.log("Error downloading media:", error); + throw new Error("Failed to download media"); + } + } + + public async downloadVideo(videoInfo: any): Promise { + const videoId = this.getVideoId(videoInfo.webpage_url); + const outputFile = path.join(this.dataDir, `${videoId}.mp4`); + + // if it already exists, return it + if (fs.existsSync(outputFile)) { + return outputFile; + } + + try { + await youtubeDl(videoInfo.webpage_url, { + verbose: true, + output: outputFile, + format: "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best", + writeInfoJson: true, + }); + return outputFile; + } catch (error) { + logger.log("Error downloading video:", error); + throw new Error("Failed to download video"); + } + } + + public async processVideo( + url: string, + runtime: IAgentRuntime + ): Promise { + this.queue.push(url); + this.processQueue(runtime); + + return new Promise((resolve, reject) => { + const checkQueue = async () => { + const index = this.queue.indexOf(url); + if (index !== -1) { + setTimeout(checkQueue, 100); + } else { + try { + const result = await this.processVideoFromUrl( + url, + runtime + ); + resolve(result); + } catch (error) { + reject(error); + } + } + }; + checkQueue(); + }); + } + + private async processQueue(runtime): Promise { + if (this.processing || this.queue.length === 0) { + return; + } + + this.processing = true; + + while (this.queue.length > 0) { + const url = this.queue.shift()!; + await this.processVideoFromUrl(url, runtime); + } + + this.processing = false; + } + + private async processVideoFromUrl( + url: string, + runtime: IAgentRuntime + ): Promise { + const videoId = + url.match( + /(?:youtu\.be\/|youtube\.com(?:\/embed\/|\/v\/|\/watch\?v=|\/watch\?.+&v=))([^\/&?]+)/ // eslint-disable-line + )?.[1] || ""; + const videoUuid = this.getVideoId(videoId); + const cacheKey = `${this.cacheKey}/${videoUuid}`; + + const cached = await runtime.cacheManager.get(cacheKey); + + if (cached) { + logger.log("Returning cached video file"); + return cached; + } + + logger.log("Cache miss, processing video"); + logger.log("Fetching video info"); + const videoInfo = await this.fetchVideoInfo(url); + logger.log("Getting transcript"); + const transcript = await this.getTranscript(url, videoInfo, runtime); + + const result: Media = { + id: videoUuid, + url: url, + title: videoInfo.title, + source: videoInfo.channel, + description: videoInfo.description, + text: transcript, + }; + + await runtime.cacheManager.set(cacheKey, result); + + return result; + } + + private getVideoId(url: string): string { + return stringToUuid(url); + } + + async fetchVideoInfo(url: string): Promise { + if (url.endsWith(".mp4") || url.includes(".mp4?")) { + try { + const response = await fetch(url); + if (response.ok) { + // If the URL is a direct link to an MP4 file, return a simplified video info object + return { + title: path.basename(url), + description: "", + channel: "", + }; + } + } catch (error) { + logger.log("Error downloading MP4 file:", error); + // Fall back to using youtube-dl if direct download fails + } + } + + try { + const result = await youtubeDl(url, { + dumpJson: true, + verbose: true, + callHome: false, + noCheckCertificates: true, + preferFreeFormats: true, + youtubeSkipDashManifest: true, + writeSub: true, + writeAutoSub: true, + subLang: "en", + skipDownload: true, + }); + return result; + } catch (error) { + logger.log("Error fetching video info:", error); + throw new Error("Failed to fetch video information"); + } + } + + private async getTranscript( + url: string, + videoInfo: any, + runtime: IAgentRuntime + ): Promise { + logger.log("Getting transcript"); + try { + // Check for manual subtitles + if (videoInfo.subtitles && videoInfo.subtitles.en) { + logger.log("Manual subtitles found"); + const srtContent = await this.downloadSRT( + videoInfo.subtitles.en[0].url + ); + return this.parseSRT(srtContent); + } + + // Check for automatic captions + if ( + videoInfo.automatic_captions && + videoInfo.automatic_captions.en + ) { + logger.log("Automatic captions found"); + const captionUrl = videoInfo.automatic_captions.en[0].url; + const captionContent = await this.downloadCaption(captionUrl); + return this.parseCaption(captionContent); + } + + // Check if it's a music video + if ( + videoInfo.categories && + videoInfo.categories.includes("Music") + ) { + logger.log("Music video detected, no lyrics available"); + return "No lyrics available."; + } + + // Fall back to audio transcription + logger.log( + "No subtitles or captions found, falling back to audio transcription" + ); + return this.transcribeAudio(url, runtime); + } catch (error) { + logger.log("Error in getTranscript:", error); + throw error; + } + } + + private async downloadCaption(url: string): Promise { + logger.log("Downloading caption from:", url); + const response = await fetch(url); + if (!response.ok) { + throw new Error( + `Failed to download caption: ${response.statusText}` + ); + } + return await response.text(); + } + + private parseCaption(captionContent: string): string { + logger.log("Parsing caption"); + try { + const jsonContent = JSON.parse(captionContent); + if (jsonContent.events) { + return jsonContent.events + .filter((event) => event.segs) + .map((event) => event.segs.map((seg) => seg.utf8).join("")) + .join("") + .replace("\n", " "); + } else { + logger.log("Unexpected caption format:", jsonContent); + return "Error: Unable to parse captions"; + } + } catch (error) { + logger.log("Error parsing caption:", error); + return "Error: Unable to parse captions"; + } + } + + private parseSRT(srtContent: string): string { + // Simple SRT parser (replace with a more robust solution if needed) + return srtContent + .split("\n\n") + .map((block) => block.split("\n").slice(2).join(" ")) + .join(" "); + } + + private async downloadSRT(url: string): Promise { + logger.log("downloadSRT"); + const response = await fetch(url); + return await response.text(); + } + + async transcribeAudio( + url: string, + runtime: IAgentRuntime + ): Promise { + logger.log("Preparing audio for transcription..."); + const mp4FilePath = path.join( + this.dataDir, + `${this.getVideoId(url)}.mp4` + ); + + const mp3FilePath = path.join( + this.dataDir, + `${this.getVideoId(url)}.mp3` + ); + + if (!fs.existsSync(mp3FilePath)) { + if (fs.existsSync(mp4FilePath)) { + logger.log("MP4 file found. Converting to MP3..."); + await this.convertMp4ToMp3(mp4FilePath, mp3FilePath); + } else { + logger.log("Downloading audio..."); + await this.downloadAudio(url, mp3FilePath); + } + } + + logger.log(`Audio prepared at ${mp3FilePath}`); + + const audioBuffer = fs.readFileSync(mp3FilePath); + logger.log(`Audio file size: ${audioBuffer.length} bytes`); + + logger.log("Starting transcription..."); + const startTime = Date.now(); + const transcript = await runtime.call(ModelClass.TRANSCRIPTION, audioBuffer); + + const endTime = Date.now(); + logger.log( + `Transcription completed in ${(endTime - startTime) / 1000} seconds` + ); + + // Don't delete the MP3 file as it might be needed for future use + return transcript || "Transcription failed"; + } + + private async convertMp4ToMp3( + inputPath: string, + outputPath: string + ): Promise { + return new Promise((resolve, reject) => { + ffmpeg(inputPath) + .output(outputPath) + .noVideo() + .audioCodec("libmp3lame") + .on("end", () => { + logger.log("Conversion to MP3 complete"); + resolve(); + }) + .on("error", (err) => { + logger.log("Error converting to MP3:", err); + reject(err); + }) + .run(); + }); + } + + private async downloadAudio( + url: string, + outputFile: string + ): Promise { + logger.log("Downloading audio"); + outputFile = + outputFile ?? + path.join(this.dataDir, `${this.getVideoId(url)}.mp3`); + + try { + if (url.endsWith(".mp4") || url.includes(".mp4?")) { + logger.log( + "Direct MP4 file detected, downloading and converting to MP3" + ); + const tempMp4File = path.join( + tmpdir(), + `${this.getVideoId(url)}.mp4` + ); + const response = await fetch(url); + const arrayBuffer = await response.arrayBuffer(); + const buffer = Buffer.from(arrayBuffer); + fs.writeFileSync(tempMp4File, buffer); + + await new Promise((resolve, reject) => { + ffmpeg(tempMp4File) + .output(outputFile) + .noVideo() + .audioCodec("libmp3lame") + .on("end", () => { + fs.unlinkSync(tempMp4File); + resolve(); + }) + .on("error", (err) => { + reject(err); + }) + .run(); + }); + } else { + logger.log( + "YouTube video detected, downloading audio with youtube-dl" + ); + await youtubeDl(url, { + verbose: true, + extractAudio: true, + audioFormat: "mp3", + output: outputFile, + writeInfoJson: true, + }); + } + return outputFile; + } catch (error) { + logger.log("Error downloading audio:", error); + throw new Error("Failed to download audio"); + } + } +} diff --git a/packages/plugin-node/src/templates.ts b/packages/plugin-node/src/templates.ts new file mode 100644 index 00000000000..de1261d296b --- /dev/null +++ b/packages/plugin-node/src/templates.ts @@ -0,0 +1,15 @@ +export const getFileLocationTemplate = ` +{{recentMessages}} + +extract the file location from the users message or the attachment in the message history that they are referring to. +your job is to infer the correct attachment based on the recent messages, the users most recent message, and the attachments in the message +image attachments are the result of the users uploads, or images you have created. +only respond with the file location, no other text. +typically the file location is in the form of a URL or a file path. + +\`\`\`json +{ + "fileLocation": "file location text goes here" +} +\`\`\` +`; diff --git a/packages/plugin-node/src/types.ts b/packages/plugin-node/src/types.ts new file mode 100644 index 00000000000..3a659ae14ae --- /dev/null +++ b/packages/plugin-node/src/types.ts @@ -0,0 +1,11 @@ +import { z } from "zod"; + +export const FileLocationResultSchema = z.object({ + fileLocation: z.string().min(1), +}); + +export type FileLocationResult = z.infer; + +export function isFileLocationResult(obj: unknown): obj is FileLocationResult { + return FileLocationResultSchema.safeParse(obj).success; +} diff --git a/packages/plugin-node/tsconfig.json b/packages/plugin-node/tsconfig.json new file mode 100644 index 00000000000..d5059a358bb --- /dev/null +++ b/packages/plugin-node/tsconfig.json @@ -0,0 +1,9 @@ +{ + "extends": "../core/tsconfig.json", + "compilerOptions": { + "outDir": "dist", + "rootDir": "src", + "types": ["node"] + }, + "include": ["src/**/*.ts", "src/**/*.d.ts"] +} diff --git a/packages/plugin-node/tsup.config.ts b/packages/plugin-node/tsup.config.ts new file mode 100644 index 00000000000..b5e4388b214 --- /dev/null +++ b/packages/plugin-node/tsup.config.ts @@ -0,0 +1,21 @@ +import { defineConfig } from "tsup"; + +export default defineConfig({ + entry: ["src/index.ts"], + outDir: "dist", + sourcemap: true, + clean: true, + format: ["esm"], // Ensure you're targeting CommonJS + external: [ + "dotenv", // Externalize dotenv to prevent bundling + "fs", // Externalize fs to use Node.js built-in module + "path", // Externalize other built-ins if necessary + "@reflink/reflink", + "@node-llama-cpp", + "https", + "http", + "agentkeepalive", + "zod", + // Add other modules you want to externalize + ], +});