Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

make doc size easily configurable #70

Merged
merged 3 commits into from
Mar 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions packages/core/config.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
/**
* Core configuration options
*/

// Documentation settings
export const DOCUMENTATION_MAX_LENGTH = 90000;
8 changes: 3 additions & 5 deletions packages/core/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ import { expressMiddleware } from '@apollo/server/express4';
import { ApolloServerPluginLandingPageLocalDefault } from '@apollo/server/plugin/landingPage/default';
import cors from 'cors';
import express from 'express';
import { graphqlUploadExpress } from 'graphql-upload-minimal';
import http from 'http';
import { LocalKeyManager } from './auth/localKeyManager.js';
import { SupabaseKeyManager } from './auth/supabaseKeyManager.js';
import { createDataStore } from './datastore/datastore.js';
import { resolvers, typeDefs } from './graphql/graphql.js';
import { handleQueryError, telemetryClient, telemetryMiddleware } from './utils/telemetry.js';
import { SupabaseKeyManager } from './auth/supabaseKeyManager.js';
import { LocalKeyManager } from './auth/localKeyManager.js';
import { graphqlUploadExpress } from 'graphql-upload-minimal';

// Constants
const PORT = process.env.GRAPHQL_PORT || 3000;
Expand Down Expand Up @@ -78,8 +78,6 @@ const contextConfig = {
}
};

// Authentication Helper Function to cache API keys

// Authentication Middleware
const authMiddleware = async (req, res, next) => {
if(req.path === '/health') {
Expand Down
1 change: 1 addition & 0 deletions packages/core/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"start": "node dist/index.js",
"dev": "npm run build && node -r dotenv/config ./dist/index.js dotenv_config_path=../../.env",
"test": "vitest",
"test-no-watch": "vitest run",
"test:coverage": "vitest run --coverage"
},
"type": "module",
Expand Down
7 changes: 7 additions & 0 deletions packages/core/utils/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { AxiosRequestConfig } from "axios";
import OpenAI from "openai";
import { z } from "zod";
import { zodToJsonSchema } from "zod-to-json-schema";
import { DOCUMENTATION_MAX_LENGTH } from "../config.js";
import { getDocumentation } from "./documentation.js";
import { API_ERROR_HANDLING_USER_PROMPT, API_PROMPT } from "./prompts.js";
import { callAxios, composeUrl, replaceVariables } from "./tools.js";
Expand All @@ -28,6 +29,12 @@ export async function prepareEndpoint(

// If a documentation URL is provided, fetch and parse additional details
const documentation = await getDocumentation(apiCallConfig.documentationUrl || composeUrl(apiCallConfig.urlHost, apiCallConfig.urlPath), apiCallConfig.headers, apiCallConfig.queryParams, apiCallConfig?.urlPath);
if(documentation.length >= DOCUMENTATION_MAX_LENGTH) {
console.warn("Documentation length at limit: " + documentation.length);
}
if(documentation.length <= 10000) {
console.warn("Documentation length is short: " + documentation.length);
}

const availableVars = [...Object.keys(payload || {}), ...Object.keys(credentials || {})];
const computedApiCallConfig = await generateApiConfig(apiCallConfig, documentation, availableVars, lastError, previousMessages);
Expand Down
98 changes: 87 additions & 11 deletions packages/core/utils/documentation.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import axios from 'axios';
import { afterEach, beforeEach, describe, expect, it, vi, type Mocked } from 'vitest';
import { DOCUMENTATION_MAX_LENGTH } from '../config.js';
import { getDocumentation, postProcessLargeDoc } from './documentation.js';

// Mock axios
Expand Down Expand Up @@ -314,51 +315,126 @@ describe('Documentation Utilities', () => {
describe('postProcessLargeDoc', () => {
it('should handle undefined endpoint without infinite loops', () => {
// Create a documentation string longer than MAX_DOC_LENGTH
const longDocumentation = 'A'.repeat(100000);
const repeatLenght = DOCUMENTATION_MAX_LENGTH * 2;
const longDocumentation = 'A'.repeat(repeatLenght);

// Call with undefined endpoint
const result = postProcessLargeDoc(longDocumentation, undefined);

// Should return a truncated version of the documentation
expect(result.length).toBeLessThanOrEqual(80000);
expect(result).toBe(longDocumentation.slice(0, 80000));
expect(result.length).toBeLessThanOrEqual(DOCUMENTATION_MAX_LENGTH);
expect(result).toBe(longDocumentation.slice(0, DOCUMENTATION_MAX_LENGTH));
});

it('should handle null endpoint without infinite loops', () => {
// Create a documentation string longer than MAX_DOC_LENGTH
const longDocumentation = 'A'.repeat(100000);
const repeatLenght = DOCUMENTATION_MAX_LENGTH * 2;
const longDocumentation = 'A'.repeat(repeatLenght);

// Call with null endpoint
const result = postProcessLargeDoc(longDocumentation, null);

// Should return a truncated version of the documentation
expect(result.length).toBeLessThanOrEqual(80000);
expect(result).toBe(longDocumentation.slice(0, 80000));
expect(result.length).toBeLessThanOrEqual(DOCUMENTATION_MAX_LENGTH);
expect(result).toBe(longDocumentation.slice(0, DOCUMENTATION_MAX_LENGTH));
});

it('should handle empty string endpoint without infinite loops', () => {
// Create a documentation string longer than MAX_DOC_LENGTH
const longDocumentation = 'A'.repeat(100000);
const repeatLenght = DOCUMENTATION_MAX_LENGTH * 2;
const longDocumentation = 'A'.repeat(repeatLenght);

// Call with empty string endpoint
const result = postProcessLargeDoc(longDocumentation, '');

// Should return a truncated version of the documentation
expect(result.length).toBeLessThanOrEqual(80000);
expect(result).toBe(longDocumentation.slice(0, 80000));
expect(result.length).toBeLessThanOrEqual(DOCUMENTATION_MAX_LENGTH);
expect(result).toBe(longDocumentation.slice(0, DOCUMENTATION_MAX_LENGTH));
});

it('should handle very short endpoint without infinite loops', () => {
// Create a documentation string longer than MAX_DOC_LENGTH
const longDocumentation = 'A'.repeat(100000) + 'api' + 'A'.repeat(10000);
const repeatLenght = DOCUMENTATION_MAX_LENGTH * 2;
const longDocumentation = 'A'.repeat(repeatLenght) + 'api' + 'A'.repeat(repeatLenght);

// Call with endpoint shorter than minimum search term length (4 chars)
const result = postProcessLargeDoc(longDocumentation, 'api');

// Should return a truncated version of the documentation (first chunk)
expect(result.length).toBeLessThanOrEqual(80000);
expect(result.length).toBeLessThanOrEqual(DOCUMENTATION_MAX_LENGTH);
// In this case, it should still find the search term
expect(result).toContain('api');
});
it('should include regions around multiple occurrences of search term', () => {
// Create a documentation string with multiple occurrences of the search term
const repeatLenght = DOCUMENTATION_MAX_LENGTH * 1.8 / 3;
const prefix = 'ABC'.repeat(repeatLenght);
const middle = 'BKJ'.repeat(repeatLenght);
const suffix = 'CDE'.repeat(repeatLenght);
const suffixShort = 'FGH'.repeat(100);

// Insert search term at different positions
const searchTerm = 'userProfile';
const docTerms = [
`Here is info about ${searchTerm}`,
`More details about ${searchTerm} endpoint`,
`Overlapping details about ${searchTerm} endpoint `
];
const longDocumentation =
prefix +
docTerms[0] +
middle +
docTerms[1] +
suffixShort +
docTerms[2] +
suffix;

// Call with the search term as endpoint
const result = postProcessLargeDoc(longDocumentation, '/userProfile');

// Should return a document within the max length
expect(result.length).toBeLessThanOrEqual(DOCUMENTATION_MAX_LENGTH);

// Should contain context from both regions
expect(result).toContain(docTerms[0]);
expect(result).toContain(docTerms[1]);
expect(result).toContain(docTerms[2]);
});

it('it should include the authorization, even if its the last thing found', () => {
// Create a documentation string with multiple occurrences of the search term
const repeatLenght = DOCUMENTATION_MAX_LENGTH * 1.8 / 3;
const prefix = 'ABC'.repeat(repeatLenght);
const middle = 'BKJ'.repeat(repeatLenght);
const suffix = 'CDE'.repeat(repeatLenght);
const suffixShort = 'FGH'.repeat(100);

// Insert search term at different positions
const searchTerm = 'userProfile';
const docTerms = [
`Here is info about ${searchTerm}`,
`More details about ${searchTerm} endpoint`,
`details about authorization`
];
const longDocumentation =
prefix +
docTerms[0] +
middle +
docTerms[1] +
suffix +
docTerms[2] +
suffixShort;

// Call with the search term as endpoint
const result = postProcessLargeDoc(longDocumentation, '/userProfile');

// Should return a document within the max length
expect(result.length).toBeLessThanOrEqual(DOCUMENTATION_MAX_LENGTH);

// Should contain context from both regions
expect(result).toContain(docTerms[0]);
expect(result).toContain(docTerms[1]);
expect(result).toContain(docTerms[2]);
});
});
});
28 changes: 17 additions & 11 deletions packages/core/utils/documentation.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import axios from "axios";
import { getIntrospectionQuery } from "graphql";
import { NodeHtmlMarkdown } from "node-html-markdown";
import { DOCUMENTATION_MAX_LENGTH } from "../config.js";

export function extractOpenApiUrl(html: string): string | null {
try {
Expand Down Expand Up @@ -68,14 +69,13 @@ async function getOpenApiJsonFromUrl(openApiUrl: string, documentationUrl: strin
}

export function postProcessLargeDoc(documentation: string, endpointPath: string): string {
const MAX_DOC_LENGTH = 80000;
const MIN_INITIAL_CHUNK = 20000;
const MAX_INITIAL_CHUNK = 40000;
const CONTEXT_SIZE = 10000;
const CONTEXT_SEPARATOR = "\n\n";
const MIN_SEARCH_TERM_LENGTH = 3;

if (documentation.length <= MAX_DOC_LENGTH) {
if (documentation.length <= DOCUMENTATION_MAX_LENGTH) {
return documentation;
}

Expand All @@ -85,15 +85,20 @@ export function postProcessLargeDoc(documentation: string, endpointPath: string)
const docLower = documentation.toLowerCase();

if (!endpointPath || searchTerm.length < MIN_SEARCH_TERM_LENGTH) {
return documentation.slice(0, MAX_DOC_LENGTH);
return documentation.slice(0, DOCUMENTATION_MAX_LENGTH);
}

// Find all occurrences of the search term
const positions: number[] = [];

let authPos = docLower.indexOf("securityschemes") || docLower.indexOf("authorization");
if(authPos !== -1) {
positions.push(authPos);
// Fix the authorization search to properly find all relevant authorization terms
let authPosSecuritySchemes = docLower.indexOf("securityschemes");
if (authPosSecuritySchemes !== -1) {
positions.push(authPosSecuritySchemes);
}
let authPosAuthorization = docLower.indexOf("authorization");
if (authPosAuthorization !== -1) {
positions.push(authPosAuthorization);
}

let pos = docLower.indexOf(searchTerm);
Expand All @@ -104,12 +109,14 @@ export function postProcessLargeDoc(documentation: string, endpointPath: string)

// If no occurrences found return max doc length
if (positions.length === 0) {
return documentation.slice(0, MAX_DOC_LENGTH);
return documentation.slice(0, DOCUMENTATION_MAX_LENGTH);
}

// Calculate non-overlapping context regions
type Region = { start: number; end: number };
const regions: Region[] = [];
// Sort positions to ensure we process them in order from start to end of document
positions.sort((a, b) => a - b);

for (const pos of positions) {
const start = Math.max(0, pos - CONTEXT_SIZE);
Expand All @@ -130,7 +137,7 @@ export function postProcessLargeDoc(documentation: string, endpointPath: string)
const separatorSpace = regions.length * CONTEXT_SEPARATOR.length;

// If contexts overlap significantly, we might have more space for initial chunk
const availableForInitial = MAX_DOC_LENGTH - (totalContextSpace + separatorSpace);
const availableForInitial = DOCUMENTATION_MAX_LENGTH - (totalContextSpace + separatorSpace);

// Use up to MAX_INITIAL_CHUNK if we have space due to overlapping contexts
const initialChunkSize = Math.max(
Expand All @@ -139,7 +146,7 @@ export function postProcessLargeDoc(documentation: string, endpointPath: string)
);

let finalDoc = documentation.slice(0, initialChunkSize);
let remainingLength = MAX_DOC_LENGTH - finalDoc.length;
let remainingLength = DOCUMENTATION_MAX_LENGTH - finalDoc.length;

// Add context for each non-overlapping region
for (const region of regions) {
Expand All @@ -158,7 +165,6 @@ export function postProcessLargeDoc(documentation: string, endpointPath: string)
}

export async function getDocumentation(documentationUrl: string, headers: Record<string, string>, queryParams: Record<string, string>, apiEndpoint?: string): Promise<string> {
const docMaxLength = 80000;
if(!documentationUrl) {
return "";
}
Expand Down Expand Up @@ -200,7 +206,7 @@ export async function getDocumentation(documentationUrl: string, headers: Record
console.warn(`Failed to fetch documentation from ${documentationUrl}:`, error?.message);
}

if(documentation.length > docMaxLength) {
if(documentation.length > DOCUMENTATION_MAX_LENGTH) {
documentation = postProcessLargeDoc(documentation, apiEndpoint || '');
}

Expand Down
2 changes: 0 additions & 2 deletions packages/core/utils/telemetry.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import { PostHog } from 'posthog-node';
import { config } from '../default.js';

// PostHog Telemetry

// we use a privacy-preserving session id to track queries
export const sessionId = crypto.randomUUID();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ export function ConfigCreateStepper({ open, onOpenChange, configId: initialConfi
try {
const urlObj = new URL(url.startsWith('http') ? url : `https://${url}`)
const cleanedHost = cleanApiDomain(`${urlObj.protocol}//${urlObj.host}`)
const path = urlObj.pathname === '/' ? '' : urlObj.pathname
// Include query params in the path, good context for LLM
const path = urlObj.pathname === '/' ? '' : `${urlObj.pathname}${urlObj.search}`
return {
urlHost: cleanedHost,
urlPath: path
Expand Down Expand Up @@ -146,7 +147,6 @@ export function ConfigCreateStepper({ open, onOpenChange, configId: initialConfi
})

// Call autofill endpoint

const response = await superglueClient.call({
endpoint: {
urlHost: url.urlHost,
Expand Down