Skip to content

Commit

Permalink
Merge pull request #70 from superglue-ai/glu-213-show-that-klaviyo-works
Browse files Browse the repository at this point in the history
make doc size easily configurable
  • Loading branch information
nimarb authored Mar 4, 2025
2 parents 8a385f9 + e9e3d0c commit ab50be1
Show file tree
Hide file tree
Showing 8 changed files with 123 additions and 31 deletions.
6 changes: 6 additions & 0 deletions packages/core/config.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
/**
* Core configuration options
*/

// Documentation settings
export const DOCUMENTATION_MAX_LENGTH = 90000;
8 changes: 3 additions & 5 deletions packages/core/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ import { expressMiddleware } from '@apollo/server/express4';
import { ApolloServerPluginLandingPageLocalDefault } from '@apollo/server/plugin/landingPage/default';
import cors from 'cors';
import express from 'express';
import { graphqlUploadExpress } from 'graphql-upload-minimal';
import http from 'http';
import { LocalKeyManager } from './auth/localKeyManager.js';
import { SupabaseKeyManager } from './auth/supabaseKeyManager.js';
import { createDataStore } from './datastore/datastore.js';
import { resolvers, typeDefs } from './graphql/graphql.js';
import { handleQueryError, telemetryClient, telemetryMiddleware } from './utils/telemetry.js';
import { SupabaseKeyManager } from './auth/supabaseKeyManager.js';
import { LocalKeyManager } from './auth/localKeyManager.js';
import { graphqlUploadExpress } from 'graphql-upload-minimal';

// Constants
const PORT = process.env.GRAPHQL_PORT || 3000;
Expand Down Expand Up @@ -78,8 +78,6 @@ const contextConfig = {
}
};

// Authentication Helper Function to cache API keys

// Authentication Middleware
const authMiddleware = async (req, res, next) => {
if(req.path === '/health') {
Expand Down
1 change: 1 addition & 0 deletions packages/core/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"start": "node dist/index.js",
"dev": "npm run build && node -r dotenv/config ./dist/index.js dotenv_config_path=../../.env",
"test": "vitest",
"test-no-watch": "vitest run",
"test:coverage": "vitest run --coverage"
},
"type": "module",
Expand Down
7 changes: 7 additions & 0 deletions packages/core/utils/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { AxiosRequestConfig } from "axios";
import OpenAI from "openai";
import { z } from "zod";
import { zodToJsonSchema } from "zod-to-json-schema";
import { DOCUMENTATION_MAX_LENGTH } from "../config.js";
import { getDocumentation } from "./documentation.js";
import { API_ERROR_HANDLING_USER_PROMPT, API_PROMPT } from "./prompts.js";
import { callAxios, composeUrl, replaceVariables } from "./tools.js";
Expand All @@ -28,6 +29,12 @@ export async function prepareEndpoint(

// If a documentation URL is provided, fetch and parse additional details
const documentation = await getDocumentation(apiCallConfig.documentationUrl || composeUrl(apiCallConfig.urlHost, apiCallConfig.urlPath), apiCallConfig.headers, apiCallConfig.queryParams, apiCallConfig?.urlPath);
if(documentation.length >= DOCUMENTATION_MAX_LENGTH) {
console.warn("Documentation length at limit: " + documentation.length);
}
if(documentation.length <= 10000) {
console.warn("Documentation length is short: " + documentation.length);
}

const availableVars = [...Object.keys(payload || {}), ...Object.keys(credentials || {})];
const computedApiCallConfig = await generateApiConfig(apiCallConfig, documentation, availableVars, lastError, previousMessages);
Expand Down
98 changes: 87 additions & 11 deletions packages/core/utils/documentation.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import axios from 'axios';
import { afterEach, beforeEach, describe, expect, it, vi, type Mocked } from 'vitest';
import { DOCUMENTATION_MAX_LENGTH } from '../config.js';
import { getDocumentation, postProcessLargeDoc } from './documentation.js';

// Mock axios
Expand Down Expand Up @@ -314,51 +315,126 @@ describe('Documentation Utilities', () => {
describe('postProcessLargeDoc', () => {
it('should handle undefined endpoint without infinite loops', () => {
// Create a documentation string longer than MAX_DOC_LENGTH
const longDocumentation = 'A'.repeat(100000);
const repeatLenght = DOCUMENTATION_MAX_LENGTH * 2;
const longDocumentation = 'A'.repeat(repeatLenght);

// Call with undefined endpoint
const result = postProcessLargeDoc(longDocumentation, undefined);

// Should return a truncated version of the documentation
expect(result.length).toBeLessThanOrEqual(80000);
expect(result).toBe(longDocumentation.slice(0, 80000));
expect(result.length).toBeLessThanOrEqual(DOCUMENTATION_MAX_LENGTH);
expect(result).toBe(longDocumentation.slice(0, DOCUMENTATION_MAX_LENGTH));
});

it('should handle null endpoint without infinite loops', () => {
// Create a documentation string longer than MAX_DOC_LENGTH
const longDocumentation = 'A'.repeat(100000);
const repeatLenght = DOCUMENTATION_MAX_LENGTH * 2;
const longDocumentation = 'A'.repeat(repeatLenght);

// Call with null endpoint
const result = postProcessLargeDoc(longDocumentation, null);

// Should return a truncated version of the documentation
expect(result.length).toBeLessThanOrEqual(80000);
expect(result).toBe(longDocumentation.slice(0, 80000));
expect(result.length).toBeLessThanOrEqual(DOCUMENTATION_MAX_LENGTH);
expect(result).toBe(longDocumentation.slice(0, DOCUMENTATION_MAX_LENGTH));
});

it('should handle empty string endpoint without infinite loops', () => {
// Create a documentation string longer than MAX_DOC_LENGTH
const longDocumentation = 'A'.repeat(100000);
const repeatLenght = DOCUMENTATION_MAX_LENGTH * 2;
const longDocumentation = 'A'.repeat(repeatLenght);

// Call with empty string endpoint
const result = postProcessLargeDoc(longDocumentation, '');

// Should return a truncated version of the documentation
expect(result.length).toBeLessThanOrEqual(80000);
expect(result).toBe(longDocumentation.slice(0, 80000));
expect(result.length).toBeLessThanOrEqual(DOCUMENTATION_MAX_LENGTH);
expect(result).toBe(longDocumentation.slice(0, DOCUMENTATION_MAX_LENGTH));
});

it('should handle very short endpoint without infinite loops', () => {
// Create a documentation string longer than MAX_DOC_LENGTH
const longDocumentation = 'A'.repeat(100000) + 'api' + 'A'.repeat(10000);
const repeatLenght = DOCUMENTATION_MAX_LENGTH * 2;
const longDocumentation = 'A'.repeat(repeatLenght) + 'api' + 'A'.repeat(repeatLenght);

// Call with endpoint shorter than minimum search term length (4 chars)
const result = postProcessLargeDoc(longDocumentation, 'api');

// Should return a truncated version of the documentation (first chunk)
expect(result.length).toBeLessThanOrEqual(80000);
expect(result.length).toBeLessThanOrEqual(DOCUMENTATION_MAX_LENGTH);
// In this case, it should still find the search term
expect(result).toContain('api');
});
it('should include regions around multiple occurrences of search term', () => {
// Create a documentation string with multiple occurrences of the search term
const repeatLenght = DOCUMENTATION_MAX_LENGTH * 1.8 / 3;
const prefix = 'ABC'.repeat(repeatLenght);
const middle = 'BKJ'.repeat(repeatLenght);
const suffix = 'CDE'.repeat(repeatLenght);
const suffixShort = 'FGH'.repeat(100);

// Insert search term at different positions
const searchTerm = 'userProfile';
const docTerms = [
`Here is info about ${searchTerm}`,
`More details about ${searchTerm} endpoint`,
`Overlapping details about ${searchTerm} endpoint `
];
const longDocumentation =
prefix +
docTerms[0] +
middle +
docTerms[1] +
suffixShort +
docTerms[2] +
suffix;

// Call with the search term as endpoint
const result = postProcessLargeDoc(longDocumentation, '/userProfile');

// Should return a document within the max length
expect(result.length).toBeLessThanOrEqual(DOCUMENTATION_MAX_LENGTH);

// Should contain context from both regions
expect(result).toContain(docTerms[0]);
expect(result).toContain(docTerms[1]);
expect(result).toContain(docTerms[2]);
});

it('it should include the authorization, even if its the last thing found', () => {
// Create a documentation string with multiple occurrences of the search term
const repeatLenght = DOCUMENTATION_MAX_LENGTH * 1.8 / 3;
const prefix = 'ABC'.repeat(repeatLenght);
const middle = 'BKJ'.repeat(repeatLenght);
const suffix = 'CDE'.repeat(repeatLenght);
const suffixShort = 'FGH'.repeat(100);

// Insert search term at different positions
const searchTerm = 'userProfile';
const docTerms = [
`Here is info about ${searchTerm}`,
`More details about ${searchTerm} endpoint`,
`details about authorization`
];
const longDocumentation =
prefix +
docTerms[0] +
middle +
docTerms[1] +
suffix +
docTerms[2] +
suffixShort;

// Call with the search term as endpoint
const result = postProcessLargeDoc(longDocumentation, '/userProfile');

// Should return a document within the max length
expect(result.length).toBeLessThanOrEqual(DOCUMENTATION_MAX_LENGTH);

// Should contain context from both regions
expect(result).toContain(docTerms[0]);
expect(result).toContain(docTerms[1]);
expect(result).toContain(docTerms[2]);
});
});
});
28 changes: 17 additions & 11 deletions packages/core/utils/documentation.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import axios from "axios";
import { getIntrospectionQuery } from "graphql";
import { NodeHtmlMarkdown } from "node-html-markdown";
import { DOCUMENTATION_MAX_LENGTH } from "../config.js";

export function extractOpenApiUrl(html: string): string | null {
try {
Expand Down Expand Up @@ -68,14 +69,13 @@ async function getOpenApiJsonFromUrl(openApiUrl: string, documentationUrl: strin
}

export function postProcessLargeDoc(documentation: string, endpointPath: string): string {
const MAX_DOC_LENGTH = 80000;
const MIN_INITIAL_CHUNK = 20000;
const MAX_INITIAL_CHUNK = 40000;
const CONTEXT_SIZE = 10000;
const CONTEXT_SEPARATOR = "\n\n";
const MIN_SEARCH_TERM_LENGTH = 3;

if (documentation.length <= MAX_DOC_LENGTH) {
if (documentation.length <= DOCUMENTATION_MAX_LENGTH) {
return documentation;
}

Expand All @@ -85,15 +85,20 @@ export function postProcessLargeDoc(documentation: string, endpointPath: string)
const docLower = documentation.toLowerCase();

if (!endpointPath || searchTerm.length < MIN_SEARCH_TERM_LENGTH) {
return documentation.slice(0, MAX_DOC_LENGTH);
return documentation.slice(0, DOCUMENTATION_MAX_LENGTH);
}

// Find all occurrences of the search term
const positions: number[] = [];

let authPos = docLower.indexOf("securityschemes") || docLower.indexOf("authorization");
if(authPos !== -1) {
positions.push(authPos);
// Fix the authorization search to properly find all relevant authorization terms
let authPosSecuritySchemes = docLower.indexOf("securityschemes");
if (authPosSecuritySchemes !== -1) {
positions.push(authPosSecuritySchemes);
}
let authPosAuthorization = docLower.indexOf("authorization");
if (authPosAuthorization !== -1) {
positions.push(authPosAuthorization);
}

let pos = docLower.indexOf(searchTerm);
Expand All @@ -104,12 +109,14 @@ export function postProcessLargeDoc(documentation: string, endpointPath: string)

// If no occurrences found return max doc length
if (positions.length === 0) {
return documentation.slice(0, MAX_DOC_LENGTH);
return documentation.slice(0, DOCUMENTATION_MAX_LENGTH);
}

// Calculate non-overlapping context regions
type Region = { start: number; end: number };
const regions: Region[] = [];
// Sort positions to ensure we process them in order from start to end of document
positions.sort((a, b) => a - b);

for (const pos of positions) {
const start = Math.max(0, pos - CONTEXT_SIZE);
Expand All @@ -130,7 +137,7 @@ export function postProcessLargeDoc(documentation: string, endpointPath: string)
const separatorSpace = regions.length * CONTEXT_SEPARATOR.length;

// If contexts overlap significantly, we might have more space for initial chunk
const availableForInitial = MAX_DOC_LENGTH - (totalContextSpace + separatorSpace);
const availableForInitial = DOCUMENTATION_MAX_LENGTH - (totalContextSpace + separatorSpace);

// Use up to MAX_INITIAL_CHUNK if we have space due to overlapping contexts
const initialChunkSize = Math.max(
Expand All @@ -139,7 +146,7 @@ export function postProcessLargeDoc(documentation: string, endpointPath: string)
);

let finalDoc = documentation.slice(0, initialChunkSize);
let remainingLength = MAX_DOC_LENGTH - finalDoc.length;
let remainingLength = DOCUMENTATION_MAX_LENGTH - finalDoc.length;

// Add context for each non-overlapping region
for (const region of regions) {
Expand All @@ -158,7 +165,6 @@ export function postProcessLargeDoc(documentation: string, endpointPath: string)
}

export async function getDocumentation(documentationUrl: string, headers: Record<string, string>, queryParams: Record<string, string>, apiEndpoint?: string): Promise<string> {
const docMaxLength = 80000;
if(!documentationUrl) {
return "";
}
Expand Down Expand Up @@ -200,7 +206,7 @@ export async function getDocumentation(documentationUrl: string, headers: Record
console.warn(`Failed to fetch documentation from ${documentationUrl}:`, error?.message);
}

if(documentation.length > docMaxLength) {
if(documentation.length > DOCUMENTATION_MAX_LENGTH) {
documentation = postProcessLargeDoc(documentation, apiEndpoint || '');
}

Expand Down
2 changes: 0 additions & 2 deletions packages/core/utils/telemetry.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import { PostHog } from 'posthog-node';
import { config } from '../default.js';

// PostHog Telemetry

// we use a privacy-preserving session id to track queries
export const sessionId = crypto.randomUUID();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ export function ConfigCreateStepper({ open, onOpenChange, configId: initialConfi
try {
const urlObj = new URL(url.startsWith('http') ? url : `https://${url}`)
const cleanedHost = cleanApiDomain(`${urlObj.protocol}//${urlObj.host}`)
const path = urlObj.pathname === '/' ? '' : urlObj.pathname
// Include query params in the path, good context for LLM
const path = urlObj.pathname === '/' ? '' : `${urlObj.pathname}${urlObj.search}`
return {
urlHost: cleanedHost,
urlPath: path
Expand Down Expand Up @@ -146,7 +147,6 @@ export function ConfigCreateStepper({ open, onOpenChange, configId: initialConfi
})

// Call autofill endpoint

const response = await superglueClient.call({
endpoint: {
urlHost: url.urlHost,
Expand Down

0 comments on commit ab50be1

Please sign in to comment.