Skip to content

Commit

Permalink
[js/googleai]: feat: Adds support for Gemini Files API URLs. Fixes #737
Browse files Browse the repository at this point in the history
… (#742)
  • Loading branch information
mbleigh authored Aug 7, 2024
1 parent 3953235 commit 19a6223
Show file tree
Hide file tree
Showing 7 changed files with 117 additions and 8 deletions.
25 changes: 25 additions & 0 deletions docs/plugins/google-genai.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,28 @@ const embedding = await embed({
content: input,
});
```

## Gemini Files API

You can use files uploaded to the Gemini Files API with Genkit:

```js
import { GoogleAIFileManager } from '@google/generative-ai/server';

const fileManager = new GoogleAIFileManager(process.env.GOOGLE_GENAI_API_KEY);
const uploadResult = await fileManager.uploadFile(
'path/to/file.jpg',
{
mimeType: 'image/jpeg',
displayName: 'Your Image',
}
);

const response = await generate({
model: gemini15Flash,
prompt: [
{text: "Describe this image:},
{media: {contentType: uploadResult.file.mimeType, url: uploadResult.file.uri}}
]
});
```
18 changes: 15 additions & 3 deletions js/ai/src/model/middleware.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,21 @@
*/

import { Document } from '../document.js';
import { MessageData, ModelInfo, ModelMiddleware, Part } from '../model.js';
import {
MediaPart,
MessageData,
ModelInfo,
ModelMiddleware,
Part,
} from '../model.js';

/**
* Preprocess a GenerateRequest to download referenced http(s) media URLs and
* inline them as data URIs.
*/
export function downloadRequestMedia(options?: {
maxBytes?: number;
filter?: (part: MediaPart) => boolean;
}): ModelMiddleware {
return async (req, next) => {
const { default: fetch } = await import('node-fetch');
Expand All @@ -33,8 +40,13 @@ export function downloadRequestMedia(options?: {
req.messages.map(async (message) => {
const content: Part[] = await Promise.all(
message.content.map(async (part) => {
// skip non-media parts and non-http urls
if (!part.media || !part.media.url.startsWith('http')) {
// skip non-media parts and non-http urls, or parts that have been
// filtered out by user config
if (
!part.media ||
!part.media.url.startsWith('http') ||
(options?.filter && !options?.filter(part))
) {
return part;
}

Expand Down
2 changes: 1 addition & 1 deletion js/plugins/googleai/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
"author": "genkit",
"license": "Apache-2.0",
"dependencies": {
"@google/generative-ai": "^0.15.0",
"@google/generative-ai": "^0.16.0",
"google-auth-library": "^9.6.3",
"node-fetch": "^3.3.2",
"zod": "^3.22.4"
Expand Down
27 changes: 25 additions & 2 deletions js/plugins/googleai/src/gemini.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import {
} from '@genkit-ai/ai/model/middleware';
import { GENKIT_CLIENT_HEADER } from '@genkit-ai/core';
import {
FileDataPart,
FunctionCallPart,
FunctionDeclaration,
FunctionDeclarationSchemaType,
Expand Down Expand Up @@ -247,6 +248,16 @@ function toInlineData(part: MediaPart): InlineDataPart {
return { inlineData: { mimeType: contentType, data: b64Data } };
}

function toFileData(part: MediaPart): FileDataPart {
if (!part.media.contentType)
throw new Error(
'Must supply a `contentType` when sending File URIs to Gemini.'
);
return {
fileData: { mimeType: part.media.contentType, fileUri: part.media.url },
};
}

function fromInlineData(inlinePart: InlineDataPart): MediaPart {
// Check if the required properties exist
if (
Expand Down Expand Up @@ -361,7 +372,10 @@ function toCustomPart(part: Part): GeminiPart {

function toGeminiPart(part: Part): GeminiPart {
if (part.text !== undefined) return { text: part.text };
if (part.media) return toInlineData(part);
if (part.media) {
if (part.media.url.startsWith('data:')) return toInlineData(part);
return toFileData(part);
}
if (part.toolRequest) return toFunctionCall(part);
if (part.toolResponse) return toFunctionResponse(part);
if (part.custom) return toCustomPart(part);
Expand Down Expand Up @@ -466,7 +480,16 @@ export function googleAIModel(
}
if (model?.info?.supports?.media) {
// the gemini api doesn't support downloading media from http(s)
middleware.push(downloadRequestMedia({ maxBytes: 1024 * 1024 * 10 }));
middleware.push(
downloadRequestMedia({
maxBytes: 1024 * 1024 * 10,
// don't downlaod files that have been uploaded using the Files API
filter: (part) =>
!part.media.url.startsWith(
'https://generativelanguage.googleapis.com/'
),
})
);
}

return defineModel(
Expand Down
15 changes: 13 additions & 2 deletions js/pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions js/testapps/flow-simple-ai/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
"@genkit-ai/google-cloud": "workspace:*",
"@genkit-ai/googleai": "workspace:*",
"@genkit-ai/vertexai": "workspace:*",
"@google/generative-ai": "^0.15.0",
"@opentelemetry/sdk-trace-base": "^1.22.0",
"firebase-admin": "^12.1.0",
"partial-json": "^0.1.7",
Expand Down
37 changes: 37 additions & 0 deletions js/testapps/flow-simple-ai/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -428,3 +428,40 @@ export const invalidOutput = defineFlow(
return result.output() as any;
}
);

import { GoogleAIFileManager } from '@google/generative-ai/server';
const fileManager = new GoogleAIFileManager(
process.env.GOOGLE_GENAI_API_KEY || process.env.GOOGLE_API_KEY!
);
export const fileApi = defineFlow(
{
name: 'fileApi',
inputSchema: z.string(),
outputSchema: z.string(),
},
async () => {
const uploadResult = await fileManager.uploadFile(
'../menu/data/menu.jpeg',
{
mimeType: 'image/jpeg',
displayName: 'Restaurant Menu',
}
);
console.log(uploadResult.file);

const result = await generate({
model: gemini15Flash,
prompt: [
{ text: 'Describe this image:' },
{
media: {
contentType: uploadResult.file.mimeType,
url: uploadResult.file.uri,
},
},
],
});

return result.text();
}
);

0 comments on commit 19a6223

Please sign in to comment.