diff --git a/src/models/image/vision.ts b/src/models/image/vision.ts index e7514f2..9570e65 100644 --- a/src/models/image/vision.ts +++ b/src/models/image/vision.ts @@ -12,8 +12,8 @@ export default { model: { type: "array", required: true, - options: ["blip2", "ocr"], - default: ["blip2"], + options: ["blip2", "ocr", "gemini"], + default: ["gemini"], description: "Model to use for the analysis", }, image: { @@ -21,6 +21,13 @@ export default { required: true, description: "Image URL for the model to process", }, + typeImage: { + type: "string", + required: false, + options: ["anything", "person"], + default: "anything", + description: "Type of image to process", + } }, response: { cost: { @@ -50,7 +57,7 @@ export default { lines: null, record: null, }; - let { model, image } = data; + let { model, image, typeImage } = data; let cost = 0; let event = new EventEmitter(); event.emit("data", result); @@ -129,6 +136,13 @@ export default { event.emit("data", result); }); } + if (model.includes("gemini")) { + let prompt = `Don't forget these rules:\n\n1. **Be Direct and Concise**: Provide straightforward descriptions without adding interpretative or speculative elements.\n2. **Use Segmented Details**: Break down details about different elements of an image into distinct sentences, focusing on one aspect at a time.\n3. **Maintain a Descriptive Focus**: Prioritize purely visible elements of the image, avoiding conclusions or inferences.\n4. **Follow a Logical Structure**: Begin with the central figure or subject and expand outward, detailing its appearance before addressing the surrounding setting.\n5. **Avoid Juxtaposition**: Do not use comparison or contrast language; keep the description purely factual.\n6. **Incorporate Specificity**: Mention age, gender, race, and specific brands or notable features when present, and clearly identify the medium if it's discernible.\n\nWhen writing descriptions, prioritize clarity and direct observation over embellishment or interpretation.\n\n Write a detailed description of this image, do not forget about the texts on it if they exist. Also, do not forget to mention the type / style of the image. No bullet points.` + if (typeImage == "person") { + prompt = "Describe with high details which emotions the person in the picture seems to be experiencing from what the micro expressions in the face and the body language seem to indicate. Write only details that can be clearly observed and draw conclusions from this that have visible evidence in the picture. Also consider if the person could be trying to convey other emotions to his/her social environment than he/she is actually feeling by looking for close how genuine the displayed emotions seems to be. Provide a detailed reply that reflects high emotional intelligence, empathy and accuracy. " + } + + } result.record = { description: result.description, text: result.text, diff --git a/src/models/text/google.ts b/src/models/text/google.ts index be44236..1c30768 100644 --- a/src/models/text/google.ts +++ b/src/models/text/google.ts @@ -155,7 +155,6 @@ export default { }; let promptLength = 0; await delay(500); - console.log(request); generativeModel .generateContentStream(request) .then(async (streamingResp) => { diff --git a/src/utils/datasets.ts b/src/utils/datasets.ts index ab49b41..98924d0 100644 --- a/src/utils/datasets.ts +++ b/src/utils/datasets.ts @@ -7,7 +7,7 @@ export async function dataset( record: any, id?: string ) { - if (type == "image" && ai != "vision") { + if (type == "image" || ai != "vision") { // means record is an array of images record.forEach(async (r: any) => { await datasetSave(type, ai, r, r.id);