-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathapp.py
29 lines (24 loc) · 1.14 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
from PIL import Image
import requests
import torch
class InferlessPythonModel:
def initialize(self):
model_id = "google/paligemma-3b-mix-224"
device = "cuda:0"
dtype = torch.bfloat16
self.model = PaliGemmaForConditionalGeneration.from_pretrained(model_id,torch_dtype=dtype,device_map=device,revision="bfloat16").eval()
self.processor = AutoProcessor.from_pretrained(model_id)
def infer(self,inputs):
prompt = inputs["prompt"]
image_url = inputs["image_url"]
image = Image.open(requests.get(image_url, stream=True).raw)
model_inputs = self.processor(text=prompt, images=image, return_tensors="pt").to("cuda")
input_len = model_inputs["input_ids"].shape[-1]
with torch.inference_mode():
generation = self.model.generate(**model_inputs, max_new_tokens=100, do_sample=False)
generation = generation[0][input_len:]
decoded = self.processor.decode(generation, skip_special_tokens=True)
return {'response': decoded}
def finalize(self):
pass