-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgpt_exp.py
154 lines (137 loc) · 6.78 KB
/
gpt_exp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import base64
import requests
from config import OPENAI_API_KEY
from utils import convert_pil_to_base64
# Function to encode the image
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
def gpt_explain(task, original_image, explanation_image, category, ground_truth=None):
# Getting the base64 string
base64_image = convert_pil_to_base64(original_image)
base64_image_xai = convert_pil_to_base64(explanation_image)
prompt = (
f"The first image is the original input image. The second image is the heatmap of category {category} in the original image." +
f"First, you look at the original image to identify which parts belong to category {category} and which parts do not." +
f"Then, you look at the second image to see the overall heatmap of category {category}." +
f"Can you describe the concentrated regions on the image for the task {task} of category {category}?" +
f"Your final answer should be correct, intuitive, compact, and simple for end-users to understand the meaning of heatmap." +
f"Your final answer should describe the regions where the model is focusing on to predict the {category}." +
f"Your final answer should be separated by bullet points." +
f"First, shortly describe the heatmap in one sentence." +
f"Secondly, describe the most concentrated region of {category} in the heatmap." +
f"Thirdly, describe the least concentrated region of {category} in the heatmap." +
f"Fourthly, assess the localization quality if the concentrated region aligns with the {category} in the original image."
)
if ground_truth is not None:
prompt += (
f"The fourth bullet point should show is the model's category {category} is correct with the ground-truth {ground_truth} or not? Must be accurate. For example, category 'purse' is different from ground truth 'shopping bag'" +
f"Is the most concentrated region of predict {category} in the heatmap supportive for model to correct predict the ground truth {ground_truth}?"
)
print(prompt)
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {OPENAI_API_KEY}"
}
payload = {
"model": "gpt-4o-mini",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image_xai}"
}
},
]
}
],
"max_tokens": 10000
}
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
print(response.json())
final_response = response.json()['choices'][0]['message']['content']
return final_response
def seg_gpt_explain(original_image, explanation_image, segmentation_image, ground_truth, category):
# Getting the base64 string
base64_image = convert_pil_to_base64(original_image)
base64_image_xai = convert_pil_to_base64(explanation_image)
base64_image_seg = convert_pil_to_base64(segmentation_image)
base64_image_gt = convert_pil_to_base64(ground_truth)
prompt = (
f"You are a Explainable AI expert for semantic segmentation models." +
f"The first image is the original image. " +
f"The second image is the explanation map of category {category} in the original image." +
f"The third image is the prediction of an AI model for category {category} in the original image." +
f"The fourth image is the ground truth of category {category} in the original image."
f"First, capture the image context in the original image." +
f"Secondly, identify which parts belong to category {category} in the ground truth." +
f"Then, you look at the explanation map to see the saliency map for the segmentation mask of category {category}." +
f"Your task is to check the focused region in the explanation map supports the prediction for category {category}" +
f"Think step by step to understand explanation map, prediction aligned with the ground truth." +
f"Your final answer must be concise, simple and separated by bullet points." +
f"First, shortly describe the explanation map." +
f"Secondly, describe the most focused region of {category} in the explanation." +
f"Thirdly, describe the least focused region of {category} in the explanation." +
f"Fourthly, assess the localization quality if the focused region aligns with the prediction and ground truth for {category}."
)
print(prompt)
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {OPENAI_API_KEY}"
}
payload = {
"model": "gpt-4o-mini",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image_xai}"
}
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image_seg}"
}
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image_gt}"
}
},
]
}
],
"max_tokens": 10000
}
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
print(response.json())
final_response = response.json()['choices'][0]['message']['content']
return final_response