-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
137 lines (109 loc) · 5.12 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import cvzone
import cv2
import numpy as np
from cvzone.HandTrackingModule import HandDetector
import google.generativeai as genai
from PIL import Image
import streamlit as st
# This function detects hands in an image and returns information about fingers.
def getHandInfo(img):
# "draw" parameter is set to True to visualize hand and finger detection.
hands, img = detector.findHands(img, draw=True, flipType=True)
# Check if any hands are detected
if hands:
hand = hands[0] # Get the first detected hand
lmList = hand["lmList"] # Get the list of 21 landmarks of the first hand
fingers = detector.fingersUp(hand) # Count the number of fingers up for the first hand
print(fingers) # Print which fingers are up
return fingers, lmList
else:
return None
# This function handles drawing operations based on the detected finger positions.
def draw(info, prev_pos, draw_canvas):
fingers, lmList = info # Get the values returned from the getHandInfo function
current_pos = None
# If only the index finger is up
if fingers == [0, 1, 0, 0, 0]:
current_pos = lmList[8][0:2] # Get the x, y coordinates of the index finger tip
if prev_pos is None:
prev_pos = current_pos
# Draw a line on the canvas from the previous position to the current position
cv2.line(draw_canvas, current_pos, prev_pos, (255, 0, 255), 10)
# If all fingers are up
elif fingers == [1, 1, 1, 1, 1]:
draw_canvas = np.zeros_like(img) # Clear the drawing canvas
return current_pos, draw_canvas
# This function sends the drawing on the canvas to the AI model and gets a response.
def sendToAI(model, canvas, fingers):
questionText = "Describe what is drawn in pink on the screen. Solve it if it's a problem." # Define the query for the model
pil_image = Image.fromarray(canvas) # Convert the canvas to a PIL image
# If the finger configuration matches the specified pattern
if fingers == [1, 1, 0, 0, 1]:
# Ask the AI model to generate a response based on the question and image
response = model.generate_content([questionText, pil_image])
return response.text
# Configure the Streamlit UI.
def streamlitConfig():
st.set_page_config(layout="wide")
st.title("Camera")
FRAME_WINDOW = st.image([]) # Placeholder for displaying the image
st.title("Answer")
output_text_area = st.subheader("") # Create a title area for displaying the AI response
return FRAME_WINDOW, output_text_area
# Configure the AI model.
def geminiConfig():
genai.configure(api_key="YOUR_API_KEY_HERE") # Set up the API key for the AI model
model = genai.GenerativeModel('gemini-1.5-flash') # Specify the version of the model to use
return model
# Main program entry point
if __name__ == '__main__':
# Initialize variables
prev_pos = None
draw_canvas = None
canvas_combine = None
output_text = None
model = geminiConfig() # Configure the AI model
FRAME_WINDOW, output_text_area = streamlitConfig() # Configure the Streamlit interface
# Start the webcam and set its resolution
cap = cv2.VideoCapture(0)
if not cap.isOpened():
st.error("Could not open webcam. Please check if it is connected properly.")
st.stop() # Stops the Streamlit script execution
else:
cap.set(3, 1280)
cap.set(4, 720)
# Initialize the HandDetector class with specified parameters
detector = HandDetector(staticMode=False, maxHands=1, modelComplexity=1, detectionCon=0.5, minTrackCon=0.5)
# Continuously capture frames from the webcam
while True:
# Capture each frame from the webcam
success, img = cap.read()
# Check if the frame was captured successfully
if not success or img is None:
st.error("Failed to capture image from webcam.")
break # Exit the loop if there's an issue with the webcam
# Flip the image horizontally (mirror image)
img = cv2.flip(img, 1)
# Initialize the drawing canvas with black if it hasn't been created yet
if draw_canvas is None:
draw_canvas = np.zeros_like(img)
canvas_combine = img.copy()
# Get hand information
info = getHandInfo(img)
if info is not None:
fingers, lmList = info
print(fingers)
prev_pos, draw_canvas = draw(info, prev_pos, draw_canvas) # Call the draw function
output_text = sendToAI(model, canvas_combine, fingers) # Send the drawing to the AI model
# Combine the original image with the drawing canvas and display it
canvas_combine = cv2.addWeighted(img, 0.75, draw_canvas, 0.40, 1)
FRAME_WINDOW.image(image=canvas_combine, channels="BGR")
# If there is an output from the AI model, display it in the UI
if output_text:
output_text_area.text(output_text)
# Keep the window open and update it for each frame; close when 'q' is pressed
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Release resources and close windows
cap.release()
cv2.destroyAllWindows()