diff --git a/lkcomputervision/__init__.py b/lkcomputervision/__init__.py index 99611c2..dc292bb 100644 --- a/lkcomputervision/__init__.py +++ b/lkcomputervision/__init__.py @@ -1 +1 @@ -from .mediapipe_handler import MediaPipeHandler \ No newline at end of file +from .mediapipe_handler import faceMesh, detectFace, detectPose, trackHands, detectAll \ No newline at end of file diff --git a/lkcomputervision/__pycache__/__init__.cpython-310.pyc b/lkcomputervision/__pycache__/__init__.cpython-310.pyc index ed21fcb..4491bb4 100644 Binary files a/lkcomputervision/__pycache__/__init__.cpython-310.pyc and b/lkcomputervision/__pycache__/__init__.cpython-310.pyc differ diff --git a/lkcomputervision/__pycache__/mediapipe_handler.cpython-310.pyc b/lkcomputervision/__pycache__/mediapipe_handler.cpython-310.pyc index 34e9f1c..83dc58f 100644 Binary files a/lkcomputervision/__pycache__/mediapipe_handler.cpython-310.pyc and b/lkcomputervision/__pycache__/mediapipe_handler.cpython-310.pyc differ diff --git a/lkcomputervision/mediapipe_handler.py b/lkcomputervision/mediapipe_handler.py index 9d979c0..643c370 100644 --- a/lkcomputervision/mediapipe_handler.py +++ b/lkcomputervision/mediapipe_handler.py @@ -1,87 +1,78 @@ import cv2 import mediapipe as mp -class MediaPipeHandler: - def __init__(self): - self.mp_drawing = mp.solutions.drawing_utils - self.mp_hands = mp.solutions.hands - self.mp_pose = mp.solutions.pose - self.mp_face_detection = mp.solutions.face_detection - self.mp_face_mesh = mp.solutions.face_mesh - - self.hands = self.mp_hands.Hands() - self.pose = self.mp_pose.Pose() - self.face_detection = self.mp_face_detection.FaceDetection() - self.face_mesh = self.mp_face_mesh.FaceMesh() - - def trackHands(self, frame, draw=True): - results = self.hands.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) - hand_land_mark = {} - - if results.multi_hand_landmarks: - for idx, landmarks in enumerate(results.multi_hand_landmarks): - landMarks = {} - for point, landmark in enumerate(landmarks.landmark): +mp_drawing = mp.solutions.drawing_utils +def faceMesh(frame, draw=True): + mp_face_mesh = mp.solutions.face_mesh + with mp_face_mesh.FaceMesh() as face_mesh: + results = face_mesh.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) + face_landmarks_dict = {} + if results.multi_face_landmarks: + for i, face_landmarks in enumerate(results.multi_face_landmarks): + face_landmarks_list = [] + for landmark_id, landmark in enumerate(face_landmarks.landmark): x, y, z = landmark.x, landmark.y, landmark.z - landMarks[point] = {"x": x, "y": y, "z": z} - hand_land_mark = landMarks + face_landmarks_list.append({"x": x, "y": y, "z": z}) + face_landmarks_dict[i] = face_landmarks_list if draw: - self.mp_drawing.draw_landmarks(frame, landmarks, self.mp_hands.HAND_CONNECTIONS) - return {"frame": frame, "landmarks": hand_land_mark} - - def detectFace(self, frame, draw=True): - with self.mp_face_detection.FaceDetection(min_detection_confidence=0.5) as face_detection: - results = face_detection.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) - faceLms = {} + mp_drawing.draw_landmarks(frame, face_landmarks, mp_face_mesh.FACEMESH_TESSELATION) + return {"frame": frame, "landmarks": face_landmarks_dict} + - if results.detections: - for i, detection in enumerate(results.detections): - bboxC = detection.location_data.relative_bounding_box - ih, iw, _ = frame.shape - x, y, w, h = bboxC.xmin, bboxC.ymin, bboxC.width, bboxC.height - faceLms[i] = {"x": x, "y": y, "width": w, "height": h} - if draw: - self.mp_drawing.draw_detection(frame, detection) - return {"frame": frame, "landmarks": faceLms} +def trackHands(frame, draw=True): + mp_hands = mp.solutions.hands + hands = mp_hands.Hands() + results = hands.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) + hand_land_mark = {} + if results.multi_hand_landmarks: + for idx, landmarks in enumerate(results.multi_hand_landmarks): + landMarks = {} + for point, landmark in enumerate(landmarks.landmark): + x, y, z = landmark.x, landmark.y, landmark.z + landMarks[point] = {"x": x, "y": y, "z": z} + hand_land_mark = landMarks + if draw: + mp_drawing.draw_landmarks(frame, landmarks,mp_hands.HAND_CONNECTIONS) + return {"frame": frame, "landmarks": hand_land_mark} - def faceMesh(self, frame, draw=True): - with self.mp_face_mesh.FaceMesh() as face_mesh: - results = face_mesh.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) - face_landmarks_dict = {} - - if results.multi_face_landmarks: - for i, face_landmarks in enumerate(results.multi_face_landmarks): - face_landmarks_list = [] - for landmark_id, landmark in enumerate(face_landmarks.landmark): - x, y, z = landmark.x, landmark.y, landmark.z - face_landmarks_list.append({"x": x, "y": y, "z": z}) - face_landmarks_dict[i] = face_landmarks_list - if draw: - self.mp_drawing.draw_landmarks(frame, face_landmarks, self.mp_face_mesh.FACEMESH_TESSELATION) - return {"frame": frame, "landmarks": face_landmarks_dict} - - def detectPose(self, frame, draw=True): - with self.mp_pose.Pose() as pose: - results = pose.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) - pose_landmarks_dict = {} - - if results.pose_landmarks: - for landmark_id, landmark in enumerate(results.pose_landmarks.landmark): - x, y, z = landmark.x, landmark.y, landmark.z - pose_landmarks_dict[landmark_id] = {"x": x, "y": y, "z": z} +def detectFace(frame, draw=True): + mp_face_detection = mp.solutions.face_detection + with mp_face_detection.FaceDetection(min_detection_confidence=0.5) as face_detection: + results = face_detection.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) + faceLms = {} + if results.detections: + for i, detection in enumerate(results.detections): + bboxC = detection.location_data.relative_bounding_box + ih, iw, _ = frame.shape + x, y, w, h = bboxC.xmin, bboxC.ymin, bboxC.width, bboxC.height + faceLms[i] = {"x": x, "y": y, "width": w, "height": h} if draw: - self.mp_drawing.draw_landmarks(frame, results.pose_landmarks, self.mp_pose.POSE_CONNECTIONS) - return {"frame": frame, "landmarks": pose_landmarks_dict} + mp_drawing.draw_detection(frame, detection) + return {"frame": frame, "landmarks": faceLms} + +def detectPose(frame, draw=True): + mp_pose = mp.solutions.pose + with mp_pose.Pose() as pose: + results = pose.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) + pose_landmarks_dict = {} + if results.pose_landmarks: + for landmark_id, landmark in enumerate(results.pose_landmarks.landmark): + x, y, z = landmark.x, landmark.y, landmark.z + pose_landmarks_dict[landmark_id] = {"x": x, "y": y, "z": z} + if draw: + mp_drawing.draw_landmarks(frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS) + return {"frame": frame, "landmarks": pose_landmarks_dict} + - def detectAll(self, frame): - handTracker = self.trackHands(frame) - faceDetector = self.detectFace(frame) - facemesh = self.faceMesh(frame) - bodypose = self.detectPose(frame) - landMarks = { - "handTracking": handTracker["landmarks"], - "detectFace": faceDetector["landmarks"], - "faceMesh": facemesh["landmarks"], - "detectPose": bodypose["landmarks"] - } - return landMarks +def detectAll(frame): + handTracker = trackHands(frame) + faceDetector = detectFace(frame) + facemesh = faceMesh(frame) + bodypose = detectPose(frame) + landMarks = { + "handTracking": handTracker, + "detectFace": faceDetector, + "faceMesh": facemesh, + "detectPose": bodypose + } + return landMarks \ No newline at end of file diff --git a/main.py b/main.py index c720eb4..3196ea1 100644 --- a/main.py +++ b/main.py @@ -1,8 +1,5 @@ import cv2 -from lkcomputervision import MediaPipeHandler - -# Initialize the MediaPipeHandler -mp = MediaPipeHandler() +from lkcomputervision.mediapipe_handler import faceMesh, trackHands, detectFace, detectPose, detectAll # Capture video from the webcam (you can also specify a video file path) cap = cv2.VideoCapture(0) # 0 represents the default webcam @@ -15,17 +12,24 @@ break # Process the frame to track hands - #result = mp.trackHands(frame) - #result = mp.detectFace(frame) - #result = mp.detectPose(frame) - result = mp.faceMesh(frame) - - # Retrieve the frame with hand landmarks drawn on it - frame_with_landmarks = result["frame"] - print(result["landmarks"]) - + # result = trackHands(frame) + # result = detectFace(frame) + # result = detectPose(frame) + # result = faceMesh(frame) + result = detectAll(frame) + + # Retrieve the frame with hand landmarks drawn on it for individual functions + # frame_with_landmarks = result["frame"] + # print(frame_with_landmarks) + + # for detectAll + for i in result.keys(): + print(f'{i}', end="") + print(result[i]["landmarks"]) + cv2.imshow("Hand Tracking", result[i]["frame"]) + # Display the frame with landmarks - cv2.imshow("Hand Tracking", frame_with_landmarks) + # cv2.imshow("Hand Tracking", frame_with_landmarks) # Exit the loop when the user presses the 'q' key if cv2.waitKey(1) & 0xFF == ord('q'):