pose_models.py

import os, sys, time
cwdir = os.path.dirname(os.path.realpath(__file__))
from PoseNet.models.mobilenet_v1 import MobileNetV1, MOBILENET_V1_CHECKPOINTS
import PoseNet
import numpy as np
import torch, cv2
from pyfiglet import Figlet
from utils import create_neck,create_hand
import json
pyf = Figlet(font = 'slant')

# import your PoseNet Code here
sys.path.insert(1, os.path.join(cwdir, "/PoseNet"))
# import ...
print(f'Successfully imported PoseNet modules from {os.path.join(cwdir,"../HRNet")}')

def LoadModel(weight_dir = f"{cwdir}/model_", model_id = 101, output_stride = 16, pose_model_name = "PoseNet", useGPU = False, verbose = False):
    '''
    returns a PyTorch model object
    Args:
        weight_dir: default to "./model_"
        model_id: default to 101
        output_stride: default to 16, must be 8, 16, 32
        pose_model_name: default to "PoseNet"
        useGPU: if true model will use GPU when predicting
    '''
    s_time = time.time()
    model = None

    if pose_model_name == "PoseNet":
        strDevice = 'cpu'
 
        # Put your code to load PoseNet here
        
        weight_path = os.path.join(weight_dir, MOBILENET_V1_CHECKPOINTS[model_id] + ".pth")
        assert os.path.exists(weight_path), f"The model is not found in {weight_path} "
        print(f'Weight: {weight_path}\n')
        model = MobileNetV1(model_id, output_stride)
        state_dict = torch.load(weight_path)
        # print(state_dict)
        model.load_state_dict(state_dict)

        if useGPU:
            strDevice = 'cuda'
            model = model.cuda()
        model.name = 'PoseNet'
        
    else:
        print(f'{pyf.renderText("FATAL")}\n{pose_model_name} not supported.')

    r_time = "{:.2f}".format(time.time() - s_time)
    if verbose:
        print(f'{pyf.renderText(pose_model_name)}\nLoaded in {r_time} second(s).')

    return model

def PredictPose(model, capture = None, img_path = None, output_stride = 16,scale_factor=1.0, useGPU = False, debug_mode = False):
    '''
    Pose Model Predictor wrapper will return pose data
    Args:
        model: model object generated by LoadModel()
        img_path: path of the image
        useGPU: use GPU when predicting
        debug_mode: print extra information when predicting
    '''
    if img_path!=None:
        image_name = os.path.basename(img_path).split(".")[0] #image name for saving
        image_name = '0' * (6 - len(image_name)) + str(image_name) #format e.g. 00001
    else:
        image_name = None
    
    pose_data = None
    start = time.time()
    if model.name == "PoseNet":
        # Do the magic here
        if img_path!=None:
            input_image, cv2_img, output_scale = PoseNet.read_imgfile(
                img_path, scale_factor=scale_factor, output_stride=output_stride)

        if capture!=None:
            input_image, cv2_img, output_scale = PoseNet.read_cap(cap=capture, scale_factor=scale_factor, output_stride=16)
        
        
        with torch.no_grad():
            if useGPU:
                input_image = torch.Tensor(input_image).cuda()
            else:
                input_image = torch.Tensor(input_image)
            heatmap, offset, displacement_fwd, displacement_bwd = model(input_image) 
            pose_scores, keypoint_scores, keypoint_coords = PoseNet.decode_multiple_poses(
            heatmap.squeeze(0),
            offset.squeeze(0),
            displacement_fwd.squeeze(0),
            displacement_bwd.squeeze(0),
            output_stride=output_stride,
            max_pose_detections=10,
            min_pose_score=0.25)
        
            keypoint_coords *= output_scale

            pose_data = {'poses':[],'compute_time':str(time.time()-start)[:5], 'metadata':{}}
            # print(f"Compute time: {pose_data['compute_time']}")
            pose_data_set_list = list(map(lambda set_:list(zip(set_[0],set_[1])),zip(keypoint_coords,keypoint_scores)))
            pose_key_list = ["nose","l_eye","r_eye","l_ear","r_ear","l_shoulder"
                        ,"r_shoulder","l_elbow","r_elbow","l_wrist","r_wrist"
                        ,"l_hip","r_hip","l_knee","r_knee","l_ankle","r_ankle"]
           
            for i in range(len(pose_data_set_list)):
                if sum(keypoint_scores[i])==0:
                    break
                pose_value_list=list(map(lambda pose_set:{'x':pose_set[0][1],'y':pose_set[0][0],'conf':pose_set[1]},pose_data_set_list[i]))
                pose_data['poses'].append(dict(zip(pose_key_list,pose_value_list)))
            
            for pose in pose_data['poses']:
                # neck
                neck_conf = (pose['l_shoulder']['conf']+pose['r_shoulder']['conf'])/2
                neck = create_neck(pose['l_shoulder']['x'],pose['r_shoulder']['x'],pose['l_shoulder']['y'],pose['r_shoulder']['y'])
                pose['neck']={'x':neck[0],'y':neck[1],'conf':neck_conf}

                if useGPU:
                    e_2_w = 1
                    w_2_h = 1.5
                    # hand
                    l_hand_conf = (pose['l_elbow']['conf']+pose['l_wrist']['conf'])/2
                    l_hand = create_hand(pose['l_wrist']['x'],pose['l_wrist']['y'],pose['l_elbow']['x'],pose['l_elbow']['y'],e_2_w,w_2_h)
                    pose['l_hand']={'x':l_hand[0],'y':l_hand[1],'conf':l_hand_conf}

                    r_hand_conf = (pose['r_elbow']['conf']+pose['r_wrist']['conf'])/2
                    r_hand = create_hand(pose['r_wrist']['x'],pose['r_wrist']['y'],pose['r_elbow']['x'],pose['r_elbow']['y'],e_2_w,w_2_h)
                    pose['r_hand']={'x':r_hand[0],'y':r_hand[1],'conf':r_hand_conf}
                
                else:
                    height, width, channel = cv2_img.shape
                    # hand
                    l_hand_conf = pose['l_wrist']['conf']
                    l_hand = (pose['l_wrist']['x'],pose['l_wrist']['y']-height//6)
                    pose['l_hand']={'x':l_hand[0],'y':l_hand[1],'conf':l_hand_conf}

                    r_hand_conf = pose['r_wrist']['conf']
                    r_hand = (pose['r_wrist']['x'],pose['r_wrist']['y']-height//6)
                    pose['r_hand']={'x':r_hand[0],'y':r_hand[1],'conf':r_hand_conf}
            
    else:
        print(f'{pyf.renderText("FATAL")}\n{model.name} is not supported in the PredictPose wrapper.')

    img_h, img_w, img_channels = cv2_img.shape
    pose_data['metadata'] = {
        'width': img_w,
        'height': img_h,
        'pose_model_name': model.name,
        'compute_time': pose_data['compute_time']
    }
        
    return pose_data, image_name, cv2_img
    
def save_to_json(pose_data, image_name, output_json_path = "./_testset/json_"):
    '''
    return None
    Args:
        pose_data: generated by PredictPose()
        output_json_path: path to save data in json format
    '''
    if not os.path.isdir(output_json_path):
        os.makedirs(output_json_path)
    with open(os.path.join(output_json_path, image_name + '.json'), 'w') as j:
        json.dump(pose_data, j)
    
if __name__ == '__main__':
    model = LoadModel(verbose= True)
    # ls_img = sorted([img.path for img in os.scandir('./_testset')if img.path.endswith('.jpg')]) #all images
    # for img_path in ls_img:
    img_path = '/Users/15077693d/Desktop/miro/miro_intern_week1/_testset/test.jpg'
    print('\n', img_path,'\n')
    pose_data, image_name, cv2_img = PredictPose(model, img_path=img_path,scale_factor=1)
    save_to_json(pose_data, image_name)
################################################################################
#     ____                  _   __     __
#    / __ \____  ________  / | / /__  / /_
#   / /_/ / __ \/ ___/ _ \/  |/ / _ \/ __/
#  / ____/ /_/ (__  )  __/ /|  /  __/ /_
# /_/    \____/____/\___/_/ |_/\___/\__/
#
################################################################################

# You might or might not need to add a wrapper to the PoseNet model here