Merge pull request #269 from mintar/fix-metrics-and-inference

Improve standalone inference script and new metrics scripts
NVlabs · Nov 4, 2022 · 6c80d16 · 6c80d16
2 parents 534c437 + d22c16d
commit 6c80d16
Show file tree

Hide file tree

Showing 6 changed files with 89 additions and 60 deletions.
diff --git a/scripts/metrics/.gitignore b/scripts/metrics/.gitignore
@@ -1 +1,3 @@
+content/
+data/
 results/
diff --git a/scripts/metrics/add_compute.py b/scripts/metrics/add_compute.py
@@ -491,7 +491,7 @@ def get_models(path,suffix=""):
                 best_dist = dist
                 best_index = i_gt
 
-        if best_index is not -1:
+        if best_index != -1:
             if not name_guess in adds_objects.keys():
                  adds_objects[name_guess] = []
             adds_all.append(best_dist)

diff --git a/scripts/metrics/readme.md b/scripts/metrics/readme.md
@@ -40,9 +40,9 @@ If the script takes to long to run, please run with `--cuboid`, instead of using
 We added a script for you to add render of the 3d model to your predictions. 
 ```
 # for scenes with DOPE inference
-python render_json.py --path_json data/table_dope_results/scene1/00300.json --opencv --contour --gray
+python render_json.py --path_json data/table_dope_results/scene1/00300.json --scale 0.01 --opencv --contour --gray
 # for scenes generated by nvisii 
-python render_json.py --path_json data/table_ground_truth/scene1/00100.json --contour --gray
+python render_json.py --path_json data/table_ground_truth/scene1/00100.json --scale 0.01 --contour --gray
 ```
 
 `--gray` render the 3d model as a gray image and `--contour` adds the 3d model contour in green. 
@@ -64,4 +64,4 @@ We assume that you have the intrinsics stored in the camera data. If you do not
 # TODO 
 - Make a `requirement.txt` file. 
 - Possibly subsamble vertices so computation is faster
-<!-- - make a script to visualize the json files from DOPE -->
+<!-- - make a script to visualize the json files from DOPE -->
diff --git a/scripts/metrics/render_json.py b/scripts/metrics/render_json.py
@@ -75,6 +75,14 @@
     help = "object to load folder, should follow YCB structure"
 )
 
+parser.add_argument(
+    '--scale',
+    default=1,
+    type=float,
+    help='Specify the scale of the target object(s). If the obj mesh is in '
+         'meters -> scale=1; if it is in cm -> scale=0.01.'
+)
+
 parser.add_argument(
     '--out',
     default='overlay.png',
@@ -162,7 +170,15 @@ def create_obj(
 visii.set_camera_entity(camera)
 
 visii.set_dome_light_intensity(1)
-visii.set_dome_light_color(visii.vec3(1,1,1),0)
+
+try:
+    visii.set_dome_light_color(visii.vec3(1, 1, 1), 0)
+except TypeError:
+    # Support for alpha transparent backgrounds was added in nvisii ef1880aa,
+    # but as of 2022-11-03, the latest released version (1.1) does not include
+    # that change yet.
+    print("WARNING! Your version of NVISII does not support alpha transparent backgrounds yet; --contour will not work properly.")
+    visii.set_dome_light_color(visii.vec3(1, 1, 1))
 
 # # # # # # # # # # # # # # # # # # # # # # # # #
 
@@ -230,16 +246,14 @@ def create_obj(
 
     camera.get_camera().set_projection(proj_matrix)
 else:
-    # im_height = 512
-    # im_width = 512 
+    im_height = 512
+    im_width = 512
     intrinsics = {  "cx": 964.957,
                     "cy": 522.586,
                     "fx": 1390.53,
                     "fy": 1386.99,
                 }
 
-    im_height = data_json['camera_data']['height']
-    im_width = data_json['camera_data']['width']
     cam = pyrender.IntrinsicsCamera(intrinsics['fx'],intrinsics['fy'],intrinsics['cx'],intrinsics['cy'])
 
     proj_matrix = cam.get_projection_matrix(im_width, im_height)
@@ -314,7 +328,7 @@ def create_obj(
             name = obj['class'] + "_" + str(i_obj),
             path_obj = opt.objs_folder + "/"+name + "/google_16k/textured.obj",
             path_tex = opt.objs_folder + "/"+name + "/google_16k/texture_map_flat.png",
-            scale = 0.01, 
+            scale = opt.scale,
             rot_base = None
         )        
 
@@ -440,4 +454,4 @@ def create_obj(
     cv2.imwrite(opt.out,outrgb)
 
 # let's clean up the GPU
-visii.deinitialize()
+visii.deinitialize()
diff --git a/scripts/train2/inference.py b/scripts/train2/inference.py
@@ -1,12 +1,12 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Copyright (c) 2018 NVIDIA Corporation. All rights reserved.
 # This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.
 # https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode
 
 """
-This file starts a ROS node to run DOPE, 
-listening to an image topic and publishing poses.
+This file runs DOPE without ROS, either on an image folder or from a Realsense
+camera image stream.
 """
 
 from __future__ import print_function
@@ -23,6 +23,7 @@
 from detector import ModelData, ObjectDetector
 
 import simplejson as json
+import copy
 
 class Draw(object):
     """Drawing helper class to visualize the neural network output"""
@@ -171,6 +172,8 @@ def image_callback(self,
             dist_coeffs = np.matrix(camera_info.D, dtype='float64')
             dist_coeffs.resize((len(camera_info.D), 1))
 
+        camera_matrix_for_json = copy.deepcopy(camera_matrix)
+
         # Downscale image if necessary
         height, width, _ = img.shape
         scaling_factor = float(self.downscale_height) / height
@@ -187,17 +190,28 @@ def image_callback(self,
         im = Image.fromarray(img_copy)
         draw = Draw(im)
 
-
         # dictionary for the final output
-        dict_out = {"camera_data":{},"objects":[]}
-
+        dict_out = {
+            "camera_data": {
+                "intrinsics": {
+                    "cx": camera_matrix_for_json[0, 2],
+                    "cy": camera_matrix_for_json[1, 2],
+                    "fx": camera_matrix_for_json[0, 0],
+                    "fy": camera_matrix_for_json[1, 1],
+                },
+                "width": width,
+                "height": height,
+            },
+            "objects": [],
+        }
         for m in self.models:
             # Detect object
             results, beliefs = ObjectDetector.detect_object_in_image(
                 self.models[m].net,
                 self.pnp_solvers[m],
                 img,
-                self.config_detect
+                self.config_detect,
+                make_belief_debug_img=True
             )
             # print(results)
             # print('---')
@@ -210,6 +224,9 @@ def image_callback(self,
                 loc = result["location"]
                 ori = result["quaternion"]
 
+                CONVERT_SCALE_CM_TO_METERS = 100
+                loc = [l / CONVERT_SCALE_CM_TO_METERS for l in loc]
+
                 print(loc)
 
                 dict_out['objects'].append({
@@ -237,11 +254,13 @@ def image_callback(self,
                         points2d.append(tuple(pair))
                     draw.draw_cube(points2d, self.draw_colors[m])
         # save the output of the image. 
-        im.save(f"{output_folder}/{img_name}.png")
+        im.save(f"{output_folder}/{img_name}")
+        if beliefs is not None:
+            beliefs.save(f"{output_folder}/{img_name[:img_name.rfind('.')]}_belief.png")
 
         # save the json files 
         with open(f"{output_folder}/{img_name.replace('png','json')}", 'w') as fp:
-            json.dump(dict_out, fp)
+            json.dump(dict_out, fp, indent=4)
 
 
 
@@ -310,10 +329,6 @@ def rotate_vector(vector, quaternion):
 
     # create the output folder
     print (f"output is located in {opt.outf}")
-    try:
-        shutil.rmtree(f"{opt.outf}")
-    except:
-        pass
 
     try:
         os.makedirs(f"{opt.outf}")
@@ -357,8 +372,8 @@ def rotate_vector(vector, quaternion):
             img_name = i_image
         else:
             if i_image >= len(imgs):
-                i_image =0
-                
+                break
+
             frame = cv2.imread(imgs[i_image])
             print(f"frame {imgsname[i_image]}")
             img_name = imgsname[i_image]

diff --git a/scripts/train2/inference/detector.py b/scripts/train2/inference/detector.py
@@ -401,59 +401,57 @@ def get_image_grid(tensor, filename, nrow=3, padding=2,mean=None, std=None):
         return im
 
     @staticmethod
-    def detect_object_in_image(net_model, pnp_solver, in_img, config, 
-            grid_belief_debug = False, norm_belief=True):
-        ''' Detect objects in a image using a specific trained network model
-            Returns the poses of the objects and the belief maps
-            '''
-
+    def detect_object_in_image(net_model, pnp_solver, in_img, config,
+                               make_belief_debug_img=False, norm_belief=True, overlay_image=True):
+        """
+        Detect objects in a image using a specific trained network model
+        Returns the poses of the objects and the belief maps
+        """
         if in_img is None:
             return []
 
-        # print("detect_object_in_image - image shape: {}".format(in_img.shape))
-
         # Run network inference
         image_tensor = transform(in_img)
         image_torch = Variable(image_tensor).cuda().unsqueeze(0)
-        out, seg = net_model(image_torch)  # run inference using the network (calls 'forward' method)
+        out, seg = net_model(image_torch)
         vertex2 = out[-1][0]
         aff = seg[-1][0]
 
         # Find objects from network output
         detected_objects = ObjectDetector.find_object_poses(vertex2, aff, pnp_solver, config)
 
-        if not grid_belief_debug: 
-
+        if not make_belief_debug_img:
             return detected_objects, None
         else:
-            # Run the belief maps debug display on the beliefmaps
-
-            upsampling = nn.UpsamplingNearest2d(scale_factor=8)
+            # Run the belief maps debug display on the belief maps
             tensor = vertex2
             belief_imgs = []
-            in_img = (torch.tensor(in_img).float()/255.0)
-            in_img *= 0.7            
+            if overlay_image:
+                upsampling = nn.UpsamplingNearest2d(size=in_img.shape[:2])
+                in_img = (torch.tensor(in_img).float() / 255.0)
+                in_img *= 0.5
 
             for j in range(tensor.size()[0]):
                 belief = tensor[j].clone()
                 if norm_belief:
-                    belief -= float(torch.min(belief)[0].data.cpu().numpy())
-                    belief /= float(torch.max(belief)[0].data.cpu().numpy())
-
-                # print (image_torch.size())
-                # raise()    
-                # belief *= 0.5
-                # print(in_img.size())
-                belief = upsampling(belief.unsqueeze(0).unsqueeze(0)).squeeze().squeeze().data 
-                belief = torch.clamp(belief,0,1).cpu()  
-                belief = torch.cat([
-                            belief.unsqueeze(0) + in_img[:,:,0],
-                            belief.unsqueeze(0) + in_img[:,:,1],
-                            belief.unsqueeze(0) + in_img[:,:,2]
-                            ]).unsqueeze(0)
-                belief = torch.clamp(belief,0,1) 
-
-                # belief_imgs.append(belief.data.squeeze().cpu().numpy().transpose(1,2,0))
+                    belief -= float(torch.min(belief).item())
+                    belief /= float(torch.max(belief).item())
+
+                belief = torch.clamp(belief, 0, 1).cpu()
+                if overlay_image:
+                    belief = upsampling(belief.unsqueeze(0).unsqueeze(0)).squeeze().squeeze().data
+                    belief = torch.cat([
+                        belief.unsqueeze(0) + in_img[:, :, 0],
+                        belief.unsqueeze(0) + in_img[:, :, 1],
+                        belief.unsqueeze(0) + in_img[:, :, 2]
+                    ]).unsqueeze(0)
+                    belief = torch.clamp(belief, 0, 1)
+                else:
+                    belief = torch.cat([
+                        belief.unsqueeze(0),
+                        belief.unsqueeze(0),
+                        belief.unsqueeze(0)
+                    ]).unsqueeze(0)
                 belief_imgs.append(belief.data.squeeze().numpy())
 
             # Create the image grid
@@ -748,7 +746,7 @@ def find_objects(vertex2, aff, config, numvertex=8, run_sampling=False, num_samp
                             best_angle = dist_angle
                             best_dist = dist_point
 
-                    if i_best is -1:
+                    if i_best == -1:
                         continue
 
                     if objects[i_best][1][i_lists] is None \