Skip to content

Commit

Permalink
Merge pull request #269 from mintar/fix-metrics-and-inference
Browse files Browse the repository at this point in the history
Improve standalone inference script and new metrics scripts
  • Loading branch information
TontonTremblay authored Nov 4, 2022
2 parents 534c437 + d22c16d commit 6c80d16
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 60 deletions.
2 changes: 2 additions & 0 deletions scripts/metrics/.gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
content/
data/
results/
2 changes: 1 addition & 1 deletion scripts/metrics/add_compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,7 @@ def get_models(path,suffix=""):
best_dist = dist
best_index = i_gt

if best_index is not -1:
if best_index != -1:
if not name_guess in adds_objects.keys():
adds_objects[name_guess] = []
adds_all.append(best_dist)
Expand Down
6 changes: 3 additions & 3 deletions scripts/metrics/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ If the script takes to long to run, please run with `--cuboid`, instead of using
We added a script for you to add render of the 3d model to your predictions.
```
# for scenes with DOPE inference
python render_json.py --path_json data/table_dope_results/scene1/00300.json --opencv --contour --gray
python render_json.py --path_json data/table_dope_results/scene1/00300.json --scale 0.01 --opencv --contour --gray
# for scenes generated by nvisii
python render_json.py --path_json data/table_ground_truth/scene1/00100.json --contour --gray
python render_json.py --path_json data/table_ground_truth/scene1/00100.json --scale 0.01 --contour --gray
```

`--gray` render the 3d model as a gray image and `--contour` adds the 3d model contour in green.
Expand All @@ -64,4 +64,4 @@ We assume that you have the intrinsics stored in the camera data. If you do not
# TODO
- Make a `requirement.txt` file.
- Possibly subsamble vertices so computation is faster
<!-- - make a script to visualize the json files from DOPE -->
<!-- - make a script to visualize the json files from DOPE -->
28 changes: 21 additions & 7 deletions scripts/metrics/render_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,14 @@
help = "object to load folder, should follow YCB structure"
)

parser.add_argument(
'--scale',
default=1,
type=float,
help='Specify the scale of the target object(s). If the obj mesh is in '
'meters -> scale=1; if it is in cm -> scale=0.01.'
)

parser.add_argument(
'--out',
default='overlay.png',
Expand Down Expand Up @@ -162,7 +170,15 @@ def create_obj(
visii.set_camera_entity(camera)

visii.set_dome_light_intensity(1)
visii.set_dome_light_color(visii.vec3(1,1,1),0)

try:
visii.set_dome_light_color(visii.vec3(1, 1, 1), 0)
except TypeError:
# Support for alpha transparent backgrounds was added in nvisii ef1880aa,
# but as of 2022-11-03, the latest released version (1.1) does not include
# that change yet.
print("WARNING! Your version of NVISII does not support alpha transparent backgrounds yet; --contour will not work properly.")
visii.set_dome_light_color(visii.vec3(1, 1, 1))

# # # # # # # # # # # # # # # # # # # # # # # # #

Expand Down Expand Up @@ -230,16 +246,14 @@ def create_obj(

camera.get_camera().set_projection(proj_matrix)
else:
# im_height = 512
# im_width = 512
im_height = 512
im_width = 512
intrinsics = { "cx": 964.957,
"cy": 522.586,
"fx": 1390.53,
"fy": 1386.99,
}

im_height = data_json['camera_data']['height']
im_width = data_json['camera_data']['width']
cam = pyrender.IntrinsicsCamera(intrinsics['fx'],intrinsics['fy'],intrinsics['cx'],intrinsics['cy'])

proj_matrix = cam.get_projection_matrix(im_width, im_height)
Expand Down Expand Up @@ -314,7 +328,7 @@ def create_obj(
name = obj['class'] + "_" + str(i_obj),
path_obj = opt.objs_folder + "/"+name + "/google_16k/textured.obj",
path_tex = opt.objs_folder + "/"+name + "/google_16k/texture_map_flat.png",
scale = 0.01,
scale = opt.scale,
rot_base = None
)

Expand Down Expand Up @@ -440,4 +454,4 @@ def create_obj(
cv2.imwrite(opt.out,outrgb)

# let's clean up the GPU
visii.deinitialize()
visii.deinitialize()
45 changes: 30 additions & 15 deletions scripts/train2/inference.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
#!/usr/bin/env python
#!/usr/bin/env python3

# Copyright (c) 2018 NVIDIA Corporation. All rights reserved.
# This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.
# https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode

"""
This file starts a ROS node to run DOPE,
listening to an image topic and publishing poses.
This file runs DOPE without ROS, either on an image folder or from a Realsense
camera image stream.
"""

from __future__ import print_function
Expand All @@ -23,6 +23,7 @@
from detector import ModelData, ObjectDetector

import simplejson as json
import copy

class Draw(object):
"""Drawing helper class to visualize the neural network output"""
Expand Down Expand Up @@ -171,6 +172,8 @@ def image_callback(self,
dist_coeffs = np.matrix(camera_info.D, dtype='float64')
dist_coeffs.resize((len(camera_info.D), 1))

camera_matrix_for_json = copy.deepcopy(camera_matrix)

# Downscale image if necessary
height, width, _ = img.shape
scaling_factor = float(self.downscale_height) / height
Expand All @@ -187,17 +190,28 @@ def image_callback(self,
im = Image.fromarray(img_copy)
draw = Draw(im)


# dictionary for the final output
dict_out = {"camera_data":{},"objects":[]}

dict_out = {
"camera_data": {
"intrinsics": {
"cx": camera_matrix_for_json[0, 2],
"cy": camera_matrix_for_json[1, 2],
"fx": camera_matrix_for_json[0, 0],
"fy": camera_matrix_for_json[1, 1],
},
"width": width,
"height": height,
},
"objects": [],
}
for m in self.models:
# Detect object
results, beliefs = ObjectDetector.detect_object_in_image(
self.models[m].net,
self.pnp_solvers[m],
img,
self.config_detect
self.config_detect,
make_belief_debug_img=True
)
# print(results)
# print('---')
Expand All @@ -210,6 +224,9 @@ def image_callback(self,
loc = result["location"]
ori = result["quaternion"]

CONVERT_SCALE_CM_TO_METERS = 100
loc = [l / CONVERT_SCALE_CM_TO_METERS for l in loc]

print(loc)

dict_out['objects'].append({
Expand Down Expand Up @@ -237,11 +254,13 @@ def image_callback(self,
points2d.append(tuple(pair))
draw.draw_cube(points2d, self.draw_colors[m])
# save the output of the image.
im.save(f"{output_folder}/{img_name}.png")
im.save(f"{output_folder}/{img_name}")
if beliefs is not None:
beliefs.save(f"{output_folder}/{img_name[:img_name.rfind('.')]}_belief.png")

# save the json files
with open(f"{output_folder}/{img_name.replace('png','json')}", 'w') as fp:
json.dump(dict_out, fp)
json.dump(dict_out, fp, indent=4)



Expand Down Expand Up @@ -310,10 +329,6 @@ def rotate_vector(vector, quaternion):

# create the output folder
print (f"output is located in {opt.outf}")
try:
shutil.rmtree(f"{opt.outf}")
except:
pass

try:
os.makedirs(f"{opt.outf}")
Expand Down Expand Up @@ -357,8 +372,8 @@ def rotate_vector(vector, quaternion):
img_name = i_image
else:
if i_image >= len(imgs):
i_image =0
break

frame = cv2.imread(imgs[i_image])
print(f"frame {imgsname[i_image]}")
img_name = imgsname[i_image]
Expand Down
66 changes: 32 additions & 34 deletions scripts/train2/inference/detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,59 +401,57 @@ def get_image_grid(tensor, filename, nrow=3, padding=2,mean=None, std=None):
return im

@staticmethod
def detect_object_in_image(net_model, pnp_solver, in_img, config,
grid_belief_debug = False, norm_belief=True):
''' Detect objects in a image using a specific trained network model
Returns the poses of the objects and the belief maps
'''

def detect_object_in_image(net_model, pnp_solver, in_img, config,
make_belief_debug_img=False, norm_belief=True, overlay_image=True):
"""
Detect objects in a image using a specific trained network model
Returns the poses of the objects and the belief maps
"""
if in_img is None:
return []

# print("detect_object_in_image - image shape: {}".format(in_img.shape))

# Run network inference
image_tensor = transform(in_img)
image_torch = Variable(image_tensor).cuda().unsqueeze(0)
out, seg = net_model(image_torch) # run inference using the network (calls 'forward' method)
out, seg = net_model(image_torch)
vertex2 = out[-1][0]
aff = seg[-1][0]

# Find objects from network output
detected_objects = ObjectDetector.find_object_poses(vertex2, aff, pnp_solver, config)

if not grid_belief_debug:

if not make_belief_debug_img:
return detected_objects, None
else:
# Run the belief maps debug display on the beliefmaps

upsampling = nn.UpsamplingNearest2d(scale_factor=8)
# Run the belief maps debug display on the belief maps
tensor = vertex2
belief_imgs = []
in_img = (torch.tensor(in_img).float()/255.0)
in_img *= 0.7
if overlay_image:
upsampling = nn.UpsamplingNearest2d(size=in_img.shape[:2])
in_img = (torch.tensor(in_img).float() / 255.0)
in_img *= 0.5

for j in range(tensor.size()[0]):
belief = tensor[j].clone()
if norm_belief:
belief -= float(torch.min(belief)[0].data.cpu().numpy())
belief /= float(torch.max(belief)[0].data.cpu().numpy())

# print (image_torch.size())
# raise()
# belief *= 0.5
# print(in_img.size())
belief = upsampling(belief.unsqueeze(0).unsqueeze(0)).squeeze().squeeze().data
belief = torch.clamp(belief,0,1).cpu()
belief = torch.cat([
belief.unsqueeze(0) + in_img[:,:,0],
belief.unsqueeze(0) + in_img[:,:,1],
belief.unsqueeze(0) + in_img[:,:,2]
]).unsqueeze(0)
belief = torch.clamp(belief,0,1)

# belief_imgs.append(belief.data.squeeze().cpu().numpy().transpose(1,2,0))
belief -= float(torch.min(belief).item())
belief /= float(torch.max(belief).item())

belief = torch.clamp(belief, 0, 1).cpu()
if overlay_image:
belief = upsampling(belief.unsqueeze(0).unsqueeze(0)).squeeze().squeeze().data
belief = torch.cat([
belief.unsqueeze(0) + in_img[:, :, 0],
belief.unsqueeze(0) + in_img[:, :, 1],
belief.unsqueeze(0) + in_img[:, :, 2]
]).unsqueeze(0)
belief = torch.clamp(belief, 0, 1)
else:
belief = torch.cat([
belief.unsqueeze(0),
belief.unsqueeze(0),
belief.unsqueeze(0)
]).unsqueeze(0)
belief_imgs.append(belief.data.squeeze().numpy())

# Create the image grid
Expand Down Expand Up @@ -748,7 +746,7 @@ def find_objects(vertex2, aff, config, numvertex=8, run_sampling=False, num_samp
best_angle = dist_angle
best_dist = dist_point

if i_best is -1:
if i_best == -1:
continue

if objects[i_best][1][i_lists] is None \
Expand Down

0 comments on commit 6c80d16

Please sign in to comment.