-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathData.py
293 lines (230 loc) · 13.8 KB
/
Data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
import os
import tensorflow as tf
from math import pi
from src.utils.TMatrix import TMatrix
import numpy as np
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
try:
# Currently, memory growth needs to be the same across GPUs
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
# tf.config.set_logical_device_configuration(gpu,[tf.config.LogicalDeviceConfiguration(memory_limit=8000)])
logical_gpus = tf.config.experimental.list_logical_devices('GPU')
print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
except RuntimeError as e:
# Memory growth must be set before GPUs have been initialized
print(e)
class Data:
def __init__(self, config=None):
self.config = config
self.image_size = self.config.get_int("image_size")
self.image_channel_num = 3
self.cam_pose_dim = 7
self.inverse_pose_representation = self.config.get_bool("inverse_pose_repr")
def _prepare_cam_pose(self, reference_cam_poses, query_cam_poses):
# Just convert rotational part to quaternions
reference_cam_poses = TMatrix.to_quaternion(reference_cam_poses, 1)
query_cam_poses = TMatrix.to_quaternion(query_cam_poses, 1)
return reference_cam_poses, query_cam_poses
def revert_cam_pose_normalization(self, cam_pose, pose_transform=None, inverse_output=True, return_euler=True):
if pose_transform is not None:
# Make sure the given quaternion has unit length
cam_pose = tf.concat((cam_pose[:, :3], cam_pose[:, 3:7] / tf.linalg.norm(cam_pose[:, 3:7], axis=-1, keepdims=True), cam_pose[:, 7:]), -1)
# Convert tmat from translation + quaternion
cam_pose = TMatrix.from_quaternion(cam_pose[:, :self.cam_pose_dim], 1)
# Invert the pose if necessary
if not self.inverse_pose_representation and inverse_output:
cam_pose = TMatrix.inverse(cam_pose, num_batch_dim=1)
# Undo pose_transform
pose_transform = TMatrix.inverse(pose_transform)
cam_pose = TMatrix.apply(pose_transform, cam_pose)
if return_euler:
return TMatrix.to_euler(cam_pose, 1)
else:
return cam_pose
else:
return cam_pose
def augment_image(self, image):
# Check if we should do any augmentation
do_brightness_augm = self.config.get_float("augmentation/brightness") > 0
do_contrast_augm = self.config.get_list("augmentation/contrast")[0] != 1 or self.config.get_list("augmentation/contrast")[1] != 1
do_saturation_augm = self.config.get_list("augmentation/saturation")[0] != 1 or self.config.get_list("augmentation/saturation")[1] != 1
# Apply random brightness
if do_brightness_augm:
image = tf.image.random_brightness(image, self.config.get_float("augmentation/brightness"))
# Apply random contrast
if do_contrast_augm:
image = tf.image.random_contrast(image, self.config.get_list("augmentation/contrast")[0], self.config.get_list("augmentation/contrast")[1])
# Apply random saturation
if do_saturation_augm:
image = tf.image.random_saturation(image, self.config.get_list("augmentation/saturation")[0], self.config.get_list("augmentation/saturation")[1])
# Make sure we get a valid image in the end
image = tf.clip_by_value(image, 0.0, 1.0)
return image
def _augment_data(self, reference_images, reference_cam_poses, query_images, query_cam_poses, iou, room_ids, pose_transform, matching_labels):
# Check if we should do any augmentation
do_brightness_augm = self.config.get_float("augmentation/brightness") > 0
do_contrast_augm = self.config.get_list("augmentation/contrast")[0] != 1 or self.config.get_list("augmentation/contrast")[1] != 1
do_saturation_augm = self.config.get_list("augmentation/saturation")[0] != 1 or self.config.get_list("augmentation/saturation")[1] != 1
# Augment reference and query images
if do_brightness_augm or do_contrast_augm or do_saturation_augm:
reference_images = self.augment_image(reference_images)
query_images = self.augment_image(query_images)
return reference_images, reference_cam_poses, query_images, query_cam_poses, iou, room_ids, pose_transform, matching_labels
def preprocess_model_input(self, image):
# Map image to 0 -> 1
image = tf.cast(image, tf.float32) / 255.0
# Resize to desired size (usually not necessary)
image = tf.image.resize(image, [self.image_size, self.image_size])
return image
def postprocess_model_output(self, cam_pose, legacy_pose_transform=False):
# Revert normalization of predicted cam pose
if self.inverse_pose_representation and legacy_pose_transform:
pose_transform = TMatrix.from_euler(tf.convert_to_tensor([[0, 0, 0, pi / 2, 0, 0]]))
else:
pose_transform = TMatrix.from_euler(tf.convert_to_tensor([[0, 0, 0, 0.0, 0, 0]]))
cam_pose = self.revert_cam_pose_normalization(cam_pose, pose_transform, inverse_output=False, return_euler=False)
return cam_pose
def decode_img(self, img):
# Decode image
img = tf.image.decode_png(img, channels=3)
# Map to [0,1] floats
img = tf.image.convert_image_dtype(img, tf.float32)
# Resize
img = tf.image.resize(img, [self.image_size, self.image_size])
return img
def load_pair(self, pair, base_path):
# Read in both images
img1 = self.decode_img(tf.io.read_file(base_path + pair[0] + ".jpg"))
img2 = self.decode_img(tf.io.read_file(base_path + pair[1] + ".jpg"))
# Build K matrix
intrinsics = tf.convert_to_tensor([
[tf.strings.to_number(pair[34]), 0, tf.strings.to_number(pair[36]), 0],
[0, tf.strings.to_number(pair[35]), tf.strings.to_number(pair[37]), 0],
[0, 0, 1, 0],
])
# Read in poses
pose1 = tf.strings.to_number(pair[2:18])
pose2 = tf.strings.to_number(pair[18:34])
pose1 = tf.reshape(pose1, [4, 4])
pose2 = tf.reshape(pose2, [4, 4])
# Read in screen cooords of reference image
in_file = tf.io.read_file(base_path + pair[0] + ".raw")
depth1 = tf.reshape(tf.io.decode_raw(in_file, tf.float32), (128, 128))
in_file = tf.io.read_file(base_path + pair[1] + ".raw")
depth2 = tf.reshape(tf.io.decode_raw(in_file, tf.float32), (128, 128))
scene_coords = self.calc_scene_coords(depth1, intrinsics, pose1)
# Poses are already stored in inverse form, so if that is not desired, inverse it again
if not self.inverse_pose_representation:
pose1 = TMatrix.inverse(pose1, num_batch_dim=0)
pose2 = TMatrix.inverse(pose2, num_batch_dim=0)
# Apply inverse of reference pose to get relative pose
pose_transform = TMatrix.inverse(pose1, num_batch_dim=0)
pose2 = TMatrix.apply(pose_transform, pose2)
pose1 = TMatrix.apply(pose_transform, pose1)
return img1, pose1, img2, pose2, [[1.0]], pair[0] + " - " + pair[1], intrinsics, pose_transform, scene_coords, depth2
def calc_scene_coords(self, depth, intrinsics, pose):
# Create pixel mesh grid
points = tf.cast(tf.stack(tf.meshgrid(tf.range(self.image_size), tf.range(self.image_size)), -1), tf.float32) + 0.5
# Project pixels to local camera coordinates
scene_coords = tf.stack([
depth * ((points[..., 0]) - intrinsics[0][2]) / intrinsics[0][0],
depth * ((self.image_size - points[..., 1]) - intrinsics[1][2]) / intrinsics[1][1],
-depth,
tf.ones_like(depth)
], -1)
# Transform to world coordinates
scene_coords = tf.reshape(scene_coords, [-1, 4])
scene_coords = tf.transpose(tf.matmul(pose, scene_coords, transpose_b=True))
scene_coords = tf.reshape(scene_coords, [128, 128, 4])[..., :3]
return scene_coords
def calc_matching_labels(self, reference_images, reference_cam_poses, query_images, query_cam_poses, iou, room_ids, intrinsics, pose_transform, scene_coord, query_depth):
# Calculate original query pose
if not self.inverse_pose_representation:
query_cam_poses_orig = TMatrix.inverse(query_cam_poses, num_batch_dim=0)
else:
query_cam_poses_orig = query_cam_poses
query_cam_poses_orig = TMatrix.apply(TMatrix.inverse(pose_transform, num_batch_dim=0), query_cam_poses_orig)
# Depth to Dist
points = tf.cast(tf.stack(tf.meshgrid(tf.range(self.image_size), tf.range(self.image_size)), -1), tf.float32) + 0.5
points = tf.abs(points - intrinsics[:2, 2])
query_dist = tf.sqrt(tf.square(query_depth) + tf.square(query_depth * points[..., 0] / intrinsics[0, 0]) + tf.square(query_depth * points[..., 1] / intrinsics[1, 1]))
# Transform screen coordinates into camera coordinate system of query image
h_scene_coord = tf.concat((scene_coord, tf.ones_like(scene_coord[..., :1])), -1)
cam_mat = TMatrix.inverse(query_cam_poses_orig, num_batch_dim=0)
pos_scree_space = tf.matmul(tf.cast(tf.reshape(h_scene_coord, [-1, 4]), tf.float32), cam_mat, transpose_b=True)
# Account for different coordinate system in opencv and blender
pos_scree_space *= [1, 1, -1, 1]
# Project to screen coordinates
repr_points = tf.matmul(pos_scree_space, tf.cast(intrinsics, tf.float32), transpose_b=True)
repr_points /= repr_points[..., -1:]
repr_points = tf.unstack(repr_points, axis=-1)
repr_points[1] = self.image_size - repr_points[1]
repr_points = tf.stack(repr_points, axis=-1)
# Round the reprojected points to discrete pixels
coord = tf.round(repr_points[..., :2] - 0.5)
# Compute distance from scene coord to camera
actual_depth = tf.linalg.norm(scene_coord - query_cam_poses_orig[:3, 3][None, None], axis=-1)
# Clip coordinates
indices = tf.reverse(tf.clip_by_value(tf.cast(coord, tf.int64), 0, self.image_size - 1), [-1])
# Gather distance data at reprojected coordinates
depth = tf.gather_nd(query_dist, indices)
depth = tf.reshape(depth, [128, 128])
# Compare distance from depth image with distance from scene coord
diff = actual_depth - depth
# Match is only valid if distances are similar (otherwise the scene coord is probably not visible from query view)
valid = tf.abs(diff) < 0.05
coord = tf.reshape(coord, [128, 128, 2])
# Remove all invalid matches outside the image
invalid_dest_mask = tf.reduce_all(tf.logical_and(tf.logical_and(coord >= 0, coord < 128), valid[..., None]), -1)
coord = tf.where(tf.repeat(tf.logical_and(tf.reduce_all(scene_coord[..., :2] != 0, -1), invalid_dest_mask)[..., None], 2, -1), coord, tf.ones_like(coord) * -1)
coord = tf.cast(coord, tf.int64)
return reference_images, reference_cam_poses, query_images, query_cam_poses, iou, room_ids, pose_transform, coord
def dataset_from_text_file(self, name):
# Read all lines from the given text file (each line represents a training pair)
pairs = []
with open(name, "r") as f:
pairs.extend([l.split() for l in f.readlines()])
for pair in pairs:
pair[0] = pair[0].replace(".jpg", "")
pair[1] = pair[1].replace(".jpg", "")
# Create dataset
dataset = tf.data.Dataset.from_tensor_slices(pairs)
return dataset
def _normalize_cam_poses_with_mapping(self, reference_images, reference_cam_poses, query_images, query_cam_poses, iou, room_ids, pose_transform, matching_labels):
reference_cam_poses, query_cam_poses = self._prepare_cam_pose(reference_cam_poses, query_cam_poses)
return reference_images, reference_cam_poses, query_images, query_cam_poses, iou, room_ids, pose_transform, matching_labels
def build_train_dataset(self):
num_threads = 8
batch_size = self.config.get_int("batch_size")
# Read in path and poses of image pairs
dataset = self.dataset_from_text_file(self.config.get_string("train_pair_file"))
# Read in pair
dataset = dataset.map(lambda x: self.load_pair(x, self.config.get_string("train_data_path")), num_parallel_calls=num_threads)
# Calculate feature match labels
dataset = dataset.map(self.calc_matching_labels, num_parallel_calls=num_threads)
# Augment the images
dataset = dataset.map(self._augment_data, num_parallel_calls=num_threads)
dataset = dataset.batch(batch_size, drop_remainder=True)
# Prepare relative pose representation
dataset = dataset.map(self._normalize_cam_poses_with_mapping, num_parallel_calls=num_threads)
dataset = dataset.prefetch(1)
return dataset
def build_val_dataset(self, augment=False):
num_threads = 8
batch_size = self.config.get_int("val_batch_size")
# Read in path and poses of image pairs
dataset = self.dataset_from_text_file(self.config.get_string("val_pair_file"))
# Read in pair
dataset = dataset.map(lambda x: self.load_pair(x, self.config.get_string("val_data_path")), num_parallel_calls=num_threads)
# Calculate feature match labels
dataset = dataset.map(self.calc_matching_labels, num_parallel_calls=num_threads)
# Augment the images
if augment:
dataset = dataset.map(self._augment_data, num_parallel_calls=num_threads)
dataset = dataset.batch(batch_size, drop_remainder=True)
# Prepare relative pose representation
dataset = dataset.map(self._normalize_cam_poses_with_mapping, num_parallel_calls=num_threads)
dataset = dataset.prefetch(1)
return dataset