Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Invalidate hub-wide caches on deletions and overwrites #7525

Merged
merged 8 commits into from
Sep 27, 2024
Prev Previous commit
lint
  • Loading branch information
teh-cmc committed Sep 27, 2024
commit 7501d082cefe54c7595ee044591babe1d206f4da
61 changes: 45 additions & 16 deletions examples/python/face_tracking/face_tracking.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
# By instead logging data as static, no data will be accumulated over time since previous
# data is overwritten.
# Naturally, the drawback of this is that there's no history of previous data sent to the viewer,
# as well as no timestamps, making the Viewer's timeline effectively inactive.
# as well as no timestamps, making the Viewer's timeline effectively inactive.
global ALL_STATIC
ALL_STATIC: bool = False

Expand Down Expand Up @@ -121,7 +121,9 @@ def __init__(self, video_mode: bool = False):
)
self._options = vision.FaceDetectorOptions(
base_options=self._base_options,
running_mode=mp.tasks.vision.RunningMode.VIDEO if self._video_mode else mp.tasks.vision.RunningMode.IMAGE,
running_mode=mp.tasks.vision.RunningMode.VIDEO
if self._video_mode
else mp.tasks.vision.RunningMode.IMAGE,
)
self._detector = vision.FaceDetector.create_from_options(self._options)

Expand Down Expand Up @@ -163,7 +165,8 @@ def detect_and_log(self, image: cv2.typing.MatLike, frame_time_nano: int) -> Non

# MediaPipe's keypoints are normalized to [0, 1], so we need to scale them to get pixel coordinates.
pts = [
(math.floor(keypoint.x * width), math.floor(keypoint.y * height)) for keypoint in detection.keypoints
(math.floor(keypoint.x * width), math.floor(keypoint.y * height))
for keypoint in detection.keypoints
]
rr.log(
f"video/detector/faces/{i}/keypoints",
Expand Down Expand Up @@ -199,7 +202,9 @@ def __init__(self, video_mode: bool = False, num_faces: int = 1):
base_options=self._base_options,
output_face_blendshapes=True,
num_faces=num_faces,
running_mode=mp.tasks.vision.RunningMode.VIDEO if self._video_mode else mp.tasks.vision.RunningMode.IMAGE,
running_mode=mp.tasks.vision.RunningMode.VIDEO
if self._video_mode
else mp.tasks.vision.RunningMode.IMAGE,
)
self._detector = vision.FaceLandmarker.create_from_options(self._options)

Expand All @@ -221,7 +226,9 @@ def __init__(self, video_mode: bool = False, num_faces: int = 1):
mp.solutions.face_mesh.FACEMESH_NOSE,
]

self._class_ids = [0] * mp.solutions.face_mesh.FACEMESH_NUM_LANDMARKS_WITH_IRISES
self._class_ids = [
0
] * mp.solutions.face_mesh.FACEMESH_NUM_LANDMARKS_WITH_IRISES
class_descriptions = []
for i, klass in enumerate(classes):
# MediaPipe only provides connections for class, not actual class per keypoint. So we have to extract the
Expand All @@ -241,7 +248,9 @@ def __init__(self, video_mode: bool = False, num_faces: int = 1):
)
)

rr.log("video/landmarker", rr.AnnotationContext(class_descriptions), static=True)
rr.log(
"video/landmarker", rr.AnnotationContext(class_descriptions), static=True
)
rr.log("reconstruction", rr.AnnotationContext(class_descriptions), static=True)

# properly align the 3D face in the viewer
Expand All @@ -264,8 +273,12 @@ def is_empty(i): # type: ignore[no-untyped-def]
except StopIteration:
return True

if is_empty(zip(detection_result.face_landmarks, detection_result.face_blendshapes)):
rr.log("video/landmarker/faces", rr.Clear(recursive=True), static=ALL_STATIC)
if is_empty(
zip(detection_result.face_landmarks, detection_result.face_blendshapes)
):
rr.log(
"video/landmarker/faces", rr.Clear(recursive=True), static=ALL_STATIC
)
rr.log("reconstruction/faces", rr.Clear(recursive=True), static=ALL_STATIC)
rr.log("blendshapes", rr.Clear(recursive=True), static=ALL_STATIC)

Expand All @@ -287,11 +300,15 @@ def is_empty(i): # type: ignore[no-untyped-def]
continue

# MediaPipe's keypoints are normalized to [0, 1], so we need to scale them to get pixel coordinates.
pts = [(math.floor(lm.x * width), math.floor(lm.y * height)) for lm in landmark]
pts = [
(math.floor(lm.x * width), math.floor(lm.y * height)) for lm in landmark
]
keypoint_ids = list(range(len(landmark)))
rr.log(
f"video/landmarker/faces/{i}/landmarks",
rr.Points2D(pts, radii=3, keypoint_ids=keypoint_ids, class_ids=self._class_ids),
rr.Points2D(
pts, radii=3, keypoint_ids=keypoint_ids, class_ids=self._class_ids
),
static=ALL_STATIC,
)

Expand Down Expand Up @@ -344,7 +361,9 @@ def resize_image(image: cv2.typing.MatLike, max_dim: int | None) -> cv2.typing.M
return image


def run_from_video_capture(vid: int | str, max_dim: int | None, max_frame_count: int | None, num_faces: int) -> None:
def run_from_video_capture(
vid: int | str, max_dim: int | None, max_frame_count: int | None, num_faces: int
) -> None:
"""
Run the face detector on a video stream.

Expand All @@ -369,7 +388,9 @@ def run_from_video_capture(vid: int | str, max_dim: int | None, max_frame_count:

print("Capturing video stream. Press ctrl-c to stop.")
try:
it: Iterable[int] = itertools.count() if max_frame_count is None else range(max_frame_count)
it: Iterable[int] = (
itertools.count() if max_frame_count is None else range(max_frame_count)
)

for frame_idx in tqdm.tqdm(it, desc="Processing frames"):
# Capture frame-by-frame
Expand Down Expand Up @@ -427,7 +448,9 @@ def main() -> None:
logging.getLogger().addHandler(logging.StreamHandler())
logging.getLogger().setLevel("INFO")

parser = argparse.ArgumentParser(description="Uses the MediaPipe Face Detection to track a human pose in video.")
parser = argparse.ArgumentParser(
description="Uses the MediaPipe Face Detection to track a human pose in video."
)
parser.add_argument(
"--demo-image",
action="store_true",
Expand Down Expand Up @@ -464,7 +487,9 @@ def main() -> None:
"(temporal smoothing is applied only for a value of 1)."
),
)
parser.add_argument("--static", action="store_true", help="If set, logs everything as static")
parser.add_argument(
"--static", action="store_true", help="If set, logs everything as static"
)

rr.script_add_args(parser)

Expand Down Expand Up @@ -505,9 +530,13 @@ def main() -> None:
elif args.image is not None:
run_from_sample_image(args.image, args.max_dim, args.num_faces)
elif args.video is not None:
run_from_video_capture(str(args.video), args.max_dim, args.max_frame, args.num_faces)
run_from_video_capture(
str(args.video), args.max_dim, args.max_frame, args.num_faces
)
else:
run_from_video_capture(args.camera, args.max_dim, args.max_frame, args.num_faces)
run_from_video_capture(
args.camera, args.max_dim, args.max_frame, args.num_faces
)

rr.script_teardown(args)

Expand Down
Loading