Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/semseg polygon dep #592

Merged
merged 18 commits into from
May 12, 2023
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -179,4 +179,5 @@ coco.ipynb

# SemSeg
CV_datasets/
coco_hf_dataset.py
coco_hf_dataset.py
coco_deeplab_hooks.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,7 @@ def _on_step_end(self) -> None:
# do not log if we are not in the final inference loop
if not self.called_finish:
return
print("logging")
logger = SemanticSegmentationModelLogger(
bucket_name=self.bucket_name,
image_paths=image_paths,
Expand Down
126 changes: 79 additions & 47 deletions dataquality/loggers/model_logger/semantic_segmentation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Dict, List, Optional, Union
from typing import Any, Dict, List, Optional, Union

import numpy as np
import torch
Expand All @@ -9,8 +9,10 @@
semantic_segmentation_logger_config,
)
from dataquality.loggers.model_logger.base_model_logger import BaseGalileoModelLogger
from dataquality.schemas.semantic_segmentation import Polygon
from dataquality.schemas.split import Split
from dataquality.utils.semantic_segmentation.errors import (
calculate_dep_polygons_batch,
calculate_misclassified_polygons_batch,
calculate_undetected_polygons_batch,
)
Expand Down Expand Up @@ -104,6 +106,66 @@ def dep_path(self) -> str:
def contours_path(self) -> str:
return f"{self.proj_run}/{self.split_name_path}/contours"

def get_polygon_data(
self,
pred_polygons_batch: List[List[Polygon]],
gold_polygons_batch: List[List[Polygon]],
) -> Dict[str, Any]:
"""Returns polygon data for a batch of images in a dictionary
that can then be used for our polygon df

Args:
pred_polygons_batch (Tuple[List, List]): polygon data for predictions
in a minibatch of images
gold_polygons_batch (Tuple[List, List]): polygon data for ground truth
in a minibatch of images

Returns:
Dict[str, Any]: a dict that can be used to create a polygon df
"""
image_ids = []
polygon_ids = []
preds = []
golds = []
data_error_potentials = []
errors = []
for i, image_id in enumerate(self.image_ids):
pred_polygons = pred_polygons_batch[i]
for polygon in pred_polygons:
image_ids.append(image_id)
preds.append(polygon.label_idx)
golds.append(-1)
data_error_potentials.append(polygon.data_error_potential)
errors.append(polygon.error_type.value)
upload_polygon_contours(
polygon, self.logger_config.polygon_idx, self.contours_path
)
polygon_ids.append(self.logger_config.polygon_idx)
self.logger_config.polygon_idx += 1
gold_polygons = gold_polygons_batch[i]
for polygon in gold_polygons:
image_ids.append(image_id)
preds.append(-1)
golds.append(polygon.label_idx)
data_error_potentials.append(polygon.data_error_potential)
errors.append(polygon.error_type.value)
upload_polygon_contours(
polygon, self.logger_config.polygon_idx, self.contours_path
)
polygon_ids.append(self.logger_config.polygon_idx)
self.logger_config.polygon_idx += 1

polygon_data = {
"id": polygon_ids,
"image_id": image_ids,
"pred": preds,
"gold": golds,
"data_error_potential": data_error_potentials,
"galileo_error_type": errors,
"split": [self.split] * len(image_ids),
}
return polygon_data

def _get_data_dict(self) -> Dict:
"""Returns a dictionary of data to be logged as a DataFrame"""
# DEP & likely mislabeled
Expand All @@ -112,7 +174,7 @@ def _get_data_dict(self) -> Dict:
self.mislabled_pixels, self.image_ids, prefix=self.lm_path
)

image_dep = calculate_and_upload_dep(
image_dep, dep_heatmaps = calculate_and_upload_dep(
self.output_probs,
self.gold_masks,
self.image_ids,
Expand All @@ -132,14 +194,23 @@ def _get_data_dict(self) -> Dict:
# Errors
calculate_misclassified_polygons_batch(self.pred_masks, gold_polygons_batch)
calculate_undetected_polygons_batch(self.pred_masks, gold_polygons_batch)
heights = [img.shape[-1] for img in self.gold_masks]
widths = [img.shape[-2] for img in self.gold_masks]

calculate_dep_polygons_batch(
gold_polygons_batch,
dep_heatmaps.numpy(),
height=heights,
width=widths,
)

image_data = {
"image": [
f"{self.bucket_name}/{pth}" for pth in self.image_paths
], # E.g. https://storage.googleapis.com/bucket_name/.../image_id.png
"id": self.image_ids,
"height": [img.shape[-1] for img in self.gold_masks],
"width": [img.shape[-2] for img in self.gold_masks],
"height": heights,
"width": widths,
"image_data_error_potential": image_dep,
"mean_lm_score": [i for i in mean_mislabeled],
"mean_iou": iou,
Expand All @@ -157,50 +228,11 @@ def _get_data_dict(self) -> Dict:
meta=meta_keys,
)

image_ids = []
polygon_ids = []
preds = []
golds = []
data_error_potentials = []
errors = []
for i, image_id in enumerate(self.image_ids):
pred_polygons = pred_polygons_batch[i]
for polygon in pred_polygons:
image_ids.append(image_id)
preds.append(polygon.label_idx)
golds.append(-1)
data_error_potentials.append(0.0)
errors.append(polygon.error_type.value)
upload_polygon_contours(
polygon, self.logger_config.polygon_idx, self.contours_path
)
polygon_ids.append(self.logger_config.polygon_idx)
self.logger_config.polygon_idx += 1
gold_polygons = gold_polygons_batch[i]
for polygon in gold_polygons:
image_ids.append(image_id)
preds.append(-1)
golds.append(polygon.label_idx)
data_error_potentials.append(0.0)
errors.append(polygon.error_type.value)
upload_polygon_contours(
polygon, self.logger_config.polygon_idx, self.contours_path
)
polygon_ids.append(self.logger_config.polygon_idx)
self.logger_config.polygon_idx += 1

polygon_data = {
"id": polygon_ids,
"image_id": image_ids,
"pred": preds,
"gold": golds,
"data_error_potential": data_error_potentials,
"galileo_error_type": errors,
"split": [self.split] * len(image_ids),
}
polygon_data = self.get_polygon_data(pred_polygons_batch, gold_polygons_batch)
dcaustin33 marked this conversation as resolved.
Show resolved Hide resolved
n_polygons = polygon_data["image_id"]
if self.split == Split.inference:
polygon_data["inference_name"] = [self.inference_name] * len(image_ids)
polygon_data["inference_name"] = [self.inference_name] * len(n_polygons)
else:
polygon_data["epoch"] = [self.epoch] * len(image_ids)
polygon_data["epoch"] = [self.epoch] * len(n_polygons)

return polygon_data
1 change: 1 addition & 0 deletions dataquality/schemas/semantic_segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ class Polygon(BaseModel):
misclassified_class_label: Optional[int] = None
error_type: ErrorType = ErrorType.none
contours: List[Contour]
data_error_potential: Optional[float] = None

@property
def contours_opencv(self) -> List[np.ndarray]:
Expand Down
52 changes: 52 additions & 0 deletions dataquality/utils/semantic_segmentation/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import numpy as np
import torch
from PIL import Image

from dataquality.schemas.semantic_segmentation import ErrorType, Polygon
from dataquality.utils.semantic_segmentation.polygons import draw_polygon
Expand Down Expand Up @@ -155,3 +156,54 @@ def calculate_undetected_polygons_batch(
pred_mask = pred_masks[idx].numpy()
gold_polygons = gold_polygons_batch[idx]
calculate_undetected_polygons(pred_mask, gold_polygons)


def calculate_dep_polygon(
dep_map: np.ndarray,
polygon_img: np.ndarray,
) -> float:
"""Calculate the mean dep score for one polygon drawn onto an image of all
zero's. We can then take the polygon's dep score by only selecting those pixels
with a value greater than 0 and averageing them.

Args:
dep_map (np.ndarray): heatmap of dep scores for an image
polygon_img (np.ndarray): image of all zeros with a polygon drawn on it

Returns:
dep_score (float): mean dep score for the polygon
"""
relevant_region = polygon_img != 0
dep_score = dep_map[relevant_region].mean()
return dep_score


def calculate_dep_polygons_batch(
gold_polygons_batch: List[List[Polygon]],
dep_heatmaps: np.ndarray,
height: List[int],
width: List[int],
) -> None:
"""Takes the mean dep score within a polygon and sets the polygon's
dep score to the mean dep score

Args:
gold_polygons_batch (List[List[[Polygon]]): list of the gold polygons
for an image
dep_heatmaps (np.ndarray): heatmaps of DEP scores for an image
height (int): height of original image to resize the dep map to the correct
dims
width (int): width of original image to resize the dep map to the correct
dims
"""
resized_dep_maps = []
for i, dep_map in enumerate(dep_heatmaps):
resized_image = Image.fromarray(dep_map).resize((width[i], height[i]))
resized_dep_maps.append(np.array(resized_image))

for idx in range(len(resized_dep_maps)):
dep_map = resized_dep_maps[idx]
gold_polygons = gold_polygons_batch[idx]
for polygon in gold_polygons:
polygon_img = draw_polygon(polygon, dep_map.shape)
polygon.data_error_potential = calculate_dep_polygon(dep_map, polygon_img)
6 changes: 3 additions & 3 deletions dataquality/utils/semantic_segmentation/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,16 @@ def calculate_and_upload_dep(
gold_masks: torch.Tensor,
image_ids: List[int],
obj_prefix: str,
) -> List[float]:
) -> Tuple[List[float], torch.Tensor]:
"""Calculates the Data Error Potential (DEP) for each image in the batch

Uploads the heatmap to Minio as a png.
Returns the image DEP for each image in the batch.
Returns the image DEP for each image in the batch. As well as the dep_heatmaps.
Image dep is calculated by the average pixel dep.
"""
dep_heatmaps = calculate_dep_heatmaps(probs, gold_masks)
upload_dep_heatmaps(dep_heatmaps, image_ids, obj_prefix)
return calculate_image_dep(dep_heatmaps)
return calculate_image_dep(dep_heatmaps), dep_heatmaps


def calculate_dep_heatmaps(
Expand Down
Loading