From 7e80afb604c4a55a227c12ca79b96b142de0e8c8 Mon Sep 17 00:00:00 2001 From: Derek Date: Thu, 11 May 2023 07:59:21 -0700 Subject: [PATCH 01/16] Initial refactor and dep polygon changes --- .../model_logger/semantic_segmentation.py | 81 +++++- dataquality/schemas/semantic_segmentation.py | 1 + .../utils/semantic_segmentation/errors.py | 59 +++++ .../utils/semantic_segmentation/metrics.py | 6 +- docs/cv/coco_deeplab_hooks.ipynb | 232 +++++++++++------- 5 files changed, 280 insertions(+), 99 deletions(-) diff --git a/dataquality/loggers/model_logger/semantic_segmentation.py b/dataquality/loggers/model_logger/semantic_segmentation.py index b824d72c9..1efa3b4a8 100644 --- a/dataquality/loggers/model_logger/semantic_segmentation.py +++ b/dataquality/loggers/model_logger/semantic_segmentation.py @@ -1,4 +1,4 @@ -from typing import Dict, List, Optional, Union +from typing import Dict, List, Optional, Union, Tuple, Any import numpy as np import torch @@ -13,6 +13,7 @@ from dataquality.utils.semantic_segmentation.errors import ( calculate_misclassified_polygons_batch, calculate_undetected_polygons_batch, + calculate_dep_polygons_batch ) from dataquality.utils.semantic_segmentation.lm import upload_mislabeled_pixels from dataquality.utils.semantic_segmentation.metrics import ( @@ -103,6 +104,67 @@ def dep_path(self) -> str: @property def contours_path(self) -> str: return f"{self.proj_run}/{self.split_name_path}/contours" + + + def get_polygon_data( + self, + pred_polygons_batch: Tuple[List, List], + gold_polygons_batch: Tuple[List, List], + ) -> Dict[str, Any]: + """Returns polygon data for a batch of images in a dictionary + that can then be used for our polygon df + + Args: + pred_polygons_batch (Tuple[List, List]): polygon data for predictions + in a minibatch of images + gold_polygons_batch (Tuple[List, List]): polygon data for ground truth + in a minibatch of images + + Returns: + Dict[str, Any]: a dict that can be used to create a polygon df + """ + image_ids = [] + polygon_ids = [] + preds = [] + golds = [] + data_error_potentials = [] + errors = [] + for i, image_id in enumerate(self.image_ids): + pred_polygons = pred_polygons_batch[i] + for polygon in pred_polygons: + image_ids.append(image_id) + preds.append(polygon.label_idx) + golds.append(-1) + data_error_potentials.append(0.0) + errors.append(polygon.error_type.value) + upload_polygon_contours( + polygon, self.logger_config.polygon_idx, self.contours_path + ) + polygon_ids.append(self.logger_config.polygon_idx) + self.logger_config.polygon_idx += 1 + gold_polygons = gold_polygons_batch[i] + for polygon in gold_polygons: + image_ids.append(image_id) + preds.append(-1) + golds.append(polygon.label_idx) + data_error_potentials.append(polygon.data_error_potential) + errors.append(polygon.error_type.value) + upload_polygon_contours( + polygon, self.logger_config.polygon_idx, self.contours_path + ) + polygon_ids.append(self.logger_config.polygon_idx) + self.logger_config.polygon_idx += 1 + + polygon_data = { + "id": polygon_ids, + "image_id": image_ids, + "pred": preds, + "gold": golds, + "data_error_potential": data_error_potentials, + "galileo_error_type": errors, + "split": [self.split] * len(image_ids), + } + return polygon_data def _get_data_dict(self) -> Dict: """Returns a dictionary of data to be logged as a DataFrame""" @@ -112,7 +174,7 @@ def _get_data_dict(self) -> Dict: self.mislabled_pixels, self.image_ids, prefix=self.lm_path ) - image_dep = calculate_and_upload_dep( + image_dep, dep_heatmaps = calculate_and_upload_dep( self.output_probs, self.gold_masks, self.image_ids, @@ -132,6 +194,13 @@ def _get_data_dict(self) -> Dict: # Errors calculate_misclassified_polygons_batch(self.pred_masks, gold_polygons_batch) calculate_undetected_polygons_batch(self.pred_masks, gold_polygons_batch) + + '''gold_polygons_batch = calculate_dep_polygons_batch( + gold_polygons_batch, + dep_heatmaps, + height = [img.shape[-1] for img in self.gold_masks], + width = [img.shape[-2] for img in self.gold_masks], + )''' image_data = { "image": [ @@ -157,6 +226,7 @@ def _get_data_dict(self) -> Dict: meta=meta_keys, ) + # polygon_data = self.get_polygon_data(pred_polygons_batch, gold_polygons_batch) image_ids = [] polygon_ids = [] preds = [] @@ -181,7 +251,7 @@ def _get_data_dict(self) -> Dict: image_ids.append(image_id) preds.append(-1) golds.append(polygon.label_idx) - data_error_potentials.append(0.0) + data_error_potentials.append(0) errors.append(polygon.error_type.value) upload_polygon_contours( polygon, self.logger_config.polygon_idx, self.contours_path @@ -198,9 +268,10 @@ def _get_data_dict(self) -> Dict: "galileo_error_type": errors, "split": [self.split] * len(image_ids), } + if self.split == Split.inference: - polygon_data["inference_name"] = [self.inference_name] * len(image_ids) + polygon_data["inference_name"] = [self.inference_name] * len(self.image_ids) else: - polygon_data["epoch"] = [self.epoch] * len(image_ids) + polygon_data["epoch"] = [self.epoch] * len(self.image_ids) return polygon_data diff --git a/dataquality/schemas/semantic_segmentation.py b/dataquality/schemas/semantic_segmentation.py index faec845bc..76104f796 100644 --- a/dataquality/schemas/semantic_segmentation.py +++ b/dataquality/schemas/semantic_segmentation.py @@ -52,6 +52,7 @@ class Polygon(BaseModel): misclassified_class_label: Optional[int] = None error_type: ErrorType = ErrorType.none contours: List[Contour] + # data_error_potential: float = 0.0 @property def contours_opencv(self) -> List[np.ndarray]: diff --git a/dataquality/utils/semantic_segmentation/errors.py b/dataquality/utils/semantic_segmentation/errors.py index 516c4f823..0930b76a0 100644 --- a/dataquality/utils/semantic_segmentation/errors.py +++ b/dataquality/utils/semantic_segmentation/errors.py @@ -2,6 +2,8 @@ import numpy as np import torch +import cv2 +from PIL import Image from dataquality.schemas.semantic_segmentation import ErrorType, Polygon from dataquality.utils.semantic_segmentation.polygons import draw_polygon @@ -155,3 +157,60 @@ def calculate_undetected_polygons_batch( pred_mask = pred_masks[idx].numpy() gold_polygons = gold_polygons_batch[idx] calculate_undetected_polygons(pred_mask, gold_polygons) + + +def calculate_dep_polygon( + dep_map: np.ndarray, + polygon_img: np.ndarray, +) -> None: + """Calculate the mean dep score for one polygon drawn onto an image of all + zero's. We can then take the polygon's dep score by only selecting those pixels + with a value greater than 0 and averageing them. + + Args: + dep_map (np.ndarray): heatmap of dep scores for an image + polygon_img (np.ndarray): image of all zeros with a polygon drawn on it + + Returns: + dep_score (float): mean dep score for the polygon + """ + relevant_region = polygon_img != 0 + dep_score = dep_map[relevant_region].mean() + return dep_score + + + +def calculate_dep_polygons_batch( + gold_polygons_batch: List[List[Polygon]], + dep_heatmaps: np.ndarray, + height: List[int], + width: List[int], +) -> List[List[Polygon]]: + """Takes the mean dep score within a polygon and sets the polygon's + dep score to the mean dep score + + Args: + gold_polygons_batch (List[List[[Polygon]]): list of the gold polygons + for an image + dep_heatmaps (np.ndarray): heatmaps of DEP scores for an image + height (int): height of original image to resize the dep map to the correct + dims + width (int): width of original image to resize the dep map to the correct + dims + """ + resized_dep_maps = [] + for i, dep_map in enumerate(dep_heatmaps): + resized_dep_maps.append(Image.fromarray(dep_map).resize((width[i], height[i]))) + + for idx in range(len(resized_dep_maps)): + dep_map = resized_dep_maps[idx] + gold_polygons = gold_polygons_batch[idx] + for polygon in gold_polygons: + polygon_img = draw_polygon(polygon, dep_map.size) + polygon.data_error_potential = calculate_dep_polygon(dep_map, polygon_img) + + return gold_polygons_batch + + + + diff --git a/dataquality/utils/semantic_segmentation/metrics.py b/dataquality/utils/semantic_segmentation/metrics.py index 29ec933e6..3cc495c09 100644 --- a/dataquality/utils/semantic_segmentation/metrics.py +++ b/dataquality/utils/semantic_segmentation/metrics.py @@ -19,16 +19,16 @@ def calculate_and_upload_dep( gold_masks: torch.Tensor, image_ids: List[int], obj_prefix: str, -) -> List[float]: +) -> Tuple[List[float], np.ndarray]: """Calculates the Data Error Potential (DEP) for each image in the batch Uploads the heatmap to Minio as a png. - Returns the image DEP for each image in the batch. + Returns the image DEP for each image in the batch. As well as the dep_heatmaps. Image dep is calculated by the average pixel dep. """ dep_heatmaps = calculate_dep_heatmaps(probs, gold_masks) upload_dep_heatmaps(dep_heatmaps, image_ids, obj_prefix) - return calculate_image_dep(dep_heatmaps) + return calculate_image_dep(dep_heatmaps), dep_heatmaps def calculate_dep_heatmaps( diff --git a/docs/cv/coco_deeplab_hooks.ipynb b/docs/cv/coco_deeplab_hooks.ipynb index 62d9826f3..8a9838cf8 100644 --- a/docs/cv/coco_deeplab_hooks.ipynb +++ b/docs/cv/coco_deeplab_hooks.ipynb @@ -40,23 +40,94 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['cat.jpeg',\n", + " 'ui',\n", + " 'Screenshot 2023-05-10 at 4.22.31 PM.png',\n", + " 'Screenshot 2023-05-10 at 4.22.52 PM.png',\n", + " 'hackathon.py',\n", + " 'screenshot',\n", + " '.DS_Store',\n", + " 'Screenshot 2023-05-10 at 4.21.57 PM.png',\n", + " 'Hyatt _ Reservation Confirmation.pdf',\n", + " '.localized',\n", + " '.bashrc',\n", + " 'val2017',\n", + " 'lease.pdf',\n", + " 'Screenshot 2023-05-10 at 4.23.40 PM.png',\n", + " 'OD',\n", + " 'CV_datasets',\n", + " '14D46FBC-BBC7-446D-B6CF-C3924EA12735.jpeg',\n", + " 'datasets',\n", + " 'tests',\n", + " 'recreate',\n", + " 'dojo',\n", + " 'Screenshot 2023-05-10 at 4.21.43 PM.png',\n", + " 'all_images',\n", + " 'Screenshot 2023-04-14 at 10.48.39 AM.png',\n", + " 'rungalileo',\n", + " 'test.py',\n", + " 'Screen Shot 2023-03-24 at 2.14.57 PM.png',\n", + " 'Screenshot 2023-03-29 at 2.13.50 PM.png',\n", + " 'annotations',\n", + " 'Screenshot 2023-05-10 at 4.23.28 PM.png',\n", + " 'dataquality',\n", + " 'Screenshot 2023-05-10 at 4.22.41 PM.png',\n", + " 'keypair',\n", + " 'Screenshot 2023-05-10 at 4.22.18 PM.png',\n", + " 'Hackathon.ipynb',\n", + " 'dep',\n", + " 'docker_images',\n", + " 'beard_pic.jpeg',\n", + " 'segmentation_datasets',\n", + " 'api',\n", + " 'ml papers',\n", + " 'runners',\n", + " 'ultralytics',\n", + " 'random_scripts',\n", + " 'cat_pic.jpeg',\n", + " 'survey.csv',\n", + " 'test.ipynb',\n", + " 'Screenshot 2023-05-10 at 4.23.04 PM.png',\n", + " 'Screen Shot 2023-02-22 at 1.52.33 PM.png',\n", + " 'semantic_segmentation',\n", + " 'llama',\n", + " 'gpt4all']" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "os.listdir('../../..')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Found dataset, there are 4031 images and 4031 masks\n" + "Found dataset, there are 4030 images and 4030 masks\n" ] } ], "source": [ "# download the data from our public gcs bucket and save it to disk\n", "# dataset_path, img_path, mask_path = download_gcs_data()\n", - "dataset_path = \"./CV_datasets/\"\n", - "img_path = \"all_images\"\n", - "mask_path = \"all_masks\"\n", + "dataset_path = \"/Users/derek/Desktop/CV_datasets\"\n", + "img_path = \"COCO_seg_val_5000/all_images\"\n", + "mask_path = \"COCO_seg_val_5000/all_masks\"\n", "\n", "IMG_SIZE = 128\n", "NC = 21 # Number of classes\n", @@ -84,7 +155,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -98,10 +169,10 @@ "name": "stderr", "output_type": "stream", "text": [ - "Using cache found in /Users/elliottchartock/.cache/torch/hub/pytorch_vision_v0.10.0\n", - "/Users/elliottchartock/Code/dataquality/.venv/lib/python3.9/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and will be removed in 0.15, please use 'weights' instead.\n", + "Using cache found in /Users/derek/.cache/torch/hub/pytorch_vision_v0.10.0\n", + "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n", " warnings.warn(\n", - "/Users/elliottchartock/Code/dataquality/.venv/lib/python3.9/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and will be removed in 0.15. The current behavior is equivalent to passing `weights=DeepLabV3_ResNet50_Weights.COCO_WITH_VOC_LABELS_V1`. You can also use `weights=DeepLabV3_ResNet50_Weights.DEFAULT` to get the most up-to-date weights.\n", + "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=DeepLabV3_ResNet50_Weights.COCO_WITH_VOC_LABELS_V1`. You can also use `weights=DeepLabV3_ResNet50_Weights.DEFAULT` to get the most up-to-date weights.\n", " warnings.warn(msg)\n" ] } @@ -119,14 +190,14 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/Users/elliottchartock/Code/dataquality/.venv/lib/python3.9/site-packages/dataquality/core/__init__.py:27: GalileoWarning: configure is deprecated, use dq.set_console_url and dq.login\n", + "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/core/__init__.py:27: GalileoWarning: configure is deprecated, use dq.set_console_url and dq.login\n", " warnings.warn(\n" ] }, @@ -139,8 +210,22 @@ "\n", "๐Ÿš€ You're logged in to Galileo as galileo@rungalileo.io!\n", "โœจ Initializing existing public project 'Derek-Elliott-Proj'\n", - "๐Ÿƒโ€โ™‚๏ธ Creating new run 'test-polygon-df'\n", - "๐Ÿ›ฐ Connected to existing project 'Derek-Elliott-Proj', and new run 'test-polygon-df'.\n" + "๐Ÿƒโ€โ™‚๏ธ Fetching existing run 'test800'\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/core/init.py:148: GalileoWarning: Run: Derek-Elliott-Proj/test800 already exists! The existing run will get overwritten on call to finish()!\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿ›ฐ Connected to existing project 'Derek-Elliott-Proj', and existing run 'test800'.\n" ] } ], @@ -162,7 +247,7 @@ "import dataquality as dq\n", "dq.configure()\n", "\n", - "dq.init(\"semantic_segmentation\", \"Derek-Elliott-Proj\", 'test-polygon-df')\n", + "dq.init(\"semantic_segmentation\", \"Derek-Elliott-Proj\", 'test800')\n", "class_dict = { 'background': 0,\n", " 'airplane': 1,\n", " 'bicycle': 2,\n", @@ -190,9 +275,17 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-05-11 07:57:24.556151: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" + ] + }, { "name": "stdout", "output_type": "stream", @@ -209,11 +302,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/torch/cuda/amp/grad_scaler.py:118: UserWarning: torch.cuda.amp.GradScaler is enabled, but CUDA is not available. Disabling.\n", + "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True).\n", + " warnings.warn(\n", + "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/torch/cuda/amp/grad_scaler.py:120: UserWarning: torch.cuda.amp.GradScaler is enabled, but CUDA is not available. Disabling.\n", " warnings.warn(\"torch.cuda.amp.GradScaler is enabled, but CUDA is not available. Disabling.\")\n", - "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/torch/amp/autocast_mode.py:202: UserWarning: User provided device_type of 'cuda', but CUDA is not available. Disabling\n", + "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/torch/amp/autocast_mode.py:204: UserWarning: User provided device_type of 'cuda', but CUDA is not available. Disabling\n", " warnings.warn('User provided device_type of \\'cuda\\', but CUDA is not available. Disabling')\n", - " 50%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ | 1/2 [00:02<00:02, 2.77s/it]\n" + " 50%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ | 1/2 [00:02<00:02, 2.91s/it]\n" ] } ], @@ -253,7 +348,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -270,91 +365,46 @@ "name": "stdout", "output_type": "stream", "text": [ - "โ˜๏ธ Uploading Data\n", - "CuML libraries not found, running standard process. For faster Galileo processing, consider installing\n", - "`pip install 'dataquality[cuda]' --extra-index-url=https://pypi.nvidia.com/`\n" + "Logging 2 samples [########################################] 100.00% elapsed time : 0.27s = 0.0m = 0.0h\n", + " " ] }, { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "f1d760def4cc4279b8fb58824ab5801b", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Processing data for upload: 0%| | 0/2 [00:00 2\u001b[0m dq\u001b[39m.\u001b[39;49mfinish()\n", + "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/utils/helpers.py:25\u001b[0m, in \u001b[0;36mcheck_noop..decorator\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[39mif\u001b[39;00m galileo_disabled():\n\u001b[1;32m 24\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[0;32m---> 25\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", + "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/core/finish.py:61\u001b[0m, in \u001b[0;36mfinish\u001b[0;34m(last_epoch, wait, create_data_embs)\u001b[0m\n\u001b[1;32m 58\u001b[0m \u001b[39m# Certain tasks require extra finish logic\u001b[39;00m\n\u001b[1;32m 59\u001b[0m data_logger\u001b[39m.\u001b[39mlogger_config\u001b[39m.\u001b[39mfinish()\n\u001b[0;32m---> 61\u001b[0m data_logger\u001b[39m.\u001b[39;49mupload(last_epoch, create_data_embs\u001b[39m=\u001b[39;49mcreate_data_embs)\n\u001b[1;32m 62\u001b[0m upload_dq_log_file()\n\u001b[1;32m 63\u001b[0m body \u001b[39m=\u001b[39m \u001b[39mdict\u001b[39m(\n\u001b[1;32m 64\u001b[0m project_id\u001b[39m=\u001b[39m\u001b[39mstr\u001b[39m(config\u001b[39m.\u001b[39mcurrent_project_id),\n\u001b[1;32m 65\u001b[0m run_id\u001b[39m=\u001b[39m\u001b[39mstr\u001b[39m(config\u001b[39m.\u001b[39mcurrent_run_id),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 70\u001b[0m feature_names\u001b[39m=\u001b[39mdata_logger\u001b[39m.\u001b[39mlogger_config\u001b[39m.\u001b[39mfeature_names,\n\u001b[1;32m 71\u001b[0m )\n", + "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/loggers/data_logger/base_data_logger.py:229\u001b[0m, in \u001b[0;36mBaseGalileoDataLogger.upload\u001b[0;34m(self, last_epoch, create_data_embs)\u001b[0m\n\u001b[1;32m 225\u001b[0m \u001b[39massert\u001b[39;00m (\n\u001b[1;32m 226\u001b[0m config\u001b[39m.\u001b[39mcurrent_project_id \u001b[39mand\u001b[39;00m config\u001b[39m.\u001b[39mcurrent_run_id\n\u001b[1;32m 227\u001b[0m ), \u001b[39m\"\u001b[39m\u001b[39mYou must call dq.init and train a model before calling finish\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 228\u001b[0m ThreadPoolManager\u001b[39m.\u001b[39mwait_for_threads()\n\u001b[0;32m--> 229\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mcheck_for_logging_failures()\n\u001b[1;32m 230\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mโ˜๏ธ Uploading Data\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 231\u001b[0m object_store \u001b[39m=\u001b[39m ObjectStore()\n", + "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/loggers/base_logger.py:354\u001b[0m, in \u001b[0;36mBaseGalileoLogger.check_for_logging_failures\u001b[0;34m(cls)\u001b[0m\n\u001b[1;32m 352\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mcls\u001b[39m\u001b[39m.\u001b[39mlogger_config\u001b[39m.\u001b[39mexception:\n\u001b[1;32m 353\u001b[0m upload_dq_log_file()\n\u001b[0;32m--> 354\u001b[0m \u001b[39mraise\u001b[39;00m GalileoException(\u001b[39mcls\u001b[39m\u001b[39m.\u001b[39mlogger_config\u001b[39m.\u001b[39mexception)\n", + "\u001b[0;31mGalileoException\u001b[0m: An issue occurred while logging model outputs. Address any issues in your logging and make sure to call dq.init before restarting:\nOSError('Unable to create file (unable to truncate a file which is already open)')" + ] } ], "source": [ + "dq.enable_galileo_verbose()\n", "dq.finish()" ] }, @@ -888,7 +938,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.9.10" }, "toc-autonumbering": false, "toc-showmarkdowntxt": false, From e201fffb9f02108126523a2c18b004cee3c3da55 Mon Sep 17 00:00:00 2001 From: Derek Date: Thu, 11 May 2023 11:42:26 -0700 Subject: [PATCH 02/16] Revert notebook --- docs/cv/coco_deeplab_hooks.ipynb | 254 +------------------------------ 1 file changed, 2 insertions(+), 252 deletions(-) diff --git a/docs/cv/coco_deeplab_hooks.ipynb b/docs/cv/coco_deeplab_hooks.ipynb index af996a4ae..c51512882 100644 --- a/docs/cv/coco_deeplab_hooks.ipynb +++ b/docs/cv/coco_deeplab_hooks.ipynb @@ -29,94 +29,6 @@ }, { "cell_type": "code", -<<<<<<< HEAD - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['cat.jpeg',\n", - " 'ui',\n", - " 'Screenshot 2023-05-10 at 4.22.31 PM.png',\n", - " 'Screenshot 2023-05-10 at 4.22.52 PM.png',\n", - " 'hackathon.py',\n", - " 'screenshot',\n", - " '.DS_Store',\n", - " 'Screenshot 2023-05-10 at 4.21.57 PM.png',\n", - " 'Hyatt _ Reservation Confirmation.pdf',\n", - " '.localized',\n", - " '.bashrc',\n", - " 'val2017',\n", - " 'lease.pdf',\n", - " 'Screenshot 2023-05-10 at 4.23.40 PM.png',\n", - " 'OD',\n", - " 'CV_datasets',\n", - " '14D46FBC-BBC7-446D-B6CF-C3924EA12735.jpeg',\n", - " 'datasets',\n", - " 'tests',\n", - " 'recreate',\n", - " 'dojo',\n", - " 'Screenshot 2023-05-10 at 4.21.43 PM.png',\n", - " 'all_images',\n", - " 'Screenshot 2023-04-14 at 10.48.39 AM.png',\n", - " 'rungalileo',\n", - " 'test.py',\n", - " 'Screen Shot 2023-03-24 at 2.14.57 PM.png',\n", - " 'Screenshot 2023-03-29 at 2.13.50 PM.png',\n", - " 'annotations',\n", - " 'Screenshot 2023-05-10 at 4.23.28 PM.png',\n", - " 'dataquality',\n", - " 'Screenshot 2023-05-10 at 4.22.41 PM.png',\n", - " 'keypair',\n", - " 'Screenshot 2023-05-10 at 4.22.18 PM.png',\n", - " 'Hackathon.ipynb',\n", - " 'dep',\n", - " 'docker_images',\n", - " 'beard_pic.jpeg',\n", - " 'segmentation_datasets',\n", - " 'api',\n", - " 'ml papers',\n", - " 'runners',\n", - " 'ultralytics',\n", - " 'random_scripts',\n", - " 'cat_pic.jpeg',\n", - " 'survey.csv',\n", - " 'test.ipynb',\n", - " 'Screenshot 2023-05-10 at 4.23.04 PM.png',\n", - " 'Screen Shot 2023-02-22 at 1.52.33 PM.png',\n", - " 'semantic_segmentation',\n", - " 'llama',\n", - " 'gpt4all']" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "os.listdir('../../..')" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found dataset, there are 4030 images and 4030 masks\n" - ] - } - ], - "source": [ - "# download the data from our public gcs bucket and save it to disk\n", - "# dataset_path, img_path, mask_path = download_gcs_data()\n", - "dataset_path = \"/Users/derek/Desktop/CV_datasets\"\n", -======= "execution_count": null, "metadata": {}, "outputs": [], @@ -124,7 +36,6 @@ "# download the data from our public gcs bucket and save it to disk\n", "# dataset_path, img_path, mask_path = download_gcs_data()\n", "dataset_path = \"../../../CV_datasets/\"\n", ->>>>>>> main "img_path = \"COCO_seg_val_5000/all_images\"\n", "mask_path = \"COCO_seg_val_5000/all_masks\"\n", "\n", @@ -154,11 +65,7 @@ }, { "cell_type": "code", -<<<<<<< HEAD - "execution_count": 4, -======= "execution_count": null, ->>>>>>> main "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -167,23 +74,7 @@ "outputId": "85d91dc9-405e-4f02-a6bf-6a88f9502412", "tags": [] }, -<<<<<<< HEAD - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using cache found in /Users/derek/.cache/torch/hub/pytorch_vision_v0.10.0\n", - "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n", - " warnings.warn(\n", - "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=DeepLabV3_ResNet50_Weights.COCO_WITH_VOC_LABELS_V1`. You can also use `weights=DeepLabV3_ResNet50_Weights.DEFAULT` to get the most up-to-date weights.\n", - " warnings.warn(msg)\n" - ] - } - ], -======= "outputs": [], ->>>>>>> main "source": [ "\n", "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", @@ -197,51 +88,9 @@ }, { "cell_type": "code", -<<<<<<< HEAD - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/core/__init__.py:27: GalileoWarning: configure is deprecated, use dq.set_console_url and dq.login\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "๐Ÿ“ก https://console.dev.rungalileo.io\n", - "๐Ÿ”ญ Logging you into Galileo\n", - "\n", - "๐Ÿš€ You're logged in to Galileo as galileo@rungalileo.io!\n", - "โœจ Initializing existing public project 'Derek-Elliott-Proj'\n", - "๐Ÿƒโ€โ™‚๏ธ Fetching existing run 'test800'\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/core/init.py:148: GalileoWarning: Run: Derek-Elliott-Proj/test800 already exists! The existing run will get overwritten on call to finish()!\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "๐Ÿ›ฐ Connected to existing project 'Derek-Elliott-Proj', and existing run 'test800'.\n" - ] - } - ], -======= "execution_count": null, "metadata": {}, "outputs": [], ->>>>>>> main "source": [ "try:\n", " import dataquality as dq\n", @@ -260,7 +109,7 @@ "import dataquality as dq\n", "dq.configure()\n", "\n", - "dq.init(\"semantic_segmentation\", \"Derek-Elliott-Proj\", 'test800')\n", + "dq.init(\"semantic_segmentation\", \"Derek-Elliott-Proj\", 'test-polygon-df')\n", "class_dict = { 'background': 0,\n", " 'airplane': 1,\n", " 'bicycle': 2,\n", @@ -288,49 +137,9 @@ }, { "cell_type": "code", -<<<<<<< HEAD - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-05-11 07:57:24.556151: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", - "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "We assume the dataloaders passed only have transforms that Tensor, Resize, and Normalize the image and mask\n", - "โ€ผ Any cropping or shearing transforms passed will lead to unexpected results\n", - "See docs at https://dq.readthedocs.io/en/latest/ (placeholder) for more info \n", - " \n", - "\n", - "Found layer classifier in model layers: backbone, classifier\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True).\n", - " warnings.warn(\n", - "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/torch/cuda/amp/grad_scaler.py:120: UserWarning: torch.cuda.amp.GradScaler is enabled, but CUDA is not available. Disabling.\n", - " warnings.warn(\"torch.cuda.amp.GradScaler is enabled, but CUDA is not available. Disabling.\")\n", - "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/torch/amp/autocast_mode.py:204: UserWarning: User provided device_type of 'cuda', but CUDA is not available. Disabling\n", - " warnings.warn('User provided device_type of \\'cuda\\', but CUDA is not available. Disabling')\n", - " 50%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ | 1/2 [00:02<00:02, 2.91s/it]\n" - ] - } - ], -======= "execution_count": null, "metadata": {}, "outputs": [], ->>>>>>> main "source": [ "from dataquality.integrations.cv.torch.semantic_segmentation import watch\n", "watch(\n", @@ -367,69 +176,10 @@ }, { "cell_type": "code", -<<<<<<< HEAD - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/derek/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--mean_iou/08bc20f4f895f3caf75fb9e3fada1404bded3c3265243d05327cbb3b9326ffe9/mean_iou.py:259: RuntimeWarning: invalid value encountered in divide\n", - " iou = total_area_intersect / total_area_union\n", - "/Users/derek/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--mean_iou/08bc20f4f895f3caf75fb9e3fada1404bded3c3265243d05327cbb3b9326ffe9/mean_iou.py:260: RuntimeWarning: invalid value encountered in divide\n", - " acc = total_area_intersect / total_area_label\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Logging 2 samples [########################################] 100.00% elapsed time : 0.27s = 0.0m = 0.0h\n", - " " - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/loggers/model_logger/base_model_logger.py:87: UserWarning: An issue occurred while logging model outputs. Address any issues in your logging and make sure to call dq.init before restarting:\n", - "OSError('Unable to create file (unable to truncate a file which is already open)')\n", - " warnings.warn(err_msg)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Logging 2 samples [########################################] 100.00% elapsed time : 0.17s = 0.0m = 0.0h\n", - "Logging 2 samples [########################################] 100.00% elapsed time : 0.15s = 0.0m = 0.0h\n", - " " - ] - }, - { - "ename": "GalileoException", - "evalue": "An issue occurred while logging model outputs. Address any issues in your logging and make sure to call dq.init before restarting:\nOSError('Unable to create file (unable to truncate a file which is already open)')", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mGalileoException\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[7], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m dq\u001b[39m.\u001b[39menable_galileo_verbose()\n\u001b[0;32m----> 2\u001b[0m dq\u001b[39m.\u001b[39;49mfinish()\n", - "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/utils/helpers.py:25\u001b[0m, in \u001b[0;36mcheck_noop..decorator\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[39mif\u001b[39;00m galileo_disabled():\n\u001b[1;32m 24\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[0;32m---> 25\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", - "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/core/finish.py:61\u001b[0m, in \u001b[0;36mfinish\u001b[0;34m(last_epoch, wait, create_data_embs)\u001b[0m\n\u001b[1;32m 58\u001b[0m \u001b[39m# Certain tasks require extra finish logic\u001b[39;00m\n\u001b[1;32m 59\u001b[0m data_logger\u001b[39m.\u001b[39mlogger_config\u001b[39m.\u001b[39mfinish()\n\u001b[0;32m---> 61\u001b[0m data_logger\u001b[39m.\u001b[39;49mupload(last_epoch, create_data_embs\u001b[39m=\u001b[39;49mcreate_data_embs)\n\u001b[1;32m 62\u001b[0m upload_dq_log_file()\n\u001b[1;32m 63\u001b[0m body \u001b[39m=\u001b[39m \u001b[39mdict\u001b[39m(\n\u001b[1;32m 64\u001b[0m project_id\u001b[39m=\u001b[39m\u001b[39mstr\u001b[39m(config\u001b[39m.\u001b[39mcurrent_project_id),\n\u001b[1;32m 65\u001b[0m run_id\u001b[39m=\u001b[39m\u001b[39mstr\u001b[39m(config\u001b[39m.\u001b[39mcurrent_run_id),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 70\u001b[0m feature_names\u001b[39m=\u001b[39mdata_logger\u001b[39m.\u001b[39mlogger_config\u001b[39m.\u001b[39mfeature_names,\n\u001b[1;32m 71\u001b[0m )\n", - "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/loggers/data_logger/base_data_logger.py:229\u001b[0m, in \u001b[0;36mBaseGalileoDataLogger.upload\u001b[0;34m(self, last_epoch, create_data_embs)\u001b[0m\n\u001b[1;32m 225\u001b[0m \u001b[39massert\u001b[39;00m (\n\u001b[1;32m 226\u001b[0m config\u001b[39m.\u001b[39mcurrent_project_id \u001b[39mand\u001b[39;00m config\u001b[39m.\u001b[39mcurrent_run_id\n\u001b[1;32m 227\u001b[0m ), \u001b[39m\"\u001b[39m\u001b[39mYou must call dq.init and train a model before calling finish\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 228\u001b[0m ThreadPoolManager\u001b[39m.\u001b[39mwait_for_threads()\n\u001b[0;32m--> 229\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mcheck_for_logging_failures()\n\u001b[1;32m 230\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mโ˜๏ธ Uploading Data\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 231\u001b[0m object_store \u001b[39m=\u001b[39m ObjectStore()\n", - "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/loggers/base_logger.py:354\u001b[0m, in \u001b[0;36mBaseGalileoLogger.check_for_logging_failures\u001b[0;34m(cls)\u001b[0m\n\u001b[1;32m 352\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mcls\u001b[39m\u001b[39m.\u001b[39mlogger_config\u001b[39m.\u001b[39mexception:\n\u001b[1;32m 353\u001b[0m upload_dq_log_file()\n\u001b[0;32m--> 354\u001b[0m \u001b[39mraise\u001b[39;00m GalileoException(\u001b[39mcls\u001b[39m\u001b[39m.\u001b[39mlogger_config\u001b[39m.\u001b[39mexception)\n", - "\u001b[0;31mGalileoException\u001b[0m: An issue occurred while logging model outputs. Address any issues in your logging and make sure to call dq.init before restarting:\nOSError('Unable to create file (unable to truncate a file which is already open)')" - ] - } - ], -======= "execution_count": null, "metadata": {}, "outputs": [], ->>>>>>> main "source": [ - "dq.enable_galileo_verbose()\n", "dq.finish()" ] }, @@ -954,7 +704,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.10" + "version": "3.9.6" }, "toc-autonumbering": false, "toc-showmarkdowntxt": false, From a0006b0fb7f364f77944ff1340ae725761ad9936 Mon Sep 17 00:00:00 2001 From: Derek Date: Thu, 11 May 2023 11:50:45 -0700 Subject: [PATCH 03/16] intermediate commits --- .../model_logger/semantic_segmentation.py | 43 +- dataquality/schemas/semantic_segmentation.py | 2 +- docs/cv/coco_deeplab_hooks.ipynb | 892 +++++++++++++++++- 3 files changed, 882 insertions(+), 55 deletions(-) diff --git a/dataquality/loggers/model_logger/semantic_segmentation.py b/dataquality/loggers/model_logger/semantic_segmentation.py index 1efa3b4a8..96dbe03ef 100644 --- a/dataquality/loggers/model_logger/semantic_segmentation.py +++ b/dataquality/loggers/model_logger/semantic_segmentation.py @@ -226,48 +226,7 @@ def _get_data_dict(self) -> Dict: meta=meta_keys, ) - # polygon_data = self.get_polygon_data(pred_polygons_batch, gold_polygons_batch) - image_ids = [] - polygon_ids = [] - preds = [] - golds = [] - data_error_potentials = [] - errors = [] - for i, image_id in enumerate(self.image_ids): - pred_polygons = pred_polygons_batch[i] - for polygon in pred_polygons: - image_ids.append(image_id) - preds.append(polygon.label_idx) - golds.append(-1) - data_error_potentials.append(0.0) - errors.append(polygon.error_type.value) - upload_polygon_contours( - polygon, self.logger_config.polygon_idx, self.contours_path - ) - polygon_ids.append(self.logger_config.polygon_idx) - self.logger_config.polygon_idx += 1 - gold_polygons = gold_polygons_batch[i] - for polygon in gold_polygons: - image_ids.append(image_id) - preds.append(-1) - golds.append(polygon.label_idx) - data_error_potentials.append(0) - errors.append(polygon.error_type.value) - upload_polygon_contours( - polygon, self.logger_config.polygon_idx, self.contours_path - ) - polygon_ids.append(self.logger_config.polygon_idx) - self.logger_config.polygon_idx += 1 - - polygon_data = { - "id": polygon_ids, - "image_id": image_ids, - "pred": preds, - "gold": golds, - "data_error_potential": data_error_potentials, - "galileo_error_type": errors, - "split": [self.split] * len(image_ids), - } + polygon_data = self.get_polygon_data(pred_polygons_batch, gold_polygons_batch) if self.split == Split.inference: polygon_data["inference_name"] = [self.inference_name] * len(self.image_ids) diff --git a/dataquality/schemas/semantic_segmentation.py b/dataquality/schemas/semantic_segmentation.py index 76104f796..064d7de7c 100644 --- a/dataquality/schemas/semantic_segmentation.py +++ b/dataquality/schemas/semantic_segmentation.py @@ -52,7 +52,7 @@ class Polygon(BaseModel): misclassified_class_label: Optional[int] = None error_type: ErrorType = ErrorType.none contours: List[Contour] - # data_error_potential: float = 0.0 + data_error_potential: float = 0.0 @property def contours_opencv(self) -> List[np.ndarray]: diff --git a/docs/cv/coco_deeplab_hooks.ipynb b/docs/cv/coco_deeplab_hooks.ipynb index c51512882..0a740579d 100644 --- a/docs/cv/coco_deeplab_hooks.ipynb +++ b/docs/cv/coco_deeplab_hooks.ipynb @@ -2,9 +2,20 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'ds = load_dataset(\\n \"CVdatasets/CocoSegmentationOnlyVal5000\",\\n use_auth_token=\"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\"\\n)'" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# !pip install datasets evaluate torch torchvision \n", "import os\n", @@ -29,9 +40,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found dataset, there are 4030 images and 4030 masks\n" + ] + } + ], "source": [ "# download the data from our public gcs bucket and save it to disk\n", "# dataset_path, img_path, mask_path = download_gcs_data()\n", @@ -65,7 +84,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -74,7 +93,19 @@ "outputId": "85d91dc9-405e-4f02-a6bf-6a88f9502412", "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using cache found in /Users/derek/.cache/torch/hub/pytorch_vision_v0.10.0\n", + "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n", + " warnings.warn(\n", + "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=DeepLabV3_ResNet50_Weights.COCO_WITH_VOC_LABELS_V1`. You can also use `weights=DeepLabV3_ResNet50_Weights.DEFAULT` to get the most up-to-date weights.\n", + " warnings.warn(msg)\n" + ] + } + ], "source": [ "\n", "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", @@ -88,9 +119,45 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/core/__init__.py:27: GalileoWarning: configure is deprecated, use dq.set_console_url and dq.login\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿ“ก https://console.dev.rungalileo.io\n", + "๐Ÿ”ญ Logging you into Galileo\n", + "\n", + "๐Ÿš€ You're logged in to Galileo as galileo@rungalileo.io!\n", + "โœจ Initializing existing public project 'Derek-Elliott-Proj'\n", + "๐Ÿƒโ€โ™‚๏ธ Fetching existing run 'test-polygon-df'\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/core/init.py:171: GalileoWarning: Run: Derek-Elliott-Proj/test-polygon-df already exists! The existing run will get overwritten on call to finish()!\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿ›ฐ Connected to existing project 'Derek-Elliott-Proj', and existing run 'test-polygon-df'.\n" + ] + } + ], "source": [ "try:\n", " import dataquality as dq\n", @@ -137,9 +204,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-05-11 11:49:01.215830: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "We assume the dataloaders passed only have transforms that Tensor, Resize, and Normalize the image and mask\n", + "โ€ผ Any cropping or shearing transforms passed will lead to unexpected results\n", + "See docs at https://dq.readthedocs.io/en/latest/ (placeholder) for more info \n", + " \n", + "\n", + "Found layer classifier in model layers: backbone, classifier\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True).\n", + " warnings.warn(\n", + "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/torch/cuda/amp/grad_scaler.py:120: UserWarning: torch.cuda.amp.GradScaler is enabled, but CUDA is not available. Disabling.\n", + " warnings.warn(\"torch.cuda.amp.GradScaler is enabled, but CUDA is not available. Disabling.\")\n", + "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/torch/amp/autocast_mode.py:204: UserWarning: User provided device_type of 'cuda', but CUDA is not available. Disabling\n", + " warnings.warn('User provided device_type of \\'cuda\\', but CUDA is not available. Disabling')\n", + " 50%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ | 1/2 [00:02<00:02, 2.83s/it]\n" + ] + } + ], "source": [ "from dataquality.integrations.cv.torch.semantic_segmentation import watch\n", "watch(\n", @@ -176,9 +277,776 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/derek/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--mean_iou/08bc20f4f895f3caf75fb9e3fada1404bded3c3265243d05327cbb3b9326ffe9/mean_iou.py:259: RuntimeWarning: invalid value encountered in divide\n", + " iou = total_area_intersect / total_area_union\n", + "/Users/derek/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--mean_iou/08bc20f4f895f3caf75fb9e3fada1404bded3c3265243d05327cbb3b9326ffe9/mean_iou.py:260: RuntimeWarning: invalid value encountered in divide\n", + " acc = total_area_intersect / total_area_label\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Logging 2 samples [########################################] 100.00% elapsed time : 0.29s = 0.0m = 0.0h\n", + "Logging 2 samples [########################################] 100.00% elapsed time : 0.16s = 0.0m = 0.0h\n", + "Logging 2 samples [########################################] 100.00% elapsed time : 0.22s = 0.0m = 0.0h \n", + "Logging 2 samples [########################################] 100.00% elapsed time : 0.27s = 0.0m = 0.0h\n", + " โ˜๏ธ Uploading Data\n", + "CuML libraries not found, running standard process. For faster Galileo processing, consider installing\n", + "`pip install 'dataquality[cuda]' --extra-index-url=https://pypi.nvidia.com/`\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "aba6e837e4d14c1693c3b963f64de2a6", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Uploading data to Galileo: 0%| | 0.00/25.8k [00:00[05/11/23 11:49:23] ERROR error opening __init__.py:271\n", + " '/Users/derek/.galileo/logs/7e78e642-cc40-4f5c-8f45-b2cadd8d674a/f9855 \n", + " eea-4121-4a24-bd68-132d3346a89a/training/0/80e887398eb1.hdf5' \n", + " Traceback (most recent call last): \n", + " File \n", + " \"/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/va \n", + " ex/__init__.py\", line 244, in open \n", + " ds = vaex.dataset.open(path, fs_options=fs_options, fs=fs, \n", + " **kwargs) \n", + " File \n", + " \"/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/va \n", + " ex/dataset.py\", line 81, in open \n", + " return opener.open(path, fs_options=fs_options, fs=fs, *args, \n", + " **kwargs) \n", + " File \n", + " \"/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/va \n", + " ex/dataset.py\", line 1457, in open \n", + " return cls(path, *args, **kwargs) \n", + " File \n", + " \"/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/va \n", + " ex/hdf5/dataset.py\", line 73, in __init__ \n", + " self._freeze() \n", + " File \n", + " \"/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/va \n", + " ex/dataset.py\", line 1501, in _freeze \n", + " self._set_row_count() \n", + " File \n", + " \"/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/va \n", + " ex/dataset.py\", line 423, in _set_row_count \n", + " raise ValueError(f'First columns has length {self._row_count}, \n", + " while column {name} has length {len(value)}') \n", + " ValueError: First columns has length 3, while column epoch has length \n", + " 2 \n", + "\n" + ], + "text/plain": [ + "\u001b[2;36m[05/11/23 11:49:23]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;31mERROR \u001b[0m error opening \u001b]8;id=155436;file:///Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/__init__.py\u001b\\\u001b[2m__init__.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=520318;file:///Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/__init__.py#271\u001b\\\u001b[2m271\u001b[0m\u001b]8;;\u001b\\\n", + "\u001b[2;36m \u001b[0m \u001b[32m'/Users/derek/.galileo/logs/7e78e642-cc40-4f5c-8f45-b2cadd8d674a/f9855\u001b[0m \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[32meea-4121-4a24-bd68-132d3346a89a/training/0/80e887398eb1.hdf5'\u001b[0m \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m Traceback \u001b[1m(\u001b[0mmost recent call last\u001b[1m)\u001b[0m: \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m File \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[32m\"/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/va\u001b[0m \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[32mex/__init__.py\"\u001b[0m, line \u001b[1;36m244\u001b[0m, in open \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m ds = \u001b[1;35mvaex.dataset.open\u001b[0m\u001b[1m(\u001b[0mpath, \u001b[33mfs_options\u001b[0m=\u001b[35mfs_options\u001b[0m, \u001b[33mfs\u001b[0m=\u001b[35mfs\u001b[0m, \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m **kwargs\u001b[1m)\u001b[0m \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m File \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[32m\"/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/va\u001b[0m \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[32mex/dataset.py\"\u001b[0m, line \u001b[1;36m81\u001b[0m, in open \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m return \u001b[1;35mopener.open\u001b[0m\u001b[1m(\u001b[0mpath, \u001b[33mfs_options\u001b[0m=\u001b[35mfs_options\u001b[0m, \u001b[33mfs\u001b[0m=\u001b[35mfs\u001b[0m, *args, \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m **kwargs\u001b[1m)\u001b[0m \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m File \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[32m\"/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/va\u001b[0m \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[32mex/dataset.py\"\u001b[0m, line \u001b[1;36m1457\u001b[0m, in open \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m return \u001b[1;35mcls\u001b[0m\u001b[1m(\u001b[0mpath, *args, **kwargs\u001b[1m)\u001b[0m \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m File \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[32m\"/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/va\u001b[0m \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[32mex/hdf5/dataset.py\"\u001b[0m, line \u001b[1;36m73\u001b[0m, in __init__ \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[1;35mself._freeze\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m File \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[32m\"/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/va\u001b[0m \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[32mex/dataset.py\"\u001b[0m, line \u001b[1;36m1501\u001b[0m, in _freeze \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[1;35mself._set_row_count\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m File \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[32m\"/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/va\u001b[0m \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[32mex/dataset.py\"\u001b[0m, line \u001b[1;36m423\u001b[0m, in _set_row_count \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m raise \u001b[1;35mValueError\u001b[0m\u001b[1m(\u001b[0mf'First columns has length \u001b[1m{\u001b[0mself._row_count\u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m while column \u001b[1m{\u001b[0mname\u001b[1m}\u001b[0m has length \u001b[1m{\u001b[0m\u001b[1;35mlen\u001b[0m\u001b[1m(\u001b[0mvalue\u001b[1m)\u001b[0m\u001b[1m}\u001b[0m'\u001b[1m)\u001b[0m \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m ValueError: First columns has length \u001b[1;36m3\u001b[0m, while column epoch has length \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[1;36m2\u001b[0m \u001b[2m \u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Custom TB Handler failed, unregistering\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)\n", + " \u001b[0;31m[... skipping hidden 1 frame]\u001b[0m\n", + "\n", + "Cell \u001b[0;32mIn[6], line 1\u001b[0m\n", + "\u001b[0;32m----> 1\u001b[0m \u001b[43mdq\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfinish\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "\n", + "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/utils/helpers.py:25\u001b[0m, in \u001b[0;36mcheck_noop..decorator\u001b[0;34m(*args, **kwargs)\u001b[0m\n", + "\u001b[1;32m 24\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n", + "\u001b[0;32m---> 25\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "\n", + "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/core/finish.py:61\u001b[0m, in \u001b[0;36mfinish\u001b[0;34m(last_epoch, wait, create_data_embs)\u001b[0m\n", + "\u001b[1;32m 59\u001b[0m data_logger\u001b[38;5;241m.\u001b[39mlogger_config\u001b[38;5;241m.\u001b[39mfinish()\n", + "\u001b[0;32m---> 61\u001b[0m \u001b[43mdata_logger\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mupload\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlast_epoch\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcreate_data_embs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcreate_data_embs\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[1;32m 62\u001b[0m upload_dq_log_file()\n", + "\n", + "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/loggers/data_logger/base_data_logger.py:267\u001b[0m, in \u001b[0;36mBaseGalileoDataLogger.upload\u001b[0;34m(self, last_epoch, create_data_embs)\u001b[0m\n", + "\u001b[1;32m 266\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m split \u001b[38;5;129;01min\u001b[39;00m Split\u001b[38;5;241m.\u001b[39mget_valid_attributes():\n", + "\u001b[0;32m--> 267\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mupload_split\u001b[49m\u001b[43m(\u001b[49m\n", + "\u001b[1;32m 268\u001b[0m \u001b[43m \u001b[49m\u001b[43mlocation\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msplit\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mobject_store\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlast_epoch\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcreate_data_embs\u001b[49m\n", + "\u001b[1;32m 269\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "\n", + "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/loggers/data_logger/base_data_logger.py:295\u001b[0m, in \u001b[0;36mBaseGalileoDataLogger.upload_split\u001b[0;34m(self, location, split, object_store, last_epoch, create_data_embs)\u001b[0m\n", + "\u001b[1;32m 294\u001b[0m in_frame_split \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconvert_large_string(in_frame_split)\n", + "\u001b[0;32m--> 295\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mupload_split_from_in_frame\u001b[49m\u001b[43m(\u001b[49m\n", + "\u001b[1;32m 296\u001b[0m \u001b[43m \u001b[49m\u001b[43mobject_store\u001b[49m\u001b[43m,\u001b[49m\n", + "\u001b[1;32m 297\u001b[0m \u001b[43m \u001b[49m\u001b[43min_frame_split\u001b[49m\u001b[43m,\u001b[49m\n", + "\u001b[1;32m 298\u001b[0m \u001b[43m \u001b[49m\u001b[43msplit\u001b[49m\u001b[43m,\u001b[49m\n", + "\u001b[1;32m 299\u001b[0m \u001b[43m \u001b[49m\u001b[43msplit_loc\u001b[49m\u001b[43m,\u001b[49m\n", + "\u001b[1;32m 300\u001b[0m \u001b[43m \u001b[49m\u001b[43mlast_epoch\u001b[49m\u001b[43m,\u001b[49m\n", + "\u001b[1;32m 301\u001b[0m \u001b[43m \u001b[49m\u001b[43mcreate_data_embs\u001b[49m\u001b[43m,\u001b[49m\n", + "\u001b[1;32m 302\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[1;32m 303\u001b[0m in_frame_split\u001b[38;5;241m.\u001b[39mclose()\n", + "\n", + "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/loggers/data_logger/semantic_segmentation.py:166\u001b[0m, in \u001b[0;36mSemanticSegmentationDataLogger.upload_split_from_in_frame\u001b[0;34m(cls, object_store, in_frame, split, split_loc, last_epoch, create_data_embs)\u001b[0m\n", + "\u001b[1;32m 165\u001b[0m dir_name \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00msplit_loc\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/0\u001b[39m\u001b[38;5;124m\"\u001b[39m\n", + "\u001b[0;32m--> 166\u001b[0m out_frame \u001b[38;5;241m=\u001b[39m \u001b[43mget_output_df\u001b[49m\u001b[43m(\u001b[49m\n", + "\u001b[1;32m 167\u001b[0m \u001b[43m \u001b[49m\u001b[43mdir_name\u001b[49m\u001b[43m,\u001b[49m\n", + "\u001b[1;32m 168\u001b[0m \u001b[43m \u001b[49m\u001b[43mprob_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n", + "\u001b[1;32m 169\u001b[0m \u001b[43m \u001b[49m\u001b[43msplit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msplit\u001b[49m\u001b[43m,\u001b[49m\n", + "\u001b[1;32m 170\u001b[0m \u001b[43m \u001b[49m\u001b[43mepoch_or_inf\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m,\u001b[49m\n", + "\u001b[1;32m 171\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[1;32m 173\u001b[0m polygon_minio_file \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mproj_run\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00msplit\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/0/prob/prob.hdf5\u001b[39m\u001b[38;5;124m\"\u001b[39m\n", + "\n", + "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/utils/vaex.py:192\u001b[0m, in \u001b[0;36mget_output_df\u001b[0;34m(dir_name, prob_only, split, epoch_or_inf)\u001b[0m\n", + "\u001b[1;32m 191\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m vaex\u001b[38;5;241m.\u001b[39mopen(out_frame_path)\n", + "\u001b[0;32m--> 192\u001b[0m str_cols \u001b[38;5;241m=\u001b[39m \u001b[43mconcat_hdf5_files\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdir_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mprob_only\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[1;32m 193\u001b[0m out_frame \u001b[38;5;241m=\u001b[39m vaex\u001b[38;5;241m.\u001b[39mopen(out_frame_path)\n", + "\n", + "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/utils/hdf5_store.py:129\u001b[0m, in \u001b[0;36mconcat_hdf5_files\u001b[0;34m(location, prob_only)\u001b[0m\n", + "\u001b[1;32m 128\u001b[0m files \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mlistdir(location)\n", + "\u001b[0;32m--> 129\u001b[0m df \u001b[38;5;241m=\u001b[39m \u001b[43mvaex\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mlocation\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m/\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mfiles\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "\u001b[1;32m 131\u001b[0m \u001b[38;5;66;03m# Construct a store per column\u001b[39;00m\n", + "\n", + "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/__init__.py:244\u001b[0m, in \u001b[0;36mopen\u001b[0;34m(path, convert, progress, shuffle, fs_options, fs, *args, **kwargs)\u001b[0m\n", + "\u001b[1;32m 243\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + "\u001b[0;32m--> 244\u001b[0m ds \u001b[38;5;241m=\u001b[39m \u001b[43mvaex\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdataset\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfs_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfs_options\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[1;32m 245\u001b[0m df \u001b[38;5;241m=\u001b[39m vaex\u001b[38;5;241m.\u001b[39mfrom_dataset(ds)\n", + "\n", + "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/dataset.py:81\u001b[0m, in \u001b[0;36mopen\u001b[0;34m(path, fs_options, fs, *args, **kwargs)\u001b[0m\n", + "\u001b[1;32m 80\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m opener\u001b[38;5;241m.\u001b[39mcan_open(path, fs_options\u001b[38;5;241m=\u001b[39mfs_options, fs\u001b[38;5;241m=\u001b[39mfs, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n", + "\u001b[0;32m---> 81\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mopener\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfs_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfs_options\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[1;32m 83\u001b[0m \u001b[38;5;66;03m# otherwise try all openers\u001b[39;00m\n", + "\n", + "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/dataset.py:1457\u001b[0m, in \u001b[0;36mDatasetFile.open\u001b[0;34m(cls, path, *args, **kwargs)\u001b[0m\n", + "\u001b[1;32m 1455\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n", + "\u001b[1;32m 1456\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mopen\u001b[39m(\u001b[38;5;28mcls\u001b[39m, path, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n", + "\u001b[0;32m-> 1457\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "\n", + "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/hdf5/dataset.py:73\u001b[0m, in \u001b[0;36mHdf5MemoryMapped.__init__\u001b[0;34m(self, path, write, fs_options, fs, nommap, group, _fingerprint)\u001b[0m\n", + "\u001b[1;32m 72\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m write: \u001b[38;5;66;03m# in write mode, call freeze yourself, so the hashes are computed\u001b[39;00m\n", + "\u001b[0;32m---> 73\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_freeze\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[1;32m 74\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + "\u001b[1;32m 75\u001b[0m \u001b[38;5;66;03m# make sure we set the row count, which otherwise freeze would do\u001b[39;00m\n", + "\n", + "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/dataset.py:1501\u001b[0m, in \u001b[0;36mDatasetFile._freeze\u001b[0;34m(self)\u001b[0m\n", + "\u001b[1;32m 1500\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_columns \u001b[38;5;241m=\u001b[39m frozendict(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_columns)\n", + "\u001b[0;32m-> 1501\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_set_row_count\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[1;32m 1502\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_frozen \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n", + "\n", + "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/dataset.py:423\u001b[0m, in \u001b[0;36mDataset._set_row_count\u001b[0;34m(self)\u001b[0m\n", + "\u001b[1;32m 422\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(value) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_row_count:\n", + "\u001b[0;32m--> 423\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mFirst columns has length \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_row_count\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m, while column \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m has length \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(value)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n", + "\n", + "\u001b[0;31mValueError\u001b[0m: First columns has length 3, while column epoch has length 2\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)\n", + "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/analytics.py:149\u001b[0m, in \u001b[0;36mAnalytics.ipython_exception_handler\u001b[0;34m(self, shell, etype, evalue, tb, tb_offset)\u001b[0m\n", + "\u001b[1;32m 147\u001b[0m \u001b[38;5;28;01mpass\u001b[39;00m\n", + "\u001b[1;32m 148\u001b[0m \u001b[38;5;66;03m# We need to call the default ipython exception handler to raise the error\u001b[39;00m\n", + "\u001b[0;32m--> 149\u001b[0m \u001b[43mshell\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshowtraceback\u001b[49m\u001b[43m(\u001b[49m\u001b[43m(\u001b[49m\u001b[43metype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mevalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtb\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtb_offset\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtb_offset\u001b[49m\u001b[43m)\u001b[49m\n", + "\n", + "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/rich/traceback.py:130\u001b[0m, in \u001b[0;36minstall..ipy_excepthook_closure..ipy_show_traceback\u001b[0;34m(*args, **kwargs)\u001b[0m\n", + "\u001b[1;32m 128\u001b[0m \u001b[38;5;28;01mnonlocal\u001b[39;00m tb_data\n", + "\u001b[1;32m 129\u001b[0m tb_data \u001b[38;5;241m=\u001b[39m kwargs\n", + "\u001b[0;32m--> 130\u001b[0m \u001b[43mdefault_showtraceback\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "\n", + "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/IPython/core/interactiveshell.py:2116\u001b[0m, in \u001b[0;36mInteractiveShell.showtraceback\u001b[0;34m(self, exc_tuple, filename, tb_offset, exception_only, running_compiled_code)\u001b[0m\n", + "\u001b[1;32m 2113\u001b[0m traceback\u001b[38;5;241m.\u001b[39mprint_exc()\n", + "\u001b[1;32m 2114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n", + "\u001b[0;32m-> 2116\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_showtraceback\u001b[49m\u001b[43m(\u001b[49m\u001b[43metype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstb\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[1;32m 2117\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcall_pdb:\n", + "\u001b[1;32m 2118\u001b[0m \u001b[38;5;66;03m# drop into debugger\u001b[39;00m\n", + "\u001b[1;32m 2119\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdebugger(force\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n", + "\n", + "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/rich/traceback.py:146\u001b[0m, in \u001b[0;36minstall..ipy_excepthook_closure..ipy_display_traceback\u001b[0;34m(is_syntax, *args, **kwargs)\u001b[0m\n", + "\u001b[1;32m 144\u001b[0m tb_offset \u001b[38;5;241m=\u001b[39m tb_data\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtb_offset\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m1\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m compiled \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;241m0\u001b[39m)\n", + "\u001b[1;32m 145\u001b[0m \u001b[38;5;66;03m# remove ipython internal frames from trace with tb_offset\u001b[39;00m\n", + "\u001b[0;32m--> 146\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m _ \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28;43mrange\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mtb_offset\u001b[49m\u001b[43m)\u001b[49m:\n", + "\u001b[1;32m 147\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tb \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "\u001b[1;32m 148\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n", + "\n", + "\u001b[0;31mTypeError\u001b[0m: 'NoneType' object cannot be interpreted as an integer\n", + "The original exception:\n" + ] + }, + { + "data": { + "text/html": [ + "
โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Traceback (most recent call last) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ\n",
+       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/IPython/core/interactiveshell โ”‚\n",
+       "โ”‚ .py:3508 in run_code                                                                             โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚   3505 โ”‚   โ”‚   โ”‚   โ”‚   if async_:                                                                โ”‚\n",
+       "โ”‚   3506 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   await eval(code_obj, self.user_global_ns, self.user_ns)               โ”‚\n",
+       "โ”‚   3507 โ”‚   โ”‚   โ”‚   โ”‚   else:                                                                     โ”‚\n",
+       "โ”‚ โฑ 3508 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   exec(code_obj, self.user_global_ns, self.user_ns)                     โ”‚\n",
+       "โ”‚   3509 โ”‚   โ”‚   โ”‚   finally:                                                                      โ”‚\n",
+       "โ”‚   3510 โ”‚   โ”‚   โ”‚   โ”‚   # Reset our crash handler in place                                        โ”‚\n",
+       "โ”‚   3511 โ”‚   โ”‚   โ”‚   โ”‚   sys.excepthook = old_excepthook                                           โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚ in <module>:1                                                                                    โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚ โฑ 1 dq.finish()                                                                                  โ”‚\n",
+       "โ”‚   2                                                                                              โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/utils/helpers.py: โ”‚\n",
+       "โ”‚ 25 in decorator                                                                                  โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚    22 โ”‚   def decorator(*args: P.args, **kwargs: P.kwargs) -> Optional[T]:                       โ”‚\n",
+       "โ”‚    23 โ”‚   โ”‚   if galileo_disabled():                                                             โ”‚\n",
+       "โ”‚    24 โ”‚   โ”‚   โ”‚   return None                                                                    โ”‚\n",
+       "โ”‚ โฑ  25 โ”‚   โ”‚   return func(*args, **kwargs)                                                       โ”‚\n",
+       "โ”‚    26 โ”‚                                                                                          โ”‚\n",
+       "โ”‚    27 โ”‚   return decorator                                                                       โ”‚\n",
+       "โ”‚    28                                                                                            โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/core/finish.py:61 โ”‚\n",
+       "โ”‚ in finish                                                                                        โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚    58 โ”‚   # Certain tasks require extra finish logic                                             โ”‚\n",
+       "โ”‚    59 โ”‚   data_logger.logger_config.finish()                                                     โ”‚\n",
+       "โ”‚    60 โ”‚                                                                                          โ”‚\n",
+       "โ”‚ โฑ  61 โ”‚   data_logger.upload(last_epoch, create_data_embs=create_data_embs)                      โ”‚\n",
+       "โ”‚    62 โ”‚   upload_dq_log_file()                                                                   โ”‚\n",
+       "โ”‚    63 โ”‚   body = dict(                                                                           โ”‚\n",
+       "โ”‚    64 โ”‚   โ”‚   project_id=str(config.current_project_id),                                         โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/loggers/data_logg โ”‚\n",
+       "โ”‚ er/base_data_logger.py:267 in upload                                                             โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚   264 โ”‚   โ”‚   โ”‚   create_data_embs = False                                                       โ”‚\n",
+       "โ”‚   265 โ”‚   โ”‚                                                                                      โ”‚\n",
+       "โ”‚   266 โ”‚   โ”‚   for split in Split.get_valid_attributes():                                         โ”‚\n",
+       "โ”‚ โฑ 267 โ”‚   โ”‚   โ”‚   self.upload_split(                                                             โ”‚\n",
+       "โ”‚   268 โ”‚   โ”‚   โ”‚   โ”‚   location, split, object_store, last_epoch, create_data_embs                โ”‚\n",
+       "โ”‚   269 โ”‚   โ”‚   โ”‚   )                                                                              โ”‚\n",
+       "โ”‚   270                                                                                            โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/loggers/data_logg โ”‚\n",
+       "โ”‚ er/base_data_logger.py:295 in upload_split                                                       โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚   292 โ”‚   โ”‚   โ”‚   return                                                                         โ”‚\n",
+       "โ”‚   293 โ”‚   โ”‚   in_frame_split = vaex.open(f\"{in_frame_path}/*.{self.INPUT_DATA_FILE_EXT}\")        โ”‚\n",
+       "โ”‚   294 โ”‚   โ”‚   in_frame_split = self.convert_large_string(in_frame_split)                         โ”‚\n",
+       "โ”‚ โฑ 295 โ”‚   โ”‚   self.upload_split_from_in_frame(                                                   โ”‚\n",
+       "โ”‚   296 โ”‚   โ”‚   โ”‚   object_store,                                                                  โ”‚\n",
+       "โ”‚   297 โ”‚   โ”‚   โ”‚   in_frame_split,                                                                โ”‚\n",
+       "โ”‚   298 โ”‚   โ”‚   โ”‚   split,                                                                         โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/loggers/data_logg โ”‚\n",
+       "โ”‚ er/semantic_segmentation.py:166 in upload_split_from_in_frame                                    โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚   163 โ”‚   โ”‚   )                                                                                  โ”‚\n",
+       "โ”‚   164 โ”‚   โ”‚                                                                                      โ”‚\n",
+       "โ”‚   165 โ”‚   โ”‚   dir_name = f\"{split_loc}/0\"                                                        โ”‚\n",
+       "โ”‚ โฑ 166 โ”‚   โ”‚   out_frame = get_output_df(                                                         โ”‚\n",
+       "โ”‚   167 โ”‚   โ”‚   โ”‚   dir_name,                                                                      โ”‚\n",
+       "โ”‚   168 โ”‚   โ”‚   โ”‚   prob_only=False,                                                               โ”‚\n",
+       "โ”‚   169 โ”‚   โ”‚   โ”‚   split=split,                                                                   โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/utils/vaex.py:192 โ”‚\n",
+       "โ”‚ in get_output_df                                                                                 โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚   189 โ”‚   # just open the processed file                                                         โ”‚\n",
+       "โ”‚   190 โ”‚   if os.path.isfile(out_frame_path):                                                     โ”‚\n",
+       "โ”‚   191 โ”‚   โ”‚   return vaex.open(out_frame_path)                                                   โ”‚\n",
+       "โ”‚ โฑ 192 โ”‚   str_cols = concat_hdf5_files(dir_name, prob_only)                                      โ”‚\n",
+       "โ”‚   193 โ”‚   out_frame = vaex.open(out_frame_path)                                                  โ”‚\n",
+       "โ”‚   194 โ”‚                                                                                          โ”‚\n",
+       "โ”‚   195 โ”‚   if split == Split.inference:                                                           โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/utils/hdf5_store. โ”‚\n",
+       "โ”‚ py:129 in concat_hdf5_files                                                                      โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚   126 โ”‚   str_cols = []                                                                          โ”‚\n",
+       "โ”‚   127 โ”‚   stores = {}                                                                            โ”‚\n",
+       "โ”‚   128 โ”‚   files = os.listdir(location)                                                           โ”‚\n",
+       "โ”‚ โฑ 129 โ”‚   df = vaex.open(f\"{location}/{files[0]}\")                                               โ”‚\n",
+       "โ”‚   130 โ”‚                                                                                          โ”‚\n",
+       "โ”‚   131 โ”‚   # Construct a store per column                                                         โ”‚\n",
+       "โ”‚   132 โ”‚   if prob_only:                                                                          โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/__init__.py:244 in open  โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚   241 โ”‚   โ”‚   โ”‚   โ”‚   )                                                                          โ”‚\n",
+       "โ”‚   242 โ”‚   โ”‚   โ”‚   โ”‚   ds = vaex.dataset.open(path_output, fs_options=fs_options, fs=fs)          โ”‚\n",
+       "โ”‚   243 โ”‚   โ”‚   โ”‚   else:                                                                          โ”‚\n",
+       "โ”‚ โฑ 244 โ”‚   โ”‚   โ”‚   โ”‚   ds = vaex.dataset.open(path, fs_options=fs_options, fs=fs, **kwargs)       โ”‚\n",
+       "โ”‚   245 โ”‚   โ”‚   โ”‚   df = vaex.from_dataset(ds)                                                     โ”‚\n",
+       "โ”‚   246 โ”‚   โ”‚   โ”‚   if df is None:                                                                 โ”‚\n",
+       "โ”‚   247 โ”‚   โ”‚   โ”‚   โ”‚   if os.path.exists(path):                                                   โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/dataset.py:81 in open    โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚     78 โ”‚   for opener in opener_classes:                                                         โ”‚\n",
+       "โ”‚     79 โ”‚   โ”‚   if opener.quick_test(path, fs_options=fs_options, fs=fs):                         โ”‚\n",
+       "โ”‚     80 โ”‚   โ”‚   โ”‚   if opener.can_open(path, fs_options=fs_options, fs=fs, *args, **kwargs):      โ”‚\n",
+       "โ”‚ โฑ   81 โ”‚   โ”‚   โ”‚   โ”‚   return opener.open(path, fs_options=fs_options, fs=fs, *args, **kwargs)   โ”‚\n",
+       "โ”‚     82 โ”‚                                                                                         โ”‚\n",
+       "โ”‚     83 โ”‚   # otherwise try all openers                                                           โ”‚\n",
+       "โ”‚     84 โ”‚   for opener in opener_classes:                                                         โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/dataset.py:1457 in open  โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚   1454 โ”‚                                                                                         โ”‚\n",
+       "โ”‚   1455 โ”‚   @classmethod                                                                          โ”‚\n",
+       "โ”‚   1456 โ”‚   def open(cls, path, *args, **kwargs):                                                 โ”‚\n",
+       "โ”‚ โฑ 1457 โ”‚   โ”‚   return cls(path, *args, **kwargs)                                                 โ”‚\n",
+       "โ”‚   1458 โ”‚                                                                                         โ”‚\n",
+       "โ”‚   1459 โ”‚   def chunk_iterator(self, columns, chunk_size=None, reverse=False):                    โ”‚\n",
+       "โ”‚   1460 โ”‚   โ”‚   yield from self._default_chunk_iterator(self._columns, columns, chunk_size, reve  โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/hdf5/dataset.py:73 in    โ”‚\n",
+       "โ”‚ __init__                                                                                         โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚    70 โ”‚   โ”‚   self._version = 1                                                                  โ”‚\n",
+       "โ”‚    71 โ”‚   โ”‚   self._load()                                                                       โ”‚\n",
+       "โ”‚    72 โ”‚   โ”‚   if not write:  # in write mode, call freeze yourself, so the hashes are computed   โ”‚\n",
+       "โ”‚ โฑ  73 โ”‚   โ”‚   โ”‚   self._freeze()                                                                 โ”‚\n",
+       "โ”‚    74 โ”‚   โ”‚   else:                                                                              โ”‚\n",
+       "โ”‚    75 โ”‚   โ”‚   โ”‚   # make sure we set the row count, which otherwise freeze would do              โ”‚\n",
+       "โ”‚    76 โ”‚   โ”‚   โ”‚   self._set_row_count()                                                          โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/dataset.py:1501 in       โ”‚\n",
+       "โ”‚ _freeze                                                                                          โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚   1498 โ”‚   def _freeze(self):                                                                    โ”‚\n",
+       "โ”‚   1499 โ”‚   โ”‚   self._ids = frozendict(self._ids)                                                 โ”‚\n",
+       "โ”‚   1500 โ”‚   โ”‚   self._columns = frozendict(self._columns)                                         โ”‚\n",
+       "โ”‚ โฑ 1501 โ”‚   โ”‚   self._set_row_count()                                                             โ”‚\n",
+       "โ”‚   1502 โ”‚   โ”‚   self._frozen = True                                                               โ”‚\n",
+       "โ”‚   1503 โ”‚   โ”‚   if self._hash_cache_needs_write:                                                  โ”‚\n",
+       "โ”‚   1504 โ”‚   โ”‚   โ”‚   self._write_hash_info()                                                       โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/dataset.py:423 in        โ”‚\n",
+       "โ”‚ _set_row_count                                                                                   โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚    420 โ”‚   โ”‚   self._row_count = len(values[0])                                                  โ”‚\n",
+       "โ”‚    421 โ”‚   โ”‚   for name, value in list(self._columns.items())[1:]:                               โ”‚\n",
+       "โ”‚    422 โ”‚   โ”‚   โ”‚   if len(value) != self._row_count:                                             โ”‚\n",
+       "โ”‚ โฑ  423 โ”‚   โ”‚   โ”‚   โ”‚   raise ValueError(f'First columns has length {self._row_count}, while col  โ”‚\n",
+       "โ”‚    424 โ”‚                                                                                         โ”‚\n",
+       "โ”‚    425 โ”‚   @property                                                                             โ”‚\n",
+       "โ”‚    426 โ”‚   def row_count(self):                                                                  โ”‚\n",
+       "โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ\n",
+       "ValueError: First columns has length 3, while column epoch has length 2\n",
+       "\n",
+       "During handling of the above exception, another exception occurred:\n",
+       "\n",
+       "โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Traceback (most recent call last) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ\n",
+       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/IPython/core/interactiveshell โ”‚\n",
+       "โ”‚ .py:1975 in wrapped                                                                              โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚   1972 โ”‚   โ”‚   โ”‚   โ”‚   handlers to crash IPython.                                                โ”‚\n",
+       "โ”‚   1973 โ”‚   โ”‚   โ”‚   โ”‚   \"\"\"                                                                       โ”‚\n",
+       "โ”‚   1974 โ”‚   โ”‚   โ”‚   โ”‚   try:                                                                      โ”‚\n",
+       "โ”‚ โฑ 1975 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   stb = handler(self,etype,value,tb,tb_offset=tb_offset)                โ”‚\n",
+       "โ”‚   1976 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   return validate_stb(stb)                                              โ”‚\n",
+       "โ”‚   1977 โ”‚   โ”‚   โ”‚   โ”‚   except:                                                                   โ”‚\n",
+       "โ”‚   1978 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   # clear custom handler immediately                                    โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/analytics.py:149  โ”‚\n",
+       "โ”‚ in ipython_exception_handler                                                                     โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚   146 โ”‚   โ”‚   โ”‚   # TODO: create internal logging endpoint                                       โ”‚\n",
+       "โ”‚   147 โ”‚   โ”‚   โ”‚   pass                                                                           โ”‚\n",
+       "โ”‚   148 โ”‚   โ”‚   # We need to call the default ipython exception handler to raise the error         โ”‚\n",
+       "โ”‚ โฑ 149 โ”‚   โ”‚   shell.showtraceback((etype, evalue, tb), tb_offset=tb_offset)                      โ”‚\n",
+       "โ”‚   150 โ”‚                                                                                          โ”‚\n",
+       "โ”‚   151 โ”‚   def track_exception_ipython(                                                           โ”‚\n",
+       "โ”‚   152 โ”‚   โ”‚   self,                                                                              โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/rich/traceback.py:130 in      โ”‚\n",
+       "โ”‚ ipy_show_traceback                                                                               โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚   127 โ”‚   โ”‚   โ”‚   \"\"\"wrap the default ip.showtraceback to store info for ip._showtraceback\"\"\"    โ”‚\n",
+       "โ”‚   128 โ”‚   โ”‚   โ”‚   nonlocal tb_data                                                               โ”‚\n",
+       "โ”‚   129 โ”‚   โ”‚   โ”‚   tb_data = kwargs                                                               โ”‚\n",
+       "โ”‚ โฑ 130 โ”‚   โ”‚   โ”‚   default_showtraceback(*args, **kwargs)                                         โ”‚\n",
+       "โ”‚   131 โ”‚   โ”‚                                                                                      โ”‚\n",
+       "โ”‚   132 โ”‚   โ”‚   def ipy_display_traceback(                                                         โ”‚\n",
+       "โ”‚   133 โ”‚   โ”‚   โ”‚   *args: Any, is_syntax: bool = False, **kwargs: Any                             โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/IPython/core/interactiveshell โ”‚\n",
+       "โ”‚ .py:2116 in showtraceback                                                                        โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚   2113 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   traceback.print_exc()                                             โ”‚\n",
+       "โ”‚   2114 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   return None                                                       โ”‚\n",
+       "โ”‚   2115 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚                                                                         โ”‚\n",
+       "โ”‚ โฑ 2116 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   self._showtraceback(etype, value, stb)                                โ”‚\n",
+       "โ”‚   2117 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   if self.call_pdb:                                                     โ”‚\n",
+       "โ”‚   2118 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   # drop into debugger                                              โ”‚\n",
+       "โ”‚   2119 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   self.debugger(force=True)                                         โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/rich/traceback.py:146 in      โ”‚\n",
+       "โ”‚ ipy_display_traceback                                                                            โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚   143 โ”‚   โ”‚   โ”‚   compiled = tb_data.get(\"running_compiled_code\", False)                         โ”‚\n",
+       "โ”‚   144 โ”‚   โ”‚   โ”‚   tb_offset = tb_data.get(\"tb_offset\", 1 if compiled else 0)                     โ”‚\n",
+       "โ”‚   145 โ”‚   โ”‚   โ”‚   # remove ipython internal frames from trace with tb_offset                     โ”‚\n",
+       "โ”‚ โฑ 146 โ”‚   โ”‚   โ”‚   for _ in range(tb_offset):                                                     โ”‚\n",
+       "โ”‚   147 โ”‚   โ”‚   โ”‚   โ”‚   if tb is None:                                                             โ”‚\n",
+       "โ”‚   148 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   break                                                                  โ”‚\n",
+       "โ”‚   149 โ”‚   โ”‚   โ”‚   โ”‚   tb = tb.tb_next                                                            โ”‚\n",
+       "โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ\n",
+       "TypeError: 'NoneType' object cannot be interpreted as an integer\n",
+       "\n",
+       "During handling of the above exception, another exception occurred:\n",
+       "\n",
+       "โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Traceback (most recent call last) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ\n",
+       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/IPython/core/interactiveshell โ”‚\n",
+       "โ”‚ .py:3448 in run_ast_nodes                                                                        โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚   3445 โ”‚   โ”‚   โ”‚   โ”‚   ):                                                                        โ”‚\n",
+       "โ”‚   3446 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   code = compiler(mod, cell_name, mode)                                 โ”‚\n",
+       "โ”‚   3447 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   asy = compare(code)                                                   โ”‚\n",
+       "โ”‚ โฑ 3448 โ”‚   โ”‚   โ”‚   โ”‚   if await self.run_code(code, result, async_=asy):                         โ”‚\n",
+       "โ”‚   3449 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   return True                                                           โ”‚\n",
+       "โ”‚   3450 โ”‚   โ”‚   โ”‚                                                                                 โ”‚\n",
+       "โ”‚   3451 โ”‚   โ”‚   โ”‚   # Flush softspace                                                             โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/IPython/core/interactiveshell โ”‚\n",
+       "โ”‚ .py:3526 in run_code                                                                             โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚   3523 โ”‚   โ”‚   โ”‚   etype, value, tb = sys.exc_info()                                             โ”‚\n",
+       "โ”‚   3524 โ”‚   โ”‚   โ”‚   if result is not None:                                                        โ”‚\n",
+       "โ”‚   3525 โ”‚   โ”‚   โ”‚   โ”‚   result.error_in_exec = value                                              โ”‚\n",
+       "โ”‚ โฑ 3526 โ”‚   โ”‚   โ”‚   self.CustomTB(etype, value, tb)                                               โ”‚\n",
+       "โ”‚   3527 โ”‚   โ”‚   except:                                                                           โ”‚\n",
+       "โ”‚   3528 โ”‚   โ”‚   โ”‚   if result is not None:                                                        โ”‚\n",
+       "โ”‚   3529 โ”‚   โ”‚   โ”‚   โ”‚   result.error_in_exec = sys.exc_info()[1]                                  โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/IPython/core/interactiveshell โ”‚\n",
+       "โ”‚ .py:1985 in wrapped                                                                              โ”‚\n",
+       "โ”‚                                                                                                  โ”‚\n",
+       "โ”‚   1982 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   stb = self.InteractiveTB.structured_traceback(*sys.exc_info())        โ”‚\n",
+       "โ”‚   1983 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   print(self.InteractiveTB.stb2text(stb))                               โ”‚\n",
+       "โ”‚   1984 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   print(\"The original exception:\")                                      โ”‚\n",
+       "โ”‚ โฑ 1985 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   stb = self.InteractiveTB.structured_traceback(                        โ”‚\n",
+       "โ”‚   1986 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   (etype,value,tb), tb_offset=tb_offset         โ”‚\n",
+       "โ”‚   1987 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   )                                                                     โ”‚\n",
+       "โ”‚   1988 โ”‚   โ”‚   โ”‚   โ”‚   return stb                                                                โ”‚\n",
+       "โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ\n",
+       "TypeError: structured_traceback() missing 1 required positional argument: 'evalue'\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[31mโ•ญโ”€\u001b[0m\u001b[31mโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31mโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\u001b[0m\u001b[31mโ”€โ•ฎ\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/IPython/core/\u001b[0m\u001b[1;33minteractiveshell\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[1;33m.py\u001b[0m:\u001b[94m3508\u001b[0m in \u001b[92mrun_code\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m3505 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mif\u001b[0m async_: \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m3506 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mawait\u001b[0m \u001b[96meval\u001b[0m(code_obj, \u001b[96mself\u001b[0m.user_global_ns, \u001b[96mself\u001b[0m.user_ns) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m3507 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m3508 \u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0mexec(code_obj, \u001b[96mself\u001b[0m.user_global_ns, \u001b[96mself\u001b[0m.user_ns) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m3509 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mfinally\u001b[0m: \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m3510 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[2m# Reset our crash handler in place\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m3511 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0msys.excepthook = old_excepthook \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m in \u001b[92m\u001b[0m:\u001b[94m1\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m1 dq.finish() \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m2 \u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/utils/\u001b[0m\u001b[1;33mhelpers.py\u001b[0m: \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[94m25\u001b[0m in \u001b[92mdecorator\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m 22 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mdecorator\u001b[0m(*args: P.args, **kwargs: P.kwargs) -> Optional[T]: \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m 23 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[94mif\u001b[0m galileo_disabled(): \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m 24 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mreturn\u001b[0m \u001b[94mNone\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m 25 \u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[94mreturn\u001b[0m func(*args, **kwargs) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m 26 \u001b[0m\u001b[2mโ”‚ \u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m 27 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[94mreturn\u001b[0m decorator \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m 28 \u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/core/\u001b[0m\u001b[1;33mfinish.py\u001b[0m:\u001b[94m61\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m in \u001b[92mfinish\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m 58 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[2m# Certain tasks require extra finish logic\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m 59 \u001b[0m\u001b[2mโ”‚ \u001b[0mdata_logger.logger_config.finish() \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m 60 \u001b[0m\u001b[2mโ”‚ \u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m 61 \u001b[2mโ”‚ \u001b[0mdata_logger.upload(last_epoch, create_data_embs=create_data_embs) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m 62 \u001b[0m\u001b[2mโ”‚ \u001b[0mupload_dq_log_file() \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m 63 \u001b[0m\u001b[2mโ”‚ \u001b[0mbody = \u001b[96mdict\u001b[0m( \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m 64 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0mproject_id=\u001b[96mstr\u001b[0m(config.current_project_id), \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/loggers/data_logg\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2;33mer/\u001b[0m\u001b[1;33mbase_data_logger.py\u001b[0m:\u001b[94m267\u001b[0m in \u001b[92mupload\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m264 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0mcreate_data_embs = \u001b[94mFalse\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m265 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m266 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[94mfor\u001b[0m split \u001b[95min\u001b[0m Split.get_valid_attributes(): \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m267 \u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[96mself\u001b[0m.upload_split( \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m268 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0mlocation, split, object_store, last_epoch, create_data_embs \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m269 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m270 \u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/loggers/data_logg\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2;33mer/\u001b[0m\u001b[1;33mbase_data_logger.py\u001b[0m:\u001b[94m295\u001b[0m in \u001b[92mupload_split\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m292 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mreturn\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m293 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0min_frame_split = vaex.open(\u001b[33mf\u001b[0m\u001b[33m\"\u001b[0m\u001b[33m{\u001b[0min_frame_path\u001b[33m}\u001b[0m\u001b[33m/*.\u001b[0m\u001b[33m{\u001b[0m\u001b[96mself\u001b[0m.INPUT_DATA_FILE_EXT\u001b[33m}\u001b[0m\u001b[33m\"\u001b[0m) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m294 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0min_frame_split = \u001b[96mself\u001b[0m.convert_large_string(in_frame_split) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m295 \u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[96mself\u001b[0m.upload_split_from_in_frame( \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m296 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0mobject_store, \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m297 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0min_frame_split, \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m298 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0msplit, \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/loggers/data_logg\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2;33mer/\u001b[0m\u001b[1;33msemantic_segmentation.py\u001b[0m:\u001b[94m166\u001b[0m in \u001b[92mupload_split_from_in_frame\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m163 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m164 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m165 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0mdir_name = \u001b[33mf\u001b[0m\u001b[33m\"\u001b[0m\u001b[33m{\u001b[0msplit_loc\u001b[33m}\u001b[0m\u001b[33m/0\u001b[0m\u001b[33m\"\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m166 \u001b[2mโ”‚ โ”‚ \u001b[0mout_frame = get_output_df( \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m167 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0mdir_name, \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m168 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0mprob_only=\u001b[94mFalse\u001b[0m, \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m169 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0msplit=split, \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/utils/\u001b[0m\u001b[1;33mvaex.py\u001b[0m:\u001b[94m192\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m in \u001b[92mget_output_df\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m189 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[2m# just open the processed file\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m190 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[94mif\u001b[0m os.path.isfile(out_frame_path): \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m191 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[94mreturn\u001b[0m vaex.open(out_frame_path) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m192 \u001b[2mโ”‚ \u001b[0mstr_cols = concat_hdf5_files(dir_name, prob_only) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m193 \u001b[0m\u001b[2mโ”‚ \u001b[0mout_frame = vaex.open(out_frame_path) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m194 \u001b[0m\u001b[2mโ”‚ \u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m195 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[94mif\u001b[0m split == Split.inference: \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/utils/\u001b[0m\u001b[1;33mhdf5_store.\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[1;33mpy\u001b[0m:\u001b[94m129\u001b[0m in \u001b[92mconcat_hdf5_files\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m126 \u001b[0m\u001b[2mโ”‚ \u001b[0mstr_cols = [] \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m127 \u001b[0m\u001b[2mโ”‚ \u001b[0mstores = {} \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m128 \u001b[0m\u001b[2mโ”‚ \u001b[0mfiles = os.listdir(location) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m129 \u001b[2mโ”‚ \u001b[0mdf = vaex.open(\u001b[33mf\u001b[0m\u001b[33m\"\u001b[0m\u001b[33m{\u001b[0mlocation\u001b[33m}\u001b[0m\u001b[33m/\u001b[0m\u001b[33m{\u001b[0mfiles[\u001b[94m0\u001b[0m]\u001b[33m}\u001b[0m\u001b[33m\"\u001b[0m) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m130 \u001b[0m\u001b[2mโ”‚ \u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m131 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[2m# Construct a store per column\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m132 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[94mif\u001b[0m prob_only: \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/\u001b[0m\u001b[1;33m__init__.py\u001b[0m:\u001b[94m244\u001b[0m in \u001b[92mopen\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m241 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m242 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0mds = vaex.dataset.open(path_output, fs_options=fs_options, fs=fs) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m243 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m244 \u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0mds = vaex.dataset.open(path, fs_options=fs_options, fs=fs, **kwargs) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m245 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0mdf = vaex.from_dataset(ds) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m246 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mif\u001b[0m df \u001b[95mis\u001b[0m \u001b[94mNone\u001b[0m: \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m247 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mif\u001b[0m os.path.exists(path): \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/\u001b[0m\u001b[1;33mdataset.py\u001b[0m:\u001b[94m81\u001b[0m in \u001b[92mopen\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m 78 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[94mfor\u001b[0m opener \u001b[95min\u001b[0m opener_classes: \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m 79 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[94mif\u001b[0m opener.quick_test(path, fs_options=fs_options, fs=fs): \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m 80 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mif\u001b[0m opener.can_open(path, fs_options=fs_options, fs=fs, *args, **kwargs): \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m 81 \u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mreturn\u001b[0m opener.open(path, fs_options=fs_options, fs=fs, *args, **kwargs) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m 82 \u001b[0m\u001b[2mโ”‚ \u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m 83 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[2m# otherwise try all openers\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m 84 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[94mfor\u001b[0m opener \u001b[95min\u001b[0m opener_classes: \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/\u001b[0m\u001b[1;33mdataset.py\u001b[0m:\u001b[94m1457\u001b[0m in \u001b[92mopen\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m1454 \u001b[0m\u001b[2mโ”‚ \u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m1455 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[1;95m@classmethod\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m1456 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mopen\u001b[0m(\u001b[96mcls\u001b[0m, path, *args, **kwargs): \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m1457 \u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[94mreturn\u001b[0m \u001b[96mcls\u001b[0m(path, *args, **kwargs) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m1458 \u001b[0m\u001b[2mโ”‚ \u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m1459 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mchunk_iterator\u001b[0m(\u001b[96mself\u001b[0m, columns, chunk_size=\u001b[94mNone\u001b[0m, reverse=\u001b[94mFalse\u001b[0m): \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m1460 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[94myield from\u001b[0m \u001b[96mself\u001b[0m._default_chunk_iterator(\u001b[96mself\u001b[0m._columns, columns, chunk_size, reve \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/hdf5/\u001b[0m\u001b[1;33mdataset.py\u001b[0m:\u001b[94m73\u001b[0m in \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[92m__init__\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m 70 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[96mself\u001b[0m._version = \u001b[94m1\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m 71 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[96mself\u001b[0m._load() \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m 72 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m write: \u001b[2m# in write mode, call freeze yourself, so the hashes are computed\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m 73 \u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[96mself\u001b[0m._freeze() \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m 74 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m 75 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[2m# make sure we set the row count, which otherwise freeze would do\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m 76 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[96mself\u001b[0m._set_row_count() \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/\u001b[0m\u001b[1;33mdataset.py\u001b[0m:\u001b[94m1501\u001b[0m in \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[92m_freeze\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m1498 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[94mdef\u001b[0m \u001b[92m_freeze\u001b[0m(\u001b[96mself\u001b[0m): \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m1499 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[96mself\u001b[0m._ids = frozendict(\u001b[96mself\u001b[0m._ids) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m1500 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[96mself\u001b[0m._columns = frozendict(\u001b[96mself\u001b[0m._columns) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m1501 \u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[96mself\u001b[0m._set_row_count() \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m1502 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[96mself\u001b[0m._frozen = \u001b[94mTrue\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m1503 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m._hash_cache_needs_write: \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m1504 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[96mself\u001b[0m._write_hash_info() \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/\u001b[0m\u001b[1;33mdataset.py\u001b[0m:\u001b[94m423\u001b[0m in \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[92m_set_row_count\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m 420 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[96mself\u001b[0m._row_count = \u001b[96mlen\u001b[0m(values[\u001b[94m0\u001b[0m]) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m 421 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[94mfor\u001b[0m name, value \u001b[95min\u001b[0m \u001b[96mlist\u001b[0m(\u001b[96mself\u001b[0m._columns.items())[\u001b[94m1\u001b[0m:]: \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m 422 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mlen\u001b[0m(value) != \u001b[96mself\u001b[0m._row_count: \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m 423 \u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mraise\u001b[0m \u001b[96mValueError\u001b[0m(\u001b[33mf\u001b[0m\u001b[33m'\u001b[0m\u001b[33mFirst columns has length \u001b[0m\u001b[33m{\u001b[0m\u001b[96mself\u001b[0m._row_count\u001b[33m}\u001b[0m\u001b[33m, while col\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m 424 \u001b[0m\u001b[2mโ”‚ \u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m 425 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[1;95m@property\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m 426 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mrow_count\u001b[0m(\u001b[96mself\u001b[0m): \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ\u001b[0m\n", + "\u001b[1;91mValueError: \u001b[0mFirst columns has length \u001b[1;36m3\u001b[0m, while column epoch has length \u001b[1;36m2\u001b[0m\n", + "\n", + "\u001b[3mDuring handling of the above exception, another exception occurred:\u001b[0m\n", + "\n", + "\u001b[31mโ•ญโ”€\u001b[0m\u001b[31mโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31mโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\u001b[0m\u001b[31mโ”€โ•ฎ\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/IPython/core/\u001b[0m\u001b[1;33minteractiveshell\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[1;33m.py\u001b[0m:\u001b[94m1975\u001b[0m in \u001b[92mwrapped\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m1972 \u001b[0m\u001b[2;33mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[33mhandlers to crash IPython.\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m1973 \u001b[0m\u001b[2;33mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[33m\"\"\"\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m1974 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mtry\u001b[0m: \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m1975 \u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0mstb = handler(\u001b[96mself\u001b[0m,etype,value,tb,tb_offset=tb_offset) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m1976 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mreturn\u001b[0m validate_stb(stb) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m1977 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mexcept\u001b[0m: \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m1978 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[2m# clear custom handler immediately\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/\u001b[0m\u001b[1;33manalytics.py\u001b[0m:\u001b[94m149\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m in \u001b[92mipython_exception_handler\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m146 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[2m# TODO: create internal logging endpoint\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m147 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mpass\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m148 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[2m# We need to call the default ipython exception handler to raise the error\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m149 \u001b[2mโ”‚ โ”‚ \u001b[0mshell.showtraceback((etype, evalue, tb), tb_offset=tb_offset) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m150 \u001b[0m\u001b[2mโ”‚ \u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m151 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mtrack_exception_ipython\u001b[0m( \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m152 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[96mself\u001b[0m, \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/rich/\u001b[0m\u001b[1;33mtraceback.py\u001b[0m:\u001b[94m130\u001b[0m in \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[92mipy_show_traceback\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m127 \u001b[0m\u001b[2;90mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[33m\"\"\"wrap the default ip.showtraceback to store info for ip._showtraceback\"\"\"\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m128 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mnonlocal\u001b[0m tb_data \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m129 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0mtb_data = kwargs \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m130 \u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0mdefault_showtraceback(*args, **kwargs) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m131 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m132 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mipy_display_traceback\u001b[0m( \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m133 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m*args: Any, is_syntax: \u001b[96mbool\u001b[0m = \u001b[94mFalse\u001b[0m, **kwargs: Any \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/IPython/core/\u001b[0m\u001b[1;33minteractiveshell\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[1;33m.py\u001b[0m:\u001b[94m2116\u001b[0m in \u001b[92mshowtraceback\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m2113 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0mtraceback.print_exc() \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m2114 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mreturn\u001b[0m \u001b[94mNone\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m2115 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m2116 \u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[96mself\u001b[0m._showtraceback(etype, value, stb) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m2117 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m.call_pdb: \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m2118 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[2m# drop into debugger\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m2119 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[96mself\u001b[0m.debugger(force=\u001b[94mTrue\u001b[0m) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/rich/\u001b[0m\u001b[1;33mtraceback.py\u001b[0m:\u001b[94m146\u001b[0m in \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[92mipy_display_traceback\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m143 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0mcompiled = tb_data.get(\u001b[33m\"\u001b[0m\u001b[33mrunning_compiled_code\u001b[0m\u001b[33m\"\u001b[0m, \u001b[94mFalse\u001b[0m) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m144 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0mtb_offset = tb_data.get(\u001b[33m\"\u001b[0m\u001b[33mtb_offset\u001b[0m\u001b[33m\"\u001b[0m, \u001b[94m1\u001b[0m \u001b[94mif\u001b[0m compiled \u001b[94melse\u001b[0m \u001b[94m0\u001b[0m) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m145 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[2m# remove ipython internal frames from trace with tb_offset\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m146 \u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mfor\u001b[0m _ \u001b[95min\u001b[0m \u001b[96mrange\u001b[0m(tb_offset): \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m147 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mif\u001b[0m tb \u001b[95mis\u001b[0m \u001b[94mNone\u001b[0m: \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m148 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mbreak\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m149 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0mtb = tb.tb_next \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ\u001b[0m\n", + "\u001b[1;91mTypeError: \u001b[0m\u001b[32m'NoneType'\u001b[0m object cannot be interpreted as an integer\n", + "\n", + "\u001b[3mDuring handling of the above exception, another exception occurred:\u001b[0m\n", + "\n", + "\u001b[31mโ•ญโ”€\u001b[0m\u001b[31mโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31mโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\u001b[0m\u001b[31mโ”€โ•ฎ\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/IPython/core/\u001b[0m\u001b[1;33minteractiveshell\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[1;33m.py\u001b[0m:\u001b[94m3448\u001b[0m in \u001b[92mrun_ast_nodes\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m3445 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m): \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m3446 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0mcode = compiler(mod, cell_name, mode) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m3447 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0masy = compare(code) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m3448 \u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mif\u001b[0m \u001b[94mawait\u001b[0m \u001b[96mself\u001b[0m.run_code(code, result, async_=asy): \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m3449 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mreturn\u001b[0m \u001b[94mTrue\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m3450 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m3451 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[2m# Flush softspace\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/IPython/core/\u001b[0m\u001b[1;33minteractiveshell\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[1;33m.py\u001b[0m:\u001b[94m3526\u001b[0m in \u001b[92mrun_code\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m3523 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0metype, value, tb = sys.exc_info() \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m3524 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mif\u001b[0m result \u001b[95mis\u001b[0m \u001b[95mnot\u001b[0m \u001b[94mNone\u001b[0m: \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m3525 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0mresult.error_in_exec = value \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m3526 \u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[96mself\u001b[0m.CustomTB(etype, value, tb) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m3527 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[94mexcept\u001b[0m: \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m3528 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mif\u001b[0m result \u001b[95mis\u001b[0m \u001b[95mnot\u001b[0m \u001b[94mNone\u001b[0m: \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m3529 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0mresult.error_in_exec = sys.exc_info()[\u001b[94m1\u001b[0m] \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/IPython/core/\u001b[0m\u001b[1;33minteractiveshell\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[1;33m.py\u001b[0m:\u001b[94m1985\u001b[0m in \u001b[92mwrapped\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m1982 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0mstb = \u001b[96mself\u001b[0m.InteractiveTB.structured_traceback(*sys.exc_info()) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m1983 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[96mprint\u001b[0m(\u001b[96mself\u001b[0m.InteractiveTB.stb2text(stb)) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m1984 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[96mprint\u001b[0m(\u001b[33m\"\u001b[0m\u001b[33mThe original exception:\u001b[0m\u001b[33m\"\u001b[0m) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m1985 \u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0mstb = \u001b[96mself\u001b[0m.InteractiveTB.structured_traceback( \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m1986 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m(etype,value,tb), tb_offset=tb_offset \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m1987 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m) \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ”‚\u001b[0m \u001b[2m1988 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mreturn\u001b[0m stb \u001b[31mโ”‚\u001b[0m\n", + "\u001b[31mโ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ\u001b[0m\n", + "\u001b[1;91mTypeError: \u001b[0m\u001b[1;35mstructured_traceback\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m missing \u001b[1;36m1\u001b[0m required positional argument: \u001b[32m'evalue'\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "dq.finish()" ] From ac8c901672a25814b06340e5509f955b5ebac8bd Mon Sep 17 00:00:00 2001 From: Derek Date: Thu, 11 May 2023 13:52:06 -0700 Subject: [PATCH 04/16] Linting and typing --- .../loggers/model_logger/base_model_logger.py | 4 +- .../model_logger/semantic_segmentation.py | 33 +- .../utils/semantic_segmentation/errors.py | 31 +- .../utils/semantic_segmentation/metrics.py | 2 +- docs/cv/coco_deeplab_hooks.ipynb | 789 ++---------------- 5 files changed, 109 insertions(+), 750 deletions(-) diff --git a/dataquality/loggers/model_logger/base_model_logger.py b/dataquality/loggers/model_logger/base_model_logger.py index b82001f03..7dbd6ba6b 100644 --- a/dataquality/loggers/model_logger/base_model_logger.py +++ b/dataquality/loggers/model_logger/base_model_logger.py @@ -17,7 +17,6 @@ from dataquality.utils.ampli import AmpliMetric from dataquality.utils.dq_logger import get_dq_logger from dataquality.utils.hdf5_store import _save_hdf5_file -from dataquality.utils.thread_pool import ThreadPoolManager analytics = Analytics(ApiClient, config) # type: ignore @@ -99,7 +98,8 @@ def log(self) -> None: # global variables (cur_split and cur_epoch) that are subject to change # between subsequent threads self.set_split_epoch() - ThreadPoolManager.add_thread(target=self._add_threaded_log) + # ThreadPoolManager.add_thread(target=self._add_threaded_log) + self._add_threaded_log() def write_model_output(self, data: Dict) -> None: """Creates an hdf5 file from the data dict""" diff --git a/dataquality/loggers/model_logger/semantic_segmentation.py b/dataquality/loggers/model_logger/semantic_segmentation.py index 96dbe03ef..f8b411ab6 100644 --- a/dataquality/loggers/model_logger/semantic_segmentation.py +++ b/dataquality/loggers/model_logger/semantic_segmentation.py @@ -1,4 +1,4 @@ -from typing import Dict, List, Optional, Union, Tuple, Any +from typing import Any, Dict, List, Optional, Union import numpy as np import torch @@ -9,11 +9,12 @@ semantic_segmentation_logger_config, ) from dataquality.loggers.model_logger.base_model_logger import BaseGalileoModelLogger +from dataquality.schemas.semantic_segmentation import Polygon from dataquality.schemas.split import Split from dataquality.utils.semantic_segmentation.errors import ( + calculate_dep_polygons_batch, calculate_misclassified_polygons_batch, calculate_undetected_polygons_batch, - calculate_dep_polygons_batch ) from dataquality.utils.semantic_segmentation.lm import upload_mislabeled_pixels from dataquality.utils.semantic_segmentation.metrics import ( @@ -104,12 +105,11 @@ def dep_path(self) -> str: @property def contours_path(self) -> str: return f"{self.proj_run}/{self.split_name_path}/contours" - - + def get_polygon_data( self, - pred_polygons_batch: Tuple[List, List], - gold_polygons_batch: Tuple[List, List], + pred_polygons_batch: List[List[Polygon]], + gold_polygons_batch: List[List[Polygon]], ) -> Dict[str, Any]: """Returns polygon data for a batch of images in a dictionary that can then be used for our polygon df @@ -147,7 +147,7 @@ def get_polygon_data( image_ids.append(image_id) preds.append(-1) golds.append(polygon.label_idx) - data_error_potentials.append(polygon.data_error_potential) + data_error_potentials.append(0.0) errors.append(polygon.error_type.value) upload_polygon_contours( polygon, self.logger_config.polygon_idx, self.contours_path @@ -194,13 +194,13 @@ def _get_data_dict(self) -> Dict: # Errors calculate_misclassified_polygons_batch(self.pred_masks, gold_polygons_batch) calculate_undetected_polygons_batch(self.pred_masks, gold_polygons_batch) - - '''gold_polygons_batch = calculate_dep_polygons_batch( + + calculate_dep_polygons_batch( gold_polygons_batch, - dep_heatmaps, - height = [img.shape[-1] for img in self.gold_masks], - width = [img.shape[-2] for img in self.gold_masks], - )''' + dep_heatmaps.numpy(), + height=[img.shape[-1] for img in self.gold_masks], + width=[img.shape[-2] for img in self.gold_masks], + ) image_data = { "image": [ @@ -227,10 +227,11 @@ def _get_data_dict(self) -> Dict: ) polygon_data = self.get_polygon_data(pred_polygons_batch, gold_polygons_batch) - if self.split == Split.inference: - polygon_data["inference_name"] = [self.inference_name] * len(self.image_ids) + polygon_data["inference_name"] = [self.inference_name] * len( + polygon_data["image_id"] + ) else: - polygon_data["epoch"] = [self.epoch] * len(self.image_ids) + polygon_data["epoch"] = [self.epoch] * len(polygon_data["image_id"]) return polygon_data diff --git a/dataquality/utils/semantic_segmentation/errors.py b/dataquality/utils/semantic_segmentation/errors.py index 0930b76a0..cade28744 100644 --- a/dataquality/utils/semantic_segmentation/errors.py +++ b/dataquality/utils/semantic_segmentation/errors.py @@ -2,7 +2,6 @@ import numpy as np import torch -import cv2 from PIL import Image from dataquality.schemas.semantic_segmentation import ErrorType, Polygon @@ -157,12 +156,12 @@ def calculate_undetected_polygons_batch( pred_mask = pred_masks[idx].numpy() gold_polygons = gold_polygons_batch[idx] calculate_undetected_polygons(pred_mask, gold_polygons) - + def calculate_dep_polygon( dep_map: np.ndarray, polygon_img: np.ndarray, -) -> None: +) -> float: """Calculate the mean dep score for one polygon drawn onto an image of all zero's. We can then take the polygon's dep score by only selecting those pixels with a value greater than 0 and averageing them. @@ -170,47 +169,41 @@ def calculate_dep_polygon( Args: dep_map (np.ndarray): heatmap of dep scores for an image polygon_img (np.ndarray): image of all zeros with a polygon drawn on it - + Returns: dep_score (float): mean dep score for the polygon """ relevant_region = polygon_img != 0 dep_score = dep_map[relevant_region].mean() return dep_score - - + def calculate_dep_polygons_batch( gold_polygons_batch: List[List[Polygon]], dep_heatmaps: np.ndarray, height: List[int], width: List[int], -) -> List[List[Polygon]]: +) -> None: """Takes the mean dep score within a polygon and sets the polygon's dep score to the mean dep score Args: - gold_polygons_batch (List[List[[Polygon]]): list of the gold polygons + gold_polygons_batch (List[List[[Polygon]]): list of the gold polygons for an image dep_heatmaps (np.ndarray): heatmaps of DEP scores for an image - height (int): height of original image to resize the dep map to the correct + height (int): height of original image to resize the dep map to the correct dims - width (int): width of original image to resize the dep map to the correct + width (int): width of original image to resize the dep map to the correct dims """ resized_dep_maps = [] for i, dep_map in enumerate(dep_heatmaps): - resized_dep_maps.append(Image.fromarray(dep_map).resize((width[i], height[i]))) - + resized_image = Image.fromarray(dep_map).resize((width[i], height[i])) + resized_dep_maps.append(np.array(resized_image)) + for idx in range(len(resized_dep_maps)): dep_map = resized_dep_maps[idx] gold_polygons = gold_polygons_batch[idx] for polygon in gold_polygons: - polygon_img = draw_polygon(polygon, dep_map.size) + polygon_img = draw_polygon(polygon, dep_map.shape) polygon.data_error_potential = calculate_dep_polygon(dep_map, polygon_img) - - return gold_polygons_batch - - - - diff --git a/dataquality/utils/semantic_segmentation/metrics.py b/dataquality/utils/semantic_segmentation/metrics.py index 3cc495c09..5eceb4690 100644 --- a/dataquality/utils/semantic_segmentation/metrics.py +++ b/dataquality/utils/semantic_segmentation/metrics.py @@ -19,7 +19,7 @@ def calculate_and_upload_dep( gold_masks: torch.Tensor, image_ids: List[int], obj_prefix: str, -) -> Tuple[List[float], np.ndarray]: +) -> Tuple[List[float], torch.Tensor]: """Calculates the Data Error Potential (DEP) for each image in the batch Uploads the heatmap to Minio as a png. diff --git a/docs/cv/coco_deeplab_hooks.ipynb b/docs/cv/coco_deeplab_hooks.ipynb index 0a740579d..74188ee25 100644 --- a/docs/cv/coco_deeplab_hooks.ipynb +++ b/docs/cv/coco_deeplab_hooks.ipynb @@ -154,7 +154,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "๐Ÿ›ฐ Connected to existing project 'Derek-Elliott-Proj', and existing run 'test-polygon-df'.\n" + "๐Ÿ›ฐ Connected to existing project 'Derek-Elliott-Proj', and existing run 'test-polygon-df'.\n", + "๐Ÿš€ Found existing run labels. Setting labels for run to ['background', 'airplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'dining table', 'dog', 'horse', 'motorcycle', 'person', 'potted plant', 'sheep', 'couch', 'train', 'tv']. You do not need to set labels for this run.\n" ] } ], @@ -211,7 +212,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2023-05-11 11:49:01.215830: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "2023-05-11 13:49:23.693790: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" ] }, @@ -237,7 +238,7 @@ " warnings.warn(\"torch.cuda.amp.GradScaler is enabled, but CUDA is not available. Disabling.\")\n", "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/torch/amp/autocast_mode.py:204: UserWarning: User provided device_type of 'cuda', but CUDA is not available. Disabling\n", " warnings.warn('User provided device_type of \\'cuda\\', but CUDA is not available. Disabling')\n", - " 50%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ | 1/2 [00:02<00:02, 2.83s/it]\n" + " 50%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ | 1/2 [00:03<00:03, 3.26s/it]\n" ] } ], @@ -296,8 +297,8 @@ "text": [ "Logging 2 samples [########################################] 100.00% elapsed time : 0.29s = 0.0m = 0.0h\n", "Logging 2 samples [########################################] 100.00% elapsed time : 0.16s = 0.0m = 0.0h\n", - "Logging 2 samples [########################################] 100.00% elapsed time : 0.22s = 0.0m = 0.0h \n", - "Logging 2 samples [########################################] 100.00% elapsed time : 0.27s = 0.0m = 0.0h\n", + "Logging 2 samples [########################################] 100.00% elapsed time : 0.34s = 0.0m = 0.0h \n", + "Logging 2 samples [########################################] 100.00% elapsed time : 0.24s = 0.0m = 0.0h\n", " โ˜๏ธ Uploading Data\n", "CuML libraries not found, running standard process. For faster Galileo processing, consider installing\n", "`pip install 'dataquality[cuda]' --extra-index-url=https://pypi.nvidia.com/`\n" @@ -306,7 +307,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "aba6e837e4d14c1693c3b963f64de2a6", + "model_id": "20400cf290fb49ac999011f681c1d08a", "version_major": 2, "version_minor": 0 }, @@ -319,732 +320,96 @@ }, { "data": { - "text/html": [ - "
[05/11/23 11:49:23] ERROR    error opening                                                          __init__.py:271\n",
-       "                             '/Users/derek/.galileo/logs/7e78e642-cc40-4f5c-8f45-b2cadd8d674a/f9855                \n",
-       "                             eea-4121-4a24-bd68-132d3346a89a/training/0/80e887398eb1.hdf5'                         \n",
-       "                             Traceback (most recent call last):                                                    \n",
-       "                               File                                                                                \n",
-       "                             \"/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/va                \n",
-       "                             ex/__init__.py\", line 244, in open                                                    \n",
-       "                                 ds = vaex.dataset.open(path, fs_options=fs_options, fs=fs,                        \n",
-       "                             **kwargs)                                                                             \n",
-       "                               File                                                                                \n",
-       "                             \"/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/va                \n",
-       "                             ex/dataset.py\", line 81, in open                                                      \n",
-       "                                 return opener.open(path, fs_options=fs_options, fs=fs, *args,                     \n",
-       "                             **kwargs)                                                                             \n",
-       "                               File                                                                                \n",
-       "                             \"/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/va                \n",
-       "                             ex/dataset.py\", line 1457, in open                                                    \n",
-       "                                 return cls(path, *args, **kwargs)                                                 \n",
-       "                               File                                                                                \n",
-       "                             \"/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/va                \n",
-       "                             ex/hdf5/dataset.py\", line 73, in __init__                                             \n",
-       "                                 self._freeze()                                                                    \n",
-       "                               File                                                                                \n",
-       "                             \"/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/va                \n",
-       "                             ex/dataset.py\", line 1501, in _freeze                                                 \n",
-       "                                 self._set_row_count()                                                             \n",
-       "                               File                                                                                \n",
-       "                             \"/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/va                \n",
-       "                             ex/dataset.py\", line 423, in _set_row_count                                           \n",
-       "                                 raise ValueError(f'First columns has length {self._row_count},                    \n",
-       "                             while column {name} has length {len(value)}')                                         \n",
-       "                             ValueError: First columns has length 3, while column epoch has length                 \n",
-       "                             2                                                                                     \n",
-       "
\n" - ], + "application/vnd.jupyter.widget-view+json": { + "model_id": "296d767ead8b41e99c30c6a9485aec73", + "version_major": 2, + "version_minor": 0 + }, "text/plain": [ - "\u001b[2;36m[05/11/23 11:49:23]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;31mERROR \u001b[0m error opening \u001b]8;id=155436;file:///Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/__init__.py\u001b\\\u001b[2m__init__.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=520318;file:///Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/__init__.py#271\u001b\\\u001b[2m271\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[32m'/Users/derek/.galileo/logs/7e78e642-cc40-4f5c-8f45-b2cadd8d674a/f9855\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[32meea-4121-4a24-bd68-132d3346a89a/training/0/80e887398eb1.hdf5'\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m Traceback \u001b[1m(\u001b[0mmost recent call last\u001b[1m)\u001b[0m: \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m File \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[32m\"/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/va\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[32mex/__init__.py\"\u001b[0m, line \u001b[1;36m244\u001b[0m, in open \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m ds = \u001b[1;35mvaex.dataset.open\u001b[0m\u001b[1m(\u001b[0mpath, \u001b[33mfs_options\u001b[0m=\u001b[35mfs_options\u001b[0m, \u001b[33mfs\u001b[0m=\u001b[35mfs\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m **kwargs\u001b[1m)\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m File \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[32m\"/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/va\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[32mex/dataset.py\"\u001b[0m, line \u001b[1;36m81\u001b[0m, in open \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m return \u001b[1;35mopener.open\u001b[0m\u001b[1m(\u001b[0mpath, \u001b[33mfs_options\u001b[0m=\u001b[35mfs_options\u001b[0m, \u001b[33mfs\u001b[0m=\u001b[35mfs\u001b[0m, *args, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m **kwargs\u001b[1m)\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m File \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[32m\"/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/va\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[32mex/dataset.py\"\u001b[0m, line \u001b[1;36m1457\u001b[0m, in open \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m return \u001b[1;35mcls\u001b[0m\u001b[1m(\u001b[0mpath, *args, **kwargs\u001b[1m)\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m File \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[32m\"/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/va\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[32mex/hdf5/dataset.py\"\u001b[0m, line \u001b[1;36m73\u001b[0m, in __init__ \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1;35mself._freeze\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m File \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[32m\"/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/va\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[32mex/dataset.py\"\u001b[0m, line \u001b[1;36m1501\u001b[0m, in _freeze \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1;35mself._set_row_count\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m File \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[32m\"/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/va\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[32mex/dataset.py\"\u001b[0m, line \u001b[1;36m423\u001b[0m, in _set_row_count \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m raise \u001b[1;35mValueError\u001b[0m\u001b[1m(\u001b[0mf'First columns has length \u001b[1m{\u001b[0mself._row_count\u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m while column \u001b[1m{\u001b[0mname\u001b[1m}\u001b[0m has length \u001b[1m{\u001b[0m\u001b[1;35mlen\u001b[0m\u001b[1m(\u001b[0mvalue\u001b[1m)\u001b[0m\u001b[1m}\u001b[0m'\u001b[1m)\u001b[0m \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m ValueError: First columns has length \u001b[1;36m3\u001b[0m, while column epoch has length \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[1;36m2\u001b[0m \u001b[2m \u001b[0m\n" + "Processing data for upload: 0%| | 0/2 [00:00 1\u001b[0m \u001b[43mdq\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfinish\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "\n", - "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/utils/helpers.py:25\u001b[0m, in \u001b[0;36mcheck_noop..decorator\u001b[0;34m(*args, **kwargs)\u001b[0m\n", - "\u001b[1;32m 24\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n", - "\u001b[0;32m---> 25\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "\n", - "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/core/finish.py:61\u001b[0m, in \u001b[0;36mfinish\u001b[0;34m(last_epoch, wait, create_data_embs)\u001b[0m\n", - "\u001b[1;32m 59\u001b[0m data_logger\u001b[38;5;241m.\u001b[39mlogger_config\u001b[38;5;241m.\u001b[39mfinish()\n", - "\u001b[0;32m---> 61\u001b[0m \u001b[43mdata_logger\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mupload\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlast_epoch\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcreate_data_embs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcreate_data_embs\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[1;32m 62\u001b[0m upload_dq_log_file()\n", - "\n", - "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/loggers/data_logger/base_data_logger.py:267\u001b[0m, in \u001b[0;36mBaseGalileoDataLogger.upload\u001b[0;34m(self, last_epoch, create_data_embs)\u001b[0m\n", - "\u001b[1;32m 266\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m split \u001b[38;5;129;01min\u001b[39;00m Split\u001b[38;5;241m.\u001b[39mget_valid_attributes():\n", - "\u001b[0;32m--> 267\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mupload_split\u001b[49m\u001b[43m(\u001b[49m\n", - "\u001b[1;32m 268\u001b[0m \u001b[43m \u001b[49m\u001b[43mlocation\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msplit\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mobject_store\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlast_epoch\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcreate_data_embs\u001b[49m\n", - "\u001b[1;32m 269\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", - "\n", - "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/loggers/data_logger/base_data_logger.py:295\u001b[0m, in \u001b[0;36mBaseGalileoDataLogger.upload_split\u001b[0;34m(self, location, split, object_store, last_epoch, create_data_embs)\u001b[0m\n", - "\u001b[1;32m 294\u001b[0m in_frame_split \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconvert_large_string(in_frame_split)\n", - "\u001b[0;32m--> 295\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mupload_split_from_in_frame\u001b[49m\u001b[43m(\u001b[49m\n", - "\u001b[1;32m 296\u001b[0m \u001b[43m \u001b[49m\u001b[43mobject_store\u001b[49m\u001b[43m,\u001b[49m\n", - "\u001b[1;32m 297\u001b[0m \u001b[43m \u001b[49m\u001b[43min_frame_split\u001b[49m\u001b[43m,\u001b[49m\n", - "\u001b[1;32m 298\u001b[0m \u001b[43m \u001b[49m\u001b[43msplit\u001b[49m\u001b[43m,\u001b[49m\n", - "\u001b[1;32m 299\u001b[0m \u001b[43m \u001b[49m\u001b[43msplit_loc\u001b[49m\u001b[43m,\u001b[49m\n", - "\u001b[1;32m 300\u001b[0m \u001b[43m \u001b[49m\u001b[43mlast_epoch\u001b[49m\u001b[43m,\u001b[49m\n", - "\u001b[1;32m 301\u001b[0m \u001b[43m \u001b[49m\u001b[43mcreate_data_embs\u001b[49m\u001b[43m,\u001b[49m\n", - "\u001b[1;32m 302\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[1;32m 303\u001b[0m in_frame_split\u001b[38;5;241m.\u001b[39mclose()\n", - "\n", - "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/loggers/data_logger/semantic_segmentation.py:166\u001b[0m, in \u001b[0;36mSemanticSegmentationDataLogger.upload_split_from_in_frame\u001b[0;34m(cls, object_store, in_frame, split, split_loc, last_epoch, create_data_embs)\u001b[0m\n", - "\u001b[1;32m 165\u001b[0m dir_name \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00msplit_loc\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/0\u001b[39m\u001b[38;5;124m\"\u001b[39m\n", - "\u001b[0;32m--> 166\u001b[0m out_frame \u001b[38;5;241m=\u001b[39m \u001b[43mget_output_df\u001b[49m\u001b[43m(\u001b[49m\n", - "\u001b[1;32m 167\u001b[0m \u001b[43m \u001b[49m\u001b[43mdir_name\u001b[49m\u001b[43m,\u001b[49m\n", - "\u001b[1;32m 168\u001b[0m \u001b[43m \u001b[49m\u001b[43mprob_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n", - "\u001b[1;32m 169\u001b[0m \u001b[43m \u001b[49m\u001b[43msplit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msplit\u001b[49m\u001b[43m,\u001b[49m\n", - "\u001b[1;32m 170\u001b[0m \u001b[43m \u001b[49m\u001b[43mepoch_or_inf\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m,\u001b[49m\n", - "\u001b[1;32m 171\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[1;32m 173\u001b[0m polygon_minio_file \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mproj_run\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00msplit\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/0/prob/prob.hdf5\u001b[39m\u001b[38;5;124m\"\u001b[39m\n", - "\n", - "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/utils/vaex.py:192\u001b[0m, in \u001b[0;36mget_output_df\u001b[0;34m(dir_name, prob_only, split, epoch_or_inf)\u001b[0m\n", - "\u001b[1;32m 191\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m vaex\u001b[38;5;241m.\u001b[39mopen(out_frame_path)\n", - "\u001b[0;32m--> 192\u001b[0m str_cols \u001b[38;5;241m=\u001b[39m \u001b[43mconcat_hdf5_files\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdir_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mprob_only\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[1;32m 193\u001b[0m out_frame \u001b[38;5;241m=\u001b[39m vaex\u001b[38;5;241m.\u001b[39mopen(out_frame_path)\n", - "\n", - "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/utils/hdf5_store.py:129\u001b[0m, in \u001b[0;36mconcat_hdf5_files\u001b[0;34m(location, prob_only)\u001b[0m\n", - "\u001b[1;32m 128\u001b[0m files \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mlistdir(location)\n", - "\u001b[0;32m--> 129\u001b[0m df \u001b[38;5;241m=\u001b[39m \u001b[43mvaex\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mlocation\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m/\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mfiles\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", - "\u001b[1;32m 131\u001b[0m \u001b[38;5;66;03m# Construct a store per column\u001b[39;00m\n", - "\n", - "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/__init__.py:244\u001b[0m, in \u001b[0;36mopen\u001b[0;34m(path, convert, progress, shuffle, fs_options, fs, *args, **kwargs)\u001b[0m\n", - "\u001b[1;32m 243\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", - "\u001b[0;32m--> 244\u001b[0m ds \u001b[38;5;241m=\u001b[39m \u001b[43mvaex\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdataset\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfs_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfs_options\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[1;32m 245\u001b[0m df \u001b[38;5;241m=\u001b[39m vaex\u001b[38;5;241m.\u001b[39mfrom_dataset(ds)\n", - "\n", - "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/dataset.py:81\u001b[0m, in \u001b[0;36mopen\u001b[0;34m(path, fs_options, fs, *args, **kwargs)\u001b[0m\n", - "\u001b[1;32m 80\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m opener\u001b[38;5;241m.\u001b[39mcan_open(path, fs_options\u001b[38;5;241m=\u001b[39mfs_options, fs\u001b[38;5;241m=\u001b[39mfs, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n", - "\u001b[0;32m---> 81\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mopener\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfs_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfs_options\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[1;32m 83\u001b[0m \u001b[38;5;66;03m# otherwise try all openers\u001b[39;00m\n", - "\n", - "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/dataset.py:1457\u001b[0m, in \u001b[0;36mDatasetFile.open\u001b[0;34m(cls, path, *args, **kwargs)\u001b[0m\n", - "\u001b[1;32m 1455\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n", - "\u001b[1;32m 1456\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mopen\u001b[39m(\u001b[38;5;28mcls\u001b[39m, path, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n", - "\u001b[0;32m-> 1457\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "\n", - "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/hdf5/dataset.py:73\u001b[0m, in \u001b[0;36mHdf5MemoryMapped.__init__\u001b[0;34m(self, path, write, fs_options, fs, nommap, group, _fingerprint)\u001b[0m\n", - "\u001b[1;32m 72\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m write: \u001b[38;5;66;03m# in write mode, call freeze yourself, so the hashes are computed\u001b[39;00m\n", - "\u001b[0;32m---> 73\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_freeze\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[1;32m 74\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", - "\u001b[1;32m 75\u001b[0m \u001b[38;5;66;03m# make sure we set the row count, which otherwise freeze would do\u001b[39;00m\n", - "\n", - "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/dataset.py:1501\u001b[0m, in \u001b[0;36mDatasetFile._freeze\u001b[0;34m(self)\u001b[0m\n", - "\u001b[1;32m 1500\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_columns \u001b[38;5;241m=\u001b[39m frozendict(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_columns)\n", - "\u001b[0;32m-> 1501\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_set_row_count\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[1;32m 1502\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_frozen \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n", - "\n", - "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/dataset.py:423\u001b[0m, in \u001b[0;36mDataset._set_row_count\u001b[0;34m(self)\u001b[0m\n", - "\u001b[1;32m 422\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(value) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_row_count:\n", - "\u001b[0;32m--> 423\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mFirst columns has length \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_row_count\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m, while column \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m has length \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(value)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n", - "\n", - "\u001b[0;31mValueError\u001b[0m: First columns has length 3, while column epoch has length 2\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)\n", - "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/analytics.py:149\u001b[0m, in \u001b[0;36mAnalytics.ipython_exception_handler\u001b[0;34m(self, shell, etype, evalue, tb, tb_offset)\u001b[0m\n", - "\u001b[1;32m 147\u001b[0m \u001b[38;5;28;01mpass\u001b[39;00m\n", - "\u001b[1;32m 148\u001b[0m \u001b[38;5;66;03m# We need to call the default ipython exception handler to raise the error\u001b[39;00m\n", - "\u001b[0;32m--> 149\u001b[0m \u001b[43mshell\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshowtraceback\u001b[49m\u001b[43m(\u001b[49m\u001b[43m(\u001b[49m\u001b[43metype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mevalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtb\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtb_offset\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtb_offset\u001b[49m\u001b[43m)\u001b[49m\n", - "\n", - "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/rich/traceback.py:130\u001b[0m, in \u001b[0;36minstall..ipy_excepthook_closure..ipy_show_traceback\u001b[0;34m(*args, **kwargs)\u001b[0m\n", - "\u001b[1;32m 128\u001b[0m \u001b[38;5;28;01mnonlocal\u001b[39;00m tb_data\n", - "\u001b[1;32m 129\u001b[0m tb_data \u001b[38;5;241m=\u001b[39m kwargs\n", - "\u001b[0;32m--> 130\u001b[0m \u001b[43mdefault_showtraceback\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "\n", - "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/IPython/core/interactiveshell.py:2116\u001b[0m, in \u001b[0;36mInteractiveShell.showtraceback\u001b[0;34m(self, exc_tuple, filename, tb_offset, exception_only, running_compiled_code)\u001b[0m\n", - "\u001b[1;32m 2113\u001b[0m traceback\u001b[38;5;241m.\u001b[39mprint_exc()\n", - "\u001b[1;32m 2114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n", - "\u001b[0;32m-> 2116\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_showtraceback\u001b[49m\u001b[43m(\u001b[49m\u001b[43metype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstb\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[1;32m 2117\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcall_pdb:\n", - "\u001b[1;32m 2118\u001b[0m \u001b[38;5;66;03m# drop into debugger\u001b[39;00m\n", - "\u001b[1;32m 2119\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdebugger(force\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n", - "\n", - "File \u001b[0;32m~/Desktop/dataquality/.venv/lib/python3.9/site-packages/rich/traceback.py:146\u001b[0m, in \u001b[0;36minstall..ipy_excepthook_closure..ipy_display_traceback\u001b[0;34m(is_syntax, *args, **kwargs)\u001b[0m\n", - "\u001b[1;32m 144\u001b[0m tb_offset \u001b[38;5;241m=\u001b[39m tb_data\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtb_offset\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m1\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m compiled \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;241m0\u001b[39m)\n", - "\u001b[1;32m 145\u001b[0m \u001b[38;5;66;03m# remove ipython internal frames from trace with tb_offset\u001b[39;00m\n", - "\u001b[0;32m--> 146\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m _ \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28;43mrange\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mtb_offset\u001b[49m\u001b[43m)\u001b[49m:\n", - "\u001b[1;32m 147\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tb \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", - "\u001b[1;32m 148\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n", - "\n", - "\u001b[0;31mTypeError\u001b[0m: 'NoneType' object cannot be interpreted as an integer\n", - "The original exception:\n" + "Job default successfully submitted. Results will be available soon at https://console.dev.rungalileo.io/insights?projectId=7e78e642-cc40-4f5c-8f45-b2cadd8d674a&runId=617746f8-14a0-4e45-b27a-903492fe7343&split=training&metric=f1&depHigh=1&depLow=0&taskType=6\n", + "Waiting for job (you can safely close this window)...\n", + "\tUploading processed validation data\n", + "Done! Job finished with status completed\n", + "Click here to see your run! https://console.dev.rungalileo.io/insights?projectId=7e78e642-cc40-4f5c-8f45-b2cadd8d674a&runId=617746f8-14a0-4e45-b27a-903492fe7343&split=training&metric=f1&depHigh=1&depLow=0&taskType=6\n", + "๐Ÿงน Cleaning up\n", + "๐Ÿงน Cleaning up\n" ] }, { "data": { - "text/html": [ - "
โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Traceback (most recent call last) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ\n",
-       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/IPython/core/interactiveshell โ”‚\n",
-       "โ”‚ .py:3508 in run_code                                                                             โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚   3505 โ”‚   โ”‚   โ”‚   โ”‚   if async_:                                                                โ”‚\n",
-       "โ”‚   3506 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   await eval(code_obj, self.user_global_ns, self.user_ns)               โ”‚\n",
-       "โ”‚   3507 โ”‚   โ”‚   โ”‚   โ”‚   else:                                                                     โ”‚\n",
-       "โ”‚ โฑ 3508 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   exec(code_obj, self.user_global_ns, self.user_ns)                     โ”‚\n",
-       "โ”‚   3509 โ”‚   โ”‚   โ”‚   finally:                                                                      โ”‚\n",
-       "โ”‚   3510 โ”‚   โ”‚   โ”‚   โ”‚   # Reset our crash handler in place                                        โ”‚\n",
-       "โ”‚   3511 โ”‚   โ”‚   โ”‚   โ”‚   sys.excepthook = old_excepthook                                           โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚ in <module>:1                                                                                    โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚ โฑ 1 dq.finish()                                                                                  โ”‚\n",
-       "โ”‚   2                                                                                              โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/utils/helpers.py: โ”‚\n",
-       "โ”‚ 25 in decorator                                                                                  โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚    22 โ”‚   def decorator(*args: P.args, **kwargs: P.kwargs) -> Optional[T]:                       โ”‚\n",
-       "โ”‚    23 โ”‚   โ”‚   if galileo_disabled():                                                             โ”‚\n",
-       "โ”‚    24 โ”‚   โ”‚   โ”‚   return None                                                                    โ”‚\n",
-       "โ”‚ โฑ  25 โ”‚   โ”‚   return func(*args, **kwargs)                                                       โ”‚\n",
-       "โ”‚    26 โ”‚                                                                                          โ”‚\n",
-       "โ”‚    27 โ”‚   return decorator                                                                       โ”‚\n",
-       "โ”‚    28                                                                                            โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/core/finish.py:61 โ”‚\n",
-       "โ”‚ in finish                                                                                        โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚    58 โ”‚   # Certain tasks require extra finish logic                                             โ”‚\n",
-       "โ”‚    59 โ”‚   data_logger.logger_config.finish()                                                     โ”‚\n",
-       "โ”‚    60 โ”‚                                                                                          โ”‚\n",
-       "โ”‚ โฑ  61 โ”‚   data_logger.upload(last_epoch, create_data_embs=create_data_embs)                      โ”‚\n",
-       "โ”‚    62 โ”‚   upload_dq_log_file()                                                                   โ”‚\n",
-       "โ”‚    63 โ”‚   body = dict(                                                                           โ”‚\n",
-       "โ”‚    64 โ”‚   โ”‚   project_id=str(config.current_project_id),                                         โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/loggers/data_logg โ”‚\n",
-       "โ”‚ er/base_data_logger.py:267 in upload                                                             โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚   264 โ”‚   โ”‚   โ”‚   create_data_embs = False                                                       โ”‚\n",
-       "โ”‚   265 โ”‚   โ”‚                                                                                      โ”‚\n",
-       "โ”‚   266 โ”‚   โ”‚   for split in Split.get_valid_attributes():                                         โ”‚\n",
-       "โ”‚ โฑ 267 โ”‚   โ”‚   โ”‚   self.upload_split(                                                             โ”‚\n",
-       "โ”‚   268 โ”‚   โ”‚   โ”‚   โ”‚   location, split, object_store, last_epoch, create_data_embs                โ”‚\n",
-       "โ”‚   269 โ”‚   โ”‚   โ”‚   )                                                                              โ”‚\n",
-       "โ”‚   270                                                                                            โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/loggers/data_logg โ”‚\n",
-       "โ”‚ er/base_data_logger.py:295 in upload_split                                                       โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚   292 โ”‚   โ”‚   โ”‚   return                                                                         โ”‚\n",
-       "โ”‚   293 โ”‚   โ”‚   in_frame_split = vaex.open(f\"{in_frame_path}/*.{self.INPUT_DATA_FILE_EXT}\")        โ”‚\n",
-       "โ”‚   294 โ”‚   โ”‚   in_frame_split = self.convert_large_string(in_frame_split)                         โ”‚\n",
-       "โ”‚ โฑ 295 โ”‚   โ”‚   self.upload_split_from_in_frame(                                                   โ”‚\n",
-       "โ”‚   296 โ”‚   โ”‚   โ”‚   object_store,                                                                  โ”‚\n",
-       "โ”‚   297 โ”‚   โ”‚   โ”‚   in_frame_split,                                                                โ”‚\n",
-       "โ”‚   298 โ”‚   โ”‚   โ”‚   split,                                                                         โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/loggers/data_logg โ”‚\n",
-       "โ”‚ er/semantic_segmentation.py:166 in upload_split_from_in_frame                                    โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚   163 โ”‚   โ”‚   )                                                                                  โ”‚\n",
-       "โ”‚   164 โ”‚   โ”‚                                                                                      โ”‚\n",
-       "โ”‚   165 โ”‚   โ”‚   dir_name = f\"{split_loc}/0\"                                                        โ”‚\n",
-       "โ”‚ โฑ 166 โ”‚   โ”‚   out_frame = get_output_df(                                                         โ”‚\n",
-       "โ”‚   167 โ”‚   โ”‚   โ”‚   dir_name,                                                                      โ”‚\n",
-       "โ”‚   168 โ”‚   โ”‚   โ”‚   prob_only=False,                                                               โ”‚\n",
-       "โ”‚   169 โ”‚   โ”‚   โ”‚   split=split,                                                                   โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/utils/vaex.py:192 โ”‚\n",
-       "โ”‚ in get_output_df                                                                                 โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚   189 โ”‚   # just open the processed file                                                         โ”‚\n",
-       "โ”‚   190 โ”‚   if os.path.isfile(out_frame_path):                                                     โ”‚\n",
-       "โ”‚   191 โ”‚   โ”‚   return vaex.open(out_frame_path)                                                   โ”‚\n",
-       "โ”‚ โฑ 192 โ”‚   str_cols = concat_hdf5_files(dir_name, prob_only)                                      โ”‚\n",
-       "โ”‚   193 โ”‚   out_frame = vaex.open(out_frame_path)                                                  โ”‚\n",
-       "โ”‚   194 โ”‚                                                                                          โ”‚\n",
-       "โ”‚   195 โ”‚   if split == Split.inference:                                                           โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/utils/hdf5_store. โ”‚\n",
-       "โ”‚ py:129 in concat_hdf5_files                                                                      โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚   126 โ”‚   str_cols = []                                                                          โ”‚\n",
-       "โ”‚   127 โ”‚   stores = {}                                                                            โ”‚\n",
-       "โ”‚   128 โ”‚   files = os.listdir(location)                                                           โ”‚\n",
-       "โ”‚ โฑ 129 โ”‚   df = vaex.open(f\"{location}/{files[0]}\")                                               โ”‚\n",
-       "โ”‚   130 โ”‚                                                                                          โ”‚\n",
-       "โ”‚   131 โ”‚   # Construct a store per column                                                         โ”‚\n",
-       "โ”‚   132 โ”‚   if prob_only:                                                                          โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/__init__.py:244 in open  โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚   241 โ”‚   โ”‚   โ”‚   โ”‚   )                                                                          โ”‚\n",
-       "โ”‚   242 โ”‚   โ”‚   โ”‚   โ”‚   ds = vaex.dataset.open(path_output, fs_options=fs_options, fs=fs)          โ”‚\n",
-       "โ”‚   243 โ”‚   โ”‚   โ”‚   else:                                                                          โ”‚\n",
-       "โ”‚ โฑ 244 โ”‚   โ”‚   โ”‚   โ”‚   ds = vaex.dataset.open(path, fs_options=fs_options, fs=fs, **kwargs)       โ”‚\n",
-       "โ”‚   245 โ”‚   โ”‚   โ”‚   df = vaex.from_dataset(ds)                                                     โ”‚\n",
-       "โ”‚   246 โ”‚   โ”‚   โ”‚   if df is None:                                                                 โ”‚\n",
-       "โ”‚   247 โ”‚   โ”‚   โ”‚   โ”‚   if os.path.exists(path):                                                   โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/dataset.py:81 in open    โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚     78 โ”‚   for opener in opener_classes:                                                         โ”‚\n",
-       "โ”‚     79 โ”‚   โ”‚   if opener.quick_test(path, fs_options=fs_options, fs=fs):                         โ”‚\n",
-       "โ”‚     80 โ”‚   โ”‚   โ”‚   if opener.can_open(path, fs_options=fs_options, fs=fs, *args, **kwargs):      โ”‚\n",
-       "โ”‚ โฑ   81 โ”‚   โ”‚   โ”‚   โ”‚   return opener.open(path, fs_options=fs_options, fs=fs, *args, **kwargs)   โ”‚\n",
-       "โ”‚     82 โ”‚                                                                                         โ”‚\n",
-       "โ”‚     83 โ”‚   # otherwise try all openers                                                           โ”‚\n",
-       "โ”‚     84 โ”‚   for opener in opener_classes:                                                         โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/dataset.py:1457 in open  โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚   1454 โ”‚                                                                                         โ”‚\n",
-       "โ”‚   1455 โ”‚   @classmethod                                                                          โ”‚\n",
-       "โ”‚   1456 โ”‚   def open(cls, path, *args, **kwargs):                                                 โ”‚\n",
-       "โ”‚ โฑ 1457 โ”‚   โ”‚   return cls(path, *args, **kwargs)                                                 โ”‚\n",
-       "โ”‚   1458 โ”‚                                                                                         โ”‚\n",
-       "โ”‚   1459 โ”‚   def chunk_iterator(self, columns, chunk_size=None, reverse=False):                    โ”‚\n",
-       "โ”‚   1460 โ”‚   โ”‚   yield from self._default_chunk_iterator(self._columns, columns, chunk_size, reve  โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/hdf5/dataset.py:73 in    โ”‚\n",
-       "โ”‚ __init__                                                                                         โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚    70 โ”‚   โ”‚   self._version = 1                                                                  โ”‚\n",
-       "โ”‚    71 โ”‚   โ”‚   self._load()                                                                       โ”‚\n",
-       "โ”‚    72 โ”‚   โ”‚   if not write:  # in write mode, call freeze yourself, so the hashes are computed   โ”‚\n",
-       "โ”‚ โฑ  73 โ”‚   โ”‚   โ”‚   self._freeze()                                                                 โ”‚\n",
-       "โ”‚    74 โ”‚   โ”‚   else:                                                                              โ”‚\n",
-       "โ”‚    75 โ”‚   โ”‚   โ”‚   # make sure we set the row count, which otherwise freeze would do              โ”‚\n",
-       "โ”‚    76 โ”‚   โ”‚   โ”‚   self._set_row_count()                                                          โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/dataset.py:1501 in       โ”‚\n",
-       "โ”‚ _freeze                                                                                          โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚   1498 โ”‚   def _freeze(self):                                                                    โ”‚\n",
-       "โ”‚   1499 โ”‚   โ”‚   self._ids = frozendict(self._ids)                                                 โ”‚\n",
-       "โ”‚   1500 โ”‚   โ”‚   self._columns = frozendict(self._columns)                                         โ”‚\n",
-       "โ”‚ โฑ 1501 โ”‚   โ”‚   self._set_row_count()                                                             โ”‚\n",
-       "โ”‚   1502 โ”‚   โ”‚   self._frozen = True                                                               โ”‚\n",
-       "โ”‚   1503 โ”‚   โ”‚   if self._hash_cache_needs_write:                                                  โ”‚\n",
-       "โ”‚   1504 โ”‚   โ”‚   โ”‚   self._write_hash_info()                                                       โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/dataset.py:423 in        โ”‚\n",
-       "โ”‚ _set_row_count                                                                                   โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚    420 โ”‚   โ”‚   self._row_count = len(values[0])                                                  โ”‚\n",
-       "โ”‚    421 โ”‚   โ”‚   for name, value in list(self._columns.items())[1:]:                               โ”‚\n",
-       "โ”‚    422 โ”‚   โ”‚   โ”‚   if len(value) != self._row_count:                                             โ”‚\n",
-       "โ”‚ โฑ  423 โ”‚   โ”‚   โ”‚   โ”‚   raise ValueError(f'First columns has length {self._row_count}, while col  โ”‚\n",
-       "โ”‚    424 โ”‚                                                                                         โ”‚\n",
-       "โ”‚    425 โ”‚   @property                                                                             โ”‚\n",
-       "โ”‚    426 โ”‚   def row_count(self):                                                                  โ”‚\n",
-       "โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ\n",
-       "ValueError: First columns has length 3, while column epoch has length 2\n",
-       "\n",
-       "During handling of the above exception, another exception occurred:\n",
-       "\n",
-       "โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Traceback (most recent call last) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ\n",
-       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/IPython/core/interactiveshell โ”‚\n",
-       "โ”‚ .py:1975 in wrapped                                                                              โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚   1972 โ”‚   โ”‚   โ”‚   โ”‚   handlers to crash IPython.                                                โ”‚\n",
-       "โ”‚   1973 โ”‚   โ”‚   โ”‚   โ”‚   \"\"\"                                                                       โ”‚\n",
-       "โ”‚   1974 โ”‚   โ”‚   โ”‚   โ”‚   try:                                                                      โ”‚\n",
-       "โ”‚ โฑ 1975 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   stb = handler(self,etype,value,tb,tb_offset=tb_offset)                โ”‚\n",
-       "โ”‚   1976 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   return validate_stb(stb)                                              โ”‚\n",
-       "โ”‚   1977 โ”‚   โ”‚   โ”‚   โ”‚   except:                                                                   โ”‚\n",
-       "โ”‚   1978 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   # clear custom handler immediately                                    โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/analytics.py:149  โ”‚\n",
-       "โ”‚ in ipython_exception_handler                                                                     โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚   146 โ”‚   โ”‚   โ”‚   # TODO: create internal logging endpoint                                       โ”‚\n",
-       "โ”‚   147 โ”‚   โ”‚   โ”‚   pass                                                                           โ”‚\n",
-       "โ”‚   148 โ”‚   โ”‚   # We need to call the default ipython exception handler to raise the error         โ”‚\n",
-       "โ”‚ โฑ 149 โ”‚   โ”‚   shell.showtraceback((etype, evalue, tb), tb_offset=tb_offset)                      โ”‚\n",
-       "โ”‚   150 โ”‚                                                                                          โ”‚\n",
-       "โ”‚   151 โ”‚   def track_exception_ipython(                                                           โ”‚\n",
-       "โ”‚   152 โ”‚   โ”‚   self,                                                                              โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/rich/traceback.py:130 in      โ”‚\n",
-       "โ”‚ ipy_show_traceback                                                                               โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚   127 โ”‚   โ”‚   โ”‚   \"\"\"wrap the default ip.showtraceback to store info for ip._showtraceback\"\"\"    โ”‚\n",
-       "โ”‚   128 โ”‚   โ”‚   โ”‚   nonlocal tb_data                                                               โ”‚\n",
-       "โ”‚   129 โ”‚   โ”‚   โ”‚   tb_data = kwargs                                                               โ”‚\n",
-       "โ”‚ โฑ 130 โ”‚   โ”‚   โ”‚   default_showtraceback(*args, **kwargs)                                         โ”‚\n",
-       "โ”‚   131 โ”‚   โ”‚                                                                                      โ”‚\n",
-       "โ”‚   132 โ”‚   โ”‚   def ipy_display_traceback(                                                         โ”‚\n",
-       "โ”‚   133 โ”‚   โ”‚   โ”‚   *args: Any, is_syntax: bool = False, **kwargs: Any                             โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/IPython/core/interactiveshell โ”‚\n",
-       "โ”‚ .py:2116 in showtraceback                                                                        โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚   2113 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   traceback.print_exc()                                             โ”‚\n",
-       "โ”‚   2114 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   return None                                                       โ”‚\n",
-       "โ”‚   2115 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚                                                                         โ”‚\n",
-       "โ”‚ โฑ 2116 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   self._showtraceback(etype, value, stb)                                โ”‚\n",
-       "โ”‚   2117 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   if self.call_pdb:                                                     โ”‚\n",
-       "โ”‚   2118 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   # drop into debugger                                              โ”‚\n",
-       "โ”‚   2119 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   self.debugger(force=True)                                         โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/rich/traceback.py:146 in      โ”‚\n",
-       "โ”‚ ipy_display_traceback                                                                            โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚   143 โ”‚   โ”‚   โ”‚   compiled = tb_data.get(\"running_compiled_code\", False)                         โ”‚\n",
-       "โ”‚   144 โ”‚   โ”‚   โ”‚   tb_offset = tb_data.get(\"tb_offset\", 1 if compiled else 0)                     โ”‚\n",
-       "โ”‚   145 โ”‚   โ”‚   โ”‚   # remove ipython internal frames from trace with tb_offset                     โ”‚\n",
-       "โ”‚ โฑ 146 โ”‚   โ”‚   โ”‚   for _ in range(tb_offset):                                                     โ”‚\n",
-       "โ”‚   147 โ”‚   โ”‚   โ”‚   โ”‚   if tb is None:                                                             โ”‚\n",
-       "โ”‚   148 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   break                                                                  โ”‚\n",
-       "โ”‚   149 โ”‚   โ”‚   โ”‚   โ”‚   tb = tb.tb_next                                                            โ”‚\n",
-       "โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ\n",
-       "TypeError: 'NoneType' object cannot be interpreted as an integer\n",
-       "\n",
-       "During handling of the above exception, another exception occurred:\n",
-       "\n",
-       "โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Traceback (most recent call last) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ\n",
-       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/IPython/core/interactiveshell โ”‚\n",
-       "โ”‚ .py:3448 in run_ast_nodes                                                                        โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚   3445 โ”‚   โ”‚   โ”‚   โ”‚   ):                                                                        โ”‚\n",
-       "โ”‚   3446 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   code = compiler(mod, cell_name, mode)                                 โ”‚\n",
-       "โ”‚   3447 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   asy = compare(code)                                                   โ”‚\n",
-       "โ”‚ โฑ 3448 โ”‚   โ”‚   โ”‚   โ”‚   if await self.run_code(code, result, async_=asy):                         โ”‚\n",
-       "โ”‚   3449 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   return True                                                           โ”‚\n",
-       "โ”‚   3450 โ”‚   โ”‚   โ”‚                                                                                 โ”‚\n",
-       "โ”‚   3451 โ”‚   โ”‚   โ”‚   # Flush softspace                                                             โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/IPython/core/interactiveshell โ”‚\n",
-       "โ”‚ .py:3526 in run_code                                                                             โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚   3523 โ”‚   โ”‚   โ”‚   etype, value, tb = sys.exc_info()                                             โ”‚\n",
-       "โ”‚   3524 โ”‚   โ”‚   โ”‚   if result is not None:                                                        โ”‚\n",
-       "โ”‚   3525 โ”‚   โ”‚   โ”‚   โ”‚   result.error_in_exec = value                                              โ”‚\n",
-       "โ”‚ โฑ 3526 โ”‚   โ”‚   โ”‚   self.CustomTB(etype, value, tb)                                               โ”‚\n",
-       "โ”‚   3527 โ”‚   โ”‚   except:                                                                           โ”‚\n",
-       "โ”‚   3528 โ”‚   โ”‚   โ”‚   if result is not None:                                                        โ”‚\n",
-       "โ”‚   3529 โ”‚   โ”‚   โ”‚   โ”‚   result.error_in_exec = sys.exc_info()[1]                                  โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚ /Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/IPython/core/interactiveshell โ”‚\n",
-       "โ”‚ .py:1985 in wrapped                                                                              โ”‚\n",
-       "โ”‚                                                                                                  โ”‚\n",
-       "โ”‚   1982 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   stb = self.InteractiveTB.structured_traceback(*sys.exc_info())        โ”‚\n",
-       "โ”‚   1983 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   print(self.InteractiveTB.stb2text(stb))                               โ”‚\n",
-       "โ”‚   1984 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   print(\"The original exception:\")                                      โ”‚\n",
-       "โ”‚ โฑ 1985 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   stb = self.InteractiveTB.structured_traceback(                        โ”‚\n",
-       "โ”‚   1986 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   (etype,value,tb), tb_offset=tb_offset         โ”‚\n",
-       "โ”‚   1987 โ”‚   โ”‚   โ”‚   โ”‚   โ”‚   )                                                                     โ”‚\n",
-       "โ”‚   1988 โ”‚   โ”‚   โ”‚   โ”‚   return stb                                                                โ”‚\n",
-       "โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ\n",
-       "TypeError: structured_traceback() missing 1 required positional argument: 'evalue'\n",
-       "
\n" - ], "text/plain": [ - "\u001b[31mโ•ญโ”€\u001b[0m\u001b[31mโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31mโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\u001b[0m\u001b[31mโ”€โ•ฎ\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/IPython/core/\u001b[0m\u001b[1;33minteractiveshell\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[1;33m.py\u001b[0m:\u001b[94m3508\u001b[0m in \u001b[92mrun_code\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m3505 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mif\u001b[0m async_: \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m3506 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mawait\u001b[0m \u001b[96meval\u001b[0m(code_obj, \u001b[96mself\u001b[0m.user_global_ns, \u001b[96mself\u001b[0m.user_ns) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m3507 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m3508 \u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0mexec(code_obj, \u001b[96mself\u001b[0m.user_global_ns, \u001b[96mself\u001b[0m.user_ns) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m3509 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mfinally\u001b[0m: \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m3510 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[2m# Reset our crash handler in place\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m3511 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0msys.excepthook = old_excepthook \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m in \u001b[92m\u001b[0m:\u001b[94m1\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m1 dq.finish() \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m2 \u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/utils/\u001b[0m\u001b[1;33mhelpers.py\u001b[0m: \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[94m25\u001b[0m in \u001b[92mdecorator\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m 22 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mdecorator\u001b[0m(*args: P.args, **kwargs: P.kwargs) -> Optional[T]: \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m 23 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[94mif\u001b[0m galileo_disabled(): \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m 24 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mreturn\u001b[0m \u001b[94mNone\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m 25 \u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[94mreturn\u001b[0m func(*args, **kwargs) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m 26 \u001b[0m\u001b[2mโ”‚ \u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m 27 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[94mreturn\u001b[0m decorator \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m 28 \u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/core/\u001b[0m\u001b[1;33mfinish.py\u001b[0m:\u001b[94m61\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m in \u001b[92mfinish\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m 58 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[2m# Certain tasks require extra finish logic\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m 59 \u001b[0m\u001b[2mโ”‚ \u001b[0mdata_logger.logger_config.finish() \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m 60 \u001b[0m\u001b[2mโ”‚ \u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m 61 \u001b[2mโ”‚ \u001b[0mdata_logger.upload(last_epoch, create_data_embs=create_data_embs) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m 62 \u001b[0m\u001b[2mโ”‚ \u001b[0mupload_dq_log_file() \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m 63 \u001b[0m\u001b[2mโ”‚ \u001b[0mbody = \u001b[96mdict\u001b[0m( \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m 64 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0mproject_id=\u001b[96mstr\u001b[0m(config.current_project_id), \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/loggers/data_logg\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2;33mer/\u001b[0m\u001b[1;33mbase_data_logger.py\u001b[0m:\u001b[94m267\u001b[0m in \u001b[92mupload\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m264 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0mcreate_data_embs = \u001b[94mFalse\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m265 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m266 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[94mfor\u001b[0m split \u001b[95min\u001b[0m Split.get_valid_attributes(): \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m267 \u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[96mself\u001b[0m.upload_split( \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m268 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0mlocation, split, object_store, last_epoch, create_data_embs \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m269 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m270 \u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/loggers/data_logg\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2;33mer/\u001b[0m\u001b[1;33mbase_data_logger.py\u001b[0m:\u001b[94m295\u001b[0m in \u001b[92mupload_split\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m292 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mreturn\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m293 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0min_frame_split = vaex.open(\u001b[33mf\u001b[0m\u001b[33m\"\u001b[0m\u001b[33m{\u001b[0min_frame_path\u001b[33m}\u001b[0m\u001b[33m/*.\u001b[0m\u001b[33m{\u001b[0m\u001b[96mself\u001b[0m.INPUT_DATA_FILE_EXT\u001b[33m}\u001b[0m\u001b[33m\"\u001b[0m) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m294 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0min_frame_split = \u001b[96mself\u001b[0m.convert_large_string(in_frame_split) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m295 \u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[96mself\u001b[0m.upload_split_from_in_frame( \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m296 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0mobject_store, \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m297 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0min_frame_split, \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m298 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0msplit, \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/loggers/data_logg\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2;33mer/\u001b[0m\u001b[1;33msemantic_segmentation.py\u001b[0m:\u001b[94m166\u001b[0m in \u001b[92mupload_split_from_in_frame\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m163 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m164 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m165 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0mdir_name = \u001b[33mf\u001b[0m\u001b[33m\"\u001b[0m\u001b[33m{\u001b[0msplit_loc\u001b[33m}\u001b[0m\u001b[33m/0\u001b[0m\u001b[33m\"\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m166 \u001b[2mโ”‚ โ”‚ \u001b[0mout_frame = get_output_df( \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m167 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0mdir_name, \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m168 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0mprob_only=\u001b[94mFalse\u001b[0m, \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m169 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0msplit=split, \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/utils/\u001b[0m\u001b[1;33mvaex.py\u001b[0m:\u001b[94m192\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m in \u001b[92mget_output_df\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m189 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[2m# just open the processed file\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m190 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[94mif\u001b[0m os.path.isfile(out_frame_path): \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m191 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[94mreturn\u001b[0m vaex.open(out_frame_path) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m192 \u001b[2mโ”‚ \u001b[0mstr_cols = concat_hdf5_files(dir_name, prob_only) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m193 \u001b[0m\u001b[2mโ”‚ \u001b[0mout_frame = vaex.open(out_frame_path) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m194 \u001b[0m\u001b[2mโ”‚ \u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m195 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[94mif\u001b[0m split == Split.inference: \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/utils/\u001b[0m\u001b[1;33mhdf5_store.\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[1;33mpy\u001b[0m:\u001b[94m129\u001b[0m in \u001b[92mconcat_hdf5_files\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m126 \u001b[0m\u001b[2mโ”‚ \u001b[0mstr_cols = [] \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m127 \u001b[0m\u001b[2mโ”‚ \u001b[0mstores = {} \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m128 \u001b[0m\u001b[2mโ”‚ \u001b[0mfiles = os.listdir(location) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m129 \u001b[2mโ”‚ \u001b[0mdf = vaex.open(\u001b[33mf\u001b[0m\u001b[33m\"\u001b[0m\u001b[33m{\u001b[0mlocation\u001b[33m}\u001b[0m\u001b[33m/\u001b[0m\u001b[33m{\u001b[0mfiles[\u001b[94m0\u001b[0m]\u001b[33m}\u001b[0m\u001b[33m\"\u001b[0m) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m130 \u001b[0m\u001b[2mโ”‚ \u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m131 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[2m# Construct a store per column\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m132 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[94mif\u001b[0m prob_only: \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/\u001b[0m\u001b[1;33m__init__.py\u001b[0m:\u001b[94m244\u001b[0m in \u001b[92mopen\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m241 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m242 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0mds = vaex.dataset.open(path_output, fs_options=fs_options, fs=fs) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m243 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m244 \u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0mds = vaex.dataset.open(path, fs_options=fs_options, fs=fs, **kwargs) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m245 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0mdf = vaex.from_dataset(ds) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m246 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mif\u001b[0m df \u001b[95mis\u001b[0m \u001b[94mNone\u001b[0m: \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m247 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mif\u001b[0m os.path.exists(path): \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/\u001b[0m\u001b[1;33mdataset.py\u001b[0m:\u001b[94m81\u001b[0m in \u001b[92mopen\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m 78 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[94mfor\u001b[0m opener \u001b[95min\u001b[0m opener_classes: \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m 79 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[94mif\u001b[0m opener.quick_test(path, fs_options=fs_options, fs=fs): \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m 80 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mif\u001b[0m opener.can_open(path, fs_options=fs_options, fs=fs, *args, **kwargs): \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m 81 \u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mreturn\u001b[0m opener.open(path, fs_options=fs_options, fs=fs, *args, **kwargs) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m 82 \u001b[0m\u001b[2mโ”‚ \u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m 83 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[2m# otherwise try all openers\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m 84 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[94mfor\u001b[0m opener \u001b[95min\u001b[0m opener_classes: \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/\u001b[0m\u001b[1;33mdataset.py\u001b[0m:\u001b[94m1457\u001b[0m in \u001b[92mopen\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m1454 \u001b[0m\u001b[2mโ”‚ \u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m1455 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[1;95m@classmethod\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m1456 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mopen\u001b[0m(\u001b[96mcls\u001b[0m, path, *args, **kwargs): \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m1457 \u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[94mreturn\u001b[0m \u001b[96mcls\u001b[0m(path, *args, **kwargs) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m1458 \u001b[0m\u001b[2mโ”‚ \u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m1459 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mchunk_iterator\u001b[0m(\u001b[96mself\u001b[0m, columns, chunk_size=\u001b[94mNone\u001b[0m, reverse=\u001b[94mFalse\u001b[0m): \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m1460 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[94myield from\u001b[0m \u001b[96mself\u001b[0m._default_chunk_iterator(\u001b[96mself\u001b[0m._columns, columns, chunk_size, reve \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/hdf5/\u001b[0m\u001b[1;33mdataset.py\u001b[0m:\u001b[94m73\u001b[0m in \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[92m__init__\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m 70 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[96mself\u001b[0m._version = \u001b[94m1\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m 71 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[96mself\u001b[0m._load() \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m 72 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[94mif\u001b[0m \u001b[95mnot\u001b[0m write: \u001b[2m# in write mode, call freeze yourself, so the hashes are computed\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m 73 \u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[96mself\u001b[0m._freeze() \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m 74 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m 75 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[2m# make sure we set the row count, which otherwise freeze would do\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m 76 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[96mself\u001b[0m._set_row_count() \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/\u001b[0m\u001b[1;33mdataset.py\u001b[0m:\u001b[94m1501\u001b[0m in \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[92m_freeze\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m1498 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[94mdef\u001b[0m \u001b[92m_freeze\u001b[0m(\u001b[96mself\u001b[0m): \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m1499 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[96mself\u001b[0m._ids = frozendict(\u001b[96mself\u001b[0m._ids) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m1500 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[96mself\u001b[0m._columns = frozendict(\u001b[96mself\u001b[0m._columns) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m1501 \u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[96mself\u001b[0m._set_row_count() \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m1502 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[96mself\u001b[0m._frozen = \u001b[94mTrue\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m1503 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m._hash_cache_needs_write: \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m1504 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[96mself\u001b[0m._write_hash_info() \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/vaex/\u001b[0m\u001b[1;33mdataset.py\u001b[0m:\u001b[94m423\u001b[0m in \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[92m_set_row_count\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m 420 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[96mself\u001b[0m._row_count = \u001b[96mlen\u001b[0m(values[\u001b[94m0\u001b[0m]) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m 421 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[94mfor\u001b[0m name, value \u001b[95min\u001b[0m \u001b[96mlist\u001b[0m(\u001b[96mself\u001b[0m._columns.items())[\u001b[94m1\u001b[0m:]: \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m 422 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mlen\u001b[0m(value) != \u001b[96mself\u001b[0m._row_count: \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m 423 \u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mraise\u001b[0m \u001b[96mValueError\u001b[0m(\u001b[33mf\u001b[0m\u001b[33m'\u001b[0m\u001b[33mFirst columns has length \u001b[0m\u001b[33m{\u001b[0m\u001b[96mself\u001b[0m._row_count\u001b[33m}\u001b[0m\u001b[33m, while col\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m 424 \u001b[0m\u001b[2mโ”‚ \u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m 425 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[1;95m@property\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m 426 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mrow_count\u001b[0m(\u001b[96mself\u001b[0m): \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ\u001b[0m\n", - "\u001b[1;91mValueError: \u001b[0mFirst columns has length \u001b[1;36m3\u001b[0m, while column epoch has length \u001b[1;36m2\u001b[0m\n", - "\n", - "\u001b[3mDuring handling of the above exception, another exception occurred:\u001b[0m\n", - "\n", - "\u001b[31mโ•ญโ”€\u001b[0m\u001b[31mโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31mโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\u001b[0m\u001b[31mโ”€โ•ฎ\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/IPython/core/\u001b[0m\u001b[1;33minteractiveshell\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[1;33m.py\u001b[0m:\u001b[94m1975\u001b[0m in \u001b[92mwrapped\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m1972 \u001b[0m\u001b[2;33mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[33mhandlers to crash IPython.\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m1973 \u001b[0m\u001b[2;33mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[33m\"\"\"\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m1974 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mtry\u001b[0m: \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m1975 \u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0mstb = handler(\u001b[96mself\u001b[0m,etype,value,tb,tb_offset=tb_offset) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m1976 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mreturn\u001b[0m validate_stb(stb) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m1977 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mexcept\u001b[0m: \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m1978 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[2m# clear custom handler immediately\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/\u001b[0m\u001b[1;33manalytics.py\u001b[0m:\u001b[94m149\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m in \u001b[92mipython_exception_handler\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m146 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[2m# TODO: create internal logging endpoint\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m147 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mpass\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m148 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[2m# We need to call the default ipython exception handler to raise the error\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m149 \u001b[2mโ”‚ โ”‚ \u001b[0mshell.showtraceback((etype, evalue, tb), tb_offset=tb_offset) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m150 \u001b[0m\u001b[2mโ”‚ \u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m151 \u001b[0m\u001b[2mโ”‚ \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mtrack_exception_ipython\u001b[0m( \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m152 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[96mself\u001b[0m, \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/rich/\u001b[0m\u001b[1;33mtraceback.py\u001b[0m:\u001b[94m130\u001b[0m in \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[92mipy_show_traceback\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m127 \u001b[0m\u001b[2;90mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[33m\"\"\"wrap the default ip.showtraceback to store info for ip._showtraceback\"\"\"\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m128 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mnonlocal\u001b[0m tb_data \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m129 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0mtb_data = kwargs \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m130 \u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0mdefault_showtraceback(*args, **kwargs) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m131 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m132 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[94mdef\u001b[0m \u001b[92mipy_display_traceback\u001b[0m( \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m133 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m*args: Any, is_syntax: \u001b[96mbool\u001b[0m = \u001b[94mFalse\u001b[0m, **kwargs: Any \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/IPython/core/\u001b[0m\u001b[1;33minteractiveshell\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[1;33m.py\u001b[0m:\u001b[94m2116\u001b[0m in \u001b[92mshowtraceback\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m2113 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0mtraceback.print_exc() \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m2114 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mreturn\u001b[0m \u001b[94mNone\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m2115 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m2116 \u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[96mself\u001b[0m._showtraceback(etype, value, stb) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m2117 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mif\u001b[0m \u001b[96mself\u001b[0m.call_pdb: \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m2118 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[2m# drop into debugger\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m2119 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[96mself\u001b[0m.debugger(force=\u001b[94mTrue\u001b[0m) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/rich/\u001b[0m\u001b[1;33mtraceback.py\u001b[0m:\u001b[94m146\u001b[0m in \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[92mipy_display_traceback\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m143 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0mcompiled = tb_data.get(\u001b[33m\"\u001b[0m\u001b[33mrunning_compiled_code\u001b[0m\u001b[33m\"\u001b[0m, \u001b[94mFalse\u001b[0m) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m144 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0mtb_offset = tb_data.get(\u001b[33m\"\u001b[0m\u001b[33mtb_offset\u001b[0m\u001b[33m\"\u001b[0m, \u001b[94m1\u001b[0m \u001b[94mif\u001b[0m compiled \u001b[94melse\u001b[0m \u001b[94m0\u001b[0m) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m145 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[2m# remove ipython internal frames from trace with tb_offset\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m146 \u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mfor\u001b[0m _ \u001b[95min\u001b[0m \u001b[96mrange\u001b[0m(tb_offset): \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m147 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mif\u001b[0m tb \u001b[95mis\u001b[0m \u001b[94mNone\u001b[0m: \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m148 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mbreak\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m149 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0mtb = tb.tb_next \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ\u001b[0m\n", - "\u001b[1;91mTypeError: \u001b[0m\u001b[32m'NoneType'\u001b[0m object cannot be interpreted as an integer\n", - "\n", - "\u001b[3mDuring handling of the above exception, another exception occurred:\u001b[0m\n", - "\n", - "\u001b[31mโ•ญโ”€\u001b[0m\u001b[31mโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31mโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€\u001b[0m\u001b[31mโ”€โ•ฎ\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/IPython/core/\u001b[0m\u001b[1;33minteractiveshell\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[1;33m.py\u001b[0m:\u001b[94m3448\u001b[0m in \u001b[92mrun_ast_nodes\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m3445 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m): \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m3446 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0mcode = compiler(mod, cell_name, mode) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m3447 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0masy = compare(code) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m3448 \u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mif\u001b[0m \u001b[94mawait\u001b[0m \u001b[96mself\u001b[0m.run_code(code, result, async_=asy): \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m3449 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mreturn\u001b[0m \u001b[94mTrue\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m3450 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m3451 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[2m# Flush softspace\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/IPython/core/\u001b[0m\u001b[1;33minteractiveshell\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[1;33m.py\u001b[0m:\u001b[94m3526\u001b[0m in \u001b[92mrun_code\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m3523 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0metype, value, tb = sys.exc_info() \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m3524 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mif\u001b[0m result \u001b[95mis\u001b[0m \u001b[95mnot\u001b[0m \u001b[94mNone\u001b[0m: \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m3525 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0mresult.error_in_exec = value \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m3526 \u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[96mself\u001b[0m.CustomTB(etype, value, tb) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m3527 \u001b[0m\u001b[2mโ”‚ โ”‚ \u001b[0m\u001b[94mexcept\u001b[0m: \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m3528 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mif\u001b[0m result \u001b[95mis\u001b[0m \u001b[95mnot\u001b[0m \u001b[94mNone\u001b[0m: \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m3529 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0mresult.error_in_exec = sys.exc_info()[\u001b[94m1\u001b[0m] \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2;33m/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/IPython/core/\u001b[0m\u001b[1;33minteractiveshell\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[1;33m.py\u001b[0m:\u001b[94m1985\u001b[0m in \u001b[92mwrapped\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m1982 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0mstb = \u001b[96mself\u001b[0m.InteractiveTB.structured_traceback(*sys.exc_info()) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m1983 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[96mprint\u001b[0m(\u001b[96mself\u001b[0m.InteractiveTB.stb2text(stb)) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m1984 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[96mprint\u001b[0m(\u001b[33m\"\u001b[0m\u001b[33mThe original exception:\u001b[0m\u001b[33m\"\u001b[0m) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[31mโฑ \u001b[0m1985 \u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0mstb = \u001b[96mself\u001b[0m.InteractiveTB.structured_traceback( \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m1986 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m(etype,value,tb), tb_offset=tb_offset \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m1987 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m) \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ”‚\u001b[0m \u001b[2m1988 \u001b[0m\u001b[2mโ”‚ โ”‚ โ”‚ โ”‚ \u001b[0m\u001b[94mreturn\u001b[0m stb \u001b[31mโ”‚\u001b[0m\n", - "\u001b[31mโ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ\u001b[0m\n", - "\u001b[1;91mTypeError: \u001b[0m\u001b[1;35mstructured_traceback\u001b[0m\u001b[1m(\u001b[0m\u001b[1m)\u001b[0m missing \u001b[1;36m1\u001b[0m required positional argument: \u001b[32m'evalue'\u001b[0m\n" + "'https://console.dev.rungalileo.io/insights?projectId=7e78e642-cc40-4f5c-8f45-b2cadd8d674a&runId=617746f8-14a0-4e45-b27a-903492fe7343&split=training&metric=f1&depHigh=1&depLow=0&taskType=6'" ] }, + "execution_count": 6, "metadata": {}, - "output_type": "display_data" + "output_type": "execute_result" } ], "source": [ From 27c9e533af493ac63156a60abce2fc6d4fdbd1bc Mon Sep 17 00:00:00 2001 From: Derek Date: Thu, 11 May 2023 13:56:45 -0700 Subject: [PATCH 05/16] Correction --- dataquality/loggers/model_logger/base_model_logger.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dataquality/loggers/model_logger/base_model_logger.py b/dataquality/loggers/model_logger/base_model_logger.py index 7dbd6ba6b..b82001f03 100644 --- a/dataquality/loggers/model_logger/base_model_logger.py +++ b/dataquality/loggers/model_logger/base_model_logger.py @@ -17,6 +17,7 @@ from dataquality.utils.ampli import AmpliMetric from dataquality.utils.dq_logger import get_dq_logger from dataquality.utils.hdf5_store import _save_hdf5_file +from dataquality.utils.thread_pool import ThreadPoolManager analytics = Analytics(ApiClient, config) # type: ignore @@ -98,8 +99,7 @@ def log(self) -> None: # global variables (cur_split and cur_epoch) that are subject to change # between subsequent threads self.set_split_epoch() - # ThreadPoolManager.add_thread(target=self._add_threaded_log) - self._add_threaded_log() + ThreadPoolManager.add_thread(target=self._add_threaded_log) def write_model_output(self, data: Dict) -> None: """Creates an hdf5 file from the data dict""" From efe4d3995e4438a72ec8b4dda047a7caec080e12 Mon Sep 17 00:00:00 2001 From: Derek Date: Thu, 11 May 2023 14:10:22 -0700 Subject: [PATCH 06/16] dep defaulted to none --- .gitignore | 3 +- .../model_logger/semantic_segmentation.py | 4 +- dataquality/schemas/semantic_segmentation.py | 2 +- docs/cv/coco_deeplab_hooks.ipynb | 257 +----------------- 4 files changed, 17 insertions(+), 249 deletions(-) diff --git a/.gitignore b/.gitignore index 8bd0bf295..7030924a4 100644 --- a/.gitignore +++ b/.gitignore @@ -179,4 +179,5 @@ coco.ipynb # SemSeg CV_datasets/ -coco_hf_dataset.py \ No newline at end of file +coco_hf_dataset.py +coco_deeplab_hooks.ipynb \ No newline at end of file diff --git a/dataquality/loggers/model_logger/semantic_segmentation.py b/dataquality/loggers/model_logger/semantic_segmentation.py index f8b411ab6..c665bf8a1 100644 --- a/dataquality/loggers/model_logger/semantic_segmentation.py +++ b/dataquality/loggers/model_logger/semantic_segmentation.py @@ -135,7 +135,7 @@ def get_polygon_data( image_ids.append(image_id) preds.append(polygon.label_idx) golds.append(-1) - data_error_potentials.append(0.0) + data_error_potentials.append(polygon.data_error_potential) errors.append(polygon.error_type.value) upload_polygon_contours( polygon, self.logger_config.polygon_idx, self.contours_path @@ -147,7 +147,7 @@ def get_polygon_data( image_ids.append(image_id) preds.append(-1) golds.append(polygon.label_idx) - data_error_potentials.append(0.0) + data_error_potentials.append(polygon.data_error_potential) errors.append(polygon.error_type.value) upload_polygon_contours( polygon, self.logger_config.polygon_idx, self.contours_path diff --git a/dataquality/schemas/semantic_segmentation.py b/dataquality/schemas/semantic_segmentation.py index 064d7de7c..56b5936cd 100644 --- a/dataquality/schemas/semantic_segmentation.py +++ b/dataquality/schemas/semantic_segmentation.py @@ -52,7 +52,7 @@ class Polygon(BaseModel): misclassified_class_label: Optional[int] = None error_type: ErrorType = ErrorType.none contours: List[Contour] - data_error_potential: float = 0.0 + data_error_potential: Optional[float] = None @property def contours_opencv(self) -> List[np.ndarray]: diff --git a/docs/cv/coco_deeplab_hooks.ipynb b/docs/cv/coco_deeplab_hooks.ipynb index 74188ee25..c51512882 100644 --- a/docs/cv/coco_deeplab_hooks.ipynb +++ b/docs/cv/coco_deeplab_hooks.ipynb @@ -2,20 +2,9 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'ds = load_dataset(\\n \"CVdatasets/CocoSegmentationOnlyVal5000\",\\n use_auth_token=\"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\"\\n)'" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# !pip install datasets evaluate torch torchvision \n", "import os\n", @@ -40,17 +29,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found dataset, there are 4030 images and 4030 masks\n" - ] - } - ], + "outputs": [], "source": [ "# download the data from our public gcs bucket and save it to disk\n", "# dataset_path, img_path, mask_path = download_gcs_data()\n", @@ -84,7 +65,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -93,19 +74,7 @@ "outputId": "85d91dc9-405e-4f02-a6bf-6a88f9502412", "tags": [] }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using cache found in /Users/derek/.cache/torch/hub/pytorch_vision_v0.10.0\n", - "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n", - " warnings.warn(\n", - "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=DeepLabV3_ResNet50_Weights.COCO_WITH_VOC_LABELS_V1`. You can also use `weights=DeepLabV3_ResNet50_Weights.DEFAULT` to get the most up-to-date weights.\n", - " warnings.warn(msg)\n" - ] - } - ], + "outputs": [], "source": [ "\n", "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", @@ -119,46 +88,9 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/core/__init__.py:27: GalileoWarning: configure is deprecated, use dq.set_console_url and dq.login\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "๐Ÿ“ก https://console.dev.rungalileo.io\n", - "๐Ÿ”ญ Logging you into Galileo\n", - "\n", - "๐Ÿš€ You're logged in to Galileo as galileo@rungalileo.io!\n", - "โœจ Initializing existing public project 'Derek-Elliott-Proj'\n", - "๐Ÿƒโ€โ™‚๏ธ Fetching existing run 'test-polygon-df'\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/core/init.py:171: GalileoWarning: Run: Derek-Elliott-Proj/test-polygon-df already exists! The existing run will get overwritten on call to finish()!\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "๐Ÿ›ฐ Connected to existing project 'Derek-Elliott-Proj', and existing run 'test-polygon-df'.\n", - "๐Ÿš€ Found existing run labels. Setting labels for run to ['background', 'airplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'dining table', 'dog', 'horse', 'motorcycle', 'person', 'potted plant', 'sheep', 'couch', 'train', 'tv']. You do not need to set labels for this run.\n" - ] - } - ], + "outputs": [], "source": [ "try:\n", " import dataquality as dq\n", @@ -205,43 +137,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-05-11 13:49:23.693790: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", - "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "We assume the dataloaders passed only have transforms that Tensor, Resize, and Normalize the image and mask\n", - "โ€ผ Any cropping or shearing transforms passed will lead to unexpected results\n", - "See docs at https://dq.readthedocs.io/en/latest/ (placeholder) for more info \n", - " \n", - "\n", - "Found layer classifier in model layers: backbone, classifier\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True).\n", - " warnings.warn(\n", - "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/torch/cuda/amp/grad_scaler.py:120: UserWarning: torch.cuda.amp.GradScaler is enabled, but CUDA is not available. Disabling.\n", - " warnings.warn(\"torch.cuda.amp.GradScaler is enabled, but CUDA is not available. Disabling.\")\n", - "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/torch/amp/autocast_mode.py:204: UserWarning: User provided device_type of 'cuda', but CUDA is not available. Disabling\n", - " warnings.warn('User provided device_type of \\'cuda\\', but CUDA is not available. Disabling')\n", - " 50%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ | 1/2 [00:03<00:03, 3.26s/it]\n" - ] - } - ], + "outputs": [], "source": [ "from dataquality.integrations.cv.torch.semantic_segmentation import watch\n", "watch(\n", @@ -278,140 +176,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/derek/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--mean_iou/08bc20f4f895f3caf75fb9e3fada1404bded3c3265243d05327cbb3b9326ffe9/mean_iou.py:259: RuntimeWarning: invalid value encountered in divide\n", - " iou = total_area_intersect / total_area_union\n", - "/Users/derek/.cache/huggingface/modules/evaluate_modules/metrics/evaluate-metric--mean_iou/08bc20f4f895f3caf75fb9e3fada1404bded3c3265243d05327cbb3b9326ffe9/mean_iou.py:260: RuntimeWarning: invalid value encountered in divide\n", - " acc = total_area_intersect / total_area_label\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Logging 2 samples [########################################] 100.00% elapsed time : 0.29s = 0.0m = 0.0h\n", - "Logging 2 samples [########################################] 100.00% elapsed time : 0.16s = 0.0m = 0.0h\n", - "Logging 2 samples [########################################] 100.00% elapsed time : 0.34s = 0.0m = 0.0h \n", - "Logging 2 samples [########################################] 100.00% elapsed time : 0.24s = 0.0m = 0.0h\n", - " โ˜๏ธ Uploading Data\n", - "CuML libraries not found, running standard process. For faster Galileo processing, consider installing\n", - "`pip install 'dataquality[cuda]' --extra-index-url=https://pypi.nvidia.com/`\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "20400cf290fb49ac999011f681c1d08a", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Uploading data to Galileo: 0%| | 0.00/25.8k [00:00 Date: Thu, 11 May 2023 14:34:53 -0700 Subject: [PATCH 07/16] Update dataquality/loggers/model_logger/semantic_segmentation.py Co-authored-by: Elliott --- dataquality/loggers/model_logger/semantic_segmentation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dataquality/loggers/model_logger/semantic_segmentation.py b/dataquality/loggers/model_logger/semantic_segmentation.py index c665bf8a1..0498d6dd1 100644 --- a/dataquality/loggers/model_logger/semantic_segmentation.py +++ b/dataquality/loggers/model_logger/semantic_segmentation.py @@ -227,6 +227,7 @@ def _get_data_dict(self) -> Dict: ) polygon_data = self.get_polygon_data(pred_polygons_batch, gold_polygons_batch) + n_polygons = polygon_data["image_id"] if self.split == Split.inference: polygon_data["inference_name"] = [self.inference_name] * len( polygon_data["image_id"] From 464d719809dfcbdec822f44f4a4917b225eba5ac Mon Sep 17 00:00:00 2001 From: Derek Date: Thu, 11 May 2023 14:36:13 -0700 Subject: [PATCH 08/16] Revised on comments --- .../model_logger/semantic_segmentation.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/dataquality/loggers/model_logger/semantic_segmentation.py b/dataquality/loggers/model_logger/semantic_segmentation.py index 0498d6dd1..c9cbff099 100644 --- a/dataquality/loggers/model_logger/semantic_segmentation.py +++ b/dataquality/loggers/model_logger/semantic_segmentation.py @@ -194,21 +194,25 @@ def _get_data_dict(self) -> Dict: # Errors calculate_misclassified_polygons_batch(self.pred_masks, gold_polygons_batch) calculate_undetected_polygons_batch(self.pred_masks, gold_polygons_batch) + heights = [img.shape[-1] for img in self.gold_masks] + widths = [img.shape[-2] for img in self.gold_masks] calculate_dep_polygons_batch( gold_polygons_batch, dep_heatmaps.numpy(), - height=[img.shape[-1] for img in self.gold_masks], - width=[img.shape[-2] for img in self.gold_masks], + height=heights, + width=widths, ) + + image_data = { "image": [ f"{self.bucket_name}/{pth}" for pth in self.image_paths ], # E.g. https://storage.googleapis.com/bucket_name/.../image_id.png "id": self.image_ids, - "height": [img.shape[-1] for img in self.gold_masks], - "width": [img.shape[-2] for img in self.gold_masks], + "height": heights, + "width": widths, "image_data_error_potential": image_dep, "mean_lm_score": [i for i in mean_mislabeled], "mean_iou": iou, @@ -229,10 +233,8 @@ def _get_data_dict(self) -> Dict: polygon_data = self.get_polygon_data(pred_polygons_batch, gold_polygons_batch) n_polygons = polygon_data["image_id"] if self.split == Split.inference: - polygon_data["inference_name"] = [self.inference_name] * len( - polygon_data["image_id"] - ) + polygon_data["inference_name"] = [self.inference_name] * n_polygons else: - polygon_data["epoch"] = [self.epoch] * len(polygon_data["image_id"]) + polygon_data["epoch"] = [self.epoch] * n_polygons return polygon_data From ee92eb2995f1152604478fb2f536786af373f474 Mon Sep 17 00:00:00 2001 From: Derek Date: Fri, 12 May 2023 07:18:16 -0700 Subject: [PATCH 09/16] Linting --- dataquality/loggers/model_logger/semantic_segmentation.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/dataquality/loggers/model_logger/semantic_segmentation.py b/dataquality/loggers/model_logger/semantic_segmentation.py index c9cbff099..f98adf517 100644 --- a/dataquality/loggers/model_logger/semantic_segmentation.py +++ b/dataquality/loggers/model_logger/semantic_segmentation.py @@ -203,8 +203,6 @@ def _get_data_dict(self) -> Dict: height=heights, width=widths, ) - - image_data = { "image": [ From 936d1c791d6532fa02f37b9fc33d2de4f4ac0676 Mon Sep 17 00:00:00 2001 From: Derek Date: Fri, 12 May 2023 08:52:12 -0700 Subject: [PATCH 10/16] Pyproject change --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index e834ca2d9..620e974c8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -104,6 +104,7 @@ py37 = [ "cachetools>=5.2.0", "types-cachetools>=5.3.0.0", "importlib-metadata<5.0.0", + "typing-extension>=4.5.0" ] evaluate = [ From 24234948257ca11b971eba5145dcb74041e70123 Mon Sep 17 00:00:00 2001 From: Derek Date: Fri, 12 May 2023 08:59:09 -0700 Subject: [PATCH 11/16] Typo --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 620e974c8..4916b64bb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -104,7 +104,7 @@ py37 = [ "cachetools>=5.2.0", "types-cachetools>=5.3.0.0", "importlib-metadata<5.0.0", - "typing-extension>=4.5.0" + "typing-extensions>=4.5.0" ] evaluate = [ From 631be75514bc5037df24c709d29216d2ddf773c2 Mon Sep 17 00:00:00 2001 From: Derek Date: Fri, 12 May 2023 09:07:25 -0700 Subject: [PATCH 12/16] Typing version change --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 4916b64bb..62e047958 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -104,7 +104,7 @@ py37 = [ "cachetools>=5.2.0", "types-cachetools>=5.3.0.0", "importlib-metadata<5.0.0", - "typing-extensions>=4.5.0" + "typing-extensions>=4.0.0.0" ] evaluate = [ From 5ca3336d01bb9778ec05f6f5963ae68bb38af61f Mon Sep 17 00:00:00 2001 From: Derek Date: Fri, 12 May 2023 09:07:57 -0700 Subject: [PATCH 13/16] Typing version change --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 62e047958..b9976b9d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -104,7 +104,7 @@ py37 = [ "cachetools>=5.2.0", "types-cachetools>=5.3.0.0", "importlib-metadata<5.0.0", - "typing-extensions>=4.0.0.0" + "typing-extensions<=4.0.0.0" ] evaluate = [ From a12eb748b7bc05f81cca9d4b4b40b9ec7d265bdb Mon Sep 17 00:00:00 2001 From: Derek Date: Fri, 12 May 2023 10:14:53 -0700 Subject: [PATCH 14/16] intermediate --- .../cv/torch/semantic_segmentation.py | 1 + .../model_logger/semantic_segmentation.py | 4 +- docs/cv/coco_deeplab_hooks.ipynb | 135 ++++++++++++++++-- 3 files changed, 125 insertions(+), 15 deletions(-) diff --git a/dataquality/integrations/cv/torch/semantic_segmentation.py b/dataquality/integrations/cv/torch/semantic_segmentation.py index 925b5c671..4cd497c0e 100644 --- a/dataquality/integrations/cv/torch/semantic_segmentation.py +++ b/dataquality/integrations/cv/torch/semantic_segmentation.py @@ -322,6 +322,7 @@ def _on_step_end(self) -> None: # do not log if we are not in the final inference loop if not self.called_finish: return + print('logging') logger = SemanticSegmentationModelLogger( bucket_name=self.bucket_name, image_paths=image_paths, diff --git a/dataquality/loggers/model_logger/semantic_segmentation.py b/dataquality/loggers/model_logger/semantic_segmentation.py index f98adf517..a4df54ae8 100644 --- a/dataquality/loggers/model_logger/semantic_segmentation.py +++ b/dataquality/loggers/model_logger/semantic_segmentation.py @@ -231,8 +231,8 @@ def _get_data_dict(self) -> Dict: polygon_data = self.get_polygon_data(pred_polygons_batch, gold_polygons_batch) n_polygons = polygon_data["image_id"] if self.split == Split.inference: - polygon_data["inference_name"] = [self.inference_name] * n_polygons + polygon_data["inference_name"] = [self.inference_name] * len(n_polygons) else: - polygon_data["epoch"] = [self.epoch] * n_polygons + polygon_data["epoch"] = [self.epoch] * len(n_polygons) return polygon_data diff --git a/docs/cv/coco_deeplab_hooks.ipynb b/docs/cv/coco_deeplab_hooks.ipynb index c51512882..1e7300697 100644 --- a/docs/cv/coco_deeplab_hooks.ipynb +++ b/docs/cv/coco_deeplab_hooks.ipynb @@ -2,9 +2,20 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'ds = load_dataset(\\n \"CVdatasets/CocoSegmentationOnlyVal5000\",\\n use_auth_token=\"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\"\\n)'" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# !pip install datasets evaluate torch torchvision \n", "import os\n", @@ -29,9 +40,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found dataset, there are 4030 images and 4030 masks\n" + ] + } + ], "source": [ "# download the data from our public gcs bucket and save it to disk\n", "# dataset_path, img_path, mask_path = download_gcs_data()\n", @@ -65,7 +84,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -74,7 +93,19 @@ "outputId": "85d91dc9-405e-4f02-a6bf-6a88f9502412", "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using cache found in /Users/derek/.cache/torch/hub/pytorch_vision_v0.10.0\n", + "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n", + " warnings.warn(\n", + "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=DeepLabV3_ResNet50_Weights.COCO_WITH_VOC_LABELS_V1`. You can also use `weights=DeepLabV3_ResNet50_Weights.DEFAULT` to get the most up-to-date weights.\n", + " warnings.warn(msg)\n" + ] + } + ], "source": [ "\n", "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", @@ -88,9 +119,45 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/core/__init__.py:27: GalileoWarning: configure is deprecated, use dq.set_console_url and dq.login\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿ“ก https://console.dev.rungalileo.io\n", + "๐Ÿ”ญ Logging you into Galileo\n", + "\n", + "๐Ÿš€ You're logged in to Galileo as galileo@rungalileo.io!\n", + "โœจ Initializing existing public project 'Derek-Elliott-Proj'\n", + "๐Ÿƒโ€โ™‚๏ธ Fetching existing run 'polygon_dep'\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/dataquality/core/init.py:171: GalileoWarning: Run: Derek-Elliott-Proj/polygon_dep already exists! The existing run will get overwritten on call to finish()!\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿ›ฐ Connected to existing project 'Derek-Elliott-Proj', and existing run 'polygon_dep'.\n" + ] + } + ], "source": [ "try:\n", " import dataquality as dq\n", @@ -109,7 +176,7 @@ "import dataquality as dq\n", "dq.configure()\n", "\n", - "dq.init(\"semantic_segmentation\", \"Derek-Elliott-Proj\", 'test-polygon-df')\n", + "dq.init(\"semantic_segmentation\", \"Derek-Elliott-Proj\", 'polygon_dep')\n", "class_dict = { 'background': 0,\n", " 'airplane': 1,\n", " 'bicycle': 2,\n", @@ -137,9 +204,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-05-12 10:06:21.051891: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "We assume the dataloaders passed only have transforms that Tensor, Resize, and Normalize the image and mask\n", + "โ€ผ Any cropping or shearing transforms passed will lead to unexpected results\n", + "See docs at https://dq.readthedocs.io/en/latest/ (placeholder) for more info \n", + " \n", + "\n", + "Found layer classifier in model layers: backbone, classifier\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True).\n", + " warnings.warn(\n", + "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/torch/cuda/amp/grad_scaler.py:120: UserWarning: torch.cuda.amp.GradScaler is enabled, but CUDA is not available. Disabling.\n", + " warnings.warn(\"torch.cuda.amp.GradScaler is enabled, but CUDA is not available. Disabling.\")\n", + "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/torch/amp/autocast_mode.py:204: UserWarning: User provided device_type of 'cuda', but CUDA is not available. Disabling\n", + " warnings.warn('User provided device_type of \\'cuda\\', but CUDA is not available. Disabling')\n", + " 50%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ | 1/2 [00:03<00:03, 3.28s/it]\n" + ] + } + ], "source": [ "from dataquality.integrations.cv.torch.semantic_segmentation import watch\n", "watch(\n", @@ -176,9 +277,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "logging\n" + ] + } + ], "source": [ "dq.finish()" ] From f6ddd41e8579f43e205cd1a5518cdd62c74ff959 Mon Sep 17 00:00:00 2001 From: Derek Date: Fri, 12 May 2023 11:20:14 -0700 Subject: [PATCH 15/16] Final commit --- docs/cv/coco_deeplab_hooks.ipynb | 145 ++++++++++++++++++++++++++----- 1 file changed, 122 insertions(+), 23 deletions(-) diff --git a/docs/cv/coco_deeplab_hooks.ipynb b/docs/cv/coco_deeplab_hooks.ipynb index 1e7300697..3a2a278ce 100644 --- a/docs/cv/coco_deeplab_hooks.ipynb +++ b/docs/cv/coco_deeplab_hooks.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -11,7 +11,7 @@ "'ds = load_dataset(\\n \"CVdatasets/CocoSegmentationOnlyVal5000\",\\n use_auth_token=\"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\"\\n)'" ] }, - "execution_count": 1, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -40,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -84,7 +84,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -119,7 +119,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -204,17 +204,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 11, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-05-12 10:06:21.051891: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", - "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" - ] - }, { "name": "stdout", "output_type": "stream", @@ -231,13 +223,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/torchvision/transforms/functional.py:1603: UserWarning: The default value of the antialias parameter of all the resizing transforms (Resize(), RandomResizedCrop(), etc.) will change from None to True in v0.17, in order to be consistent across the PIL and Tensor backends. To suppress this warning, directly pass antialias=True (recommended, future default), antialias=None (current default, which means False for Tensors and True for PIL), or antialias=False (only works on Tensors - PIL will still use antialiasing). This also applies if you are using the inference transforms from the models weights: update the call to weights.transforms(antialias=True).\n", - " warnings.warn(\n", - "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/torch/cuda/amp/grad_scaler.py:120: UserWarning: torch.cuda.amp.GradScaler is enabled, but CUDA is not available. Disabling.\n", - " warnings.warn(\"torch.cuda.amp.GradScaler is enabled, but CUDA is not available. Disabling.\")\n", - "/Users/derek/Desktop/dataquality/.venv/lib/python3.9/site-packages/torch/amp/autocast_mode.py:204: UserWarning: User provided device_type of 'cuda', but CUDA is not available. Disabling\n", - " warnings.warn('User provided device_type of \\'cuda\\', but CUDA is not available. Disabling')\n", - " 50%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ | 1/2 [00:03<00:03, 3.28s/it]\n" + " 50%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ | 1/2 [00:02<00:02, 2.93s/it]\n" ] } ], @@ -277,15 +263,128 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "logging\n" + "Logging 2 samples [########################################] 100.00% elapsed time : 0.22s = 0.0m = 0.0h\n", + "Logging 2 samples [########################################] 100.00% elapsed time : 0.14s = 0.0m = 0.0h\n", + "Logging 2 samples [########################################] 100.00% elapsed time : 0.19s = 0.0m = 0.0h\n", + "Logging 2 samples [########################################] 100.00% elapsed time : 0.14s = 0.0m = 0.0h \n", + " โ˜๏ธ Uploading Data\n", + "CuML libraries not found, running standard process. For faster Galileo processing, consider installing\n", + "`pip install 'dataquality[cuda]' --extra-index-url=https://pypi.nvidia.com/`\n" ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "5a4f12b6d15045ccb5aeed25e33d9c88", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Uploading data to Galileo: 0%| | 0.00/25.8k [00:00 Date: Fri, 12 May 2023 11:57:06 -0700 Subject: [PATCH 16/16] Linting --- dataquality/integrations/cv/torch/semantic_segmentation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dataquality/integrations/cv/torch/semantic_segmentation.py b/dataquality/integrations/cv/torch/semantic_segmentation.py index 4cd497c0e..70b705097 100644 --- a/dataquality/integrations/cv/torch/semantic_segmentation.py +++ b/dataquality/integrations/cv/torch/semantic_segmentation.py @@ -322,7 +322,7 @@ def _on_step_end(self) -> None: # do not log if we are not in the final inference loop if not self.called_finish: return - print('logging') + print("logging") logger = SemanticSegmentationModelLogger( bucket_name=self.bucket_name, image_paths=image_paths,