generated from dbt-labs/dbt-oss-template
-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add generic snowplow tracker with file logger for testing
- Loading branch information
1 parent
e671471
commit b3494cc
Showing
4 changed files
with
164 additions
and
66 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,71 +1,121 @@ | ||
from dataclasses import dataclass | ||
import logging | ||
from logging.handlers import RotatingFileHandler | ||
from typing import Optional | ||
from typing import Any, Dict, Optional, Protocol, Self | ||
|
||
from snowplow_tracker import Emitter, Tracker | ||
import snowplow_tracker | ||
from snowplow_tracker.typing import FailureCallback | ||
|
||
from dbt_common.events.base_types import EventMsg | ||
from dbt_common.events.base_types import EventMsg, msg_to_dict | ||
from dbt_common.events.format import timestamp_to_datetime_string | ||
|
||
|
||
@dataclass | ||
class TrackerConfig: | ||
invocation_id: Optional[str] = None | ||
msg_schemas: Optional[Dict[str, str]] = None | ||
endpoint: Optional[str] = None | ||
protocol: Optional[str] = None | ||
protocol: Optional[str] = "https" | ||
on_failure: Optional[FailureCallback] = None | ||
name: Optional[str] = None | ||
output_file_name: Optional[str] = None | ||
output_file_max_bytes: Optional[int] = 10 * 1024 * 1024 # 10 mb | ||
|
||
|
||
class _Tracker: | ||
def __init__(self, config: TrackerConfig) -> None: | ||
self.invocation_id: Optional[str] = config.invocation_id | ||
class Tracker(Protocol): | ||
@classmethod | ||
def from_config(cls, config: TrackerConfig) -> Self: | ||
... | ||
|
||
if all([config.name, config.output_file_name]): | ||
file_handler = RotatingFileHandler( | ||
filename=str(config.output_file_name), | ||
encoding="utf8", | ||
maxBytes=config.output_file_max_bytes, # type: ignore | ||
backupCount=5, | ||
) | ||
self._tracker = self._python_file_logger(config.name, file_handler) | ||
def track(self, msg: EventMsg) -> None: | ||
... | ||
|
||
elif all([config.endpoint, config.protocol]): | ||
self._tracker = self._snowplow_tracker(config.endpoint, config.protocol) | ||
def enable_tracking(self, cookie: Dict[str, Any]) -> None: | ||
... | ||
|
||
def track(self, msg: EventMsg) -> str: | ||
raise NotImplementedError() | ||
def disable_tracking(self) -> None: | ||
... | ||
|
||
def _python_file_logger(self, name: str, handler: logging.Handler) -> logging.Logger: | ||
log = logging.getLogger(name) | ||
log.setLevel(logging.DEBUG) | ||
handler.setFormatter(logging.Formatter(fmt="%(message)s")) | ||
log.handlers.clear() | ||
log.propagate = False | ||
log.addHandler(handler) | ||
return log | ||
|
||
def _snowplow_tracker( | ||
class FileTracker(Tracker): | ||
def __init__(self, logger: logging.Logger, invocation_id: Optional[str]) -> None: | ||
self.logger = logger | ||
self.invocation_id = invocation_id | ||
|
||
@classmethod | ||
def from_config(cls, config: TrackerConfig) -> Self: | ||
file_handler = RotatingFileHandler( | ||
filename=config.output_file_name, | ||
maxBytes=config.output_file_max_bytes, # type: ignore | ||
backupCount=5, | ||
encoding="utf8", | ||
) | ||
file_handler.setFormatter(logging.Formatter(fmt="%(message)s")) | ||
|
||
logger = logging.getLogger(config.name) | ||
logger.setLevel(logging.DEBUG) | ||
logger.handlers.clear() | ||
logger.propagate = False | ||
logger.addHandler(file_handler) | ||
return cls(logger, config.invocation_id) | ||
|
||
def track(self, msg: EventMsg) -> None: | ||
ts: str = timestamp_to_datetime_string(msg.info.ts) | ||
log_line = f"{ts} | {msg.info.msg}" | ||
self.logger.debug(log_line) | ||
|
||
def enable_tracking(self, cookie: Dict[str, Any]) -> None: | ||
pass | ||
|
||
def disable_tracking(self) -> None: | ||
pass | ||
|
||
|
||
class SnowplowTracker(Tracker): | ||
def __init__( | ||
self, | ||
endpoint: str, | ||
protocol: Optional[str] = "https", | ||
on_failure: Optional[FailureCallback] = None, | ||
) -> Tracker: | ||
emitter = Emitter( | ||
endpoint, | ||
protocol, | ||
tracker: snowplow_tracker.Tracker, | ||
msg_schemas: Dict[str, str], | ||
invocation_id: Optional[str], | ||
) -> None: | ||
self.tracker = tracker | ||
self.msg_schemas = msg_schemas | ||
self.invocation_id = invocation_id | ||
|
||
@classmethod | ||
def from_config(cls, config: TrackerConfig) -> Self: | ||
emitter = snowplow_tracker.Emitter( | ||
config.endpoint, | ||
config.protocol, | ||
method="post", | ||
batch_size=30, | ||
on_failure=on_failure, | ||
on_failure=config.on_failure, | ||
byte_limit=None, | ||
request_timeout=5.0, | ||
) | ||
tracker = Tracker( | ||
tracker = snowplow_tracker.Tracker( | ||
emitters=emitter, | ||
namespace="cf", | ||
app_id="dbt", | ||
) | ||
return tracker | ||
return cls(tracker, config.msg_schemas, config.invocation_id) | ||
|
||
def track(self, msg: EventMsg) -> None: | ||
data = msg_to_dict(msg) | ||
schema = self.msg_schemas.get(msg.info.name) | ||
context = [snowplow_tracker.SelfDescribingJson(schema, data)] | ||
event = snowplow_tracker.StructuredEvent( | ||
category="dbt", | ||
action=msg.info.name, | ||
label=self.invocation_id, | ||
context=context, | ||
) | ||
self.tracker.track(event) | ||
|
||
def enable_tracking(self, cookie: Dict[str, Any]) -> None: | ||
subject = snowplow_tracker.Subject() | ||
subject.set_user_id(cookie.get("id")) | ||
self.tracker.set_subject(subject) | ||
|
||
def disable_tracking(self) -> None: | ||
self.tracker.set_subject(None) |
Empty file.