-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add example python udaf * Fixing the streaming join example (#54) * Fixing the streaming join example * format * add drop_columns * update python internal package name --------- Co-authored-by: Matt Green <emgeee@users.noreply.github.com> * Implement stream join in python (#51) * Implement stream join in python * update * fmt * clippy fmt * example works * fmt * Adding config option for checkpointing (#50) * Adding config option for checkpointing * Add maturin build step for ci (#52) * fix: correct python module name * Fixing the streaming join example (#54) * Fixing the streaming join example * format * add drop_columns * update python internal package name --------- Co-authored-by: Matt Green <emgeee@users.noreply.github.com> * merge with main * Adding config option for checkpointing * merge with main * Cargo fmt --------- Co-authored-by: Matt Green <emgeee@users.noreply.github.com> * Rm hardcoded checkpoint (#55) * Add dockerfile to run emit_measurements and kafka (#56) * Add dockerfile to run emit_measurements and kafka * Add ability to specify timestamp_column on kafka stream (#57) * Add example python udaf * Fixing the UDAF example * merge main --------- Co-authored-by: Amey Chaugule <ameyc@users.noreply.github.com> Co-authored-by: Amey Chaugule <amey@probablynothinglabs.xyz>
- Loading branch information
1 parent
031616c
commit c9f4760
Showing
6 changed files
with
305 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
/target | ||
.vscode | ||
.DS_Store | ||
.ipynb_checkpoints/ | ||
Untitled.ipynb |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
"""stream_aggregate example.""" | ||
|
||
import json | ||
import signal | ||
import sys | ||
from collections import Counter | ||
from typing import List | ||
import pyarrow as pa | ||
|
||
from denormalized import Context | ||
from denormalized.datafusion import Accumulator, col | ||
from denormalized.datafusion import functions as f | ||
from denormalized.datafusion import udaf | ||
|
||
|
||
def signal_handler(sig, frame): | ||
sys.exit(0) | ||
|
||
|
||
signal.signal(signal.SIGINT, signal_handler) | ||
|
||
bootstrap_server = "localhost:9092" | ||
|
||
sample_event = { | ||
"occurred_at_ms": 100, | ||
"sensor_name": "foo", | ||
"reading": 0.0, | ||
} | ||
|
||
class TotalValuesRead(Accumulator): | ||
# Define the state type as a struct containing a map | ||
acc_state_type = pa.struct([("counts", pa.map_(pa.string(), pa.int64()))]) | ||
|
||
def __init__(self): | ||
self.counts = Counter() | ||
|
||
def update(self, values: pa.Array) -> None: | ||
# Update counter with new values | ||
if values is not None: | ||
self.counts.update(values.to_pylist()) | ||
|
||
def merge(self, states: pa.Array) -> None: | ||
# Merge multiple states into this accumulator | ||
if states is None or len(states) == 0: | ||
return | ||
for state in states: | ||
if state is not None: | ||
counts_map = state.to_pylist()[0] # will always be one element struct | ||
for k, v in counts_map["counts"]: | ||
self.counts[k] += v | ||
|
||
def state(self) -> List[pa.Scalar]: | ||
# Convert current state to Arrow array format | ||
result = {"counts": dict(self.counts.items())} | ||
return [pa.scalar(result, type=pa.struct([("counts", pa.map_(pa.string(), pa.int64()))]))] | ||
|
||
def evaluate(self) -> pa.Scalar: | ||
return self.state()[0] | ||
|
||
|
||
input_type = [pa.string()] | ||
return_type = TotalValuesRead.acc_state_type | ||
state_type = [TotalValuesRead.acc_state_type] | ||
sample_udaf = udaf(TotalValuesRead, input_type, return_type, state_type, "stable") | ||
|
||
|
||
def print_batch(rb: pa.RecordBatch): | ||
if not len(rb): | ||
return | ||
print(rb) | ||
|
||
ctx = Context() | ||
ds = ctx.from_topic("temperature", json.dumps(sample_event), bootstrap_server, "occurred_at_ms") | ||
|
||
ds = ds.window( | ||
[], | ||
[ | ||
sample_udaf(col("sensor_name")).alias("count"), | ||
], | ||
2000, | ||
None, | ||
).sink(print_batch) |
Oops, something went wrong.