-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprecompute_volume_features.py
56 lines (42 loc) · 1.55 KB
/
precompute_volume_features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from utils import load_counters, calculate_volume_features
import argparse
import numpy as np
from conf import data_dir
# Feature engineering on top of counter series
def gr(vals):
return (vals[-1] / (vals[0] + 0.01))
def argmax(vals):
return max(enumerate(vals), key=lambda x: x[1])[0]
def amplitude(vals):
min_vals = min(vals)
if min_vals == 0:
return -1
else:
return max(vals) / min(vals)
def last(vals):
return vals[-1]
engineered_volume_features = {
# "volumes_amplitude": amplitude,
"volumes_gr": gr,
# "volumes_argmax": argmax,
# "volumes_max": np.max,
# "volumes_min": np.min,
# "volumes_mdn": np.median,
# "volumes_mean": np.mean,
"volumes_sum": np.sum,
"volumes_last": last
}
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--city_name", "-c", required=True)
args = parser.parse_args()
city_name = args.city_name
(data_dir / "traffic" / city_name).mkdir(parents=True, exist_ok=True)
counters = load_counters(city_name, "train")
counters = calculate_volume_features(counters, aggregators=engineered_volume_features, nan_to_zero=True)
del counters["volumes_1h"]
counters.to_parquet(data_dir / "traffic" / city_name / "all_counters_train.parquet")
counters = load_counters(city_name, "test")
counters = calculate_volume_features(counters, aggregators=engineered_volume_features, nan_to_zero=True)
del counters["volumes_1h"]
counters.to_parquet(data_dir / "traffic" / city_name / "all_counters_test.parquet")