-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathtemplate_pipeline.json
executable file
·119 lines (117 loc) · 3.22 KB
/
template_pipeline.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
{
"pipeline-def": {
"name": "event-consolidation",
"description": "This is the process for etl'ing event data",
"version": "1.0.0",
"settings": {
"singleSparkSession": true,
"globalViewAsLocal": true
},
"aliases": [
{
"include": "./src/test/resources/pipelines/miscellaneous/alias.json"
},
{
"name": "setting",
"type": "com.qwshen.etl.setting.SparkConfSetter"
}
],
"udf-registration": {
"include": "./src/test/resources/pipelines/miscellaneous/udf-registration.json"
},
"variables": [
{
"name": "iam_password",
"value": "${events.db.password}",
"decryptionKeyString": "${application.security.decryption.key}"
},
{
"name": "process_date",
"value": "${application.process_date}"
},
{
"name": "staging_uri",
"value": "file:///c:/temp/staging"
},
{
"name": "metrics_uri",
"value": "file:///c:/temp/metrics"
}
],
"jobs": [
{
"name": "transform-user-events",
"actions": [
{
"name": "load users",
"actor": {
"type": "file",
"properties": {
"format": "csv",
"options": {
"header": false,
"delimiter": ",",
"quote": "\"",
"timestampFormat": "yyyy/MM/dd HH:mm:ss"
},
"ddlSchemaString": "user_id long, birth_year int, gender string, location string",
"fileUri": "${events.users_input}"
}
},
"output-view": {
"name": "users",
"global": "true"
}
},
{
"name": "load events",
"actor": {
"type": "flat",
"properties": {
"fileUri": "${events.events_input}"
}
},
"output-view": {
"name": "events_raw",
"global": false
}
},
{
"name": "transform-events",
"actor": {
"type": "sql",
"properties": {
"sqlString": "select substr(row_value, 1, 12) as event_id, substr(row_value, 13, 16) as event_time, substr(row_value, 29, 12) as event_host, substr(row_value, 41, 64) as event_location from events_raw where row_no not in (1, 2) and substr(row_value, 6, 5) != 'TFYKR'"
}
},
"input-views": [
"events_raw"
],
"output-view": {
"name": "events",
"global": true
}
}
]
},
{
"include": "./src/test/resources/pipelines/jobs/job.json"
}
],
"metrics-logging": {
"enabled": true,
"uri": "${metrics_uri}",
"actions": [
"load-events"
]
},
"debug-staging": {
"enabled": true,
"uri": "${staging_uri}",
"actions": [
"transform-events",
"load-events"
]
}
}
}