-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathtemplate_pipeline.yaml
executable file
·80 lines (80 loc) · 2.44 KB
/
template_pipeline.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
pipeline-def:
name: event-consolidation
description: This is the process for etl'ing event data
version: 1.0.0
settings:
singleSparkSession: true
globalViewAsLocal: true
aliases:
- include: ./src/test/resources/pipelines/miscellaneous/alias.yaml
- name: setting
type: com.qwshen.etl.common.SparkConfActor
udf-registration:
- include: ./src/test/resources/pipelines/miscellaneous/udf-registration.yaml
- prefix: user_
type: com.qwshen.etl.test.udf.UserUdf
variables:
- name: iam_password
value: ${events.db.password}
decryptionKeyString: ${application.security.decryption.key}
- name: process_date
value: ${application.process_date}
- name: staging_uri
value: "file:///c:/temp/staging"
- name: metrics_uri
value: "file:///c:/temp/metrics"
jobs:
- name: transform-user-events
actions:
- name: load users
actor:
type: file
properties:
format: csv
options:
header: false
delimiter: ","
quote: \"
timestampFormat: "yyyy/MM/dd HH:mm:ss"
ddlSchemaString: "user_id long, birth_year int, gender string, location string"
fileUri: "${events.users_input}"
output-view:
name: users
global: true
- name: load events
actor:
type: flat
properties:
fileUri: "${events.events_input}"
output-view:
name: events_raw
global: false
- name: transform-events
actor:
type: sql
properties:
sqlString: >
select
substr(row_value, 1, 12) as event_id,
substr(row_value, 13, 16) as event_time,
substr(row_value, 29, 12) as event_host,
substr(row_value, 41, 64) as event_location
from events_raw
where row_no not in (1, 2) and substr(row_value, 6, 5) != 'TFYKR'
input-views:
- events_raw
output-view:
name: events
global: true
- include: ./src/test/resources/pipelines/jobs/job.yaml
metrics-logging:
enabled: true
uri: "${metrics_uri}"
actions:
- load-events
debug-staging:
enabled: true
uri: "${staging_uri}"
actions:
- transform-events
- load-events