-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathdefault.cfg
245 lines (175 loc) · 7.6 KB
/
default.cfg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Authors:
# - Daniel Drizhuk, d.drizhuk@gmail.com, 2017
# - Paul Nilsson, paul.nilsson@cern.ch, 2017-2019
################################
# Experiment specific parameters
[Experiment]
name: ATLAS
################################
# Pilot parameters
[Pilot]
# The default file name for the pilot log
pilotlog: pilotlog.txt
stageinlog: stageinlog.txt
stageoutlog: stageoutlog.txt
# The file name for the job definition
pandajobdata: pandaJobData.out
# Run with a fake test job, no server updates (values: 'fake', 'real'). The test job type can be 'production' or 'user'.
# The test transfer type can be 'direct' or 'NULL'. Test job command can be 'normal' or 'sleep' (normal means standard
# reconstruction job, while sleep means that the payload command is 'sleep 1' and no input or output transfers).
pandajob: real
testjobtype: production
testjobcommand: normal
testtransfertype: NULL
# The URL for the PanDA server
pandaserver: http://127.0.0.1:8080
# pandaserver: https://aipanda007.cern.ch:25443
# The heartbeat period in seconds (30*60 = 1800 s in normal mode, 5 * 60=300 s in ddebug mode)
heartbeat: 1800
debug_heartbeat: 300
# Heartbeat message file (only used when Pilot is not sending heartbeats to server)
heartbeat_message: heartbeat.json
# Job IDs can be stored to a file that is picked up by the wrapper
jobid_file: pandaIDs.out
# The minimum required disk space for the pilot to run a job
free_space_limit: 2 GB
# The maximum output file size
maximum_output_file_size: 500 GB
# The maximum allowed sum of all input files (files accessed by direct access not counted by pilot)
# (fall-back value, schedconfig value is primarily used)
maximum_input_file_sizes: 14336 MB
# Size limit of payload stdout size during running. unit is in kB (value = 2 * 1024 ** 2)
local_size_limit_stdout: 2097152
# The maximum number of getJob requests
maximum_getjob_requests: 1
# Looping job time limits; if job does not write anything in N hours, it is considered a looping job
looping_verification_time: 600
# for production jobs, 12*3600
looping_limit_default_prod: 43200
# for user jobs, 3*3600
looping_limit_default_user: 10800
# The minimum allowed looping limit, 2*3600
looping_limit_min_default: 7200
# Proxy verification time (used by monitoring) in seconds
proxy_verification_time: 600
# Disk space monitoring
disk_space_verification_time: 300
# Memory usage verification time (how often the memory monitor output will be checked)
memory_usage_verification_time: 60
# Process verification time
process_verification_time: 300
# Output file size verification time
output_verification_time: 300
# The default thread check time in seconds, used by thread monitoring
thread_check: 10
# The default CPU check time in seconds, used by CPU monitoring
cpu_check: 60
# The timing file used to store various timing measurements
timing_file: pilot_timing.json
# Optional error log (leave filename empty if not wanted)
error_log: piloterrorlog.txt
# List of redundant files and directories to be removed prior to log file creation
# For ATLAS, any initial /cvmfs bit will automatically be corrected if ATLAS_LOCAL_ROOT_BASE is set
redundant: /cvmfs/atlas.cern.ch/repo/sw/PandaPilot/config/redundant.txt
# Utility commands that may be launched by the pilot before payload, with payload, after payload or with stagein
# E.g. MemoryMonitor is used as an internal name. The actual command is 'prmon'
utility_before_payload:
utility_with_payload:
utility_after_payload_started:
utility_with_stagein:
################################
# Information service parameters
[Information]
# Path to local cache
#cache_dir: /lustre/atlas/proj-shared/csc108/debug/atlas/HPC_pilot_test/queue_cache #for Titan
cache_dir:
# URL for the PanDA queues json
queues: http://atlas-agis-api.cern.ch/request/pandaqueue/query/list/?json
# URL for the sites json
sites: http://atlas-agis-api.cern.ch/request/site/query/list/?json
# URL for the DDM endpoints json
storages: http://atlas-agis-api.cern.ch/request/ddmendpoint/query/list/?json
# URL for the SchedConfig json
schedconfig: http://pandaserver.cern.ch:25085/cache/schedconfig
# File name for the queuedata json
queuedata: queuedata.json
# overwrite acopytools for queuedata
#acopytools: {'pr':['rucio']}
#acopytools: {'pr':['rucio'], 'pw':['gfalcopy'], 'pl':['gfalcopy']}
#acopytools: {'pr': ['lsm'], 'pw': ['lsm']}
################################
# Payload parameters
[Payload]
# File name for the job report produced by the payload
jobreport: jobReport.json
# File name for production job metadata
metadata: metadata.xml
# File names for stdout/stderr
payloadstdout: payload.stdout
payloadstderr: payload.stderr
# Event service executor type
# default: generic (alternatives: base, raythena)
executor_type: raythena
################################
# Container parameters
[Container]
# Master parameter (unused)
# Is the pilot allowed to use containers? If False, then any database settings are ignored
# allow_container: False
# The setup type can be either ALRB or (explicit) singularity
setup_type: ALRB
# Name of script file that will contain the payload command to be executed in the container
container_script: container_script.sh
# Name of script file that will contain the setup command for the payload to be executed in the container
release_setup: my_release_setup.sh
# Name of the file that will contain the payload pid
pid_file: pid.txt
# Execute middleware in container
use_middleware_container: False
# If a middleware container script is listed (e.g. stagein.py), the pilot will perform all stage-in and/or stage-out
# steps in a standard container (to be revised).
# Note: if no middleware container image is specified below, the middleware will still be executed by the specified script
# (without using a container).
middleware_container_stagein_script: stagein.py
#middleware_container_stagein_script:
middleware_container_stageout_script:
# error information and stage-in file status is saved in a json file by the stage-in script and later read by the pilot
stagein_dictionary: stagein_dictionary.json
middleware_stagein_stdout: stagein_stdout.txt
middleware_stagein_stderr: stagein_stderr.txt
# Name of middleware image (to be revised)
# This image is used if middleware is not found locally on the worker node. Middleware is expected to be present
# in the container image
middleware_container:
################################
# Harvester parameters
[Harvester]
# Name of the job request file. The pilot places this file in the pilot launch directory when it wants Harvester
# to send another job (placed by Harvester in the same directory)
job_request_file: worker_requestjob.json
# Name of the kill worker file. The pilot places this file in the pilot launch directory when it has finished all jobs
# and wants Harvester to kill the worker (virtual machine)
kill_worker_file: kill_worker
# Name of file with list of IDs of PanDA jobs to be processed by HPC Pilot
jobs_list_file: worker_pandaids.json
# Name of file with PanDA job to be processed by HPC Pilot
pandajob_file: HPCJobs.json
# Name of file with worker report
workerAttributesFile: worker_attributes.json
# Name of file for declaration of stageout
StageOutnFile: event_status.dump.json
################################
# HPC parameters
[HPC]
# Path to scratch disk (RAM, SSD etc) for placing of job working directory
scratch: /tmp/scratch/
################################
# Rucio parameters
[Rucio]
# Rucio server URL for traces
url: https://rucio-lb-prod.cern.ch/traces/