forked from Cross-PLN-Technical-Working-Group/adpn-cli
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmyLockssScripts.py
executable file
·444 lines (355 loc) · 13.7 KB
/
myLockssScripts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
#!/usr/bin/python3
#
# myLockssScripts: utility classes for use across various ADPNet/LOCKSS automation scripts
# Regularizing the use of key-value pair switches, JSON input, etc.
#
# @version 2019.0624
import sys
import os.path
import re, json, numbers
import fileinput
import subprocess
from subprocess import PIPE
class myPyPipeline :
"""Given a sequence of shell processes, pipe output from one to input for the next, using POSIX pipes.
@param iterable pipeline an iterable sequence of lists specifying shell commands with command-line parameters
"""
def __init__ (self, pipeline) :
self.pipeline = pipeline
self.processes = []
def process (self, cmd, stdin=sys.stdin, stdout=sys.stdout, stderr=sys.stderr, encoding="utf-8") :
if callable(cmd) :
res=cmd(stdin=stdin, stdout=stdout, stderr=stderr, encoding=encoding)
else :
res=subprocess.Popen(cmd, stdin=stdin, stdout=stdout, stderr=stderr, encoding=encoding)
return res
def siphon (self, encoding="utf-8", stdin=sys.stdin) :
self.processes = []
piped_in = stdin if type(stdin) is str or type(stdin) is bytes else None
_stdin = stdin if piped_in is None else PIPE
for cmd in self.pipeline :
proc = self.process(cmd=cmd, stdin=_stdin, stdout=PIPE, encoding=encoding)
_stdin = proc.stdout
self.processes.append(proc)
if piped_in is not None :
print(piped_in, file=self.processes[0].stdin)
if len(self.processes) > 1 :
self.processes[0].stdin.close()
for proc in self.processes[0:-1] :
proc.stdout.flush()
proc.wait()
(buf, errbuf) = self.processes[len(self.processes)-1].communicate()
return (buf, errbuf, [proc.returncode for proc in self.processes])
def align_switches (left, right, switches, override=True) :
if switches[left] is None :
switches[left] = switches[right]
if switches[right] is None :
switches[right] = switches[left]
if override :
if switches[right] != switches[left] :
switches[right] = switches[left]
def shift_args (args: list) -> tuple :
top = args[0] if len(args) > 0 else None
remainder = args[1:] if len(args) > 1 else []
return ( top, remainder )
class myPyCommandLine :
"""Parse a Unix-style shell command-line, separating out configuration parameters and files/objects.
"""
def __init__ (self, argv: list = [], defaults: dict = {}, configfile: str = "", alias: dict = {}, settingsgroup = "") :
"""Initialize with a list of command-line arguments, and optionally a dictionary of default values for expected configuration switches."""
self._argv = argv
self._switches = {}
jsonText = "{}"
if len(configfile) > 0 :
try :
default_map = open(configfile, "r")
jsonText = "".join([line for line in default_map])
default_map.close()
except FileNotFoundError as e :
jsonText = "{}"
try :
self._defaults = {**defaults, **json.loads(jsonText)}
except json.decoder.JSONDecodeError as e :
self._defaults = defaults
if len(settingsgroup) > 0 :
overlay = { }
if not isinstance(settingsgroup, list) :
settingsgroup = [ settingsgroup ]
for key in self._defaults.keys() :
subkey = key.split("/", maxsplit=2)
if len(subkey) > 1 and settingsgroup.count(subkey[0]) > 0 :
overlay[subkey[1]] = self._defaults[key]
self._defaults = {**self._defaults, **overlay}
self._alias = alias
self._switchPattern = '--([0-9_A-z][^=]*)?(\s*=(.*)\s*)?$'
@property
def pattern (self) -> str :
"""Provides a regex that matches a command-line switch and parse out switch names and values."""
return self._switchPattern
@pattern.setter
def pattern (self, rhs) -> str :
self._switchPattern = rhs
self._compiled = re.compile(self._switchPattern)
@property
def argv (self) -> list :
"""List of files and objects from the command line, without --switches.
"""
return self._argv
@property
def switches (self) -> dict :
"""Dictionary of configuration switches provided on command-line or in defaults.
"""
return self._switches
def accept_switch (self, switch, switches: dict = {}, defaults: dict = {}) :
result = { **switches }
key = switch.group(1)
value = switch.group(3) if switch.group(3) is not None else switch.group(1)
if callable(result.get(key)) :
f = result.get(key)
result[key] = f(value, switch, None)
elif callable(defaults.get(key)) :
f = defaults.get(key)
result[key] = f(value, switch, result.get(key))
elif type(result.get(key)) is list :
result[key].append(value)
elif type(result.get(key)) is bool :
result[key] = (len(value) > 0)
elif isinstance(result.get(key), numbers.Number) :
try :
result[key] = int(value)
except ValueError as e :
try :
result[key] = float(value)
except ValueError as e :
result[key] = 0
else :
result[key] = value
return result
def parse (self, argv: list = [], defaults: dict = {}) -> tuple :
"""Separate out a list of command-line arguments into switches and files/objects.
"""
in_argv = []
in_argv.extend(self.argv)
in_argv.extend(argv)
the_defaults = { **self._defaults, **defaults }
( out_argv, out_switches ) = ( [], { **the_defaults } )
allowing_switches = True
for arg in in_argv :
ref_switch = re.match(self.pattern, arg) if allowing_switches else False
if ref_switch and ref_switch.group(0) == '--' :
allowing_switches = False
elif ref_switch :
out_switches = self.accept_switch(ref_switch, out_switches, the_defaults)
else :
out_argv.append(arg)
for (primary, secondary) in self._alias.items() :
if out_switches.get(primary) is None :
if out_switches.get(secondary) is not None :
out_switches[primary] = out_switches.get(secondary)
if out_switches.get(primary) is not None :
out_switches[secondary] = out_switches.get(primary)
self._argv = out_argv
self._switches = out_switches
return (out_argv, out_switches)
def compose (self, keyvalues: list) -> list :
return self.argv + [ "--%(k)s=%(v)s" % {"k": key, "v": value} for key, value in keyvalues if not value is None ]
class myPyJSON :
"""Extract JSON hash tables from plain-text input, for example copy-pasted or piped into stdin.
"""
def __init__ (self, splat=True, cascade=False, where=None) :
"""Initialize the JSON extractor pattern."""
self._jsonPrologRE = r'^JSON(?:\s+(?:PACKET|DATA))?:\s*'
self._jsonPrologText = 'JSON: '
self._jsonBraces = r'^\s*([{].*[}]|\[.*\])\s*'
self._jsonRaw = ''
self._jsonText = [ ]
self._splat = splat
self._cascade = cascade
self.select_where(where)
@property
def prolog (self) :
"""Regex that matches and parses out the JSON representation from a line of text."""
return self._jsonPrologRE
@property
def prologText (self) :
"""Plain text that will match against the myPyJSON.prolog regex."""
return self._jsonPrologText
@property
def braces (self) :
"""Regex that matches and parses out a likely JSON hashtable or array from a line of text."""
return self._jsonBraces
@property
def splat (self) :
"""Switch for splatting (or not) single item lists into their first unit."""
return self._splat
@property
def cascade (self) :
"""Switch for joining (or not) multiple data items in a first-to-last cascade or list them separately."""
return self._cascade
@property
def selected (self) :
"""Lambda that filters JSON data objects according to programmatic criteria."""
return self._where
@property
def raw (self) -> str :
return self._jsonRaw
@property
def json (self) -> list :
"""List of all the JSON representations taken from the accepted input."""
return self._jsonText
@property
def data (self) -> "list of dict" :
"""A list of all the hash tables parsed from the JSON representations."""
return [ json.loads(marble) for marble in self.json ]
@property
def text (self) :
return self._jsonText
@property
def allData (self) :
"""A unified hash table that either lists or merges together all the tables parsed from the JSON representations."""
data = {
"splat": { "used": False, "data": [ ] },
"hashes": { "used": 0, "data": { } },
"lists": { "used": 0, "data": [ ] }
}
for datum in self.data :
if self.selected(datum) :
if self.cascade :
if isinstance(datum, dict) :
data["hashes"]["data"] = {**data["hashes"]["data"], **datum}
data["hashes"]["used"] = True
elif isinstance(datum, list) :
data["lists"]["data"].extend(datum)
data["lists"]["used"] = True
else :
data["splat"]["used"] = True
data["splat"]["data"].extend( [ datum ] )
splat = [ self.splatted(data[glob]["data"]) for glob in data.keys() if data[glob]["used"] ]
return self.splatted(splat)
def select_where (self, condition=None) :
self._where = condition if condition is not None else lambda x: True
def splatted (self, data, force=False) :
splat = data
if force or self.splat :
if isinstance(data, list) :
if len(data) == 0 :
splat = None
elif len(data) == 1 :
splat = data[0]
else :
splat = data
return splat
def add_prolog (self, line) :
if re.match(self.prolog, line, flags=re.I) :
output=line
else :
output=( "%(prolog)s%(line)s" % { "prolog": self.prologText, "line": line } )
return output
def is_acceptable (self, line) :
is_prologged = re.match(self.prolog, line, flags=re.I)
is_braces = False
maybe_braces = re.match(self.braces, line, flags=re.I)
if maybe_braces :
try :
json.loads(line)
is_braces = True
except json.decoder.JSONDecodeError as e :
is_braces = False
return ( is_prologged or is_braces )
def accept (self, jsonSource, screen=False) :
"""Accept the plain-text input containing one or more JSON hash tables within the text.
jsonSource can be a string, or an iterable object that spits out lines of text
(for example, flieinput.input()).
"""
self._jsonRaw = ( jsonSource if isinstance(jsonSource, str) else "\n".join(jsonSource) )
if screen :
if isinstance(jsonSource, str) :
split_src = [ jsonSource ]
else :
split_src = jsonSource
self._jsonText = [ self.add_prolog(bit) for bit in split_src if self.is_acceptable(bit) ]
else :
self._jsonText = jsonSource
if isinstance(self._jsonText, str) :
src = self._jsonText
else :
src = "\n".join(self._jsonText)
split_src = re.split(self.prolog, src, flags=re.M)
self._jsonText = [ bit for bit in split_src if len(bit.strip()) > 0 ]
if len(self._jsonText) == 0 :
self._jsonText = [ "".join(src) ]
class myADPNScriptSuite :
def __init__ (self, script=None) :
path = (script if script is not None else __file__)
self._modpath = os.path.realpath(path)
self._modname = os.path.basename(self._modpath)
self._moddir = os.path.dirname(self._modpath)
@property
def directory (self) :
return self._moddir
@property
def name (self) :
return self._modname
def python (self) :
return sys.executable
def path (self, filename) :
return os.path.join(self.directory, filename)
if __name__ == '__main__':
defaults = {"foo": "bar"}
ss = {}
old_argv = sys.argv
print("DEFAULTS: ", "\t", defaults)
print("")
print(">>>", "(sys.argv, sw) = myPyCommandLine(sys.argv).parse(defaults=defaults)")
(sys.argv, sw) = myPyCommandLine(sys.argv).parse(defaults=defaults)
print("ARGV: ", "\t", sys.argv)
print("SWITCHES:", "\t", sw)
sys.argv = old_argv
print("")
print(">>>", "cmd = myPyCommandLine(sys.argv) ; cmd.parse(defaults=defaults) ; args = cmd.argv ; sw = cmd.switches")
cmd = myPyCommandLine(sys.argv)
cmd.parse(defaults=defaults)
args = cmd.argv
sw = cmd.switches
print("ARGS: ", "\t", args)
print("SWITCHES:", "\t", sw)
sys.argv = old_argv
print("")
print("Good JSON...")
table1 = {"Ingest Title": "Alabama Department of Archives and History WPA Folder 01", "File Size ": "2.1G (2,243,154,758 bytes, 689 files)", "Plugin JAR": "http://configuration.adpn.org/overhead/takeover/plugins/AlabamaDepartmentOfArchivesAndHistoryDirectoryPlugin.jar", "Plugin ID": "gov.alabama.archives.adpn.directory.AlabamaDepartmentOfArchivesAndHistoryDirectoryPlugin", "Plugin Name": "Alabama Department of Archives and History Directory Plugin", "Plugin Version": "1", "Start URL": "http://archives.alabama.gov/Lockss/WPA-Folder-01/", "Manifest URL": "http://archives.alabama.gov/Lockss/WPA-Folder-01/manifestpage.html", "Base URL": "base_url=\"http://archives.alabama.gov/Lockss/\"", "Subdirectory": "subdirectory=\"WPA-Folder-01\"", "au_name": "Alabama Department of Archives and History Directory Plugin, Base URL http://archives.alabama.gov/Lockss/, Subdirectory WPA-Folder-01"}
table2 = {"au_start_url": "http://archives.alabama.gov/Lockss/WPA-Folder-01/", "au_manifest": "http://archives.alabama.gov/Lockss/WPA-Folder-01/manifestpage.html", "parameters": [["base_url", "http://archives.alabama.gov/Lockss/"], ["subdirectory", "WPA-Folder-01"]]}
inp = "USELESS LINE: FooBar" + "\n" + "JSON PACKET: " + json.dumps(table1) + "\n" + json.dumps(table2) + "\n\n"
jsonInput = myPyJSON()
jsonInput.accept(inp)
print("")
print("")
print("JSON TEXT >>>")
print(jsonInput.json)
print("")
print("JSON DATA >>>")
print(jsonInput.data)
print("")
print("AGGREGATED JSON DATA >>>")
print(jsonInput.allData)
print("")
print("Bad JSON...")
inp = "JSON PACKET: {oooOOooo what's this?}" + "\n" + "NON-JSON LINE: Hmmm"
jsonInput = myPyJSON()
jsonInput.accept(inp)
print("")
print("")
print("JSON TEXT >>>")
print(jsonInput.json)
print("")
print("JSON DATA >>>")
try :
print(jsonInput.data)
except json.decoder.JSONDecodeError as e :
print("myPyJSON.data -- excepted expected, OK !")
print("")
print("AGGREGATED JSON DATA >>>")
try :
print(jsonInput.allData)
except json.decoder.JSONDecodeError as e :
print("myPyJSON.data -- excepted expected, OK !")
print("")