-
Notifications
You must be signed in to change notification settings - Fork 85
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Feature: command line parse xml #197
Changes from 10 commits
150756e
a54b77f
2c12f09
54b378c
de9a382
eb94579
b9b294c
aeffd4f
616f5c6
6a28836
f591c19
a904111
2dc2a02
29bca28
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
import logging | ||
logging.basicConfig() | ||
log = logging.getLogger(__name__) | ||
|
||
import click | ||
import json | ||
from . import readxml | ||
|
||
@click.group(context_settings=dict(help_option_names=['-h', '--help'])) | ||
def pyhf(): | ||
pass | ||
|
||
@pyhf.command() | ||
@click.option('--entrypoint-xml', required=True, prompt='Top-level XML', help='The top-level XML file for the PDF definition.', type=click.Path(exists=True)) | ||
@click.option('--basedir', required=True, prompt='Base directory', help='The base directory for the XML files to point relative to.', type=click.Path(exists=True)) | ||
@click.option('--output-file', required=True, prompt='Output file', help='The location of the output json file. If not specified, prints to screen.') | ||
@click.option('--tqdm/--no-tqdm', default=True) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion: |
||
def xml2json(entrypoint_xml, basedir, output_file, tqdm): | ||
spec = readxml.parse(entrypoint_xml, basedir, enable_tqdm=tqdm) | ||
json.dump(spec, open(output_file, 'w+'), indent=4, sort_keys=True) | ||
log.info("Written to {0:s}".format(output_file)) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,17 @@ | ||
import logging | ||
log = logging.getLogger(__name__) | ||
|
||
import os | ||
import xml.etree.ElementTree as ET | ||
import numpy as np | ||
import logging | ||
|
||
log = logging.getLogger(__name__) | ||
import tqdm | ||
|
||
def import_root_histogram(rootdir, filename, path, name): | ||
import uproot | ||
#import pdb; pdb.set_trace() | ||
#assert path == '' | ||
# strip leading slashes as uproot doesn't use "/" for top-level | ||
if path is None: path = '' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was needed to handle situations where There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe
is more pythonic? |
||
path = path.strip('/') | ||
f = uproot.open(os.path.join(rootdir, filename)) | ||
try: | ||
|
@@ -26,7 +28,7 @@ def import_root_histogram(rootdir, filename, path, name): | |
|
||
raise KeyError('Both {0:s} and {1:s} were tried and not found in {2:s}'.format(name, os.path.join(path, name), os.path.join(rootdir, filename))) | ||
|
||
def process_sample(sample,rootdir,inputfile, histopath, channelname): | ||
def process_sample(sample,rootdir,inputfile, histopath, channelname, enable_tqdm=False): | ||
if 'InputFile' in sample.attrib: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. here and in the other cases, I'd also suggest renaming to |
||
inputfile = sample.attrib.get('InputFile') | ||
if 'HistoPath' in sample.attrib: | ||
|
@@ -36,7 +38,11 @@ def process_sample(sample,rootdir,inputfile, histopath, channelname): | |
data,err = import_root_histogram(rootdir, inputfile, histopath, histoname) | ||
|
||
modifiers = [] | ||
for modtag in sample.iter(): | ||
|
||
modtags = tqdm.tqdm(sample.iter(), unit='modifier', disable=not(enable_tqdm), total=len(sample)) | ||
|
||
for modtag in modtags: | ||
modtags.set_description(' - modifier {0:s}({1:s})'.format(modtag.attrib.get('Name', 'n/a'), modtag.tag)) | ||
if modtag == sample: | ||
continue | ||
if modtag.tag == 'OverallSys': | ||
|
@@ -51,7 +57,6 @@ def process_sample(sample,rootdir,inputfile, histopath, channelname): | |
'type': 'normfactor', | ||
'data': None | ||
}) | ||
|
||
elif modtag.tag == 'HistoSys': | ||
lo,_ = import_root_histogram(rootdir, | ||
modtag.attrib.get('HistoFileLow',inputfile), | ||
|
@@ -97,26 +102,35 @@ def process_data(sample,rootdir,inputfile, histopath): | |
data,_ = import_root_histogram(rootdir, inputfile, histopath, histoname) | ||
return data | ||
|
||
def process_channel(channelxml,rootdir): | ||
def process_channel(channelxml, rootdir, enable_tqdm=False): | ||
channel = channelxml.getroot() | ||
|
||
inputfile = channel.attrib.get('InputFile') | ||
histopath = channel.attrib.get('HistoPath') | ||
|
||
samples = channel.findall('Sample') | ||
|
||
samples = tqdm.tqdm(channel.findall('Sample'), unit='sample', disable=not(enable_tqdm)) | ||
|
||
data = channel.findall('Data')[0] | ||
|
||
channelname = channel.attrib['Name'] | ||
return channelname, process_data(data, rootdir, inputfile, histopath), [process_sample(x, rootdir, inputfile, histopath, channelname) for x in samples] | ||
|
||
def parse(configfile,rootdir): | ||
results = [] | ||
for sample in samples: | ||
samples.set_description(' - sample {}'.format(sample.attrib.get('Name'))) | ||
result = process_sample(sample, rootdir, inputfile, histopath, channelname, enable_tqdm) | ||
results.append(result) | ||
|
||
return channelname, process_data(data, rootdir, inputfile, histopath), results | ||
|
||
def parse(configfile, rootdir, enable_tqdm=False): | ||
toplvl = ET.parse(configfile) | ||
inputs = [ET.parse(os.path.join(rootdir,x.text)) for x in toplvl.findall('Input')] | ||
channels = { | ||
k:{'data': d, 'samples': v} for k,d,v in [process_channel(inp,rootdir) for inp in inputs] | ||
} | ||
inputs = tqdm.tqdm([x.text for x in toplvl.findall('Input')], unit='channel', disable=not(enable_tqdm)) | ||
|
||
channels = {} | ||
for inp in inputs: | ||
inputs.set_description('Processing {}'.format(inp)) | ||
channel, data, samples = process_channel(ET.parse(os.path.join(rootdir,inp)), rootdir, enable_tqdm) | ||
channels[channel] = {'data': data, 'samples': samples} | ||
|
||
return { | ||
'toplvl':{ | ||
'resultprefix':toplvl.getroot().attrib['OutputFilePrefix'], | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
import pytest | ||
import json | ||
import shlex | ||
|
||
import pyhf | ||
|
||
# see test_import.py for the same (detailed) test | ||
def test_import_prepHistFactory(tmpdir, script_runner): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. where is this fixture defined? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Comes from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ah ok click has some built in testing capabilities from example usage There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. CliRunner doesn't isolate stdout/stderr. It's probably only specific to running click-enabled commands. The pytest-console-scripts is much more generic (runs any script). I would use CliRunner if I spent more time figuring out stderr extraction. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok yes, testing stdout/stderr separately is important, especially if we want to do e.g. |
||
temp = tmpdir.join("parsed_output.json") | ||
command = 'pyhf xml2json --entrypoint-xml validation/xmlimport_input/config/example.xml --basedir validation/xmlimport_input/ --output-file {0:s} --no-tqdm'.format(temp.strpath) | ||
ret = script_runner.run(*shlex.split(command)) | ||
assert ret.success | ||
assert ret.stdout == '' | ||
assert ret.stderr == '' | ||
|
||
parsed_xml = json.loads(temp.read()) | ||
spec = {'channels': parsed_xml['channels']} | ||
pyhf.utils.validate(spec, pyhf.utils.get_default_schema()) | ||
|
||
def test_import_prepHistFactory_TQDM(tmpdir, script_runner): | ||
temp = tmpdir.join("parsed_output.json") | ||
command = 'pyhf xml2json --entrypoint-xml validation/xmlimport_input/config/example.xml --basedir validation/xmlimport_input/ --output-file {0:s}'.format(temp.strpath) | ||
ret = script_runner.run(*shlex.split(command)) | ||
assert ret.success | ||
assert ret.stdout == '' | ||
assert ret.stderr != '' | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what do you think about making the
entrypoint-xml
be aclick.argument
there is really not way to convert without input, sopyhf xml2json input.xml
seems to be a good cmd line--basedir
could default toos.getcwd()
also, maybe it's somewhat more unixy to print to stdout if the output file is not provided?
pyhf xml2json input.xml > test.json
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The good part is
tqdm
is part ofstderr
so we can definitely do that.