Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: EDM4HEPSchema and Newstyle FCCSchema #1245

Merged
merged 38 commits into from
Mar 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
eeeeef5
EDM4HEPSchema and Newstyle FCCSchema
prayagyadav Jan 15, 2025
6567e16
Merge branch 'master' into edm4hep-schema
lgray Jan 15, 2025
a5ff99d
Fixed Scope Issues
prayagyadav Jan 17, 2025
e3faeec
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 17, 2025
e33eb62
Merge branch 'master' into edm4hep-schema
lgray Jan 21, 2025
8dcb8c7
Merge branch 'master' into edm4hep-schema
lgray Jan 21, 2025
ef39d7e
Merge branch 'master' into edm4hep-schema
lgray Jan 22, 2025
2ce7638
Merge branch 'master' into edm4hep-schema
lgray Jan 24, 2025
f05abe9
Typenames could be added to branch forms in delayed=False mode
prayagyadav Jan 29, 2025
dccb13f
eager and delayed typenames work given a change in uproot5
prayagyadav Jan 30, 2025
67ea14c
All the schemas work with the typename change; fullsim samples are re…
prayagyadav Feb 3, 2025
a947232
Merge branch 'scikit-hep:master' into test-typenames
prayagyadav Feb 3, 2025
6ba2947
w
prayagyadav Feb 4, 2025
6873c6c
Everything works
prayagyadav Feb 4, 2025
42278c3
cleaned
prayagyadav Feb 4, 2025
5789d1f
Merge branch 'scikit-hep:master' into test-typenames
prayagyadav Feb 18, 2025
240e9d1
support for extend ak_add_doc
prayagyadav Feb 18, 2025
a3219fa
Merge branch 'scikit-hep:master' into edm4hep-schema
prayagyadav Feb 19, 2025
c940b28
bring back the uproot dependency
prayagyadav Feb 19, 2025
4084fad
Adding the prospective functionality to recognize any edm4hep style r…
prayagyadav Feb 19, 2025
e138305
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 19, 2025
720e9b7
fixed spelling mistake
prayagyadav Feb 19, 2025
6817b98
Merge branch 'edm4hep-schema' of https://github.com/prayagyadav/coffe…
prayagyadav Feb 19, 2025
b26afd1
Merge branch 'master' into edm4hep-schema
lgray Feb 21, 2025
59effd7
cleaned schemas/edm4hep.py
prayagyadav Feb 25, 2025
0814516
cleaned all the other files
prayagyadav Feb 26, 2025
244bfb2
Many Tests added
prayagyadav Feb 26, 2025
3c85632
Merge branch 'master' into edm4hep-schema
prayagyadav Mar 10, 2025
7cecf37
python ver < 3.10 compatibility
prayagyadav Mar 10, 2025
62c598c
precommit
prayagyadav Mar 10, 2025
ab98ca4
Added typename in dataset_tools, test_dataset_tools.py and test_nanoe…
prayagyadav Mar 10, 2025
ec320fc
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 10, 2025
3b801d0
lazy load yaml
prayagyadav Mar 10, 2025
6c135af
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 10, 2025
0a9ed15
Merge branch 'master' into edm4hep-schema
lgray Mar 11, 2025
8a0ba5d
better imports; fixed hard-coded raise error in EDM4HEPSchema.version
prayagyadav Mar 11, 2025
ca3f956
edm4hep reading requires uproot 5.6.0
lgray Mar 11, 2025
748208e
Merge branch 'master' into edm4hep-schema
prayagyadav Mar 18, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ classifiers = [
]
dependencies = [
"awkward>=2.6.7",
"uproot>=5.3.11",
"uproot>=5.6.0",
"dask[array]>=2024.3.0",
"dask-awkward>=2025.2.0",
"dask-histogram>=2025.2.0",
Expand Down
2 changes: 1 addition & 1 deletion src/coffea/dataset_tools/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def get_steps(
if save_form:
form_str = uproot.dask(
tree,
ak_add_doc=True,
ak_add_doc={"__doc__": "title", "typename": "typename"},
filter_name=no_filter,
filter_typename=no_filter,
filter_branch=partial(_remove_not_interpretable, emit_warning=False),
Expand Down
4 changes: 4 additions & 0 deletions src/coffea/nanoevents/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
FCC,
BaseSchema,
DelphesSchema,
EDM4HEPSchema,
FCCSchema,
FCCSchema_edm4hep1,
NanoAODSchema,
PDUNESchema,
PFNanoAODSchema,
Expand All @@ -26,4 +28,6 @@
"ScoutingNanoAODSchema",
"FCC",
"FCCSchema",
"FCCSchema_edm4hep1",
"EDM4HEPSchema",
]
32 changes: 32 additions & 0 deletions src/coffea/nanoevents/assets/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import importlib
import os
from functools import partial

import yaml

root_dir = importlib.resources.files("coffea.nanoevents.assets")

versions = [
"00-10-01",
"00-10-02",
"00-10-03",
"00-10-04",
"00-10-05",
"00-99-00",
"00-99-01",
]


def _load_edm4hep_version(yamlfile):
with open(yamlfile) as f:
loaded = yaml.safe_load(f)
return loaded


edm4hep_ver = {
version: partial(
_load_edm4hep_version,
yamlfile=os.path.join(root_dir, f"edm4hep_v{version}.yaml"),
)
for version in versions
}
578 changes: 578 additions & 0 deletions src/coffea/nanoevents/assets/edm4hep_v00-10-01.yaml

Large diffs are not rendered by default.

578 changes: 578 additions & 0 deletions src/coffea/nanoevents/assets/edm4hep_v00-10-02.yaml

Large diffs are not rendered by default.

593 changes: 593 additions & 0 deletions src/coffea/nanoevents/assets/edm4hep_v00-10-03.yaml

Large diffs are not rendered by default.

596 changes: 596 additions & 0 deletions src/coffea/nanoevents/assets/edm4hep_v00-10-04.yaml

Large diffs are not rendered by default.

593 changes: 593 additions & 0 deletions src/coffea/nanoevents/assets/edm4hep_v00-10-05.yaml

Large diffs are not rendered by default.

882 changes: 882 additions & 0 deletions src/coffea/nanoevents/assets/edm4hep_v00-99-00.yaml

Large diffs are not rendered by default.

877 changes: 877 additions & 0 deletions src/coffea/nanoevents/assets/edm4hep_v00-99-01.yaml

Large diffs are not rendered by default.

13 changes: 10 additions & 3 deletions src/coffea/nanoevents/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,10 @@ def __call__(self, form):
for ifield, field in enumerate(form.fields):
iform = form.contents[ifield].to_dict()
branch_forms[field] = _lazify_form(
iform, f"{field},!load", docstr=iform["parameters"]["__doc__"]
iform,
f"{field},!load",
docstr=iform["parameters"]["__doc__"],
typestr=iform["parameters"]["typename"],
)
lform = {
"class": "RecordArray",
Expand All @@ -129,6 +132,7 @@ def __call__(self, form):
},
"form_key": None,
}

return (
awkward.forms.form.from_dict(self.schemaclass(lform, self.version).form),
self,
Expand Down Expand Up @@ -340,13 +344,12 @@ def from_root(
to_open = file
if isinstance(file, uproot.reading.ReadOnlyDirectory):
to_open = file[treepath]

opener = partial(
uproot.dask,
to_open,
full_paths=True,
open_files=False,
ak_add_doc=True,
ak_add_doc={"__doc__": "title", "typename": "typename"},
filter_branch=_remove_not_interpretable,
steps_per_file=steps_per_file,
known_base_form=known_base_form,
Expand All @@ -372,6 +375,9 @@ def from_root(
else:
tree = uproot.open(file, **uproot_options)

# Get the typenames
typenames = tree.typenames()

if entry_start is None or entry_start < 0:
entry_start = 0
if entry_stop is None or entry_stop > tree.num_entries:
Expand All @@ -396,6 +402,7 @@ def from_root(
base_form = mapping._extract_base_form(
tree, iteritems_options=iteritems_options
)
base_form["typenames"] = typenames

return cls._from_mapping(
mapping,
Expand Down
26 changes: 19 additions & 7 deletions src/coffea/nanoevents/mapping/uproot.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,24 +28,26 @@ class CannotBeNanoEvents(Exception):
pass


def _lazify_form(form, prefix, docstr=None):
def _lazify_form(form, prefix, docstr=None, typestr=None):
if not isinstance(form, dict) or "class" not in form:
raise RuntimeError("form should have been normalized by now")

parameters = _lazify_parameters(form.get("parameters", {}), docstr=docstr)
parameters = _lazify_parameters(
form.get("parameters", {}), docstr=docstr, typestr=typestr
)
if form["class"].startswith("ListOffset"):
# awkward will add !offsets
form["form_key"] = quote(prefix)
form["content"] = _lazify_form(
form["content"], prefix + ",!content", docstr=docstr
form["content"], prefix + ",!content", docstr=docstr, typestr=typestr
)
elif form["class"] == "NumpyArray":
form["form_key"] = quote(prefix)
if parameters:
form["parameters"] = parameters
elif form["class"] == "RegularArray":
form["content"] = _lazify_form(
form["content"], prefix + ",!content", docstr=docstr
form["content"], prefix + ",!content", docstr=docstr, typestr=typestr
)
if parameters:
form["parameters"] = parameters
Expand All @@ -61,7 +63,10 @@ def _lazify_form(form, prefix, docstr=None):
assert prefix.endswith("!load")
form["form_key"] = quote(prefix + "allowmissing,!index")
form["content"] = _lazify_form(
form["content"], prefix + "allowmissing,!content", docstr=docstr
form["content"],
prefix + "allowmissing,!content",
docstr=docstr,
typestr=typestr,
)
if parameters:
form["parameters"] = parameters
Expand All @@ -88,12 +93,16 @@ def _lazify_form(form, prefix, docstr=None):
return form


def _lazify_parameters(form_parameters, docstr=None):
def _lazify_parameters(form_parameters, docstr=None, typestr=None):
parameters = {}
if "__array__" in form_parameters:
parameters["__array__"] = form_parameters["__array__"]
if docstr is not None:
parameters["__doc__"] = docstr
if typestr is not None:
parameters["typename"] = typestr
if "typename" in form_parameters: # eager mode
parameters["typename"] = form_parameters["typename"]
return parameters


Expand Down Expand Up @@ -159,13 +168,16 @@ def _extract_base_form(cls, tree, iteritems_options={}):
form.to_json()
) # normalizes form (expand NumpyArray classes)
try:
form = _lazify_form(form, f"{key},!load", docstr=branch.title)
form = _lazify_form(
form, f"{key},!load", docstr=branch.title, typestr=branch.typename
)
except CannotBeNanoEvents as ex:
warnings.warn(
f"Skipping {key} as it is not interpretable by NanoEvents\nDetails: {ex}"
)
continue
branch_forms[key] = form

return {
"class": "RecordArray",
"contents": [item for item in branch_forms.values()],
Expand Down
Loading
Loading