Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLN: Remove last usages of the Any type #489

Merged
merged 1 commit into from
Mar 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 14 additions & 5 deletions src/fmu/dataio/_filedata_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,18 @@
from copy import deepcopy
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Final, Literal, Optional
from typing import TYPE_CHECKING, Final, Optional
from warnings import warn

from ._definitions import FmuContext
from ._logging import null_logger

logger: Final = null_logger(__name__)

if TYPE_CHECKING:
from ._objectdata_provider import ObjectDataProvider
from .dataio import ExportData


@dataclass
class FileDataProvider:
Expand All @@ -31,8 +35,8 @@ class FileDataProvider:
"""

# input
dataio: Any
objdata: Any
dataio: ExportData
objdata: ObjectDataProvider
rootpath: Path = field(default_factory=Path)
itername: str = ""
realname: str = ""
Expand Down Expand Up @@ -161,8 +165,13 @@ def _get_path(self) -> tuple[Path, Path | None]:
"""Construct and get the folder path(s)."""
linkdest = None

assert isinstance(
self.dataio.fmu_context, FmuContext
) # Converted to a FmuContext obj. in post-init.

dest = self._get_path_generic(
mode=self.dataio.fmu_context, allow_forcefolder=True
mode=self.dataio.fmu_context,
allow_forcefolder=True,
)

if self.dataio.fmu_context == FmuContext.CASE_SYMLINK_REALIZATION:
Expand All @@ -176,7 +185,7 @@ def _get_path(self) -> tuple[Path, Path | None]:

def _get_path_generic(
self,
mode: Literal[FmuContext.REALIZATION, FmuContext.PREPROCESSED],
mode: FmuContext,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Discussed with @jcrivenaes

allow_forcefolder: bool = True,
info: str = "",
) -> Path:
Expand Down
19 changes: 12 additions & 7 deletions src/fmu/dataio/_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from datetime import timezone
from pathlib import Path
from tempfile import NamedTemporaryFile
from typing import Any, Final
from typing import Final
from warnings import warn

from fmu import dataio
Expand All @@ -30,6 +30,7 @@
)
from fmu.dataio.datastructure.meta import meta

from . import types
from ._definitions import FmuContext
from ._logging import null_logger

Expand Down Expand Up @@ -220,13 +221,13 @@ class MetaData:
"""

# input variables
obj: Any
dataio: Any
obj: types.Inferrable
dataio: dataio.ExportData
compute_md5: bool = True

# storage state variables
objdata: Any = field(default=None, init=False)
fmudata: Any = field(default=None, init=False)
objdata: ObjectDataProvider | None = field(default=None, init=False)
fmudata: FmuProvider | None = field(default=None, init=False)
iter_name: str = field(default="", init=False)
real_name: str = field(default="", init=False)

Expand Down Expand Up @@ -281,8 +282,8 @@ def _populate_meta_fmu(self) -> None:
"""
fmudata = FmuProvider(
model=self.dataio.config.get("model", None),
fmu_context=self.dataio.fmu_context,
casepath_proposed=self.dataio.casepath,
fmu_context=FmuContext.get(self.dataio.fmu_context),
casepath_proposed=self.dataio.casepath or "",
include_ertjobs=self.dataio.include_ertjobs,
forced_realization=self.dataio.realization,
workflow=self.dataio.workflow,
Expand Down Expand Up @@ -328,6 +329,8 @@ def _populate_meta_file(self) -> None:
- absolute_path_symlink, as above but full path
"""

assert self.objdata is not None

fdata = FileDataProvider(
self.dataio,
self.objdata,
Expand Down Expand Up @@ -362,6 +365,7 @@ def _populate_meta_file(self) -> None:

def _populate_meta_class(self) -> None:
"""Get the general class which is a simple string."""
assert self.objdata is not None
self.meta_class = self.objdata.classname

def _populate_meta_tracklog(self) -> None:
Expand Down Expand Up @@ -395,6 +399,7 @@ def _populate_meta_display(self) -> None:
if self.dataio.display_name is not None:
display_name = self.dataio.display_name
else:
assert self.objdata is not None
display_name = self.objdata.name

self.meta_display = {"name": display_name}
Expand Down
46 changes: 27 additions & 19 deletions src/fmu/dataio/_objectdata_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,15 +147,18 @@ class DerivedObjectDescriptor:
"table",
"dictionary",
]
efolder: Literal[
"maps",
"polygons",
"points",
"cubes",
"grids",
"tables",
"dictionaries",
]
efolder: (
Literal[
"maps",
"polygons",
"points",
"cubes",
"grids",
"tables",
"dictionaries",
]
| str
)
fmt: str
extension: str
spec: Dict[str, Any]
Expand Down Expand Up @@ -230,8 +233,8 @@ class ObjectDataProvider:
"""

# input fields
obj: Any
dataio: Any
obj: types.Inferrable
dataio: dataio.ExportData
meta_existing: dict = field(default_factory=dict)

# result properties; the most important is metadata which IS the 'data' part in
Expand Down Expand Up @@ -262,7 +265,7 @@ def _derive_name_stratigraphy(self) -> DerivedNamedStratigraphy:
name = derive_name(self.dataio, self.obj)

# next check if usename has a "truename" and/or aliases from the config
strat = self.dataio.config.get("stratigraphy") # shortform
strat = self.dataio.config.get("stratigraphy", {})
no_start_or_missing_name = strat is None or name not in strat

rv = DerivedNamedStratigraphy(
Expand Down Expand Up @@ -658,11 +661,11 @@ def _derive_spec_bbox_dataframe(
) -> SpecificationAndBoundingBox:
"""Process/collect the data items for DataFrame."""
logger.info("Process data metadata for DataFrame (tables)")
df: pd.DataFrame = self.obj
assert isinstance(self.obj, pd.DataFrame)
return SpecificationAndBoundingBox(
spec=specification.TableSpecification(
columns=list(df.columns),
size=int(df.size),
columns=list(self.obj.columns),
size=int(self.obj.size),
).model_dump(
mode="json",
exclude_none=True,
Expand All @@ -675,11 +678,13 @@ def _derive_spec_bbox_arrowtable(
) -> SpecificationAndBoundingBox:
"""Process/collect the data items for Arrow table."""
logger.info("Process data metadata for arrow (tables)")
table = self.obj
from pyarrow import Table

assert isinstance(self.obj, Table)
return SpecificationAndBoundingBox(
spec=specification.TableSpecification(
columns=list(table.column_names),
size=table.num_columns * table.num_rows,
columns=list(self.obj.column_names),
size=self.obj.num_columns * self.obj.num_rows,
).model_dump(
mode="json",
exclude_none=True,
Expand All @@ -699,6 +704,9 @@ def _get_columns(self) -> list[str]:
columns = list(self.obj.columns)
else:
logger.debug("arrow")
from pyarrow import Table
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if and when these lazy imports start to become numerous enough to bite us later on... the import doesn't seem excessively slow to my mind, about half a second (at least as far as Python is concerned). Probably few enough to be OK now, but will there be more?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ive wondered so as well. But i think for now, its fine. But if i go back and look at older versions it looks like there was a period of time where we could not really upon pyarrow being installed.


assert isinstance(self.obj, Table)
columns = self.obj.column_names
logger.debug("Available columns in table %s ", columns)
return columns
Expand Down Expand Up @@ -813,7 +821,7 @@ def _derive_from_existing(self) -> None:
# TODO: Clean up types below.
self.time0, self.time1 = parse_timedata(self.meta_existing["data"]) # type: ignore

def _process_content(self) -> tuple[str, dict | None]:
def _process_content(self) -> tuple[str | dict, dict | None]:
"""Work with the `content` metadata"""

# content == "unset" is not wanted, but in case metadata has been produced while
Expand Down
6 changes: 4 additions & 2 deletions src/fmu/dataio/dataio.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,9 @@ class ExportData:
depth_reference: str = "msl"
description: Union[str, list] = ""
display_name: Optional[str] = None
fmu_context: Union[FmuContext, str] = "realization"
fmu_context: Union[FmuContext, str] = (
FmuContext.REALIZATION
) # post init converts to FmuContext
forcefolder: str = ""
grid_model: Optional[str] = None
is_observation: bool = False
Expand All @@ -447,7 +449,7 @@ class ExportData:
table_index: Optional[list] = None

# some keys that are modified version of input, prepended with _use
_usecontent: dict = field(default_factory=dict, init=False)
_usecontent: dict | str = field(default_factory=dict, init=False)
_usefmtflag: str = field(default="", init=False)

# storing resulting state variables for instance, non-public:
Expand Down
Loading