Merge pull request #5 from dhensle/explicit_chunking

explicit chunking on interaction simulate models
TransLinkForecasting · May 10, 2024 · e6a904c · e6a904c
2 parents 85b43e7 + 0293dd2
commit e6a904c
Show file tree

Hide file tree

Showing 18 changed files with 104 additions and 25 deletions.
diff --git a/activitysim/abm/models/accessibility.py b/activitysim/abm/models/accessibility.py
@@ -32,8 +32,11 @@ class AccessibilitySettings(PydanticReadable):
     SPEC: str = "accessibility.csv"
     """Filename for the accessibility specification (csv) file."""
 
-    explicit_chunk: int = 0
-    """If > 0, use this chunk size instead of adaptive chunking."""
+    explicit_chunk: float = 0
+    """
+    If > 0, use this chunk size instead of adaptive chunking.
+    If less than 1, use this fraction of the total number of rows.
+    """
 
 
 @nb.njit

diff --git a/activitysim/abm/models/disaggregate_accessibility.py b/activitysim/abm/models/disaggregate_accessibility.py
@@ -177,6 +177,12 @@ class DisaggregateAccessibilitySettings(PydanticReadable, extra="forbid"):
     """
     List of preprocessor settings to apply to the proto-population tables after generation.
     """
+    explicit_chunk: float | None = None
+    """
+    If > 0, use this chunk size instead of adaptive chunking.
+    If less than 1, use this fraction of the total number of rows.
+    If not supplied or None, will default to the chunk size in the location choice model settings.
+    """
 
 
 def read_disaggregate_accessibility_yaml(
@@ -758,6 +764,11 @@ def get_disaggregate_logsums(
         model_settings = util.suffix_tables_in_settings(model_settings)
         model_settings.CHOOSER_ID_COLUMN = "proto_person_id"
 
+        # Can set explicit chunking for disaggregate accessibility
+        # Otherwise the explict_chunk will be set to whatever is in the location model settings
+        if disagg_model_settings.explicit_chunk is not None:
+            model_settings.explicit_chunk = disagg_model_settings.explicit_chunk
+
         # Include the suffix tags to pass onto downstream logsum models (e.g., tour mode choice)
         if model_settings.LOGSUM_SETTINGS:
             suffixes = util.concat_suffix_dict(disagg_model_settings.suffixes)

diff --git a/activitysim/abm/models/location_choice.py b/activitysim/abm/models/location_choice.py
@@ -19,8 +19,6 @@
 from activitysim.core.interaction_sample_simulate import interaction_sample_simulate
 from activitysim.core.util import reindex
 
-# import multiprocessing
-
 
 """
 The school/workplace location model predicts the zones in which various people will
@@ -192,6 +190,7 @@ def _location_sample(
         chunk_tag=chunk_tag,
         trace_label=trace_label,
         zone_layer=zone_layer,
+        explicit_chunk_size=model_settings.explicit_chunk,
         compute_settings=model_settings.compute_settings.subcomponent_settings(
             "sample"
         ),
@@ -1185,11 +1184,6 @@ def workplace_location(
             state, estimator, model_settings, "workplace_location.yaml"
         )
 
-    # FIXME - debugging code to test multiprocessing failure handling
-    # process_name = multiprocessing.current_process().name
-    # if multiprocessing.current_process().name =='mp_households_0':
-    #     raise RuntimeError(f"fake fail {process_name}")
-
     # disable locutor for benchmarking
     if state.settings.benchmarking:
         locutor = False

diff --git a/activitysim/abm/models/non_mandatory_tour_frequency.py b/activitysim/abm/models/non_mandatory_tour_frequency.py
@@ -181,8 +181,11 @@ class NonMandatoryTourFrequencySettings(LogitComponentSettings, extra="forbid"):
     annotate_tours: PreprocessorSettings | None = None
     """Preprocessor settings to annotate tours"""
 
-    explicit_chunk: int = 0
-    """Number of rows to process in each chunk when explicit chunking is enabled"""
+    explicit_chunk: float = 0
+    """
+    If > 0, use this chunk size instead of adaptive chunking.
+    If less than 1, use this fraction of the total number of rows.
+    """
 
 
 @workflow.step

diff --git a/activitysim/abm/models/parking_location_choice.py b/activitysim/abm/models/parking_location_choice.py
@@ -147,6 +147,7 @@ def parking_destination_simulate(
         chunk_size=chunk_size,
         trace_label=trace_label,
         trace_choice_name="parking_loc",
+        explicit_chunk_size=model_settings.explicit_chunk,
     )
 
     # drop any failed zero_prob destinations
@@ -345,6 +346,12 @@ class ParkingLocationSettings(LogitComponentSettings, extra="forbid"):
     """List of auto modes that use parking. AUTO_MODES are used in write_trip_matrices to make sure
     parking locations are accurately represented in the output trip matrices."""
 
+    explicit_chunk: float = 0
+    """
+    If > 0, use this chunk size instead of adaptive chunking.
+    If less than 1, use this fraction of the total number of rows.
+    """
+
 
 @workflow.step
 def parking_location(

diff --git a/activitysim/abm/models/school_escorting.py b/activitysim/abm/models/school_escorting.py
@@ -357,8 +357,11 @@ class SchoolEscortSettings(BaseLogitComponentSettings, extra="forbid"):
     no_escorting_alterative: int = 1
     """The alternative number for no escorting. Used to set the choice for households with no escortees."""
 
-    explicit_chunk: int = 0
-    """If > 0, use this chunk size instead of adaptive chunking."""
+    explicit_chunk: float = 0
+    """
+    If > 0, use this chunk size instead of adaptive chunking.
+    If less than 1, use this fraction of the total number of rows.
+    """
 
     LOGIT_TYPE: Literal["MNL"] = "MNL"
     """Logit model mathematical form.

diff --git a/activitysim/abm/models/trip_destination.py b/activitysim/abm/models/trip_destination.py
@@ -217,6 +217,7 @@ def _destination_sample(
         chunk_tag=chunk_tag,
         trace_label=trace_label,
         zone_layer=zone_layer,
+        explicit_chunk_size=model_settings.explicit_chunk,
         compute_settings=model_settings.compute_settings.subcomponent_settings(
             "sample"
         ),
@@ -690,6 +691,7 @@ def compute_ood_logsums(
     chunk_size,
     trace_label,
     chunk_tag,
+    explicit_chunk_size=0,
 ):
     """
     Compute one (of two) out-of-direction logsums for destination alternatives
@@ -723,6 +725,7 @@ def compute_ood_logsums(
         chunk_size=chunk_size,
         trace_label=trace_label,
         chunk_tag=chunk_tag,
+        explicit_chunk_size=explicit_chunk_size,
     )
 
     assert logsums.index.equals(choosers.index)
@@ -835,6 +838,7 @@ def compute_logsums(
         state.settings.chunk_size,
         trace_label=tracing.extend_trace_label(trace_label, "od"),
         chunk_tag=chunk_tag,
+        explicit_chunk_size=model_settings.explicit_chunk,
     )
 
     # - dp_logsums
@@ -864,6 +868,7 @@ def compute_logsums(
         state.settings.chunk_size,
         trace_label=tracing.extend_trace_label(trace_label, "dp"),
         chunk_tag=chunk_tag,
+        explicit_chunk_size=model_settings.explicit_chunk,
     )
 
     return destination_sample
@@ -954,6 +959,7 @@ def trip_destination_simulate(
         trace_label=trace_label,
         trace_choice_name="trip_dest",
         estimator=estimator,
+        explicit_chunk_size=model_settings.explicit_chunk,
     )
 
     if not want_logsums:

diff --git a/activitysim/abm/models/util/logsums.py b/activitysim/abm/models/util/logsums.py
@@ -261,6 +261,7 @@ def compute_location_choice_logsums(
         chunk_size=chunk_size,
         chunk_tag=chunk_tag,
         trace_label=trace_label,
+        explicit_chunk_size=model_settings.explicit_chunk,
         compute_settings=logsum_settings.compute_settings,
     )
 

diff --git a/activitysim/abm/models/util/tour_destination.py b/activitysim/abm/models/util/tour_destination.py
@@ -123,6 +123,7 @@ def _destination_sample(
         chunk_tag=chunk_tag,
         trace_label=trace_label,
         zone_layer=zone_layer,
+        explicit_chunk_size=model_settings.explicit_chunk,
         compute_settings=model_settings.compute_settings.subcomponent_settings(
             "sample"
         ),

diff --git a/activitysim/abm/models/util/tour_od.py b/activitysim/abm/models/util/tour_od.py
@@ -216,6 +216,7 @@ def _od_sample(
         chunk_tag=chunk_tag,
         trace_label=trace_label,
         zone_layer="taz",
+        explicit_chunk_size=model_settings.explicit_chunk,
         compute_settings=model_settings.compute_settings.subcomponent_settings(
             "sample"
         ),
@@ -1057,6 +1058,7 @@ def run_od_simulate(
         trace_label=trace_label,
         trace_choice_name="origin_destination",
         estimator=estimator,
+        explicit_chunk_size=model_settings.explicit_chunk,
         compute_settings=model_settings.compute_settings,
     )
 

diff --git a/activitysim/abm/models/util/vectorize_tour_scheduling.py b/activitysim/abm/models/util/vectorize_tour_scheduling.py
@@ -59,6 +59,12 @@ class TourSchedulingSettings(LogitComponentSettings, extra="forbid"):
     this unsegmented SPEC should be omitted.
     """
 
+    explicit_chunk: float = 0
+    """
+    If > 0, use this chunk size instead of adaptive chunking.
+    If less than 1, use this fraction of the total number of rows.
+    """
+
 
 def skims_for_logsums(
     state: workflow.State,
@@ -929,7 +935,11 @@ def schedule_tours(
         chunk_trace_label,
         chunk_sizer,
     ) in chunk.adaptive_chunked_choosers(
-        state, tours, tour_trace_label, tour_chunk_tag
+        state,
+        tours,
+        tour_trace_label,
+        tour_chunk_tag,
+        explicit_chunk_size=model_settings.explicit_chunk,
     ):
         choices = _schedule_tours(
             state,

diff --git a/activitysim/abm/models/vehicle_type_choice.py b/activitysim/abm/models/vehicle_type_choice.py
@@ -585,8 +585,11 @@ class VehicleTypeChoiceSettings(LogitComponentSettings, extra="forbid"):
 
     FLEET_YEAR: int
 
-    explicit_chunk: int = 0
-    """If > 0, use this chunk size instead of adaptive chunking."""
+    explicit_chunk: float = 0
+    """
+    If > 0, use this chunk size instead of adaptive chunking.
+    If less than 1, use this fraction of the total number of rows.
+    """
 
 
 @workflow.step

diff --git a/activitysim/core/chunk.py b/activitysim/core/chunk.py
@@ -1213,7 +1213,7 @@ def adaptive_chunked_choosers(
     chunk_tag: str = None,
     *,
     chunk_size: int | None = None,
-    explicit_chunk_size: int = 0,
+    explicit_chunk_size: float = 0,
 ):
     # generator to iterate over choosers
 
@@ -1232,12 +1232,16 @@ def adaptive_chunked_choosers(
 
     chunk_tag = chunk_tag or trace_label
 
+    num_choosers = len(choosers.index)
+
     if state.settings.chunk_training_mode == MODE_EXPLICIT:
-        chunk_size = explicit_chunk_size
+        if explicit_chunk_size < 1:
+            chunk_size = math.ceil(num_choosers * explicit_chunk_size)
+        else:
+            chunk_size = int(explicit_chunk_size)
     elif chunk_size is None:
         chunk_size = state.settings.chunk_size
 
-    num_choosers = len(choosers.index)
     assert num_choosers > 0
     assert chunk_size >= 0
 
@@ -1369,7 +1373,10 @@ def adaptive_chunked_choosers_and_alts(
     )
 
     if state.settings.chunk_training_mode == MODE_EXPLICIT:
-        chunk_size = explicit_chunk_size
+        if explicit_chunk_size < 1:
+            chunk_size = math.ceil(num_choosers * explicit_chunk_size)
+        else:
+            chunk_size = explicit_chunk_size
     elif chunk_size is None:
         chunk_size = state.settings.chunk_size
     chunk_sizer = ChunkSizer(

diff --git a/activitysim/core/configuration/logit.py b/activitysim/core/configuration/logit.py
@@ -190,6 +190,12 @@ class LocationComponentSettings(BaseLogitComponentSettings):
     LOGSUM_SETTINGS: Path
     """Settings for the logsum computation."""
 
+    explicit_chunk: float = 0
+    """
+    If > 0, use this chunk size instead of adaptive chunking.
+    If less than 1, use this fraction of the total number of rows.
+    """
+
 
 class TourLocationComponentSettings(LocationComponentSettings, extra="forbid"):
     # Logsum-related settings

diff --git a/activitysim/core/interaction_sample.py b/activitysim/core/interaction_sample.py
@@ -560,6 +560,7 @@ def interaction_sample(
     chunk_tag: str | None = None,
     trace_label: str | None = None,
     zone_layer: str | None = None,
+    explicit_chunk_size: float = 0,
     compute_settings: ComputeSettings | None = None,
 ):
     """
@@ -606,6 +607,9 @@ def interaction_sample(
         Specify which zone layer of the skims is to be used.  You cannot use the
         'maz' zone layer in a one-zone model, but you can use the 'taz' layer in
         a two- or three-zone model (e.g. for destination pre-sampling).
+    explicit_chunk_size : float, optional
+        If > 0, specifies the chunk size to use when chunking the interaction
+        simulation. If < 1, specifies the fraction of the total number of choosers.
 
     Returns
     -------
@@ -641,7 +645,9 @@ def interaction_sample(
         chooser_chunk,
         chunk_trace_label,
         chunk_sizer,
-    ) in chunk.adaptive_chunked_choosers(state, choosers, trace_label, chunk_tag):
+    ) in chunk.adaptive_chunked_choosers(
+        state, choosers, trace_label, chunk_tag, explicit_chunk_size=explicit_chunk_size
+    ):
         choices = _interaction_sample(
             state,
             chooser_chunk,

diff --git a/activitysim/core/interaction_sample_simulate.py b/activitysim/core/interaction_sample_simulate.py
@@ -411,6 +411,7 @@ def interaction_sample_simulate(
     trace_choice_name=None,
     estimator=None,
     skip_choice=False,
+    explicit_chunk_size=0,
     *,
     compute_settings: ComputeSettings | None = None,
 ):
@@ -454,6 +455,9 @@ def interaction_sample_simulate(
     skip_choice: bool
         This skips the logit choice step and simply returns the alternatives table with logsums
         (used in disaggregate accessibility)
+    explicit_chunk_size : float, optional
+        If > 0, specifies the chunk size to use when chunking the interaction
+        simulation. If < 1, specifies the fraction of the total number of choosers.
 
     Returns
     -------
@@ -483,7 +487,13 @@ def interaction_sample_simulate(
         chunk_trace_label,
         chunk_sizer,
     ) in chunk.adaptive_chunked_choosers_and_alts(
-        state, choosers, alternatives, trace_label, chunk_tag, chunk_size=chunk_size
+        state,
+        choosers,
+        alternatives,
+        trace_label,
+        chunk_tag,
+        chunk_size=chunk_size,
+        explicit_chunk_size=explicit_chunk_size,
     ):
         choices = _interaction_sample_simulate(
             state,

diff --git a/activitysim/core/interaction_simulate.py b/activitysim/core/interaction_simulate.py
@@ -966,9 +966,9 @@ def interaction_simulate(
         when household tracing enabled. No tracing occurs if label is empty or None.
     trace_choice_name: str
         This is the column label to be used in trace file csv dump of choices
-    explicit_chunk_size : int, optional
+    explicit_chunk_size : float, optional
         If > 0, specifies the chunk size to use when chunking the interaction
-        simulation.
+        simulation. If < 1, specifies the fraction of the total number of choosers.
 
     Returns
     -------

diff --git a/activitysim/core/simulate.py b/activitysim/core/simulate.py
@@ -2027,6 +2027,7 @@ def simple_simulate_logsums(
     chunk_size=0,
     trace_label=None,
     chunk_tag=None,
+    explicit_chunk_size=0,
     compute_settings: ComputeSettings | None = None,
 ):
     """
@@ -2049,7 +2050,12 @@ def simple_simulate_logsums(
         chunk_trace_label,
         chunk_sizer,
     ) in chunk.adaptive_chunked_choosers(
-        state, choosers, trace_label, chunk_tag, chunk_size=chunk_size
+        state,
+        choosers,
+        trace_label,
+        chunk_tag,
+        chunk_size=chunk_size,
+        explicit_chunk_size=explicit_chunk_size,
     ):
         logsums = _simple_simulate_logsums(
             state,