dhensle
diff --git a/‎.gitignore
+6 b/‎.gitignore
+6
diff --git a/‎.travis.yml
+15-10 b/‎.travis.yml
+15-10
diff --git a/‎MANIFEST.in
+1-12 b/‎MANIFEST.in
+1-12
diff --git a/‎activitysim/__init__.py
+1-1 b/‎activitysim/__init__.py
+1-1
diff --git a/‎activitysim/abm/models/accessibility.py
+118-128 b/‎activitysim/abm/models/accessibility.py
+118-128
@@ -66,3 +66,9 @@ docs/_build/
 
 # macOS
 *.DS_Store
+_test_est
+
+# activitysim conventions
+*_local/
+*_local.*
+
@@ -8,13 +8,17 @@ env:
   jobs:
    # Add new TEST_SUITE jobs as needed via Travis build matrix expansion
   - TEST_SUITE=activitysim/abm/models
-  - TEST_SUITE=activitysim/abm/test/test_misc.py
-  - TEST_SUITE=activitysim/abm/test/test_mp_pipeline.py
-  - TEST_SUITE=activitysim/abm/test/test_multi_zone.py
-  - TEST_SUITE=activitysim/abm/test/test_multi_zone_mp.py
-  - TEST_SUITE=activitysim/abm/test/test_pipeline.py
+  - TEST_SUITE=activitysim/abm/test
   - TEST_SUITE=activitysim/cli
   - TEST_SUITE=activitysim/core
+  - TEST_SUITE=activitysim/estimation/test/test_larch_estimation.py TEST_DEPENDS="larch>=5.5.3 -c conda-forge"
+  - TEST_SUITE=activitysim/examples/example_mtc/test
+  - TEST_SUITE=activitysim/examples/example_multiple_zone/test
+  - TEST_SUITE=activitysim/examples/example_marin/test
+  - TEST_SUITE=activitysim/examples/example_arc/test
+  - TEST_SUITE=activitysim/examples/example_semcog/test
+  - TEST_SUITE=activitysim/examples/example_psrc/test
+  - TEST_SUITE=activitysim/examples/example_sandag/test
 
 python:
   - '3.7'
@@ -30,16 +34,17 @@ install:
 - conda info -a
 - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION
 - conda activate test-environment
-- conda install pytest pytest-cov coveralls pycodestyle
+- conda install pytest pytest-cov coveralls pycodestyle $TEST_DEPENDS
+- pip install pytest-regressions
 - pip install .
 - pip freeze
 
 script:
 # build 2 and 3 zone test data twice since the Python test code on Linux sees these as different locations
-- python activitysim/examples/example_multiple_zone/two_zone_example_data.py
-- python activitysim/examples/example_multiple_zone/three_zone_example_data.py
-- python /home/travis/miniconda/envs/test-environment/lib/python$TRAVIS_PYTHON_VERSION/site-packages/activitysim/examples/example_multiple_zone/two_zone_example_data.py
-- python /home/travis/miniconda/envs/test-environment/lib/python$TRAVIS_PYTHON_VERSION/site-packages/activitysim/examples/example_multiple_zone/three_zone_example_data.py
+- python activitysim/examples/example_multiple_zone/scripts/two_zone_example_data.py
+- python activitysim/examples/example_multiple_zone/scripts/three_zone_example_data.py
+- python /home/travis/miniconda/envs/test-environment/lib/python$TRAVIS_PYTHON_VERSION/site-packages/activitysim/examples/example_multiple_zone/scripts/two_zone_example_data.py
+- python /home/travis/miniconda/envs/test-environment/lib/python$TRAVIS_PYTHON_VERSION/site-packages/activitysim/examples/example_multiple_zone/scripts/three_zone_example_data.py
 # pycodestyle
 - pycodestyle activitysim
 # run specific TEST_SUITE job on travis to avoid job max time
 
@@ -1,13 +1,2 @@
+graft activitysim
 
-include ez_setup.py
-include README.rst
-graft activitysim/examples
-
-# required for test system
-
-include activitysim\abm\test\data\mtc_asim.h5
-include activitysim\abm\test\data\skims.omx
-include activitysim\abm\test\data\households.csv
-include activitysim\abm\test\data\persons.csv
-include activitysim\abm\test\data\land_use.csv
-include activitysim\abm\test\data\override_hh_ids.csv
@@ -1,5 +1,5 @@
 # ActivitySim
 # See full license in LICENSE.txt.
 
-__version__ = '0.9.7'
+__version__ = '0.9.9'
 __doc__ = 'Activity-Based Travel Modeling'
@@ -10,6 +10,7 @@
 from activitysim.core import config
 from activitysim.core import inject
 from activitysim.core import pipeline
+from activitysim.core import chunk
 from activitysim.core import mem
 
 from activitysim.core import los
@@ -18,131 +19,23 @@
 logger = logging.getLogger(__name__)
 
 
-# class AccessibilitySkims(object):
-#     """
-#     Wrapper for skim arrays to facilitate use of skims by accessibility model
-#
-#     Parameters
-#     ----------
-#     skims : 2D array
-#     omx: open omx file object
-#         this is only used to load skims on demand that were not preloaded
-#     length: int
-#         number of zones in skim to return in skim matrix
-#         in case the skims contain additional external zones that should be trimmed out so skim
-#         array is correct shape to match (flattened) O-D tiled columns in the od dataframe
-#     transpose: bool
-#         whether to transpose the matrix before flattening. (i.e. act as a D-O instead of O-D skim)
-#     """
-#
-#     def __init__(self, skim_dict, orig_zones, dest_zones, transpose=False):
-#
-#         logger.info(f"init AccessibilitySkims with {len(dest_zones)} dest zones {len(orig_zones)} orig zones")
-#
-#         assert len(orig_zones) <= len(dest_zones)
-#         assert np.isin(orig_zones, dest_zones).all()
-#         assert len(np.unique(orig_zones)) == len(orig_zones)
-#         assert len(np.unique(dest_zones)) == len(dest_zones)
-#
-#         self.skim_dict = skim_dict
-#         self.transpose = transpose
-#
-#         num_skim_zones = skim_dict.get_skim_info('omx_shape')[0]
-#         if num_skim_zones == len(orig_zones) and skim_dict.offset_mapper.offset_series is None:
-#             # no slicing required because whatever the offset_int, the skim data aligns with zone list
-#             self.map_data = False
-#         else:
-#
-#             logger.debug("AccessibilitySkims - applying offset_mapper")
-#
-#             skim_index = list(range(num_skim_zones))
-#             orig_map = skim_dict.offset_mapper.map(orig_zones)
-#             dest_map = skim_dict.offset_mapper.map(dest_zones)
-#
-#             # (we might be sliced multiprocessing)
-#             # assert np.isin(skim_index, orig_map).all()
-#
-#             out_of_bounds = ~np.isin(skim_index, dest_map)
-#             # if out_of_bounds.any():
-#             #    print(f"{(out_of_bounds).sum()} skim zones not in dest_map")
-#             #    print(f"dest_zones {dest_zones}")
-#             #    print(f"dest_map {dest_map}")
-#             #    print(f"skim_index {skim_index}")
-#             assert not out_of_bounds.any(), \
-#                 f"AccessibilitySkims {(out_of_bounds).sum()} skim zones not in dest_map: {np.ix_(out_of_bounds)[0]}"
-#
-#             self.map_data = True
-#             self.orig_map = orig_map
-#             self.dest_map = dest_map
-#
-#     def __getitem__(self, key):
-#         """
-#         accessor to return flattened skim array with specified key
-#         flattened array will have length length*length and will match tiled OD df used by assign
-#
-#         this allows the skim array to be accessed from expressions as
-#         skim['DISTANCE'] or skim[('SOVTOLL_TIME', 'MD')]
-#         """
-#
-#         data = self.skim_dict.get(key).data
-#
-#         if self.transpose:
-#             data = data.transpose()
-#
-#         if self.map_data:
-#             # slice skim to include only orig rows and dest columns
-#             # 2-d boolean slicing in numpy is a bit tricky
-#             # data = data[orig_map, dest_map]          # <- WRONG!
-#             # data = data[orig_map, :][:, dest_map]    # <- RIGHT
-#             # data = data[np.ix_(orig_map, dest_map)]  # <- ALSO RIGHT
-#
-#             data = data[self.orig_map, :][:, self.dest_map]
-#
-#         return data.flatten()
+def compute_accessibilities_for_zones(
+        accessibility_df,
+        land_use_df,
+        assignment_spec,
+        constants,
+        network_los,
+        trace_od,
+        trace_label):
 
-
-@inject.step()
-def compute_accessibility(accessibility, network_los, land_use, trace_od):
-
-    """
-    Compute accessibility for each zone in land use file using expressions from accessibility_spec
-
-    The actual results depend on the expressions in accessibility_spec, but this is initially
-    intended to permit implementation of the mtc accessibility calculation as implemented by
-    Accessibility.job
-
-    Compute measures of accessibility used by the automobile ownership model.
-    The accessibility measure first multiplies an employment variable by a mode-specific decay
-    function.  The product reflects the difficulty of accessing the activities the farther
-    (in terms of round-trip travel time) the jobs are from the location in question. The products
-    to each destination zone are next summed over each origin zone, and the logarithm of the
-    product mutes large differences.  The decay function on the walk accessibility measure is
-    steeper than automobile or transit.  The minimum accessibility is zero.
-    """
-
-    trace_label = 'compute_accessibility'
-    model_settings = config.read_model_settings('accessibility.yaml')
-    assignment_spec = assign.read_assignment_spec(config.config_file_path('accessibility.csv'))
-
-    accessibility_df = accessibility.to_frame()
-
-    logger.info("Running %s with %d dest zones" % (trace_label, len(accessibility_df)))
-
-    constants = config.get_model_constants(model_settings)
-
-    land_use_columns = model_settings.get('land_use_columns', [])
-    land_use_df = land_use.to_frame()
-    land_use_df = land_use_df[land_use_columns]
-
-    # don't assume they are the same: accessibility may be sliced if we are multiprocessing
     orig_zones = accessibility_df.index.values
     dest_zones = land_use_df.index.values
 
     orig_zone_count = len(orig_zones)
     dest_zone_count = len(dest_zones)
 
-    logger.info("Running %s with %d dest zones %d orig zones" %
-                (trace_label, dest_zone_count, orig_zone_count))
+    logger.info("Running %s with %d orig zones %d dest zones" %
+                (trace_label, orig_zone_count, dest_zone_count))
 
     # create OD dataframe
     od_df = pd.DataFrame(
@@ -160,36 +53,34 @@ def compute_accessibility(accessibility, network_los, land_use, trace_od):
 
     # merge land_use_columns into od_df
     od_df = pd.merge(od_df, land_use_df, left_on='dest', right_index=True).sort_index()
+    chunk.log_df(trace_label, "od_df", od_df)
 
     locals_d = {
         'log': np.log,
         'exp': np.exp,
         'network_los': network_los,
     }
+    locals_d.update(constants)
 
     skim_dict = network_los.get_default_skim_dict()
     locals_d['skim_od'] = skim_dict.wrap('orig', 'dest').set_df(od_df)
     locals_d['skim_do'] = skim_dict.wrap('dest', 'orig').set_df(od_df)
 
     if network_los.zone_system == los.THREE_ZONE:
-        locals_d['tvpb'] = TransitVirtualPathBuilder(network_los)
-
-    if constants is not None:
-        locals_d.update(constants)
+        locals_d['tvpb'] = network_los.tvpb
 
     results, trace_results, trace_assigned_locals \
-        = assign.assign_variables(assignment_spec, od_df, locals_d, trace_rows=trace_od_rows)
+        = assign.assign_variables(assignment_spec, od_df, locals_d,
+                                  trace_rows=trace_od_rows, trace_label=trace_label, chunk_log=True)
 
+    chunk.log_df(trace_label, "results", results)
+
+    # accessibility_df = accessibility_df.copy()
     for column in results.columns:
         data = np.asanyarray(results[column])
         data.shape = (orig_zone_count, dest_zone_count)  # (o,d)
         accessibility_df[column] = np.log(np.sum(data, axis=1) + 1)
 
-    logger.info("{trace_label} added {len(results.columns} columns")
-
-    # - write table to pipeline
-    pipeline.replace_table("accessibility", accessibility_df)
-
     if trace_od:
 
         if not trace_od_rows.any():
@@ -208,3 +99,102 @@ def compute_accessibility(accessibility, network_los, land_use, trace_od):
 
             if trace_assigned_locals:
                 tracing.write_csv(trace_assigned_locals, file_name="accessibility_locals")
+
+    return(accessibility_df)
+
+
+def accessibility_calc_row_size(accessibility_df, land_use_df, assignment_spec, network_los, trace_label):
+    """
+    rows_per_chunk calculator for accessibility
+    """
+
+    sizer = chunk.RowSizeEstimator(trace_label)
+
+    # if there are skims, and zone_system is THREE_ZONE, and there are any
+    # then we want to estimate the per-row overhead tvpb skims
+    # (do this first to facilitate tracing of rowsize estimation below)
+    if network_los.zone_system == los.THREE_ZONE:
+        # DISABLE_TVPB_OVERHEAD
+        logger.debug("disable calc_row_size for THREE_ZONE with tap skims")
+        return 0
+
+    land_use_rows = len(land_use_df.index)
+    land_use_columns = len(land_use_df.columns)
+    od_columns = 2
+
+    # assignment spec has one row per value to assign
+    # count number of unique persistent assign_variables targets simultaneously resident during spec eval
+    # (since dict overwrites recurring targets, only count unique targets)
+    def is_persistent(target):
+        return not (assign.is_throwaway(target) or assign.is_temp_scalar(target))
+    num_spec_values = len([target for target in assignment_spec.target.unique() if is_persistent(target)])
+
+    sizer.add_elements(land_use_rows * od_columns, 'od_df')
+
+    # each od_df joins to all land_use zones
+    sizer.add_elements(land_use_rows * land_use_columns, 'land_use_choosers')
+
+    # and then we assign_variables to joined land_use from assignment_spec
+    sizer.add_elements(land_use_rows * num_spec_values, 'spec_values')
+
+    row_size = sizer.get_hwm()
+    return row_size
+
+
+@inject.step()
+def compute_accessibility(land_use, accessibility, network_los, chunk_size, trace_od):
+
+    """
+    Compute accessibility for each zone in land use file using expressions from accessibility_spec
+
+    The actual results depend on the expressions in accessibility_spec, but this is initially
+    intended to permit implementation of the mtc accessibility calculation as implemented by
+    Accessibility.job
+
+    Compute measures of accessibility used by the automobile ownership model.
+    The accessibility measure first multiplies an employment variable by a mode-specific decay
+    function.  The product reflects the difficulty of accessing the activities the farther
+    (in terms of round-trip travel time) the jobs are from the location in question. The products
+    to each destination zone are next summed over each origin zone, and the logarithm of the
+    product mutes large differences.  The decay function on the walk accessibility measure is
+    steeper than automobile or transit.  The minimum accessibility is zero.
+    """
+
+    trace_label = 'compute_accessibility'
+    model_settings = config.read_model_settings('accessibility.yaml')
+    assignment_spec = assign.read_assignment_spec(config.config_file_path('accessibility.csv'))
+
+    accessibility_df = accessibility.to_frame()
+    if len(accessibility_df.columns) > 0:
+        logger.warning(f"accessibility table is not empty. Columns:{list(accessibility_df.columns)}")
+        raise RuntimeError(f"accessibility table is not empty.")
+
+    constants = config.get_model_constants(model_settings)
+
+    # only include the land_use columns needed by spec, as specified by land_use_columns model_setting
+    land_use_columns = model_settings.get('land_use_columns', [])
+    land_use_df = land_use.to_frame()
+    land_use_df = land_use_df[land_use_columns]
+
+    logger.info(f"Running {trace_label} with {len(accessibility_df.index)} orig zones {len(land_use_df)} dest zones")
+
+    row_size = \
+        chunk_size and accessibility_calc_row_size(accessibility_df, land_use_df,
+                                                   assignment_spec, network_los, trace_label)
+
+    accessibilities_list = []
+
+    for i, chooser_chunk, chunk_trace_label in \
+            chunk.adaptive_chunked_choosers(accessibility_df, chunk_size, row_size, trace_label):
+
+        accessibilities = \
+            compute_accessibilities_for_zones(chooser_chunk, land_use_df, assignment_spec,
+                                              constants, network_los, trace_od, trace_label)
+        accessibilities_list.append(accessibilities)
+
+    accessibility_df = pd.concat(accessibilities_list)
+
+    logger.info(f"{trace_label} computed accessibilities {accessibility_df.shape}")
+
+    # - write table to pipeline
+    pipeline.replace_table("accessibility", accessibility_df)