Skip to content

Commit

Permalink
test dropping before interaction df
Browse files Browse the repository at this point in the history
  • Loading branch information
i-am-sijia committed Mar 25, 2024
1 parent 34bb9de commit a013b6e
Show file tree
Hide file tree
Showing 5 changed files with 175 additions and 49 deletions.
6 changes: 3 additions & 3 deletions activitysim/abm/models/trip_destination.py
Original file line number Diff line number Diff line change
Expand Up @@ -995,7 +995,7 @@ def choose_trip_destination(
destination_sample = trip_destination_sample(
state,
primary_purpose=primary_purpose,
trips=trips,
trips=trips.copy(),
alternatives=alternatives,
model_settings=model_settings,
size_term_matrix=size_term_matrix,
Expand Down Expand Up @@ -1023,7 +1023,7 @@ def choose_trip_destination(
destination_sample = compute_logsums(
state,
primary_purpose=primary_purpose,
trips=trips,
trips=trips.copy(),
destination_sample=destination_sample,
tours_merged=tours_merged,
model_settings=model_settings,
Expand All @@ -1036,7 +1036,7 @@ def choose_trip_destination(
destinations = trip_destination_simulate(
state,
primary_purpose=primary_purpose,
trips=trips,
trips=trips.copy(),
destination_sample=destination_sample,
model_settings=model_settings,
want_logsums=want_logsums,
Expand Down
57 changes: 57 additions & 0 deletions activitysim/core/interaction_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,63 @@ def _interaction_sample(

interaction_utilities = None
interaction_utilities_sh = None

# drop variables before the interaction dataframe is created

# check if tracing is enabled and if we have trace targets
# if not estimation mode, drop unused columns
if (not have_trace_targets):

# keep only variables needed for spec
import re

# define a regular expression to find variables in spec
pattern = r"[a-zA-Z_][a-zA-Z0-9_]*"

unique_variables_in_spec = set(
spec.reset_index()["Expression"]
.apply(lambda x: re.findall(pattern, x))
.sum()
)

# when sharrow mode, need to keep skim variables in the chooser table
# if sharrow_enabled:
if locals_d:
unique_variables_in_spec.add(locals_d.get("orig_col_name", None))
unique_variables_in_spec.add(locals_d.get("dest_col_name", None))
if locals_d.get("timeframe") == "trip":
orig_col_name = locals_d.get("ORIGIN", None)
dest_col_name = locals_d.get("DESTINATION", None)
stop_col_name = None
parking_col_name = None
primary_origin_col_name = None
if orig_col_name is None and "od_skims" in locals_d:
orig_col_name = locals_d["od_skims"].orig_key
if dest_col_name is None and "od_skims" in locals_d:
dest_col_name = locals_d["od_skims"].dest_key
if stop_col_name is None and "dp_skims" in locals_d:
stop_col_name = locals_d["dp_skims"].dest_key
if primary_origin_col_name is None and "dnt_skims" in locals_d:
primary_origin_col_name = locals_d["dnt_skims"].dest_key
unique_variables_in_spec.add(orig_col_name)
unique_variables_in_spec.add(dest_col_name)
unique_variables_in_spec.add(parking_col_name)
unique_variables_in_spec.add(primary_origin_col_name)
unique_variables_in_spec.add(stop_col_name)
unique_variables_in_spec.add("trip_period")
unique_variables_in_spec.add("purpose_index_num")

unique_variables_in_spec.add("proto_person_id")
unique_variables_in_spec.add("person_id")
unique_variables_in_spec.add("tour_id")
unique_variables_in_spec.add("tour_mode")
logger.info("Dropping unused variables in chooser table")

if "school_escorting" not in trace_label:
for c in choosers.columns:
if c not in unique_variables_in_spec:
choosers.drop(c, axis=1, inplace=True)

if sharrow_enabled:
(
interaction_utilities,
Expand Down
59 changes: 59 additions & 0 deletions activitysim/core/interaction_sample_simulate.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,65 @@ def _interaction_sample_simulate(
logger.info(
f"{trace_label} start merging choosers and alternatives to create interaction_df"
)

# drop variables before the interaction dataframe is created

# check if tracing is enabled and if we have trace targets
# if not estimation mode, drop unused columns
if (not have_trace_targets):

# keep only variables needed for spec
import re

# define a regular expression to find variables in spec
pattern = r"[a-zA-Z_][a-zA-Z0-9_]*"

unique_variables_in_spec = set(
spec.reset_index()["Expression"]
.apply(lambda x: re.findall(pattern, x))
.sum()
)

# when sharrow mode, need to keep skim variables in the chooser table
# if sharrow_enabled:
if locals_d:
unique_variables_in_spec.add(locals_d.get("orig_col_name", None))
unique_variables_in_spec.add(locals_d.get("dest_col_name", None))
if locals_d.get("timeframe") == "trip":
orig_col_name = locals_d.get("ORIGIN", None)
dest_col_name = locals_d.get("DESTINATION", None)
stop_col_name = None
parking_col_name = None
primary_origin_col_name = None
if orig_col_name is None and "od_skims" in locals_d:
orig_col_name = locals_d["od_skims"].orig_key
if dest_col_name is None and "od_skims" in locals_d:
dest_col_name = locals_d["od_skims"].dest_key
if stop_col_name is None and "dp_skims" in locals_d:
stop_col_name = locals_d["dp_skims"].dest_key
if primary_origin_col_name is None and "dnt_skims" in locals_d:
primary_origin_col_name = locals_d["dnt_skims"].dest_key
unique_variables_in_spec.add(orig_col_name)
unique_variables_in_spec.add(dest_col_name)
unique_variables_in_spec.add(parking_col_name)
unique_variables_in_spec.add(primary_origin_col_name)
unique_variables_in_spec.add(stop_col_name)
unique_variables_in_spec.add("trip_period")
unique_variables_in_spec.add("purpose_index_num")

unique_variables_in_spec.add("proto_person_id")
unique_variables_in_spec.add("person_id")
unique_variables_in_spec.add("tour_id")
unique_variables_in_spec.add("tour_mode")
unique_variables_in_spec.add("household_id")
unique_variables_in_spec.add("parent_tour_id")
logger.info("Dropping unused variables in chooser table")

if "school_escorting" not in trace_label:
for c in choosers.columns:
if c not in unique_variables_in_spec:
choosers.drop(c, axis=1, inplace=True)

interaction_df = alternatives.join(choosers, how="left", rsuffix="_chooser")
logger.info(
f"{trace_label} end merging choosers and alternatives to create interaction_df"
Expand Down
100 changes: 54 additions & 46 deletions activitysim/core/interaction_simulate.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,52 +91,6 @@ def eval_interaction_utilities(

logger.info(f"{trace_label} sharrow_enabled is {sharrow_enabled}")

# check if tracing is enabled and if we have trace targets
# if not estimation mode, drop unused columns
if ((trace_rows is None) or (not trace_rows.any())) and (estimator is None):

# keep only variables needed for spec
import re

# define a regular expression to find variables in spec
pattern = r"[a-zA-Z_][a-zA-Z0-9_]*"

unique_variables_in_spec = set(
spec.reset_index()["Expression"]
.apply(lambda x: re.findall(pattern, x))
.sum()
)

# when sharrow mode, need to keep skim variables in the chooser table
if sharrow_enabled:
if locals_d:
unique_variables_in_spec.add(locals_d.get("orig_col_name", None))
unique_variables_in_spec.add(locals_d.get("dest_col_name", None))
if locals_d.get("timeframe") == "trip":
orig_col_name = locals_d.get("ORIGIN", None)
dest_col_name = locals_d.get("DESTINATION", None)
stop_col_name = None
parking_col_name = None
primary_origin_col_name = None
if orig_col_name is None and "od_skims" in locals_d:
orig_col_name = locals_d["od_skims"].orig_key
if dest_col_name is None and "od_skims" in locals_d:
dest_col_name = locals_d["od_skims"].dest_key
if stop_col_name is None and "dp_skims" in locals_d:
stop_col_name = locals_d["dp_skims"].dest_key
if primary_origin_col_name is None and "dnt_skims" in locals_d:
primary_origin_col_name = locals_d["dnt_skims"].dest_key
unique_variables_in_spec.add(orig_col_name)
unique_variables_in_spec.add(dest_col_name)
unique_variables_in_spec.add(parking_col_name)
unique_variables_in_spec.add(primary_origin_col_name)
unique_variables_in_spec.add(stop_col_name)
unique_variables_in_spec.add("trip_period")
unique_variables_in_spec.add("purpose_index_num")

# keep only variables needed for spec
df = df[[c for c in df.columns if c in unique_variables_in_spec]]

trace_eval_results = None

with chunk.chunk_log(state, trace_label) as chunk_sizer:
Expand Down Expand Up @@ -742,6 +696,60 @@ def _interaction_simulate(
sharrow_enabled = state.settings.sharrow
interaction_utilities = None

# drop variables before the interaction dataframe is created

# check if tracing is enabled and if we have trace targets
# if not estimation mode, drop unused columns
if (not have_trace_targets) and (estimator is None):

# drop_variable = True

# keep only variables needed for spec
import re

# define a regular expression to find variables in spec
pattern = r"[a-zA-Z_][a-zA-Z0-9_]*"

unique_variables_in_spec = set(
spec.reset_index()["Expression"]
.apply(lambda x: re.findall(pattern, x))
.sum()
)

# when sharrow mode, need to keep skim variables in the chooser table
if sharrow_enabled:
if locals_d:
unique_variables_in_spec.add(locals_d.get("orig_col_name", None))
unique_variables_in_spec.add(locals_d.get("dest_col_name", None))
if locals_d.get("timeframe") == "trip":
orig_col_name = locals_d.get("ORIGIN", None)
dest_col_name = locals_d.get("DESTINATION", None)
stop_col_name = None
parking_col_name = None
primary_origin_col_name = None
if orig_col_name is None and "od_skims" in locals_d:
orig_col_name = locals_d["od_skims"].orig_key
if dest_col_name is None and "od_skims" in locals_d:
dest_col_name = locals_d["od_skims"].dest_key
if stop_col_name is None and "dp_skims" in locals_d:
stop_col_name = locals_d["dp_skims"].dest_key
if primary_origin_col_name is None and "dnt_skims" in locals_d:
primary_origin_col_name = locals_d["dnt_skims"].dest_key
unique_variables_in_spec.add(orig_col_name)
unique_variables_in_spec.add(dest_col_name)
unique_variables_in_spec.add(parking_col_name)
unique_variables_in_spec.add(primary_origin_col_name)
unique_variables_in_spec.add(stop_col_name)
unique_variables_in_spec.add("trip_period")
unique_variables_in_spec.add("purpose_index_num")

logger.info("Dropping unused variables in chooser table")

if "school_escorting" not in trace_label:
for c in choosers.columns:
if c not in unique_variables_in_spec:
choosers.drop(c, axis=1, inplace=True)

if locals_d is not None and locals_d.get("_sharrow_skip", False):
sharrow_enabled = False

Expand Down
2 changes: 2 additions & 0 deletions activitysim/core/simulate.py
Original file line number Diff line number Diff line change
Expand Up @@ -1550,6 +1550,8 @@ def _simple_simulate(

custom_chooser_lines = inspect.getsource(custom_chooser)
unique_variables_in_spec.update(re.findall(pattern, custom_chooser_lines))

logger.info("Dropping unused variables in chooser table")

# keep only variables needed for spec
choosers = choosers[
Expand Down

0 comments on commit a013b6e

Please sign in to comment.