From b6921308088ab91576062881f6e5b8f384f7133a Mon Sep 17 00:00:00 2001 From: Shashank Srikanth Date: Fri, 10 Jan 2025 16:56:47 -0800 Subject: [PATCH] Remove debug statements --- metaflow/client/core.py | 26 +++----------------- metaflow/metaflow_config.py | 1 - metaflow/plugins/metadata_providers/local.py | 6 ----- metaflow/task.py | 3 +-- test/core/contexts.json | 4 +-- test/core/run_tests.py | 9 ------- test/core/tests/client_ancestors.py | 1 - test_runner | 2 +- 8 files changed, 8 insertions(+), 44 deletions(-) diff --git a/metaflow/client/core.py b/metaflow/client/core.py index 53e576a04bd..9fe5a319cbd 100644 --- a/metaflow/client/core.py +++ b/metaflow/client/core.py @@ -1177,14 +1177,10 @@ def _get_metadata_query_vals( # For linear steps, or foreach splits and joins, ancestor and successor tasks will all belong to # the same step. query_task = self._get_task_for_queried_step(flow_id, run_id, steps[0]) - print(f"flow_id: {flow_id}, run_id: {run_id}, query_step: {steps[0]}, query_task: {query_task}") - print(f"query_task.metadata_dict: {query_task.metadata_dict}") query_foreach_stack_len = len( query_task.metadata_dict.get("foreach-stack", []) ) - print(f"cur_foreach_stack_len: {cur_foreach_stack_len}, query_foreach_stack_len: {query_foreach_stack_len}") - if query_foreach_stack_len == cur_foreach_stack_len: # The successor or ancestor tasks belong to the same foreach stack level field_name = "foreach-indices" @@ -1237,8 +1233,6 @@ def _get_related_tasks(self, relation_type: str) -> Dict[str, List[str]]: if not steps: return {} - print("-" * 50) - print(f"Getting related tasks for {relation_type} and path {self.path_components} and query steps {steps}") field_name, field_value = self._get_metadata_query_vals( flow_id, run_id, @@ -1246,30 +1240,18 @@ def _get_related_tasks(self, relation_type: str) -> Dict[str, List[str]]: steps, relation_type, ) - print(f"fetched field_name: {field_name} and field_value: {field_value}") cur_time = time.time() - tp = {} - for step in steps: - tp[step] = [ + return { + step: [ f"{flow_id}/{run_id}/{step}/{task_id}" for task_id in self._metaflow.metadata.filter_tasks_by_metadata( flow_id, run_id, step, field_name, field_value ) ] - cur_time = time.time() - - return tp - # return { - # step: [ - # f"{flow_id}/{run_id}/{step}/{task_id}" - # for task_id in self._metaflow.metadata.filter_tasks_by_metadata( - # flow_id, run_id, step, field_name, field_value - # ) - # ] - # for step in steps - # } + for step in steps + } @property def immediate_ancestors(self) -> Dict[str, List[str]]: diff --git a/metaflow/metaflow_config.py b/metaflow/metaflow_config.py index c78aa41dd15..415a934cbe4 100644 --- a/metaflow/metaflow_config.py +++ b/metaflow/metaflow_config.py @@ -248,7 +248,6 @@ # Default container registry DEFAULT_CONTAINER_REGISTRY = from_conf("DEFAULT_CONTAINER_REGISTRY") # Controls whether to include foreach stack information in metadata. -# TODO(Darin, 05/01/24): Remove this flag once we are confident with this feature. INCLUDE_FOREACH_STACK = from_conf("INCLUDE_FOREACH_STACK", True) # Maximum length of the foreach value string to be stored in each ForeachFrame. MAXIMUM_FOREACH_VALUE_CHARS = from_conf("MAXIMUM_FOREACH_VALUE_CHARS", 30) diff --git a/metaflow/plugins/metadata_providers/local.py b/metaflow/plugins/metadata_providers/local.py index 6553595047a..9cf4d5a5099 100644 --- a/metaflow/plugins/metadata_providers/local.py +++ b/metaflow/plugins/metadata_providers/local.py @@ -291,18 +291,12 @@ def _read_metadata_value(file_path: str) -> dict: meta_path = LocalMetadataProvider._get_metadir( flow_id, run_id, query_step, task_id ) - print(f"Meta path: {meta_path}") - print(f"Field name prefix: {field_name_prefix}, field value: {field_value}") - print(f"Files in meta path: {os.listdir(meta_path)}") - latest_file = _get_latest_metadata_file(meta_path, field_name_prefix) - print(f"Latest file: {latest_file}") if not latest_file: continue # Read metadata and check value metadata = _read_metadata_value(latest_file[0]) - print(f"Metadata: {metadata}") if metadata.get("value") == field_value: filtered_task_ids.append(task_id) diff --git a/metaflow/task.py b/metaflow/task.py index 8e3bba2ecdd..81d96eb7809 100644 --- a/metaflow/task.py +++ b/metaflow/task.py @@ -501,7 +501,7 @@ def run_step( current_foreach_path_length += len(foreach_step) foreach_stack_formatted.append(foreach_step) - print(f"Foreach stack formatted: {foreach_stack_formatted}") + if foreach_stack_formatted: metadata.append( MetaDatum( @@ -516,7 +516,6 @@ def run_step( foreach_indices, foreach_indices_truncated, foreach_step_names = ( self._dynamic_runtime_metadata(foreach_stack) ) - print(f"Task id: {task_id}, foreach_stack: {foreach_stack}, foreach_indices: {foreach_indices}") metadata.extend( [ MetaDatum( diff --git a/test/core/contexts.json b/test/core/contexts.json index 3811f2490bf..9d1e264ca47 100644 --- a/test/core/contexts.json +++ b/test/core/contexts.json @@ -24,14 +24,14 @@ "--tag=\u523a\u8eab means sashimi", "--tag=multiple tags should be ok" ], - "checks": [ "python3-metadata"], + "checks": ["python3-cli", "python3-metadata"], "disabled_tests": [ "LargeArtifactTest", "S3FailureTest", "CardComponentRefreshTest", "CardWithRefreshTest" ], - "executors": ["api"] + "executors": ["cli", "api"] }, { "name": "python3-all-local-cards-realtime", diff --git a/test/core/run_tests.py b/test/core/run_tests.py index 0697ba97e02..35487f17616 100644 --- a/test/core/run_tests.py +++ b/test/core/run_tests.py @@ -236,15 +236,6 @@ def construct_arg_dicts_from_click_api(): ) return called_processes[-1].returncode, path - # Print the test_flow.py file - subprocess.run( - ["cat", "test_flow.py"], - env=env, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - check=False, - ) - # run flow if executor == "cli": called_processes.append( diff --git a/test/core/tests/client_ancestors.py b/test/core/tests/client_ancestors.py index 5548392e774..b46f81952fc 100644 --- a/test/core/tests/client_ancestors.py +++ b/test/core/tests/client_ancestors.py @@ -63,7 +63,6 @@ def check_results(self, flow, checker): # For each task in the step for task in step: ancestors = task.immediate_ancestors - print(f"Task is {task.data.task_pathspec} and ancestors are {ancestors}") ancestor_pathspecs = set(chain.from_iterable(ancestors.values())) # Compare with stored parent_task_pathspecs diff --git a/test_runner b/test_runner index b0a6bed8f38..69ca8a8d921 100755 --- a/test_runner +++ b/test_runner @@ -16,7 +16,7 @@ install_extensions() { } run_tests() { - cd test/core && PYTHONPATH=`pwd`/../../ python3 run_tests.py --num-parallel 8 --tests ImmediateAncestorTest && cd ../../ + cd test/core && PYTHONPATH=`pwd`/../../ python3 run_tests.py --num-parallel 8 && cd ../../ } # We run realtime cards tests separately because there these tests validate the asynchronous updates to the