[ApiServer] WIP fix parse_experiment_flow_json

leondavi · Feb 16, 2024 · 09b53d0 · 09b53d0
1 parent 5ee34a6
commit 09b53d0
Showing 2 changed files with 20 additions and 6 deletions.
diff --git a/src_py/apiServer/experiment_flow.py b/src_py/apiServer/experiment_flow.py
@@ -92,16 +92,19 @@ def parse_experiment_flow_json(self, json_path : str):
             sourcePieces = phase[EXPFLOW_PHASES_PHASE_SOURCE_PIECES_FIELD]
             source_pieces_inst_list = []
             for source_piece in sourcePieces:
+                # build source piece instant 
                 source_name = source_piece[EXPFLOW_PHASE_SOURCE_PIECES_SOURCE_NAME_FIELD]
                 strating_sample = source_piece[EXPFLOW_PHASE_SOURCE_PIECES_STRATING_SAMPLE_FIELD]
                 num_of_batches = source_piece[EXPFLOW_PHASE_SOURCE_PIECES_NUM_OF_BATCHES_FIELD]
                 workers = source_piece[EXPFLOW_PHASE_SOURCE_PIECES_WORKERS_FIELD]
-                source_piece_inst = SourcePieceDS(source_name, strating_sample, num_of_batches, workers)
+                source_piece_inst =  self.csv_dataset.generate_source_pieceDS(source_name, self.batch_size, phase_name, strating_sample, num_of_batches)
+                source_piece_inst.build_workers_target(workers)
+                source_piece_csv_file = self.csv_dataset.generate_source_pieceDs_csv_file(self.csv_dataset.get_csv_path(), source_piece_inst)
+                source_piece_inst.set_pointer_to_sourcePiece_CsvDataSet(source_piece_csv_file)
                 source_pieces_inst_list.append(source_piece_inst)
+
             self.add_phase(phase_name, phase_type, source_pieces_inst_list)
 
-
-
     def set_csv_dataset(self, csv_file_path : str,  num_of_features : int, num_of_labels : int, headers_row : bool):
         self.csv_dataset = CSVDataSet(csv_file_path, self.batch_size, num_of_features, num_of_labels, headers_row)  # Todo get num of features and labels from csv file
 

diff --git a/src_py/apiServer/nerl_csv_dataSet_db.py b/src_py/apiServer/nerl_csv_dataSet_db.py
@@ -10,8 +10,8 @@ def __init__(self, source_name : str, batch_size, phase : str, starting_offset =
         self.phase = phase
         self.starting_offset = starting_offset  # given as index of csv rows
         self.num_of_batches = num_of_batches
-        self.workers_target = None
-        self.pointer_to_CsvDataSet = None # which csvDataSet
+        self.workers_target = [] # [worker_name1, worker_name2, ...]
+        self.pointer_to_sourcePiece_CsvDataSet= None # pointer to the csv file that contains the source piece data
 
     def get_source_name(self):
         return self.source_name
@@ -24,9 +24,19 @@ def get_phase(self):
 
     def get_starting_offset(self):
         return self.starting_offset 
-
 
+    def get_num_of_batches(self):
+        return self.num_of_batches
+
+    def get_workers_target(self):
+        return self.workers_target
+
+    def build_workers_target(self, workers_target: list):
+        self.workers_target = workers_target
 
+    def set_pointer_to_sourcePiece_CsvDataSet(self, pointer_to_sourcePiece_CsvDataSet):
+        self.pointer_to_sourcePiece_CsvDataSet = pointer_to_sourcePiece_CsvDataSet
+
 class CsvDataSet():
     def __init__(self, csv_path, batch_size, num_of_features, num_of_labels, headers_row: bool):
         self.csv_path = csv_path
@@ -62,6 +72,7 @@ def generate_source_pieceDS(self, source_name : str, batch_size: int, phase : st
     def generate_source_pieceDs_csv_file(self, csv_file_path : str, source_pieceDS_inst: SourcePieceDS):
         # Todo Ohad&Noa
         # df_train = pd.df.read_csv(self.csv_dataset_path, skiprows=starting_offset_index_train, nrows=number_of_samples_train)
+        #return path for source piece csv file
         pass