diff --git a/src_py/apiServer/experiment_flow.py b/src_py/apiServer/experiment_flow.py index 48eaabb78..c5104db4e 100644 --- a/src_py/apiServer/experiment_flow.py +++ b/src_py/apiServer/experiment_flow.py @@ -92,16 +92,19 @@ def parse_experiment_flow_json(self, json_path : str): sourcePieces = phase[EXPFLOW_PHASES_PHASE_SOURCE_PIECES_FIELD] source_pieces_inst_list = [] for source_piece in sourcePieces: + # build source piece instant source_name = source_piece[EXPFLOW_PHASE_SOURCE_PIECES_SOURCE_NAME_FIELD] strating_sample = source_piece[EXPFLOW_PHASE_SOURCE_PIECES_STRATING_SAMPLE_FIELD] num_of_batches = source_piece[EXPFLOW_PHASE_SOURCE_PIECES_NUM_OF_BATCHES_FIELD] workers = source_piece[EXPFLOW_PHASE_SOURCE_PIECES_WORKERS_FIELD] - source_piece_inst = SourcePieceDS(source_name, strating_sample, num_of_batches, workers) + source_piece_inst = self.csv_dataset.generate_source_pieceDS(source_name, self.batch_size, phase_name, strating_sample, num_of_batches) + source_piece_inst.build_workers_target(workers) + source_piece_csv_file = self.csv_dataset.generate_source_pieceDs_csv_file(self.csv_dataset.get_csv_path(), source_piece_inst) + source_piece_inst.set_pointer_to_sourcePiece_CsvDataSet(source_piece_csv_file) source_pieces_inst_list.append(source_piece_inst) + self.add_phase(phase_name, phase_type, source_pieces_inst_list) - - def set_csv_dataset(self, csv_file_path : str, num_of_features : int, num_of_labels : int, headers_row : bool): self.csv_dataset = CSVDataSet(csv_file_path, self.batch_size, num_of_features, num_of_labels, headers_row) # Todo get num of features and labels from csv file diff --git a/src_py/apiServer/nerl_csv_dataSet_db.py b/src_py/apiServer/nerl_csv_dataSet_db.py index ac35836e3..65cc64e11 100644 --- a/src_py/apiServer/nerl_csv_dataSet_db.py +++ b/src_py/apiServer/nerl_csv_dataSet_db.py @@ -10,8 +10,8 @@ def __init__(self, source_name : str, batch_size, phase : str, starting_offset = self.phase = phase self.starting_offset = starting_offset # given as index of csv rows self.num_of_batches = num_of_batches - self.workers_target = None - self.pointer_to_CsvDataSet = None # which csvDataSet + self.workers_target = [] # [worker_name1, worker_name2, ...] + self.pointer_to_sourcePiece_CsvDataSet= None # pointer to the csv file that contains the source piece data def get_source_name(self): return self.source_name @@ -24,9 +24,19 @@ def get_phase(self): def get_starting_offset(self): return self.starting_offset - + def get_num_of_batches(self): + return self.num_of_batches + + def get_workers_target(self): + return self.workers_target + + def build_workers_target(self, workers_target: list): + self.workers_target = workers_target + def set_pointer_to_sourcePiece_CsvDataSet(self, pointer_to_sourcePiece_CsvDataSet): + self.pointer_to_sourcePiece_CsvDataSet = pointer_to_sourcePiece_CsvDataSet + class CsvDataSet(): def __init__(self, csv_path, batch_size, num_of_features, num_of_labels, headers_row: bool): self.csv_path = csv_path @@ -62,6 +72,7 @@ def generate_source_pieceDS(self, source_name : str, batch_size: int, phase : st def generate_source_pieceDs_csv_file(self, csv_file_path : str, source_pieceDS_inst: SourcePieceDS): # Todo Ohad&Noa # df_train = pd.df.read_csv(self.csv_dataset_path, skiprows=starting_offset_index_train, nrows=number_of_samples_train) + #return path for source piece csv file pass