From 60db221117784ec2ca8578721879c33f12f5a642 Mon Sep 17 00:00:00 2001 From: andreygetmanov Date: Fri, 19 Jan 2024 17:39:51 +0300 Subject: [PATCH 1/5] Add unit tests for ALL InputData and Data classes methods #1200 - added tests for from_numpy, from_dataframe, from_csv methods for regression and classification (bi and multi) tasks - simple_classification and simple_regression datasets are moved to separate folders --- docs/source/advanced/cli_call.rst | 2 +- .../cli_classification_call.bat | 2 +- .../multiclass_classification.csv | 0 .../multiclass_classification.npy | Bin 0 -> 1288 bytes .../simple_classification.csv | 0 .../classification/simple_classification.npy | Bin 0 -> 2720 bytes test/data/regression/simple_regression.csv | 62 +++++++++++ test/data/regression/simple_regression.npy | Bin 0 -> 4032 bytes test/integration/api/test_api_cli_params.py | 2 +- test/integration/api/test_api_utils.py | 4 +- test/integration/composer/test_history.py | 2 +- .../pipelines/tuning/test_pipeline_tuning.py | 2 +- .../real_applications/test_real_cases.py | 2 +- test/unit/data/test_data.py | 6 +- test/unit/data/test_multimodal_data.py | 2 +- .../optimizer/gp_operators/test_mutation.py | 2 +- test/unit/pipelines/test_pipeline.py | 2 +- test/unit/tasks/test_classification.py | 101 ++++++++++++++++-- test/unit/tasks/test_regression.py | 65 ++++++++++- test/unit/validation/test_table_cv.py | 2 +- 20 files changed, 236 insertions(+), 22 deletions(-) rename test/data/{ => classification}/multiclass_classification.csv (100%) create mode 100644 test/data/classification/multiclass_classification.npy rename test/data/{ => classification}/simple_classification.csv (100%) create mode 100644 test/data/classification/simple_classification.npy create mode 100644 test/data/regression/simple_regression.csv create mode 100644 test/data/regression/simple_regression.npy diff --git a/docs/source/advanced/cli_call.rst b/docs/source/advanced/cli_call.rst index 281f91e8f8..8293610967 100644 --- a/docs/source/advanced/cli_call.rst +++ b/docs/source/advanced/cli_call.rst @@ -69,4 +69,4 @@ problems decision are presented. The string below helps to run classification problem decision from the console: -``python --problem classification --train ../../test/data/simple_classification.csv --test ../../test/data/simple_classification.csv --target Y --timeout 0.1`` +``python --problem classification --train ../../test/data/classification/simple_classification.csv --test ../../test/data/classification/simple_classification.csv --target Y --timeout 0.1`` diff --git a/examples/simple/cli_application/cli_classification_call.bat b/examples/simple/cli_application/cli_classification_call.bat index 1b69b74391..5fe7cd973d 100644 --- a/examples/simple/cli_application/cli_classification_call.bat +++ b/examples/simple/cli_application/cli_classification_call.bat @@ -1,3 +1,3 @@ set python_path = "DEFAULT" cd ../../fedot/api -%python_path% fedot_cli.py --problem classification --train ../../test/data/simple_classification.csv --test ../../test/data/simple_classification.csv --target Y --timeout 0.1 +%python_path% fedot_cli.py --problem classification --train ../../test/data/classification/simple_classification.csv --test ../../test/data/classification/simple_classification.csv --target Y --timeout 0.1 diff --git a/test/data/multiclass_classification.csv b/test/data/classification/multiclass_classification.csv similarity index 100% rename from test/data/multiclass_classification.csv rename to test/data/classification/multiclass_classification.csv diff --git a/test/data/classification/multiclass_classification.npy b/test/data/classification/multiclass_classification.npy new file mode 100644 index 0000000000000000000000000000000000000000..343530cf17d9d6b2dd6f1a90d4431f15b0025caf GIT binary patch literal 1288 zcmbVK%}N4c6rIS-N=r@0971L{1+I#qBAD+5iWaqK6_O*ZGRA5nJw>Z}lO92+RV&}Z zg@{2!>OJQUqqyl?%$G{=y^<=fZdeFKFy<^uqX&ic0*K0Jxhp={c z-3;r&7{3S~gAv@it=$JBcW3XY>K^P?-DmgTXSFdodGq-(J~pgl_9ti4yuFT34t^Pa z8N7nACa0jj?EU3uJ7oZr=Yf=Bz6eg0^Di6?NceavH-xLybQCEing pc?VxQSG*qUMtSkLc%!e0>q`Kj`sin literal 0 HcmV?d00001 diff --git a/test/data/simple_classification.csv b/test/data/classification/simple_classification.csv similarity index 100% rename from test/data/simple_classification.csv rename to test/data/classification/simple_classification.csv diff --git a/test/data/classification/simple_classification.npy b/test/data/classification/simple_classification.npy new file mode 100644 index 0000000000000000000000000000000000000000..c8fb4d301747724f051c81d9e0ceccd1f332f4dd GIT binary patch literal 2720 zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(4=E~51qv5u zBo?Fsxf(|1ItqqHnmP)#3giMV2CU$NJwzBr!xX{z=rl|oorbA{iKFw;#Rj}=rl|oorbA{iR0qK#9=f{J&Z;d$Hj+#?Un?o-?%)Kz0SbNdc6U&FG zgV8YiV0;*jE{@KJse{o7c^4(+Fw^Z%?BibV-S<8CvHia|ZQ14nx9q26&F?$T^TuBJ z_RqSkJxA;hf7OWTKXlhVWuuOaA;%SagD-Ad7rGv^-|_N%-I9*yaB)qbs;?Sn>;=xX zHRL>hZU4dHM^{IPPrf+-?kz6P literal 0 HcmV?d00001 diff --git a/test/data/regression/simple_regression.csv b/test/data/regression/simple_regression.csv new file mode 100644 index 0000000000..224c38a884 --- /dev/null +++ b/test/data/regression/simple_regression.csv @@ -0,0 +1,62 @@ +Unnamed: 0,Asphalt,N-par,Iso-par,AROM UV,P,V,S,target +41,0.21,71.09,18.5,2.13,839.3,13.44,0.09,0.223 +8,1.2,63.47,20.44,7.53,891.8,42.49,0.61,0.168 +43,2.37,61.27,20.41,9.0,906.8,62.9,1.01,0.11 +21,4.16,59.83,19.72,9.79,916.5,81.55,1.55,0.07 +27,1.26,68.27,18.77,4.04,858.6,19.59,0.47,0.99 +51,0.98,68.44,18.9,3.96,857.5,19.19,0.39,1.75 +20,4.24,59.62,19.75,9.93,917.9,85.16,1.58,0.05 +42,2.3,59.63,21.0,10.17,918.2,87.75,1.04,0.03 +24,3.29,62.36,19.44,8.07,899.1,50.44,1.23,0.34 +53,1.59,65.0,19.66,4.11,881.2,32.51,0.67,1.21 +59,2.93,60.93,20.16,4.03,909.1,66.68,1.18,0.242 +10,1.05,64.52,20.18,6.78,884.5,35.43,0.53,0.196 +45,2.51,64.44,19.24,6.66,884.9,35.24,0.94,1.03 +22,4.09,60.05,19.7,9.64,915.0,78.14,1.53,0.102 +60,3.45,60.61,19.93,2.43,911.2,70.47,1.33,0.252 +14,0.22,70.36,18.74,2.66,844.3,14.71,0.11,0.239 +29,0.31,71.04,18.45,2.16,839.6,13.53,0.12,0.35 +55,3.28,60.71,20.0,2.96,910.5,69.17,1.28,0.223 +32,2.07,61.45,20.55,8.88,905.5,60.93,0.92,0.03 +6,1.31,62.75,20.61,8.03,896.7,48.26,0.66,0.072 +57,1.86,61.6,20.65,7.35,904.7,59.6,0.85,0.087 +37,1.76,64.89,19.59,6.43,881.9,33.0,0.72,0.618 +54,1.63,66.53,19.11,2.03,870.5,25.36,0.63,2.06 +33,2.97,60.06,20.43,9.77,915.0,79.26,1.21,0.123 +38,1.3,66.73,19.26,5.15,869.2,24.7,0.53,0.874 +12,0.69,67.1,19.54,4.97,866.8,23.53,0.35,0.465 +58,2.37,61.27,20.41,5.76,906.8,62.86,1.01,0.178 +28,0.62,70.13,18.55,2.78,845.8,15.21,0.23,0.68 +31,2.21,60.53,20.77,9.53,911.9,72.93,0.99,0.02 +46,2.59,64.39,19.21,6.68,885.2,35.5,0.97,1.726 +25,2.59,64.39,19.21,6.68,885.2,35.5,0.97,1.726 +7,1.28,62.93,20.57,7.91,895.5,46.73,0.65,0.134 +48,0.72,62.53,24.37,5.1,877.2,28.25,0.37,0.28 +9,1.18,63.64,20.39,7.4,890.6,41.19,0.6,0.169 +39,0.85,68.51,18.95,3.92,859.0,19.0,0.35,0.59 +52,1.51,63.44,20.24,7.51,891.9,42.51,0.7,0.341 +49,0.93,65.39,19.96,6.17,878.5,30.7,0.47,0.452 +11,0.93,65.39,19.96,6.17,878.5,30.7,0.47,0.452 +18,4.39,59.19,19.8,10.23,920.9,93.0,1.64,0.03 +35,2.47,62.04,20.09,8.4,901.5,54.18,1.01,0.322 +47,0.85,58.08,28.54,5.7,890.6,37.14,0.43,0.19 +4,1.36,62.39,20.7,8.29,899.2,51.54,0.69,0.019 +15,0.11,71.15,18.54,2.1,838.9,13.36,0.06,0.108 +36,2.23,63.0,19.92,7.73,894.8,45.5,0.91,0.65 +1,1.97,58.1,21.76,11.32,928.7,122.8,0.99,0.02 +56,3.17,59.1,20.63,5.42,921.8,97.04,1.3,0.02 +17,4.76,58.09,19.92,10.97,928.4,117.2,1.78,0.02 +23,4.02,60.26,19.68,9.5,913.5,74.9,1.5,0.119 +0,2.24,56.15,22.24,12.7,942.2,193.1,1.14,0.02 +3,1.43,61.85,20.84,8.67,902.9,57.04,0.73,0.01 +34,2.72,61.06,20.26,9.08,908.2,65.18,1.11,0.123 +50,0.93,66.95,19.43,5.04,867.8,24.0,0.42,0.66 +30,2.35,59.6,21.0,10.19,918.3,88.19,1.05,0.02 +44,2.44,62.88,19.82,7.8,895.7,46.43,0.98,0.62 +13,0.45,68.75,19.14,3.8,855.4,18.47,0.23,0.39 +26,1.92,66.36,18.98,5.34,871.7,25.95,0.72,2.657 +40,0.42,70.24,18.65,2.72,845.1,15.0,0.17,0.51 +19,4.31,59.4,19.77,10.08,919.4,89.0,1.61,0.04 +2,1.7,60.0,21.29,9.98,915.6,81.93,0.86,0.015 +16,5.53,55.86,20.18,12.5,943.7,194.9,2.07,0.02 +5,1.33,62.57,20.66,8.16,897.9,49.87,0.67,0.047 diff --git a/test/data/regression/simple_regression.npy b/test/data/regression/simple_regression.npy new file mode 100644 index 0000000000000000000000000000000000000000..41447758b4b6837d5b0bcfa035ee7bd4ce7a4ddd GIT binary patch literal 4032 zcmbVPYfx2X6up38p(EEseB*_S0*a=bgP?-nK{HXfFGaKlO;jAJ2@%jtvvkT);gaKM zqm?mZnqoDS+N36xS&%x2XqcAo5FJAV1r!vkbI)4mFw@V@pFQ7Wuf6vAUi)me&$KyT z@G>nktqfn7nYSP}e4Hgbeo>d=Pu0j^rz-7%~ZSdvNCcq)n>$) z(NUJT$SBJy%m4oTudfI!opSx8khHH^Jcvl&c&yc1*_D&ve}1P~iS#rhT7+iO8#!)P z7HOZiGW}5S+IpA2vcxkzr(M8yRZdlUz9!*8UlG_vfxlV#@B`1+jkuLUby7Wl6`Fg3 zzv79OzR{$BM~nE$(>(S>fJX=M9rmzZ{wH?u`OAj* zJvlzmg>^V>!@5@66ivsG4#nj86sMxe_PQN@Ax_{9bYfrl*)iX4$GojB=HpT{-Cwk0 z9~`y=Z=QpB*|9I;oY+@8oZ#Qpu4ws`62HP}@Yi%$@4|kaME|}{;HVZn9UON6_lyI1 zSzO!?4$PlTKhM4cw;>O=4eQ#Cx`x=XF1=DPe#efy z*4fc6b25MGokN{Rxu~C$=fVY?12)7pQRlnK;Gaal)zLhUGWxS*<|!ec@@TBn&1m2p zm53KZKM8&%v=K7bUsiBVCGwEae^)~PchTt2Bd$rN9y0dFb{TPZ%gjsSz9CMwjCGwT zLGJ>_SNUt_z#UEgGVX^M$*|fzv`NN3nHz`oULb={b2NF=ua$nP#rcxJr;p6I@yt(e zNAsLY+{ZE4_d8^xANLG(NEidW^LjqiFS}D`jqe!=JgpMW#UrlEZaq)xFfGQ=U%LlO z$jjdg`347b{X%iCY_VefBFykB4S>#XpI{@Q5`}6*=a-I*{zO-J% z?@C=luulUld>;mJ-}R&ZChBYDI}pfwuNTi(AI|e5j+OrX4U2baF!2M4GYI*m2M{Ni zy!skedry{zalI|HLDWlco4B6A%zr5Tdn5m*A=KH*_>t5zgnEb4GOvs<>?29%+k-rN zQ!ii65951hC4MB|Rq8md0P(IA!lt<~ZHdY7 zzo!uX+c@tHH{#4M!2EdP=hH8R^L%M1yMeQjIP!n+BFQ_blfPCzWRvgP#MxU2-l{cu zz7DouChr>dU!dPrw(~o!_8$L2o>96!#OX(#vDEVc`+bX$ZxzRViV)vTUQ3FQUo6`h z)F+tv>}UTI>UpF9c_iq1lkXAo+0OZ^h#TPMIT(g>IAJp4G#4+ z@NAli^AR@{IEf1O@#lR14kiM(I1&48{S55uVi)GUI)dkIIPRfoe0PlZJ&EUq@6Kw4 z`*At@ z{&K!|&0{%lBzdL5e`PA-q-R0j#w6ZP2L^ zv(BzF>Q>at*XVy&puTORe`j5ay77CCw-Bd+^h2q?U-kckYXbHEZw7zV z{in#Uk#%*a#XEb0xKT`g86k~h~Jv`ClywT0nc6z^IKDH z{1>3!-)HayuMUC#qP?u^i-~{E(0yKAb?C+RDlx6Q_waQ=Czw~0xIJ|AQ(p}HR;1Q` z`q9spYO!$qRY%d*DnnPD2XrbH+Hm%M@yt6%PwtrfjTo{2Y@H$b6Lzc&mQ~r@e1;)Iy^eZIwLYc!YX x`&;jSSLk`+zo?VH{-22Rb3v3;B*$(@^yT||jkvqGF7Fv}KD&3;Hs4$#{sr~Wx5fYf literal 0 HcmV?d00001 diff --git a/test/integration/api/test_api_cli_params.py b/test/integration/api/test_api_cli_params.py index 631456afcc..42694169f3 100644 --- a/test/integration/api/test_api_cli_params.py +++ b/test/integration/api/test_api_cli_params.py @@ -26,7 +26,7 @@ def test_cli_with_parameters(): f'--cv_folds 2 --target sea_height --train {ts_train_path} ' f'--test {ts_train_path} --for_len 10' ).split() - class_train_path = project_root_path.joinpath('test/data/simple_classification.csv') + class_train_path = project_root_path.joinpath('test/data/classification/simple_classification.csv') class_call = ( f'--problem classification --train {class_train_path} --test {class_train_path} --target Y ' '--preset fast_train --timeout 0.1 --depth 3 --arity 3 ' diff --git a/test/integration/api/test_api_utils.py b/test/integration/api/test_api_utils.py index ac23a25339..a47f260838 100644 --- a/test/integration/api/test_api_utils.py +++ b/test/integration/api/test_api_utils.py @@ -14,7 +14,7 @@ from fedot.preprocessing.preprocessing import DataPreprocessor from test.data.datasets import get_cholesterol_dataset from test.integration.api.test_main_api import get_dataset -from test.unit.tasks.test_classification import get_binary_classification_data +from test.unit.tasks.test_classification import get_binary_classification_data_from_csv def test_compose_fedot_model_without_tuning(): @@ -32,7 +32,7 @@ def test_output_binary_classification_correct(): task_type = 'classification' - data = get_binary_classification_data() + data = get_binary_classification_data_from_csv() train_data, test_data = train_test_data_setup(data, shuffle=True) diff --git a/test/integration/composer/test_history.py b/test/integration/composer/test_history.py index c266ba7539..71a7e61ff0 100644 --- a/test/integration/composer/test_history.py +++ b/test/integration/composer/test_history.py @@ -68,7 +68,7 @@ def _test_individuals_in_history(history: OptHistory): @pytest.mark.parametrize('n_jobs', [1, 2]) def test_newly_generated_history(n_jobs: int): - file_path_train = fedot_project_root().joinpath('test/data/simple_classification.csv') + file_path_train = fedot_project_root().joinpath('test/data/classification/simple_classification.csv') num_of_gens = 2 auto_model = Fedot(problem='classification', seed=42, diff --git a/test/integration/pipelines/tuning/test_pipeline_tuning.py b/test/integration/pipelines/tuning/test_pipeline_tuning.py index 48510f0339..4386ea343f 100644 --- a/test/integration/pipelines/tuning/test_pipeline_tuning.py +++ b/test/integration/pipelines/tuning/test_pipeline_tuning.py @@ -40,7 +40,7 @@ def regression_dataset(): @pytest.fixture() def classification_dataset(): test_file_path = str(os.path.dirname(__file__)) - file = os.path.join(str(fedot_project_root()), 'test/data/simple_classification.csv') + file = os.path.join(str(fedot_project_root()), 'test/data/classification/simple_classification.csv') return InputData.from_csv(os.path.join(test_file_path, file), task=Task(TaskTypesEnum.classification)) diff --git a/test/integration/real_applications/test_real_cases.py b/test/integration/real_applications/test_real_cases.py index 57e7e3f343..226f0c9e44 100644 --- a/test/integration/real_applications/test_real_cases.py +++ b/test/integration/real_applications/test_real_cases.py @@ -13,7 +13,7 @@ def test_credit_scoring_problem(): - full_path_train = full_path_test = fedot_project_root().joinpath('test/data/simple_classification.csv') + full_path_train = full_path_test = fedot_project_root().joinpath('test/data/classification/simple_classification.csv') roc_auc_test = run_credit_scoring_problem(full_path_train, full_path_test, timeout=5, target='Y', n_jobs=1) assert roc_auc_test > 0.5 diff --git a/test/unit/data/test_data.py b/test/unit/data/test_data.py index 3f23fb289f..0dd5bd1214 100644 --- a/test/unit/data/test_data.py +++ b/test/unit/data/test_data.py @@ -48,7 +48,7 @@ def test_data_subset_incorrect(data_setup): def test_data_from_csv(): test_file_path = str(os.path.dirname(__file__)) - file = '../../data/simple_classification.csv' + file = '../../data/classification/simple_classification.csv' task = Task(TaskTypesEnum.classification) df = pd.read_csv(os.path.join(test_file_path, file)) data_array = np.array(df).T @@ -71,7 +71,7 @@ def test_data_from_csv(): def test_with_custom_target(): test_file_path = str(os.path.dirname(__file__)) - file = '../../data/simple_classification.csv' + file = '../../data/classification/simple_classification.csv' file_custom = '../../data/simple_classification_with_custom_target.csv' file_data = InputData.from_csv( @@ -140,7 +140,7 @@ def test_target_data_from_csv_correct(): def test_table_data_shuffle(): test_file_path = str(os.path.dirname(__file__)) - file = '../../data/simple_classification.csv' + file = '../../data/classification/simple_classification.csv' data = InputData.from_csv(os.path.join(test_file_path, file)) shuffled_data = deepcopy(data) diff --git a/test/unit/data/test_multimodal_data.py b/test/unit/data/test_multimodal_data.py index efc6abf57b..3c77150430 100644 --- a/test/unit/data/test_multimodal_data.py +++ b/test/unit/data/test_multimodal_data.py @@ -106,7 +106,7 @@ def test_text_data_only(data_type): data_source_name = 'data_source_text/description' elif data_type is DataTypesEnum.table: # Case when there is no text data in csv, but MultiModalData.from_csv() is used - file_path = 'test/data/simple_classification.csv' + file_path = 'test/data/classification/simple_classification.csv' data_source_name = 'data_source_table' path = Path(fedot_project_root(), file_path) diff --git a/test/unit/optimizer/gp_operators/test_mutation.py b/test/unit/optimizer/gp_operators/test_mutation.py index d090c47477..72007adbe2 100644 --- a/test/unit/optimizer/gp_operators/test_mutation.py +++ b/test/unit/optimizer/gp_operators/test_mutation.py @@ -33,7 +33,7 @@ def get_requirements_and_params_for_task(task: TaskTypesEnum): def file_data(): - test_file_path = Path(__file__).parents[3].joinpath('data', 'simple_classification.csv') + test_file_path = Path(__file__).parents[3].joinpath('data', 'classification', 'simple_classification.csv') input_data = InputData.from_csv(test_file_path) input_data.idx = to_categorical_codes(categorical_ids=input_data.idx) return input_data diff --git a/test/unit/pipelines/test_pipeline.py b/test/unit/pipelines/test_pipeline.py index 21eede0588..17bad5799f 100644 --- a/test/unit/pipelines/test_pipeline.py +++ b/test/unit/pipelines/test_pipeline.py @@ -47,7 +47,7 @@ def classification_dataset(): @pytest.fixture() def file_data_setup(): test_file_path = str(os.path.dirname(__file__)) - file = '../../data/simple_classification.csv' + file = '../../data/classification/simple_classification.csv' input_data = InputData.from_csv( os.path.join(test_file_path, file)) input_data.idx = to_categorical_codes(categorical_ids=input_data.idx) diff --git a/test/unit/tasks/test_classification.py b/test/unit/tasks/test_classification.py index 7373f758be..2b9144c62c 100644 --- a/test/unit/tasks/test_classification.py +++ b/test/unit/tasks/test_classification.py @@ -1,6 +1,9 @@ import os import numpy as np +import pandas as pd +import pytest +from typing import Callable from sklearn.datasets import load_iris, make_classification from sklearn.metrics import roc_auc_score as roc_auc @@ -60,9 +63,61 @@ def get_iris_data() -> InputData: return input_data -def get_binary_classification_data(): +def get_binary_classification_data_from_numpy(): test_file_path = str(os.path.dirname(__file__)) - file = '../../data/simple_classification.csv' + file = '../../data/classification/simple_classification.npy' + numpy_data = np.load(os.path.join(test_file_path, file)) + features_array = numpy_data[:, :-1] + target_array = numpy_data[:, -1] + input_data = InputData.from_numpy(features_array=features_array, + target_array=target_array) + return input_data + + +def get_binary_classification_data_from_df(): + test_file_path = str(os.path.dirname(__file__)) + file = '../../data/classification/simple_classification.csv' + df_data = pd.read_csv(os.path.join(test_file_path, file)) + features_df = df_data.iloc[:, :-1] + target_df = df_data.iloc[:, -1] + input_data = InputData.from_dataframe(features_df=features_df, + target_df=target_df) + return input_data + + +def get_binary_classification_data_from_csv(): + test_file_path = str(os.path.dirname(__file__)) + file = '../../data/classification/simple_classification.csv' + input_data = InputData.from_csv( + os.path.join(test_file_path, file)) + return input_data + + +def get_multiclassification_data_from_numpy(): + test_file_path = str(os.path.dirname(__file__)) + file = '../../data/classification/multiclass_classification.npy' + numpy_data = np.load(os.path.join(test_file_path, file)) + features_array = numpy_data[:, :-1] + target_array = numpy_data[:, -1] + input_data = InputData.from_numpy(features_array=features_array, + target_array=target_array) + return input_data + + +def get_multiclassification_data_from_df(): + test_file_path = str(os.path.dirname(__file__)) + file = '../../data/classification/multiclass_classification.csv' + df_data = pd.read_csv(os.path.join(test_file_path, file)) + features_df = df_data.iloc[:, :-1] + target_df = df_data.iloc[:, -1] + input_data = InputData.from_dataframe(features_df=features_df, + target_df=target_df) + return input_data + + +def get_multiclassification_data_from_csv(): + test_file_path = str(os.path.dirname(__file__)) + file = '../../data/classification/multiclass_classification.csv' input_data = InputData.from_csv( os.path.join(test_file_path, file)) return input_data @@ -96,8 +151,42 @@ def get_image_classification_data(composite_flag: bool = True): return roc_auc_on_valid, dataset_to_train, dataset_to_validate -def test_multiclassification_pipeline_fit_correct(): - data = get_iris_data() +BINARY_CLASSIFICATION_DATA_SOURCES = [get_binary_classification_data_from_numpy, + get_binary_classification_data_from_df, + get_binary_classification_data_from_csv, + # 'from_image', + # 'from_text_meta_file', + # 'from_text_files', + # 'from_json_files', + ] + +MULTICLASSIFICATION_DATA_SOURCES = [get_multiclassification_data_from_numpy, + get_multiclassification_data_from_df, + get_multiclassification_data_from_csv, + # 'from_image', + # 'from_text_meta_file', + # 'from_text_files', + # 'from_json_files', + ] + +@pytest.mark.parametrize('get_classification_data', BINARY_CLASSIFICATION_DATA_SOURCES) +def test_binary_classification_pipeline_fit_correct(get_classification_data: Callable): + data = get_classification_data() + pipeline = pipeline_simple() + train_data, test_data = train_test_data_setup(data, shuffle=True) + + pipeline.fit(input_data=train_data) + results = pipeline.predict(input_data=test_data) + + roc_auc_on_test = roc_auc(y_true=test_data.target, + y_score=results.predict) + + assert roc_auc_on_test > 0.8 + + +@pytest.mark.parametrize('get_classification_data', MULTICLASSIFICATION_DATA_SOURCES) +def test_multiclassification_pipeline_fit_correct(get_classification_data: Callable): + data = get_classification_data() pipeline = pipeline_simple() train_data, test_data = train_test_data_setup(data, shuffle=True) @@ -106,7 +195,7 @@ def test_multiclassification_pipeline_fit_correct(): roc_auc_on_test = roc_auc(y_true=test_data.target, y_score=results.predict, - multi_class='ovo', + multi_class='ovr', # TODO: strange bug when ovo is chosen average='macro') assert roc_auc_on_test > 0.95 @@ -154,7 +243,7 @@ def test_output_mode_labels(): def test_output_mode_full_probs(): - data = get_binary_classification_data() + data = get_binary_classification_data_from_csv() pipeline = pipeline_simple() train_data, test_data = train_test_data_setup(data, shuffle=True) diff --git a/test/unit/tasks/test_regression.py b/test/unit/tasks/test_regression.py index 7489d1c5b6..ec6bcc0141 100644 --- a/test/unit/tasks/test_regression.py +++ b/test/unit/tasks/test_regression.py @@ -1,5 +1,9 @@ +import os + import numpy as np +import pandas as pd import pytest +from typing import Callable from sklearn.datasets import make_regression from sklearn.metrics import mean_squared_error as mse @@ -52,6 +56,39 @@ def get_synthetic_regression_data(n_samples=1000, n_features=10, random_state=No return input_data +def get_regression_data_from_numpy(): + test_file_path = str(os.path.dirname(__file__)) + file = '../../data/regression/simple_regression.npy' + numpy_data = np.load(os.path.join(test_file_path, file)) + features_array = numpy_data[:, :-1] + target_array = numpy_data[:, -1] + input_data = InputData.from_numpy(features_array=features_array, + target_array=target_array, + task='regression') + return input_data + + +def get_regression_data_from_df(): + test_file_path = str(os.path.dirname(__file__)) + file = '../../data/regression/simple_regression.csv' + df_data = pd.read_csv(os.path.join(test_file_path, file)) + features_df = df_data.iloc[:, :-1] + target_df = df_data.iloc[:, -1] + input_data = InputData.from_dataframe(features_df=features_df, + target_df=target_df, + task='regression') + return input_data + + +def get_regression_data_from_csv(): + test_file_path = str(os.path.dirname(__file__)) + file = '../../data/regression/simple_regression.csv' + input_data = InputData.from_csv( + os.path.join(test_file_path, file), + task='regression') + return input_data + + def get_rmse_value(pipeline: Pipeline, train_data: InputData, test_data: InputData) -> (float, float): train_pred = pipeline.predict(input_data=train_data) test_pred = pipeline.predict(input_data=test_data) @@ -61,7 +98,33 @@ def get_rmse_value(pipeline: Pipeline, train_data: InputData, test_data: InputDa return rmse_value_train, rmse_value_test -def test_regression_pipeline_fit_predict_correct(): +REGRESSION_DATA_SOURCES = [get_regression_data_from_numpy, + get_regression_data_from_df, + get_regression_data_from_csv, + # 'from_image', + # 'from_text_meta_file', + # 'from_text_files', + # 'from_json_files', + ] + + +@pytest.mark.parametrize('get_regression_data', REGRESSION_DATA_SOURCES) +def test_regression_pipeline_fit_predict_correct(get_regression_data: Callable): + data = get_regression_data() + pipeline = generate_pipeline() + train_data, test_data = train_test_data_setup(data, shuffle=True) + + pipeline.fit(input_data=train_data) + results = pipeline.predict(input_data=test_data) + + rmse_on_test = mse(y_true=test_data.target, + y_pred=results.predict, + squared=False) + + assert rmse_on_test < 0.8 + + +def test_synthetic_regression_pipeline_fit_predict_correct(): data = get_synthetic_regression_data() pipeline = generate_pipeline() diff --git a/test/unit/validation/test_table_cv.py b/test/unit/validation/test_table_cv.py index 8ec6ce1a16..5f469e298e 100644 --- a/test/unit/validation/test_table_cv.py +++ b/test/unit/validation/test_table_cv.py @@ -31,7 +31,7 @@ def sample_pipeline(): def get_classification_data(): - file_path = fedot_project_root().joinpath('test/data/simple_classification.csv') + file_path = fedot_project_root().joinpath('test/data/classification/simple_classification.csv') input_data = InputData.from_csv(file_path, task=Task(TaskTypesEnum.classification)) return input_data From f6624fed5a16c3e247f4c4ec05b5afb741c8fa44 Mon Sep 17 00:00:00 2001 From: andreygetmanov Date: Fri, 19 Jan 2024 17:49:03 +0300 Subject: [PATCH 2/5] - pep8 --- test/integration/real_applications/test_real_cases.py | 3 ++- test/unit/tasks/test_classification.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/test/integration/real_applications/test_real_cases.py b/test/integration/real_applications/test_real_cases.py index 226f0c9e44..4ce5c09c99 100644 --- a/test/integration/real_applications/test_real_cases.py +++ b/test/integration/real_applications/test_real_cases.py @@ -13,7 +13,8 @@ def test_credit_scoring_problem(): - full_path_train = full_path_test = fedot_project_root().joinpath('test/data/classification/simple_classification.csv') + full_path_train = full_path_test = \ + fedot_project_root().joinpath('test/data/classification/simple_classification.csv') roc_auc_test = run_credit_scoring_problem(full_path_train, full_path_test, timeout=5, target='Y', n_jobs=1) assert roc_auc_test > 0.5 diff --git a/test/unit/tasks/test_classification.py b/test/unit/tasks/test_classification.py index 2b9144c62c..afb27c0a1b 100644 --- a/test/unit/tasks/test_classification.py +++ b/test/unit/tasks/test_classification.py @@ -169,6 +169,7 @@ def get_image_classification_data(composite_flag: bool = True): # 'from_json_files', ] + @pytest.mark.parametrize('get_classification_data', BINARY_CLASSIFICATION_DATA_SOURCES) def test_binary_classification_pipeline_fit_correct(get_classification_data: Callable): data = get_classification_data() From 936d8f9f060a3aef644f53aeef34db50a933079a Mon Sep 17 00:00:00 2001 From: andreygetmanov Date: Wed, 24 Jan 2024 12:58:09 +0300 Subject: [PATCH 3/5] - refactoring of test_regression.py --- test/unit/tasks/test_regression.py | 62 +++++++++++++----------------- 1 file changed, 27 insertions(+), 35 deletions(-) diff --git a/test/unit/tasks/test_regression.py b/test/unit/tasks/test_regression.py index ec6bcc0141..974aced366 100644 --- a/test/unit/tasks/test_regression.py +++ b/test/unit/tasks/test_regression.py @@ -56,37 +56,29 @@ def get_synthetic_regression_data(n_samples=1000, n_features=10, random_state=No return input_data -def get_regression_data_from_numpy(): +def get_regression_data(source: str) -> InputData: test_file_path = str(os.path.dirname(__file__)) - file = '../../data/regression/simple_regression.npy' - numpy_data = np.load(os.path.join(test_file_path, file)) - features_array = numpy_data[:, :-1] - target_array = numpy_data[:, -1] - input_data = InputData.from_numpy(features_array=features_array, - target_array=target_array, - task='regression') - return input_data - - -def get_regression_data_from_df(): - test_file_path = str(os.path.dirname(__file__)) - file = '../../data/regression/simple_regression.csv' - df_data = pd.read_csv(os.path.join(test_file_path, file)) - features_df = df_data.iloc[:, :-1] - target_df = df_data.iloc[:, -1] - input_data = InputData.from_dataframe(features_df=features_df, - target_df=target_df, + if source == 'numpy': + file = '../../data/regression/simple_regression.npy' + numpy_data = np.load(os.path.join(test_file_path, file)) + features_array = numpy_data[:, :-1] + target_array = numpy_data[:, -1] + return InputData.from_numpy(features_array=features_array, + target_array=target_array, task='regression') - return input_data - - -def get_regression_data_from_csv(): - test_file_path = str(os.path.dirname(__file__)) - file = '../../data/regression/simple_regression.csv' - input_data = InputData.from_csv( - os.path.join(test_file_path, file), - task='regression') - return input_data + elif source == 'dataframe': + file = '../../data/regression/simple_regression.csv' + df_data = pd.read_csv(os.path.join(test_file_path, file)) + features_df = df_data.iloc[:, :-1] + target_df = df_data.iloc[:, -1] + return InputData.from_dataframe(features_df=features_df, + target_df=target_df, + task='regression') + elif source == 'csv': + file = '../../data/regression/simple_regression.csv' + return InputData.from_csv( + os.path.join(test_file_path, file), + task='regression') def get_rmse_value(pipeline: Pipeline, train_data: InputData, test_data: InputData) -> (float, float): @@ -98,9 +90,9 @@ def get_rmse_value(pipeline: Pipeline, train_data: InputData, test_data: InputDa return rmse_value_train, rmse_value_test -REGRESSION_DATA_SOURCES = [get_regression_data_from_numpy, - get_regression_data_from_df, - get_regression_data_from_csv, +REGRESSION_DATA_SOURCES = ['numpy', + 'dataframe', + 'csv', # 'from_image', # 'from_text_meta_file', # 'from_text_files', @@ -108,9 +100,9 @@ def get_rmse_value(pipeline: Pipeline, train_data: InputData, test_data: InputDa ] -@pytest.mark.parametrize('get_regression_data', REGRESSION_DATA_SOURCES) -def test_regression_pipeline_fit_predict_correct(get_regression_data: Callable): - data = get_regression_data() +@pytest.mark.parametrize('source', REGRESSION_DATA_SOURCES) +def test_regression_pipeline_fit_predict_correct(source: str): + data = get_regression_data(source) pipeline = generate_pipeline() train_data, test_data = train_test_data_setup(data, shuffle=True) From e4b92fe40e918f54f40709104cdd770d0a0098a8 Mon Sep 17 00:00:00 2001 From: andreygetmanov Date: Tue, 20 Feb 2024 15:58:51 +0300 Subject: [PATCH 4/5] - added classification tests for text and image files - refactoring of test_regression.py - refactoring of test_classification.py --- cases/spam_detection.py | 7 +- fedot/core/data/data.py | 19 ---- test/unit/tasks/test_classification.py | 151 +++++++++++-------------- test/unit/tasks/test_regression.py | 1 - 4 files changed, 71 insertions(+), 107 deletions(-) diff --git a/cases/spam_detection.py b/cases/spam_detection.py index 5ec26a2ee2..42f593fda5 100644 --- a/cases/spam_detection.py +++ b/cases/spam_detection.py @@ -7,6 +7,7 @@ from fedot.core.data.data_split import train_test_data_setup from fedot.core.pipelines.node import PipelineNode from fedot.core.pipelines.pipeline import Pipeline +from fedot.core.repository.dataset_types import DataTypesEnum def execute_pipeline_for_text_problem(train_data, test_data): @@ -26,7 +27,8 @@ def execute_pipeline_for_text_problem(train_data, test_data): def run_text_problem_from_meta_file(): data_file_abspath = os.path.abspath(os.path.join('data', 'spam', 'spamham.csv')) - data = InputData.from_text_meta_file(meta_file_path=data_file_abspath) + data = InputData.from_csv(file_path=data_file_abspath, + data_type=DataTypesEnum.text) train_data, test_data = train_test_data_setup(data, split_ratio=0.7) @@ -52,7 +54,8 @@ def run_text_problem_from_files(): def run_text_problem_from_saved_meta_file(path): - data = InputData.from_text_meta_file(meta_file_path=path) + data = InputData.from_csv(file_path=path, + data_type=DataTypesEnum.text) train_data, test_data = train_test_data_setup(data, split_ratio=0.7) diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py index 3719806f58..0691d715fb 100644 --- a/fedot/core/data/data.py +++ b/fedot/core/data/data.py @@ -356,25 +356,6 @@ def from_image(images: Union[str, np.ndarray] = None, return InputData(idx=idx, features=features, target=target, task=task, data_type=DataTypesEnum.image) - @staticmethod - def from_text_meta_file(meta_file_path: str = None, - label: str = 'label', - task: Task = Task(TaskTypesEnum.classification), - data_type: DataTypesEnum = DataTypesEnum.text) -> InputData: - - if os.path.isdir(meta_file_path): - raise ValueError("""CSV file expected but got directory""") - - df_text = pd.read_csv(meta_file_path) - df_text = df_text.sample(frac=1).reset_index(drop=True) - messages = df_text['text'].astype('U').tolist() - - features = np.array(messages) - target = np.array(df_text[label]).reshape(-1, 1) - idx = [index for index in range(len(target))] - - return InputData(idx=idx, features=features, - target=target, task=task, data_type=data_type) @staticmethod def from_text_files(files_path: str, diff --git a/test/unit/tasks/test_classification.py b/test/unit/tasks/test_classification.py index afb27c0a1b..a923767c2f 100644 --- a/test/unit/tasks/test_classification.py +++ b/test/unit/tasks/test_classification.py @@ -3,9 +3,8 @@ import numpy as np import pandas as pd import pytest -from typing import Callable from sklearn.datasets import load_iris, make_classification -from sklearn.metrics import roc_auc_score as roc_auc +from sklearn.metrics import roc_auc_score as roc_auc, f1_score as f1 from examples.simple.classification.image_classification_problem import run_image_classification_problem from fedot.core.data.data import InputData @@ -40,6 +39,13 @@ def pipeline_with_pca() -> Pipeline: return pipeline +def simple_text_pipeline() -> Pipeline: + node_tfidf = PipelineNode('tfidf') + model_node = PipelineNode('logit', nodes_from=[node_tfidf]) + pipeline = Pipeline(model_node) + return pipeline + + def get_synthetic_classification_data(n_samples=1000, n_features=10, random_state=None) -> InputData: synthetic_data = make_classification(n_samples=n_samples, n_features=n_features, random_state=random_state) input_data = InputData(idx=np.arange(0, len(synthetic_data[1])), @@ -63,64 +69,26 @@ def get_iris_data() -> InputData: return input_data -def get_binary_classification_data_from_numpy(): - test_file_path = str(os.path.dirname(__file__)) - file = '../../data/classification/simple_classification.npy' - numpy_data = np.load(os.path.join(test_file_path, file)) - features_array = numpy_data[:, :-1] - target_array = numpy_data[:, -1] - input_data = InputData.from_numpy(features_array=features_array, - target_array=target_array) - return input_data - - -def get_binary_classification_data_from_df(): - test_file_path = str(os.path.dirname(__file__)) - file = '../../data/classification/simple_classification.csv' - df_data = pd.read_csv(os.path.join(test_file_path, file)) - features_df = df_data.iloc[:, :-1] - target_df = df_data.iloc[:, -1] - input_data = InputData.from_dataframe(features_df=features_df, - target_df=target_df) - return input_data - - -def get_binary_classification_data_from_csv(): - test_file_path = str(os.path.dirname(__file__)) - file = '../../data/classification/simple_classification.csv' - input_data = InputData.from_csv( - os.path.join(test_file_path, file)) - return input_data - - -def get_multiclassification_data_from_numpy(): - test_file_path = str(os.path.dirname(__file__)) - file = '../../data/classification/multiclass_classification.npy' - numpy_data = np.load(os.path.join(test_file_path, file)) - features_array = numpy_data[:, :-1] - target_array = numpy_data[:, -1] - input_data = InputData.from_numpy(features_array=features_array, - target_array=target_array) - return input_data - - -def get_multiclassification_data_from_df(): +def get_classification_data(source: str, problem: str) -> InputData: test_file_path = str(os.path.dirname(__file__)) - file = '../../data/classification/multiclass_classification.csv' - df_data = pd.read_csv(os.path.join(test_file_path, file)) - features_df = df_data.iloc[:, :-1] - target_df = df_data.iloc[:, -1] - input_data = InputData.from_dataframe(features_df=features_df, - target_df=target_df) - return input_data - - -def get_multiclassification_data_from_csv(): - test_file_path = str(os.path.dirname(__file__)) - file = '../../data/classification/multiclass_classification.csv' - input_data = InputData.from_csv( - os.path.join(test_file_path, file)) - return input_data + if source == 'numpy': + file = f'../../data/classification/{problem}_classification.npy' + numpy_data = np.load(os.path.join(test_file_path, file)) + features_array = numpy_data[:, :-1] + target_array = numpy_data[:, -1] + return InputData.from_numpy(features_array=features_array, + target_array=target_array) + elif source == 'dataframe': + file = f'../../data/classification/{problem}_classification.csv' + df_data = pd.read_csv(os.path.join(test_file_path, file)) + features_df = df_data.iloc[:, :-1] + target_df = df_data.iloc[:, -1] + return InputData.from_dataframe(features_df=features_df, + target_df=target_df) + elif source == 'csv': + file = f'../../data/classification/{problem}_classification.csv' + return InputData.from_csv( + os.path.join(test_file_path, file)) def get_image_classification_data(composite_flag: bool = True): @@ -151,28 +119,17 @@ def get_image_classification_data(composite_flag: bool = True): return roc_auc_on_valid, dataset_to_train, dataset_to_validate -BINARY_CLASSIFICATION_DATA_SOURCES = [get_binary_classification_data_from_numpy, - get_binary_classification_data_from_df, - get_binary_classification_data_from_csv, - # 'from_image', - # 'from_text_meta_file', - # 'from_text_files', - # 'from_json_files', - ] - -MULTICLASSIFICATION_DATA_SOURCES = [get_multiclassification_data_from_numpy, - get_multiclassification_data_from_df, - get_multiclassification_data_from_csv, - # 'from_image', - # 'from_text_meta_file', - # 'from_text_files', - # 'from_json_files', - ] - - -@pytest.mark.parametrize('get_classification_data', BINARY_CLASSIFICATION_DATA_SOURCES) -def test_binary_classification_pipeline_fit_correct(get_classification_data: Callable): - data = get_classification_data() +CLASSIFICATION_DATA_SOURCES = ['numpy', + 'dataframe', + 'csv', + # 'from_text_files', + # 'from_json_files', + ] + + +@pytest.mark.parametrize('source', CLASSIFICATION_DATA_SOURCES) +def test_binary_classification_pipeline_fit_correct(source: str): + data = get_classification_data(source, 'simple') pipeline = pipeline_simple() train_data, test_data = train_test_data_setup(data, shuffle=True) @@ -185,9 +142,9 @@ def test_binary_classification_pipeline_fit_correct(get_classification_data: Cal assert roc_auc_on_test > 0.8 -@pytest.mark.parametrize('get_classification_data', MULTICLASSIFICATION_DATA_SOURCES) -def test_multiclassification_pipeline_fit_correct(get_classification_data: Callable): - data = get_classification_data() +@pytest.mark.parametrize('source', CLASSIFICATION_DATA_SOURCES) +def test_multiclassification_pipeline_fit_correct(source: str): + data = get_classification_data(source, 'multiclass') pipeline = pipeline_simple() train_data, test_data = train_test_data_setup(data, shuffle=True) @@ -244,7 +201,7 @@ def test_output_mode_labels(): def test_output_mode_full_probs(): - data = get_binary_classification_data_from_csv() + data = get_classification_data('csv', 'simple') pipeline = pipeline_simple() train_data, test_data = train_test_data_setup(data, shuffle=True) @@ -257,3 +214,27 @@ def test_output_mode_full_probs(): assert np.array_equal(results_probs.predict, results_default.predict) assert results.predict.shape == (len(test_data.target), 2) assert results_probs.predict.shape == (len(test_data.target), 1) + + +def test_image_pipeline_fit_correct(): + roc_auc_on_valid, _, _ = get_image_classification_data() + + assert roc_auc_on_valid >= 0.5 + + +def test_text_classification_pipeline_fit_correct(): + test_file_path = str(os.path.dirname(__file__)) + file = '../../data/simple_multimodal_classification_text.csv' + data = InputData.from_csv(file_path=os.path.join(test_file_path, file), + data_type=DataTypesEnum.text) + pipeline = simple_text_pipeline() + train_data, test_data = train_test_data_setup(data, shuffle=True) + + pipeline.fit(input_data=train_data) + results = pipeline.predict(input_data=test_data, output_mode='labels') + + f1_on_test = f1(y_true=test_data.target, + y_pred=results.predict, + average='micro') + + assert f1_on_test >= 0.5 \ No newline at end of file diff --git a/test/unit/tasks/test_regression.py b/test/unit/tasks/test_regression.py index 974aced366..6c288852a0 100644 --- a/test/unit/tasks/test_regression.py +++ b/test/unit/tasks/test_regression.py @@ -93,7 +93,6 @@ def get_rmse_value(pipeline: Pipeline, train_data: InputData, test_data: InputDa REGRESSION_DATA_SOURCES = ['numpy', 'dataframe', 'csv', - # 'from_image', # 'from_text_meta_file', # 'from_text_files', # 'from_json_files', From b5b7a1aa528e742290ffb085759db9bf9a41157d Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 20 Feb 2024 13:01:39 +0000 Subject: [PATCH 5/5] Automated autopep8 fixes --- fedot/core/data/data.py | 1 - test/unit/tasks/test_classification.py | 4 ++-- test/unit/tasks/test_regression.py | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/fedot/core/data/data.py b/fedot/core/data/data.py index 0691d715fb..886a75a70d 100644 --- a/fedot/core/data/data.py +++ b/fedot/core/data/data.py @@ -356,7 +356,6 @@ def from_image(images: Union[str, np.ndarray] = None, return InputData(idx=idx, features=features, target=target, task=task, data_type=DataTypesEnum.image) - @staticmethod def from_text_files(files_path: str, label: str = 'label', diff --git a/test/unit/tasks/test_classification.py b/test/unit/tasks/test_classification.py index a923767c2f..a7c23a9542 100644 --- a/test/unit/tasks/test_classification.py +++ b/test/unit/tasks/test_classification.py @@ -77,7 +77,7 @@ def get_classification_data(source: str, problem: str) -> InputData: features_array = numpy_data[:, :-1] target_array = numpy_data[:, -1] return InputData.from_numpy(features_array=features_array, - target_array=target_array) + target_array=target_array) elif source == 'dataframe': file = f'../../data/classification/{problem}_classification.csv' df_data = pd.read_csv(os.path.join(test_file_path, file)) @@ -237,4 +237,4 @@ def test_text_classification_pipeline_fit_correct(): y_pred=results.predict, average='micro') - assert f1_on_test >= 0.5 \ No newline at end of file + assert f1_on_test >= 0.5 diff --git a/test/unit/tasks/test_regression.py b/test/unit/tasks/test_regression.py index 6c288852a0..8b31282982 100644 --- a/test/unit/tasks/test_regression.py +++ b/test/unit/tasks/test_regression.py @@ -64,8 +64,8 @@ def get_regression_data(source: str) -> InputData: features_array = numpy_data[:, :-1] target_array = numpy_data[:, -1] return InputData.from_numpy(features_array=features_array, - target_array=target_array, - task='regression') + target_array=target_array, + task='regression') elif source == 'dataframe': file = '../../data/regression/simple_regression.csv' df_data = pd.read_csv(os.path.join(test_file_path, file))