\n",
+ "
staging
\n",
" \n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " \n",
+ " type | \n",
+ " \n",
+ " \n",
+ " \n",
+ " label | \n",
+ " \n",
+ " \n",
+ " \n",
+ " message | \n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " \n",
+ " \n",
+ " INFO | \n",
+ " \n",
+ " \n",
+ " \n",
+ " FOREIGN KEYS NOT FOUND | \n",
+ " \n",
+ " \n",
+ " \n",
+ " When joining SALARIES__STAGING_TABLE_1 and ALLSTARFULL__STAGING_TABLE_2 over 'playerID' and 'playerID', there are no corresponding entries for 64.710317% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys. | \n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " 1 | \n",
+ " \n",
+ " \n",
+ " INFO | \n",
+ " \n",
+ " \n",
+ " \n",
+ " FOREIGN KEYS NOT FOUND | \n",
+ " \n",
+ " \n",
+ " \n",
+ " When joining SALARIES__STAGING_TABLE_1 and AWARDSPLAYERS__STAGING_TABLE_3 over 'playerID' and 'playerID', there are no corresponding entries for 75.376911% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys. | \n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " 2 | \n",
+ " \n",
+ " \n",
+ " INFO | \n",
+ " \n",
+ " \n",
+ " \n",
+ " FOREIGN KEYS NOT FOUND | \n",
+ " \n",
+ " \n",
+ " \n",
+ " When joining SALARIES__STAGING_TABLE_1 and AWARDSSHAREPLAYERS__STAGING_TABLE_4 over 'playerID' and 'playerID', there are no corresponding entries for 62.459617% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys. | \n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " 3 | \n",
+ " \n",
+ " \n",
+ " INFO | \n",
+ " \n",
+ " \n",
+ " \n",
+ " FOREIGN KEYS NOT FOUND | \n",
+ " \n",
+ " \n",
+ " \n",
+ " When joining SALARIES__STAGING_TABLE_1 and BATTING__STAGING_TABLE_5 over 'playerID' and 'playerID', there are no corresponding entries for 8.765884% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys. | \n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " 4 | \n",
+ " \n",
+ " \n",
+ " INFO | \n",
+ " \n",
+ " \n",
+ " \n",
+ " FOREIGN KEYS NOT FOUND | \n",
+ " \n",
+ " \n",
+ " \n",
+ " When joining SALARIES__STAGING_TABLE_1 and BATTINGPOST__STAGING_TABLE_6 over 'playerID' and 'playerID', there are no corresponding entries for 41.018738% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys. | \n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " 5 | \n",
+ " \n",
+ " \n",
+ " INFO | \n",
+ " \n",
+ " \n",
+ " \n",
+ " FOREIGN KEYS NOT FOUND | \n",
+ " \n",
+ " \n",
+ " \n",
+ " When joining SALARIES__STAGING_TABLE_1 and FIELDING__STAGING_TABLE_7 over 'playerID' and 'playerID', there are no corresponding entries for 19.270946% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys. | \n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " 6 | \n",
+ " \n",
+ " \n",
+ " INFO | \n",
+ " \n",
+ " \n",
+ " \n",
+ " FOREIGN KEYS NOT FOUND | \n",
+ " \n",
+ " \n",
+ " \n",
+ " When joining SALARIES__STAGING_TABLE_1 and FIELDINGPOST__STAGING_TABLE_8 over 'playerID' and 'playerID', there are no corresponding entries for 38.369589% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys. | \n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " 7 | \n",
+ " \n",
+ " \n",
+ " INFO | \n",
+ " \n",
+ " \n",
+ " \n",
+ " FOREIGN KEYS NOT FOUND | \n",
+ " \n",
+ " \n",
+ " \n",
+ " When joining SALARIES__STAGING_TABLE_1 and PITCHING__STAGING_TABLE_9 over 'playerID' and 'playerID', there are no corresponding entries for 54.862158% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys. | \n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " 8 | \n",
+ " \n",
+ " \n",
+ " INFO | \n",
+ " \n",
+ " \n",
+ " \n",
+ " FOREIGN KEYS NOT FOUND | \n",
+ " \n",
+ " \n",
+ " \n",
+ " When joining SALARIES__STAGING_TABLE_1 and PITCHINGPOST__STAGING_TABLE_10 over 'playerID' and 'playerID', there are no corresponding entries for 73.589274% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys. | \n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ "
"
+ ],
+ "text/plain": [
+ " type label message \n",
+ "0 INFO FOREIGN KEYS NOT FOUND When joining SALARIES__STAGING_T...\n",
+ "1 INFO FOREIGN KEYS NOT FOUND When joining SALARIES__STAGING_T...\n",
+ "2 INFO FOREIGN KEYS NOT FOUND When joining SALARIES__STAGING_T...\n",
+ "3 INFO FOREIGN KEYS NOT FOUND When joining SALARIES__STAGING_T...\n",
+ "4 INFO FOREIGN KEYS NOT FOUND When joining SALARIES__STAGING_T...\n",
+ "5 INFO FOREIGN KEYS NOT FOUND When joining SALARIES__STAGING_T...\n",
+ "6 INFO FOREIGN KEYS NOT FOUND When joining SALARIES__STAGING_T...\n",
+ "7 INFO FOREIGN KEYS NOT FOUND When joining SALARIES__STAGING_T...\n",
+ "8 INFO FOREIGN KEYS NOT FOUND When joining SALARIES__STAGING_T..."
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
}
],
"source": [
@@ -49221,23 +49368,17 @@
"Staging... 100% |██████████| [elapsed: 00:00, remaining: 00:00] \n",
"Preprocessing... 100% |██████████| [elapsed: 00:00, remaining: 00:00] \n",
"\n",
- "INFO [FOREIGN KEYS NOT FOUND]: When joining SALARIES__STAGING_TABLE_1 and ALLSTARFULL__STAGING_TABLE_2 over 'playerID' and 'playerID', there are no corresponding entries for 64.710317% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys.\n",
- "INFO [FOREIGN KEYS NOT FOUND]: When joining SALARIES__STAGING_TABLE_1 and AWARDSPLAYERS__STAGING_TABLE_3 over 'playerID' and 'playerID', there are no corresponding entries for 75.376911% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys.\n",
- "INFO [FOREIGN KEYS NOT FOUND]: When joining SALARIES__STAGING_TABLE_1 and AWARDSSHAREPLAYERS__STAGING_TABLE_4 over 'playerID' and 'playerID', there are no corresponding entries for 62.459617% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys.\n",
- "INFO [FOREIGN KEYS NOT FOUND]: When joining SALARIES__STAGING_TABLE_1 and BATTING__STAGING_TABLE_5 over 'playerID' and 'playerID', there are no corresponding entries for 8.765884% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys.\n",
- "INFO [FOREIGN KEYS NOT FOUND]: When joining SALARIES__STAGING_TABLE_1 and BATTINGPOST__STAGING_TABLE_6 over 'playerID' and 'playerID', there are no corresponding entries for 41.018738% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys.\n",
- "INFO [FOREIGN KEYS NOT FOUND]: When joining SALARIES__STAGING_TABLE_1 and FIELDING__STAGING_TABLE_7 over 'playerID' and 'playerID', there are no corresponding entries for 19.270946% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys.\n",
- "INFO [FOREIGN KEYS NOT FOUND]: When joining SALARIES__STAGING_TABLE_1 and FIELDINGPOST__STAGING_TABLE_8 over 'playerID' and 'playerID', there are no corresponding entries for 38.369589% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys.\n",
- "INFO [FOREIGN KEYS NOT FOUND]: When joining SALARIES__STAGING_TABLE_1 and PITCHING__STAGING_TABLE_9 over 'playerID' and 'playerID', there are no corresponding entries for 54.862158% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys.\n",
- "INFO [FOREIGN KEYS NOT FOUND]: When joining SALARIES__STAGING_TABLE_1 and PITCHINGPOST__STAGING_TABLE_10 over 'playerID' and 'playerID', there are no corresponding entries for 73.589274% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys.\n",
+ "The pipeline check generated 9 issues labeled INFO and 0 issues labeled WARNING.\n",
+ "To see the issues in full, run .check() on the pipeline.\n",
+ "\n",
"Staging... 100% |██████████| [elapsed: 00:00, remaining: 00:00] \n",
"Preprocessing... 100% |██████████| [elapsed: 00:00, remaining: 00:00] \n",
- "FastProp: Trying 3080 features... 100% |██████████| [elapsed: 00:11, remaining: 00:00] \n",
+ "FastProp: Trying 3080 features... 100% |██████████| [elapsed: 00:12, remaining: 00:00] \n",
"FastProp: Building features... 100% |██████████| [elapsed: 00:02, remaining: 00:00] \n",
- "XGBoost: Training as predictor... 100% |██████████| [elapsed: 00:24, remaining: 00:00] \n",
+ "XGBoost: Training as predictor... 100% |██████████| [elapsed: 00:23, remaining: 00:00] \n",
"\n",
"Trained pipeline.\n",
- "Time taken: 0h:0m:39.821604\n",
+ "Time taken: 0h:0m:41.877725\n",
"\n"
]
},
@@ -49254,7 +49395,7 @@
" predictors=['XGBoostRegressor'],\n",
" preprocessors=['Mapping'],\n",
" share_selected_features=0.5,\n",
- " tags=['fast_prop', 'container-udGggs'])
url: localhost:1709/#/getpipeline/baseball/Hiv492/0/
"
+ " tags=['fast_prop', 'container-SHAMaN'])"
],
"text/plain": [
"Pipeline(data_model='salaries',\n",
@@ -49267,9 +49408,7 @@
" predictors=['XGBoostRegressor'],\n",
" preprocessors=['Mapping'],\n",
" share_selected_features=0.5,\n",
- " tags=['fast_prop', 'container-udGggs'])\n",
- "\n",
- "url: localhost:1709/#/getpipeline/baseball/Hiv492/0/"
+ " tags=['fast_prop', 'container-SHAMaN'])"
]
},
"execution_count": 31,
@@ -49295,16 +49434,234 @@
"Preprocessing... 100% |██████████| [elapsed: 00:00, remaining: 00:00] \n",
"Checking... 100% |██████████| [elapsed: 00:00, remaining: 00:00] \n",
"\n",
- "INFO [FOREIGN KEYS NOT FOUND]: When joining SALARIES__STAGING_TABLE_1 and ALLSTARFULL__STAGING_TABLE_2 over 'playerID' and 'playerID', there are no corresponding entries for 64.710317% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys.\n",
- "INFO [FOREIGN KEYS NOT FOUND]: When joining SALARIES__STAGING_TABLE_1 and AWARDSPLAYERS__STAGING_TABLE_3 over 'playerID' and 'playerID', there are no corresponding entries for 75.376911% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys.\n",
- "INFO [FOREIGN KEYS NOT FOUND]: When joining SALARIES__STAGING_TABLE_1 and AWARDSSHAREPLAYERS__STAGING_TABLE_4 over 'playerID' and 'playerID', there are no corresponding entries for 62.459617% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys.\n",
- "INFO [FOREIGN KEYS NOT FOUND]: When joining SALARIES__STAGING_TABLE_1 and BATTING__STAGING_TABLE_5 over 'playerID' and 'playerID', there are no corresponding entries for 8.765884% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys.\n",
- "INFO [FOREIGN KEYS NOT FOUND]: When joining SALARIES__STAGING_TABLE_1 and BATTINGPOST__STAGING_TABLE_6 over 'playerID' and 'playerID', there are no corresponding entries for 41.018738% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys.\n",
- "INFO [FOREIGN KEYS NOT FOUND]: When joining SALARIES__STAGING_TABLE_1 and FIELDING__STAGING_TABLE_7 over 'playerID' and 'playerID', there are no corresponding entries for 19.270946% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys.\n",
- "INFO [FOREIGN KEYS NOT FOUND]: When joining SALARIES__STAGING_TABLE_1 and FIELDINGPOST__STAGING_TABLE_8 over 'playerID' and 'playerID', there are no corresponding entries for 38.369589% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys.\n",
- "INFO [FOREIGN KEYS NOT FOUND]: When joining SALARIES__STAGING_TABLE_1 and PITCHING__STAGING_TABLE_9 over 'playerID' and 'playerID', there are no corresponding entries for 54.862158% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys.\n",
- "INFO [FOREIGN KEYS NOT FOUND]: When joining SALARIES__STAGING_TABLE_1 and PITCHINGPOST__STAGING_TABLE_10 over 'playerID' and 'playerID', there are no corresponding entries for 73.589274% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys.\n"
+ "The pipeline check generated 9 issues labeled INFO and 0 issues labeled WARNING.\n"
]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " \n",
+ " type | \n",
+ " \n",
+ " \n",
+ " \n",
+ " label | \n",
+ " \n",
+ " \n",
+ " \n",
+ " message | \n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " \n",
+ " \n",
+ " INFO | \n",
+ " \n",
+ " \n",
+ " \n",
+ " FOREIGN KEYS NOT FOUND | \n",
+ " \n",
+ " \n",
+ " \n",
+ " When joining SALARIES__STAGING_TABLE_1 and ALLSTARFULL__STAGING_TABLE_2 over 'playerID' and 'playerID', there are no corresponding entries for 64.710317% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys. | \n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " 1 | \n",
+ " \n",
+ " \n",
+ " INFO | \n",
+ " \n",
+ " \n",
+ " \n",
+ " FOREIGN KEYS NOT FOUND | \n",
+ " \n",
+ " \n",
+ " \n",
+ " When joining SALARIES__STAGING_TABLE_1 and AWARDSPLAYERS__STAGING_TABLE_3 over 'playerID' and 'playerID', there are no corresponding entries for 75.376911% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys. | \n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " 2 | \n",
+ " \n",
+ " \n",
+ " INFO | \n",
+ " \n",
+ " \n",
+ " \n",
+ " FOREIGN KEYS NOT FOUND | \n",
+ " \n",
+ " \n",
+ " \n",
+ " When joining SALARIES__STAGING_TABLE_1 and AWARDSSHAREPLAYERS__STAGING_TABLE_4 over 'playerID' and 'playerID', there are no corresponding entries for 62.459617% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys. | \n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " 3 | \n",
+ " \n",
+ " \n",
+ " INFO | \n",
+ " \n",
+ " \n",
+ " \n",
+ " FOREIGN KEYS NOT FOUND | \n",
+ " \n",
+ " \n",
+ " \n",
+ " When joining SALARIES__STAGING_TABLE_1 and BATTING__STAGING_TABLE_5 over 'playerID' and 'playerID', there are no corresponding entries for 8.765884% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys. | \n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " 4 | \n",
+ " \n",
+ " \n",
+ " INFO | \n",
+ " \n",
+ " \n",
+ " \n",
+ " FOREIGN KEYS NOT FOUND | \n",
+ " \n",
+ " \n",
+ " \n",
+ " When joining SALARIES__STAGING_TABLE_1 and BATTINGPOST__STAGING_TABLE_6 over 'playerID' and 'playerID', there are no corresponding entries for 41.018738% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys. | \n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " 5 | \n",
+ " \n",
+ " \n",
+ " INFO | \n",
+ " \n",
+ " \n",
+ " \n",
+ " FOREIGN KEYS NOT FOUND | \n",
+ " \n",
+ " \n",
+ " \n",
+ " When joining SALARIES__STAGING_TABLE_1 and FIELDING__STAGING_TABLE_7 over 'playerID' and 'playerID', there are no corresponding entries for 19.270946% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys. | \n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " 6 | \n",
+ " \n",
+ " \n",
+ " INFO | \n",
+ " \n",
+ " \n",
+ " \n",
+ " FOREIGN KEYS NOT FOUND | \n",
+ " \n",
+ " \n",
+ " \n",
+ " When joining SALARIES__STAGING_TABLE_1 and FIELDINGPOST__STAGING_TABLE_8 over 'playerID' and 'playerID', there are no corresponding entries for 38.369589% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys. | \n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " 7 | \n",
+ " \n",
+ " \n",
+ " INFO | \n",
+ " \n",
+ " \n",
+ " \n",
+ " FOREIGN KEYS NOT FOUND | \n",
+ " \n",
+ " \n",
+ " \n",
+ " When joining SALARIES__STAGING_TABLE_1 and PITCHING__STAGING_TABLE_9 over 'playerID' and 'playerID', there are no corresponding entries for 54.862158% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys. | \n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " 8 | \n",
+ " \n",
+ " \n",
+ " INFO | \n",
+ " \n",
+ " \n",
+ " \n",
+ " FOREIGN KEYS NOT FOUND | \n",
+ " \n",
+ " \n",
+ " \n",
+ " When joining SALARIES__STAGING_TABLE_1 and PITCHINGPOST__STAGING_TABLE_10 over 'playerID' and 'playerID', there are no corresponding entries for 73.589274% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys. | \n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ "
"
+ ],
+ "text/plain": [
+ " type label message \n",
+ "0 INFO FOREIGN KEYS NOT FOUND When joining SALARIES__STAGING_T...\n",
+ "1 INFO FOREIGN KEYS NOT FOUND When joining SALARIES__STAGING_T...\n",
+ "2 INFO FOREIGN KEYS NOT FOUND When joining SALARIES__STAGING_T...\n",
+ "3 INFO FOREIGN KEYS NOT FOUND When joining SALARIES__STAGING_T...\n",
+ "4 INFO FOREIGN KEYS NOT FOUND When joining SALARIES__STAGING_T...\n",
+ "5 INFO FOREIGN KEYS NOT FOUND When joining SALARIES__STAGING_T...\n",
+ "6 INFO FOREIGN KEYS NOT FOUND When joining SALARIES__STAGING_T...\n",
+ "7 INFO FOREIGN KEYS NOT FOUND When joining SALARIES__STAGING_T...\n",
+ "8 INFO FOREIGN KEYS NOT FOUND When joining SALARIES__STAGING_T..."
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
}
],
"source": [
@@ -49324,23 +49681,17 @@
"Staging... 100% |██████████| [elapsed: 00:00, remaining: 00:00] \n",
"Preprocessing... 100% |██████████| [elapsed: 00:00, remaining: 00:00] \n",
"\n",
- "INFO [FOREIGN KEYS NOT FOUND]: When joining SALARIES__STAGING_TABLE_1 and ALLSTARFULL__STAGING_TABLE_2 over 'playerID' and 'playerID', there are no corresponding entries for 64.710317% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys.\n",
- "INFO [FOREIGN KEYS NOT FOUND]: When joining SALARIES__STAGING_TABLE_1 and AWARDSPLAYERS__STAGING_TABLE_3 over 'playerID' and 'playerID', there are no corresponding entries for 75.376911% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys.\n",
- "INFO [FOREIGN KEYS NOT FOUND]: When joining SALARIES__STAGING_TABLE_1 and AWARDSSHAREPLAYERS__STAGING_TABLE_4 over 'playerID' and 'playerID', there are no corresponding entries for 62.459617% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys.\n",
- "INFO [FOREIGN KEYS NOT FOUND]: When joining SALARIES__STAGING_TABLE_1 and BATTING__STAGING_TABLE_5 over 'playerID' and 'playerID', there are no corresponding entries for 8.765884% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys.\n",
- "INFO [FOREIGN KEYS NOT FOUND]: When joining SALARIES__STAGING_TABLE_1 and BATTINGPOST__STAGING_TABLE_6 over 'playerID' and 'playerID', there are no corresponding entries for 41.018738% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys.\n",
- "INFO [FOREIGN KEYS NOT FOUND]: When joining SALARIES__STAGING_TABLE_1 and FIELDING__STAGING_TABLE_7 over 'playerID' and 'playerID', there are no corresponding entries for 19.270946% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys.\n",
- "INFO [FOREIGN KEYS NOT FOUND]: When joining SALARIES__STAGING_TABLE_1 and FIELDINGPOST__STAGING_TABLE_8 over 'playerID' and 'playerID', there are no corresponding entries for 38.369589% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys.\n",
- "INFO [FOREIGN KEYS NOT FOUND]: When joining SALARIES__STAGING_TABLE_1 and PITCHING__STAGING_TABLE_9 over 'playerID' and 'playerID', there are no corresponding entries for 54.862158% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys.\n",
- "INFO [FOREIGN KEYS NOT FOUND]: When joining SALARIES__STAGING_TABLE_1 and PITCHINGPOST__STAGING_TABLE_10 over 'playerID' and 'playerID', there are no corresponding entries for 73.589274% of entries in 'playerID' in 'SALARIES__STAGING_TABLE_1'. You might want to double-check your join keys.\n",
+ "The pipeline check generated 9 issues labeled INFO and 0 issues labeled WARNING.\n",
+ "To see the issues in full, run .check() on the pipeline.\n",
+ "\n",
"Staging... 100% |██████████| [elapsed: 00:00, remaining: 00:00] \n",
"Preprocessing... 100% |██████████| [elapsed: 00:00, remaining: 00:00] \n",
- "Relboost: Training features... 100% |██████████| [elapsed: 00:39, remaining: 00:00] \n",
+ "Relboost: Training features... 100% |██████████| [elapsed: 00:40, remaining: 00:00] \n",
"Relboost: Building features... 100% |██████████| [elapsed: 00:04, remaining: 00:00] \n",
"XGBoost: Training as predictor... 100% |██████████| [elapsed: 00:04, remaining: 00:00] \n",
"\n",
"Trained pipeline.\n",
- "Time taken: 0h:0m:47.967351\n",
+ "Time taken: 0h:0m:48.206881\n",
"\n"
]
},
@@ -49357,7 +49708,7 @@
" predictors=['XGBoostRegressor'],\n",
" preprocessors=['Mapping'],\n",
" share_selected_features=0.5,\n",
- " tags=['relboost', 'container-udGggs'])
url: localhost:1709/#/getpipeline/baseball/qERlSW/0/
"
+ " tags=['relboost', 'container-SHAMaN'])"
],
"text/plain": [
"Pipeline(data_model='salaries',\n",
@@ -49370,9 +49721,7 @@
" predictors=['XGBoostRegressor'],\n",
" preprocessors=['Mapping'],\n",
" share_selected_features=0.5,\n",
- " tags=['relboost', 'container-udGggs'])\n",
- "\n",
- "url: localhost:1709/#/getpipeline/baseball/qERlSW/0/"
+ " tags=['relboost', 'container-SHAMaN'])"
]
},
"execution_count": 33,
@@ -49482,7 +49831,7 @@
"
0 | \n",
" \n",
" \n",
- "
2022-10-31 07:09:48 | \n",
+ "
2023-07-30 18:59:23 | \n",
" \n",
" \n",
" \n",
@@ -49511,7 +49860,7 @@
"
1 | \n",
" \n",
" \n",
- "
2022-10-31 07:10:40 | \n",
+ "
2023-07-30 19:00:14 | \n",
" \n",
" \n",
" \n",
@@ -49541,8 +49890,8 @@
],
"text/plain": [
" date time set used target mae rmse rsquared\n",
- "0 2022-10-31 07:09:48 train salary 690630.2317 1242307.2495 0.8248\n",
- "1 2022-10-31 07:10:40 test salary 763930.032 1401705.6283 0.7883"
+ "0 2023-07-30 18:59:23 train salary 690630.2317 1242307.2495 0.8248\n",
+ "1 2023-07-30 19:00:14 test salary 763930.032 1401705.6283 0.7883"
]
},
"execution_count": 34,
@@ -49640,7 +49989,7 @@
"
0 | \n",
" \n",
" \n",
- "
2022-10-31 07:10:36 | \n",
+ "
2023-07-30 19:00:11 | \n",
" \n",
" \n",
" \n",
@@ -49669,7 +50018,7 @@
"
1 | \n",
" \n",
" \n",
- "
2022-10-31 07:10:41 | \n",
+ "
2023-07-30 19:00:16 | \n",
" \n",
" \n",
" \n",
@@ -49699,8 +50048,8 @@
],
"text/plain": [
" date time set used target mae rmse rsquared\n",
- "0 2022-10-31 07:10:36 train salary 459470.8604 793963.8048 0.9284\n",
- "1 2022-10-31 07:10:41 test salary 664766.3496 1217213.7658 0.8402"
+ "0 2023-07-30 19:00:11 train salary 459470.8604 793963.8048 0.9284\n",
+ "1 2023-07-30 19:00:16 test salary 664766.3496 1217213.7658 0.8402"
]
},
"execution_count": 35,
@@ -52893,23 +53242,23 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/home/ubuntu/.local/lib/python3.8/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
+ "/home/ubuntu/.local/lib/python3.10/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
" warnings.warn(\n",
- "/home/ubuntu/.local/lib/python3.8/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
+ "/home/ubuntu/.local/lib/python3.10/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
" warnings.warn(\n",
- "/home/ubuntu/.local/lib/python3.8/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
+ "/home/ubuntu/.local/lib/python3.10/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
" warnings.warn(\n",
- "/home/ubuntu/.local/lib/python3.8/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
+ "/home/ubuntu/.local/lib/python3.10/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
" warnings.warn(\n",
- "/home/ubuntu/.local/lib/python3.8/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
+ "/home/ubuntu/.local/lib/python3.10/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
" warnings.warn(\n",
- "/home/ubuntu/.local/lib/python3.8/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
+ "/home/ubuntu/.local/lib/python3.10/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
" warnings.warn(\n",
- "/home/ubuntu/.local/lib/python3.8/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
+ "/home/ubuntu/.local/lib/python3.10/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
" warnings.warn(\n",
- "/home/ubuntu/.local/lib/python3.8/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
+ "/home/ubuntu/.local/lib/python3.10/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
" warnings.warn(\n",
- "/home/ubuntu/.local/lib/python3.8/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
+ "/home/ubuntu/.local/lib/python3.10/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
" warnings.warn(\n"
]
}
@@ -52930,23 +53279,23 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/home/ubuntu/.local/lib/python3.8/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
+ "/home/ubuntu/.local/lib/python3.10/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
" warnings.warn(\n",
- "/home/ubuntu/.local/lib/python3.8/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
+ "/home/ubuntu/.local/lib/python3.10/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
" warnings.warn(\n",
- "/home/ubuntu/.local/lib/python3.8/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
+ "/home/ubuntu/.local/lib/python3.10/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
" warnings.warn(\n",
- "/home/ubuntu/.local/lib/python3.8/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
+ "/home/ubuntu/.local/lib/python3.10/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
" warnings.warn(\n",
- "/home/ubuntu/.local/lib/python3.8/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
+ "/home/ubuntu/.local/lib/python3.10/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
" warnings.warn(\n",
- "/home/ubuntu/.local/lib/python3.8/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
+ "/home/ubuntu/.local/lib/python3.10/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
" warnings.warn(\n",
- "/home/ubuntu/.local/lib/python3.8/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
+ "/home/ubuntu/.local/lib/python3.10/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
" warnings.warn(\n",
- "/home/ubuntu/.local/lib/python3.8/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
+ "/home/ubuntu/.local/lib/python3.10/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
" warnings.warn(\n",
- "/home/ubuntu/.local/lib/python3.8/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
+ "/home/ubuntu/.local/lib/python3.10/site-packages/featuretools/entityset/entityset.py:1906: UserWarning: index index not found in dataframe, creating new integer column\n",
" warnings.warn(\n"
]
}
@@ -52983,44 +53332,6 @@
"execution_count": 58,
"metadata": {},
"outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "91.880250% of all entries of column 'MAX(allstarfull.startingPos)' are NULL values.\n",
- "91.880250% of all entries of column 'MEAN(allstarfull.startingPos)' are NULL values.\n",
- "91.880250% of all entries of column 'MIN(allstarfull.startingPos)' are NULL values.\n",
- "91.923325% of all entries of column 'SKEW(allstarfull.GP)' are NULL values.\n",
- "91.912557% of all entries of column 'SKEW(allstarfull.gameNum)' are NULL values.\n",
- "97.571613% of all entries of column 'SKEW(allstarfull.startingPos)' are NULL values.\n",
- "96.047814% of all entries of column 'STD(allstarfull.startingPos)' are NULL values.\n",
- "91.158734% of all entries of column 'SKEW(pitchingpost.BAOpp)' are NULL values.\n",
- "91.142580% of all entries of column 'SKEW(pitchingpost.BB)' are NULL values.\n",
- "91.142580% of all entries of column 'SKEW(pitchingpost.BFP)' are NULL values.\n",
- "91.142580% of all entries of column 'SKEW(pitchingpost.BK)' are NULL values.\n",
- "91.142580% of all entries of column 'SKEW(pitchingpost.CG)' are NULL values.\n",
- "91.142580% of all entries of column 'SKEW(pitchingpost.ER)' are NULL values.\n",
- "91.756407% of all entries of column 'SKEW(pitchingpost.ERA)' are NULL values.\n",
- "91.142580% of all entries of column 'SKEW(pitchingpost.G)' are NULL values.\n",
- "91.142580% of all entries of column 'SKEW(pitchingpost.GF)' are NULL values.\n",
- "91.142580% of all entries of column 'SKEW(pitchingpost.GIDP)' are NULL values.\n",
- "91.142580% of all entries of column 'SKEW(pitchingpost.GS)' are NULL values.\n",
- "91.142580% of all entries of column 'SKEW(pitchingpost.H)' are NULL values.\n",
- "91.142580% of all entries of column 'SKEW(pitchingpost.HBP)' are NULL values.\n",
- "91.142580% of all entries of column 'SKEW(pitchingpost.HR)' are NULL values.\n",
- "91.142580% of all entries of column 'SKEW(pitchingpost.IBB)' are NULL values.\n",
- "91.142580% of all entries of column 'SKEW(pitchingpost.IPouts)' are NULL values.\n",
- "91.142580% of all entries of column 'SKEW(pitchingpost.L)' are NULL values.\n",
- "91.142580% of all entries of column 'SKEW(pitchingpost.R)' are NULL values.\n",
- "91.180271% of all entries of column 'SKEW(pitchingpost.SF)' are NULL values.\n",
- "91.180271% of all entries of column 'SKEW(pitchingpost.SH)' are NULL values.\n",
- "91.142580% of all entries of column 'SKEW(pitchingpost.SHO)' are NULL values.\n",
- "91.142580% of all entries of column 'SKEW(pitchingpost.SO)' are NULL values.\n",
- "91.142580% of all entries of column 'SKEW(pitchingpost.SV)' are NULL values.\n",
- "91.142580% of all entries of column 'SKEW(pitchingpost.W)' are NULL values.\n",
- "91.142580% of all entries of column 'SKEW(pitchingpost.WP)' are NULL values.\n"
- ]
- },
{
"data": {
"text/html": [
@@ -155636,8 +155947,6 @@
" name: featuretools_train
\n",
" type: getml.DataFrame
\n",
" \n",
- " url:
localhost:1709/#/getdataframe/baseball/featuretools_train/\n",
- " \n",
" \n"
],
"text/plain": [
@@ -155672,9 +155981,7 @@
"\n",
"18572 rows x 722 columns\n",
"memory usage: 102.37 MB\n",
- "name: featuretools_train\n",
- "type: getml.DataFrame\n",
- "url: localhost:1709/#/getdataframe/baseball/featuretools_train/"
+ "type: getml.DataFrame"
]
},
"execution_count": 58,
@@ -155692,47 +155999,9 @@
},
{
"cell_type": "code",
- "execution_count": 59,
+ "execution_count": 60,
"metadata": {},
"outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "91.451862% of all entries of column 'MAX(allstarfull.startingPos)' are NULL values.\n",
- "91.451862% of all entries of column 'MEAN(allstarfull.startingPos)' are NULL values.\n",
- "91.451862% of all entries of column 'MIN(allstarfull.startingPos)' are NULL values.\n",
- "91.914519% of all entries of column 'SKEW(allstarfull.GP)' are NULL values.\n",
- "91.892487% of all entries of column 'SKEW(allstarfull.gameNum)' are NULL values.\n",
- "97.884997% of all entries of column 'SKEW(allstarfull.startingPos)' are NULL values.\n",
- "96.342807% of all entries of column 'STD(allstarfull.startingPos)' are NULL values.\n",
- "90.746861% of all entries of column 'SKEW(pitchingpost.BAOpp)' are NULL values.\n",
- "90.724829% of all entries of column 'SKEW(pitchingpost.BB)' are NULL values.\n",
- "90.724829% of all entries of column 'SKEW(pitchingpost.BFP)' are NULL values.\n",
- "90.724829% of all entries of column 'SKEW(pitchingpost.BK)' are NULL values.\n",
- "90.724829% of all entries of column 'SKEW(pitchingpost.CG)' are NULL values.\n",
- "90.724829% of all entries of column 'SKEW(pitchingpost.ER)' are NULL values.\n",
- "91.341705% of all entries of column 'SKEW(pitchingpost.ERA)' are NULL values.\n",
- "90.724829% of all entries of column 'SKEW(pitchingpost.G)' are NULL values.\n",
- "90.724829% of all entries of column 'SKEW(pitchingpost.GF)' are NULL values.\n",
- "90.724829% of all entries of column 'SKEW(pitchingpost.GIDP)' are NULL values.\n",
- "90.724829% of all entries of column 'SKEW(pitchingpost.GS)' are NULL values.\n",
- "90.724829% of all entries of column 'SKEW(pitchingpost.H)' are NULL values.\n",
- "90.724829% of all entries of column 'SKEW(pitchingpost.HBP)' are NULL values.\n",
- "90.724829% of all entries of column 'SKEW(pitchingpost.HR)' are NULL values.\n",
- "90.724829% of all entries of column 'SKEW(pitchingpost.IBB)' are NULL values.\n",
- "90.724829% of all entries of column 'SKEW(pitchingpost.IPouts)' are NULL values.\n",
- "90.724829% of all entries of column 'SKEW(pitchingpost.L)' are NULL values.\n",
- "90.724829% of all entries of column 'SKEW(pitchingpost.R)' are NULL values.\n",
- "90.768892% of all entries of column 'SKEW(pitchingpost.SF)' are NULL values.\n",
- "90.768892% of all entries of column 'SKEW(pitchingpost.SH)' are NULL values.\n",
- "90.724829% of all entries of column 'SKEW(pitchingpost.SHO)' are NULL values.\n",
- "90.724829% of all entries of column 'SKEW(pitchingpost.SO)' are NULL values.\n",
- "90.724829% of all entries of column 'SKEW(pitchingpost.SV)' are NULL values.\n",
- "90.724829% of all entries of column 'SKEW(pitchingpost.W)' are NULL values.\n",
- "90.724829% of all entries of column 'SKEW(pitchingpost.WP)' are NULL values.\n"
- ]
- },
{
"data": {
"text/html": [
@@ -258348,8 +258617,6 @@
" name: featuretools_test
\n",
" type: getml.DataFrame
\n",
" \n",
- " url:
localhost:1709/#/getdataframe/baseball/featuretools_test/\n",
- " \n",
" \n"
],
"text/plain": [
@@ -258384,12 +258651,10 @@
"\n",
"4539 rows x 722 columns\n",
"memory usage: 25.02 MB\n",
- "name: featuretools_test\n",
- "type: getml.DataFrame\n",
- "url: localhost:1709/#/getdataframe/baseball/featuretools_test/"
+ "type: getml.DataFrame"
]
},
- "execution_count": 59,
+ "execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
@@ -258413,7 +258678,7 @@
},
{
"cell_type": "code",
- "execution_count": 60,
+ "execution_count": 61,
"metadata": {},
"outputs": [
{
@@ -258443,7 +258708,7 @@
" tags=['featuretools'])"
]
},
- "execution_count": 60,
+ "execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
@@ -258465,7 +258730,7 @@
},
{
"cell_type": "code",
- "execution_count": 61,
+ "execution_count": 62,
"metadata": {},
"outputs": [
{
@@ -258477,30 +258742,15 @@
"Preprocessing... 100% |██████████| [elapsed: 00:00, remaining: 00:00] \n",
"Checking... 100% |██████████| [elapsed: 00:00, remaining: 00:00] \n",
"\n",
- "WARNING [COLUMN SHOULD BE UNUSED]: All non-NULL entries in column 'day(year)' in POPULATION__STAGING_TABLE_1 are equal to each other. You should consider setting its role to unused_string or using it for comparison only (you can do the latter by setting a unit that contains 'comparison only').\n",
- "WARNING [COLUMN SHOULD BE UNUSED]: All non-NULL entries in column 'month(year)' in POPULATION__STAGING_TABLE_1 are equal to each other. You should consider setting its role to unused_string or using it for comparison only (you can do the latter by setting a unit that contains 'comparison only').\n",
- "WARNING [COLUMN SHOULD BE UNUSED]: All non-NULL entries in column 'COALESCE( max(allstarfull.gamenum), 0.000000 )' in POPULATION__STAGING_TABLE_1 are equal to each other. You should consider setting its role to unused_float or using it for comparison only (you can do the latter by setting a unit that contains 'comparison only').\n",
- "WARNING [COLUMN SHOULD BE UNUSED]: All non-NULL entries in column 'COALESCE( mean(allstarfull.gamenum), 0.000000 )' in POPULATION__STAGING_TABLE_1 are equal to each other. You should consider setting its role to unused_float or using it for comparison only (you can do the latter by setting a unit that contains 'comparison only').\n",
- "WARNING [COLUMN SHOULD BE UNUSED]: All non-NULL entries in column 'COALESCE( min(allstarfull.gamenum), 0.000000 )' in POPULATION__STAGING_TABLE_1 are equal to each other. You should consider setting its role to unused_float or using it for comparison only (you can do the latter by setting a unit that contains 'comparison only').\n",
- "WARNING [COLUMN SHOULD BE UNUSED]: All non-NULL entries in column 'COALESCE( skew(allstarfull.gamenum), 0.000000 )' in POPULATION__STAGING_TABLE_1 are equal to each other. You should consider setting its role to unused_float or using it for comparison only (you can do the latter by setting a unit that contains 'comparison only').\n",
- "WARNING [COLUMN SHOULD BE UNUSED]: All non-NULL entries in column 'COALESCE( std(allstarfull.gamenum), 0.000000 )' in POPULATION__STAGING_TABLE_1 are equal to each other. You should consider setting its role to unused_float or using it for comparison only (you can do the latter by setting a unit that contains 'comparison only').\n",
- "WARNING [COLUMN SHOULD BE UNUSED]: All non-NULL entries in column 'COALESCE( sum(allstarfull.gamenum), 0.000000 )' in POPULATION__STAGING_TABLE_1 are equal to each other. You should consider setting its role to unused_float or using it for comparison only (you can do the latter by setting a unit that contains 'comparison only').\n",
- "WARNING [COLUMN SHOULD BE UNUSED]: All non-NULL entries in column 'COALESCE( min(batting.stint), 1.000000 )' in POPULATION__STAGING_TABLE_1 are equal to each other. You should consider setting its role to unused_float or using it for comparison only (you can do the latter by setting a unit that contains 'comparison only').\n",
- "WARNING [COLUMN SHOULD BE UNUSED]: All non-NULL entries in column 'COALESCE( min(fielding.stint), 1.000000 )' in POPULATION__STAGING_TABLE_1 are equal to each other. You should consider setting its role to unused_float or using it for comparison only (you can do the latter by setting a unit that contains 'comparison only').\n",
- "WARNING [COLUMN SHOULD BE UNUSED]: All non-NULL entries in column 'COALESCE( max(fieldingpost.tp), 0.000000 )' in POPULATION__STAGING_TABLE_1 are equal to each other. You should consider setting its role to unused_float or using it for comparison only (you can do the latter by setting a unit that contains 'comparison only').\n",
- "WARNING [COLUMN SHOULD BE UNUSED]: All non-NULL entries in column 'COALESCE( mean(fieldingpost.tp), 0.000000 )' in POPULATION__STAGING_TABLE_1 are equal to each other. You should consider setting its role to unused_float or using it for comparison only (you can do the latter by setting a unit that contains 'comparison only').\n",
- "WARNING [COLUMN SHOULD BE UNUSED]: All non-NULL entries in column 'COALESCE( min(fieldingpost.tp), 0.000000 )' in POPULATION__STAGING_TABLE_1 are equal to each other. You should consider setting its role to unused_float or using it for comparison only (you can do the latter by setting a unit that contains 'comparison only').\n",
- "WARNING [COLUMN SHOULD BE UNUSED]: All non-NULL entries in column 'COALESCE( skew(fieldingpost.tp), 0.000000 )' in POPULATION__STAGING_TABLE_1 are equal to each other. You should consider setting its role to unused_float or using it for comparison only (you can do the latter by setting a unit that contains 'comparison only').\n",
- "WARNING [COLUMN SHOULD BE UNUSED]: All non-NULL entries in column 'COALESCE( std(fieldingpost.tp), 0.000000 )' in POPULATION__STAGING_TABLE_1 are equal to each other. You should consider setting its role to unused_float or using it for comparison only (you can do the latter by setting a unit that contains 'comparison only').\n",
- "WARNING [COLUMN SHOULD BE UNUSED]: All non-NULL entries in column 'COALESCE( sum(fieldingpost.tp), 0.000000 )' in POPULATION__STAGING_TABLE_1 are equal to each other. You should consider setting its role to unused_float or using it for comparison only (you can do the latter by setting a unit that contains 'comparison only').\n",
- "WARNING [COLUMN SHOULD BE UNUSED]: All non-NULL entries in column 'COALESCE( min(pitching.baopp), 0.000000 )' in POPULATION__STAGING_TABLE_1 are equal to each other. You should consider setting its role to unused_float or using it for comparison only (you can do the latter by setting a unit that contains 'comparison only').\n",
- "WARNING [COLUMN SHOULD BE UNUSED]: All non-NULL entries in column 'COALESCE( min(pitching.stint), 1.000000 )' in POPULATION__STAGING_TABLE_1 are equal to each other. You should consider setting its role to unused_float or using it for comparison only (you can do the latter by setting a unit that contains 'comparison only').\n",
+ "The pipeline check generated 0 issues labeled INFO and 18 issues labeled WARNING.\n",
+ "To see the issues in full, run .check() on the pipeline.\n",
+ "\n",
"Staging... 100% |██████████| [elapsed: 00:00, remaining: 00:00] \n",
"Preprocessing... 100% |██████████| [elapsed: 00:00, remaining: 00:00] \n",
- "XGBoost: Training as predictor... 100% |██████████| [elapsed: 00:24, remaining: 00:00] \n",
+ "XGBoost: Training as predictor... 100% |██████████| [elapsed: 00:23, remaining: 00:00] \n",
"\n",
"Trained pipeline.\n",
- "Time taken: 0h:0m:27.394815\n",
+ "Time taken: 0h:0m:28.412967\n",
"\n"
]
},
@@ -258516,7 +258766,7 @@
" predictors=['XGBoostRegressor'],\n",
" preprocessors=['Imputation'],\n",
" share_selected_features=0.5,\n",
- " tags=['featuretools'])
url: localhost:1709/#/getpipeline/baseball/LFHvfA/0/
"
+ " tags=['featuretools'])"
],
"text/plain": [
"Pipeline(data_model='population',\n",
@@ -258528,12 +258778,10 @@
" predictors=['XGBoostRegressor'],\n",
" preprocessors=['Imputation'],\n",
" share_selected_features=0.5,\n",
- " tags=['featuretools'])\n",
- "\n",
- "url: localhost:1709/#/getpipeline/baseball/LFHvfA/0/"
+ " tags=['featuretools'])"
]
},
- "execution_count": 61,
+ "execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
@@ -258544,7 +258792,7 @@
},
{
"cell_type": "code",
- "execution_count": 62,
+ "execution_count": 63,
"metadata": {},
"outputs": [
{
@@ -258625,7 +258873,7 @@
"
0 | \n",
" \n",
" \n",
- "
2022-10-31 07:12:56 | \n",
+ "
2023-07-30 19:07:40 | \n",
" \n",
" \n",
" \n",
@@ -258654,7 +258902,7 @@
"
1 | \n",
" \n",
" \n",
- "
2022-10-31 07:12:57 | \n",
+ "
2023-07-30 19:07:44 | \n",
" \n",
" \n",
" \n",
@@ -258684,11 +258932,11 @@
],
"text/plain": [
" date time set used target mae rmse rsquared\n",
- "0 2022-10-31 07:12:56 featuretools_train salary 704893.7458 1288741.874 0.8128\n",
- "1 2022-10-31 07:12:57 featuretools_test salary 776053.9972 1445682.6312 0.775 "
+ "0 2023-07-30 19:07:40 featuretools_train salary 704893.7458 1288741.874 0.8128\n",
+ "1 2023-07-30 19:07:44 featuretools_test salary 776053.9972 1445682.6312 0.775 "
]
},
- "execution_count": 62,
+ "execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
@@ -258708,7 +258956,7 @@
},
{
"cell_type": "code",
- "execution_count": 63,
+ "execution_count": 64,
"metadata": {},
"outputs": [
{
@@ -258718,20 +258966,20 @@
"DROP TABLE IF EXISTS \"FEATURE_1_29\";\n",
"\n",
"CREATE TABLE \"FEATURE_1_29\" AS\n",
- "SELECT LAST( t2.\"gameid__mapping_1_target_1_avg\", t2.\"year, '+1.000000 days'\" ) AS \"feature_1_29\",\n",
+ "SELECT LAST( t2.\"gameid__mapping_1_target_1_avg\", t2.\"year__1_000000_days\" ) AS \"feature_1_29\",\n",
" t1.rowid AS rownum\n",
"FROM \"SALARIES__STAGING_TABLE_1\" t1\n",
"INNER JOIN \"ALLSTARFULL__STAGING_TABLE_2\" t2\n",
"ON t1.\"playerid\" = t2.\"playerid\"\n",
- "WHERE t2.\"year, '+1.000000 days'\" <= t1.\"year\"\n",
+ "WHERE t2.\"year__1_000000_days\" <= t1.\"year\"\n",
"GROUP BY t1.rowid;\n",
"```"
],
"text/plain": [
- "'DROP TABLE IF EXISTS \"FEATURE_1_29\";\\n\\nCREATE TABLE \"FEATURE_1_29\" AS\\nSELECT LAST( t2.\"gameid__mapping_1_target_1_avg\", t2.\"year, \\'+1.000000 days\\'\" ) AS \"feature_1_29\",\\n t1.rowid AS rownum\\nFROM \"SALARIES__STAGING_TABLE_1\" t1\\nINNER JOIN \"ALLSTARFULL__STAGING_TABLE_2\" t2\\nON t1.\"playerid\" = t2.\"playerid\"\\nWHERE t2.\"year, \\'+1.000000 days\\'\" <= t1.\"year\"\\nGROUP BY t1.rowid;'"
+ "'DROP TABLE IF EXISTS \"FEATURE_1_29\";\\n\\nCREATE TABLE \"FEATURE_1_29\" AS\\nSELECT LAST( t2.\"gameid__mapping_1_target_1_avg\", t2.\"year__1_000000_days\" ) AS \"feature_1_29\",\\n t1.rowid AS rownum\\nFROM \"SALARIES__STAGING_TABLE_1\" t1\\nINNER JOIN \"ALLSTARFULL__STAGING_TABLE_2\" t2\\nON t1.\"playerid\" = t2.\"playerid\"\\nWHERE t2.\"year__1_000000_days\" <= t1.\"year\"\\nGROUP BY t1.rowid;'"
]
},
- "execution_count": 63,
+ "execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
@@ -258742,14 +258990,14 @@
},
{
"cell_type": "code",
- "execution_count": 64,
+ "execution_count": 65,
"metadata": {},
"outputs": [
{
"data": {
"text/markdown": [
"```sql\n",
- "-- The size of the SQL code for FEATURE_1_1 is 133317 characters, which is greater than the size_threshold of 50000!\n",
+ "-- The size of the SQL code for FEATURE_1_1 is 133314 characters, which is greater than the size_threshold of 50000!\n",
"-- To display very long features like this anyway, increase the size_threshold or set the size_threshold to None.\n",
"DROP TABLE IF EXISTS \"FEATURE_1_1\";\n",
"\n",
@@ -258757,10 +259005,10 @@
"```"
],
"text/plain": [
- "'-- The size of the SQL code for FEATURE_1_1 is 133317 characters, which is greater than the size_threshold of 50000!\\n-- To display very long features like this anyway, increase the size_threshold or set the size_threshold to None.\\nDROP TABLE IF EXISTS \"FEATURE_1_1\";\\n\\nCREATE TABLE \"FEATURE_1_1\";'"
+ "'-- The size of the SQL code for FEATURE_1_1 is 133314 characters, which is greater than the size_threshold of 50000!\\n-- To display very long features like this anyway, increase the size_threshold or set the size_threshold to None.\\nDROP TABLE IF EXISTS \"FEATURE_1_1\";\\n\\nCREATE TABLE \"FEATURE_1_1\";'"
]
},
- "execution_count": 64,
+ "execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
@@ -258787,7 +259035,7 @@
},
{
"cell_type": "code",
- "execution_count": 65,
+ "execution_count": 66,
"metadata": {},
"outputs": [],
"source": [
@@ -258798,7 +259046,7 @@
},
{
"cell_type": "code",
- "execution_count": 66,
+ "execution_count": 67,
"metadata": {},
"outputs": [],
"source": [
@@ -258929,7 +259177,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.8.10"
+ "version": "3.10.6"
},
"toc": {
"base_numbering": 1,