diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d0f317f4d28..e1799320101 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ fail_fast: false minimum_pre_commit_version: 3.2.0 repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: 'v0.5.7' + rev: 'v0.6.4' hooks: - id: ruff args: [ --fix, --exit-non-zero-on-fix ] diff --git a/docs/guide/3-developer-guide/3-programming/2-accelerator-backend/2-libsmm_acc/2-parameters.md b/docs/guide/3-developer-guide/3-programming/2-accelerator-backend/2-libsmm_acc/2-parameters.md index 44729500ba9..9e54d016be7 100644 --- a/docs/guide/3-developer-guide/3-programming/2-accelerator-backend/2-libsmm_acc/2-parameters.md +++ b/docs/guide/3-developer-guide/3-programming/2-accelerator-backend/2-libsmm_acc/2-parameters.md @@ -14,9 +14,3 @@ The batched matrix-matrix multiplication kernels are templated on: The batched transpose kernels are templated on: * the characteristic dimensions of the transpose: `m, n` - -## Predictive parameters - -The input features for the predictive models can be 'raw' parameters (left-most-column in the figure below), or hand-engineered features 'derived' from the raw features (matrix sizes, launch parameters and resource usage estimations). - -![libsmm_acc_predictive_modeling_features](../../../../../media/images/libsmm_acc_predictive_modeling_features.png) diff --git a/docs/guide/3-developer-guide/3-programming/2-accelerator-backend/2-libsmm_acc/4-predict.md b/docs/guide/3-developer-guide/3-programming/2-accelerator-backend/2-libsmm_acc/4-predict.md deleted file mode 100644 index ce0ff183dd0..00000000000 --- a/docs/guide/3-developer-guide/3-programming/2-accelerator-backend/2-libsmm_acc/4-predict.md +++ /dev/null @@ -1,3 +0,0 @@ -title: Predictive Modeling Framework - -{!./src/acc/libsmm_acc/predict/README.md!} diff --git a/docs/guide/3-developer-guide/3-programming/2-accelerator-backend/2-libsmm_acc/5-notebooks.md b/docs/guide/3-developer-guide/3-programming/2-accelerator-backend/2-libsmm_acc/5-notebooks.md deleted file mode 100644 index f450b61b1ad..00000000000 --- a/docs/guide/3-developer-guide/3-programming/2-accelerator-backend/2-libsmm_acc/5-notebooks.md +++ /dev/null @@ -1,3 +0,0 @@ -title: Notebooks - -{!./src/acc/libsmm_acc/notebooks/README.md!} diff --git a/docs/media/images/README.md b/docs/media/images/README.md deleted file mode 100644 index cf42b988ac8..00000000000 --- a/docs/media/images/README.md +++ /dev/null @@ -1,3 +0,0 @@ -#### libsmm_acc predictive_modelling_features - -The XML file `libsmm_acc_predictive_modelling_features.xml` can be opened in [www.draw.io](www.draw.io) to be edited. diff --git a/docs/media/images/libsmm_acc_predictive_modeling_features.png b/docs/media/images/libsmm_acc_predictive_modeling_features.png deleted file mode 100644 index 69df06ea0eb..00000000000 Binary files a/docs/media/images/libsmm_acc_predictive_modeling_features.png and /dev/null differ diff --git a/docs/media/images/libsmm_acc_predictive_modeling_features.xml b/docs/media/images/libsmm_acc_predictive_modeling_features.xml deleted file mode 100644 index 1a3b13b6908..00000000000 --- a/docs/media/images/libsmm_acc_predictive_modeling_features.xml +++ /dev/null @@ -1 +0,0 @@ -7Z1tc5s6FoB/TWZ270w7IN4/Nmncnbnt3c62u529XzzYEIcNBi+GJumvvxIgDOiA7QRhBx/nQxIZY0DPOdJ50dGVdrN++pS4m/svseeHV0Txnq60j1eEEMew6C/W8ly0qCZxipZVEnhl267hW/DLLxuVsjULPH/bODCN4zANNs3GZRxF/jJttLlJEj82D7uLw+a3btyVLzR8W7qh2Poj8NL7otUm1q79H36wuuffrJrl/S3c5cMqibOo/L4ros3yV/H22uXnKm90e+968WOtSbu90m6SOE6Lv9ZPN37IHi5/bMXnZh3vVted+FF6yAe+3mbZD33z9O1/f/56sG82vyJdf0eIUZznpxtm5RP54qZJ8MSumHbW9oqY9M5ob2nX5X2kz/zZbR+DdehG9L/ruyAMb+IwTvJ32JPQbzTavkpcL6BXWHvvLn+xz8RROnPXQcjYuYmzJPAT+gV/+I/lmyUtRCn/r51EyV+0PXQXfvjV9bwgWtG3WNM2cjff469xkD8YlbWUFzprXmV1Jds0iR986Prjn35yF+b9xm6xegr15152BT0y9Z9qTWU/fPLjtZ8mz/SQ8l1dJcVHSqFxSkQedwASxS7a7uvw8QPdEvpVdepdx9M/yr6HOfhP9ueN+vn3L7b9y7ve+N+jn873d0TVBA7Wv0W/PdDuD+m3Xy9oz5irFKKA3jRrv0/XYfm4E5+i4y7yA1iHbFhX5JdsXF8ZH2mLm6Xxtuhd9gE3DFYR/XtJnyllQLtmzzKgQvqhfGMdeB473fV24y5pT3+PN+W590NUXqkCQHUfJ8Ev2ubyK89puq4kmxMRxVHty/nJBJjIcgnB5JkL0zDZc2Hn9D0OJVcI7GSr0N1uy7+X8TpYlgeJtO2R44MhVLkGLiG0jfeGgKGtiBTqA0BoC7QxGuZLxG2yuDVpUzVTgM2RBJsFw7ZA2C4ENlC1yaLNhGlzkbYLoU0bj7WOOxAR7Ju613Bb3geh99l9jjN2kduUdh//r0DR/8qfn9pq+uI+lQ9dQIGeJ+GwNA/4xr4BhuD2w2x26wAWBCfoqCl9+aEB5u6q3tItFqBbiAF0uKpZ8rpcHOO4vmGSmtvP/BmZ/8/iQv1QK8o0qRVVa1Lu3KXfOKypCeqH7nDijey73hXa6AM9gD7qp/onCrX34CcR8yIUl0fvt7jCDp0Ig1pHStQ/fTiBXBzHWHWug1WKeTBl+yAy7AEYqqjf0cK09IZp5PMbk9pegM4xqmdQaA9fwshik4VmmiI3nuHbng4gQt9zLE+xrNaQc8y4dzA/1sH88HeJ9d7OaTF1QzVVPgKUSku3FUBrVRbh0MMUd4HVeEvvE9/1tnNxVrTxk/kijJdvw/WAcL4ATtVu0sj1UZ1FAim/AVhURdfDT+RqElxVPlM+EbdEv6omScWpjoDVI2I1TaxUaMovDSxxZp8GoT+PcHicKG+q3uSNAGpMlkNBFV0HOW1rZGuabJER2RJDzW64ipGsaZBltciCLExTEloiWQ+I1TSwcppYqcBgqEuiSkyJiM6Mqj2gHJVcc9FUgdpKFldE4Oocp1dnEQqs+Oomso88mWHCV0NnjcocAF0Q5X7W7RnCh0Pl692qhgY6+SHHqsRYtOhubQUm5QccP339N3vkSbxhDOYprVJDj2CIsSOWOCPsR2Swaj+Yl8NjjJwPW5xKwTHHIeLWGpBg6rIsY9GPlfgrerYiQjRfhBc7lTfzl8jGncF+YP20yH/G0E/2wbw9cYze29ruxYEtaTQJpKyg/NMhokCaLtCYU6e4IR0S34NYZlHA9FUOERI5BSLtXiJVootAgvpxCCBFF8ajm2z6iaRPO8pCNwnSZyRyEkRa/UTagIqEfGqDEAnk6K/9NepIJLLm5dV1aNiWxqQYnqLDNp3Qz7bn6EJB3o7nTdV6gdMUEDhFlgsYcJvkVgsbnJG7CXHXr+c040AtNwh0Ynpul6lcGcmFHw9xnAqOpIWj0UqZVMBlB1AC7yDjrpjdVswFEbipALdn3NVNe8xxl5+jARz90GqTIW/T4E3v5c1QId5kpSZpIm7lAgX6ycIPg8mWk8Rwj8NFG2/W13FH4sg7YvwMXjXoOIqie2KcjdGcZlG+ikxuhO249Xqzm1t9JiYRlB9qB974wQczd3TgTa28yPsib1V65qtC/yJCEb2tNx32b3Wab3qK50PqynFM5jUF1ZJnWJrjifrwZarHORgD/q7RTkISoJBVfgUqUsCWYs/fuuN2Clw49nvV3L0M0qDEBIJS0pKG4Mop8d2cldZCTE6MSVN7qIB5Jg0MuH4TBcOLM9bfyMZJ2TBbbJhisaUqkXqceSxg0FMC6KOnk1eiZFtWpZEof0uD6PnvLyw5aN5+nM3g7thVBdlTNaTHMqonidbtGLHaoFgzcBMn9HIiipBbHFbOPZvt/4pTNw3iiFMbPH10t/flf7tyk//M0jCIOg20m5tbI38OR2WAC9IlwvmCMod6A0PLFGc4mm6KGBqc31ctNwIyzpJszgibr92nOb01Nwz9cP4YJ286xayrMuZrrPyZeqsVELWEjLe/1JJ/Sap1pTsO13/cd1RVGBDBg/JgtQH0nwokTlTcBdG8ciwhcRMmzjah+Aw0HxuEOb1b1y2yu7dv1iFwwvS/BZxKQOKgiOAwxAGpOCVxLDQ4r5XxQe6mzB2vh16nDvJbDUEdfBMWYFuUKEaL8GGbs8jCh1OK4SCae9EkqgqpRCiAIxFOMZmHg0ifUVSteeHA5ollLWJZnkXtWMR22tga4EgO+WskYku6derbzvlBAoGVow0ANQXM9YEClBIB7LZoojeeBYQEAtVj9JYW7IBw5MEbWCZYQhgvlxkdk5dveuUVkrh3NNYMMXZCZPkO//vu+ofyPb4zt8/Pm8/Jz89Z9u1dlRhQo/D3OL0vOPyDGdu0hxLaXUjiOCSeOOQjCoKA9+Gy0S0IDTHQFUAMZE1JO8RAnBG0xGAVxgsUAhSC4YSgFcHU9VHd+h1yIE5KmnKAIoAiIE8EwMgWWKhDogiI1RJ2IrChXTD/HYUAhUCaEBjjBts6LhlaDC+mVG3XbhhKzak6LQBNoWhkZnWlb8nKmdqXXSbCeHROlWVZDRLBnCpFFzlUHUcSiJoixt9ybZyThwYq6uSDFPDhy2h4PbGmn0aDNPKogWhNEWN9bUFAExXFYFgxaO0+aYnrQ8aNeGtKh7tyJwUoACgA0gRAVQEJgOoUSJSADk9lIQFon6IMyJYBA5ABWUGrjnsQ1w5C1una94JsLdM8FXt7IPPvuLVEEmw/hyfpNGw/qJeNAdbTwN1si6oujF1vnkVJTHXdnbtM42SuorZDbbdXU7zU8jv1jJfYYkQGEAKCQjCSEIxF3sltLWKLgZBovn5g9G0x333yAEK2DrjBh0QCxShENH+IkMCpEqjuPK1nQ2EV8Whb3IV5gTEInIkOLAyVoW2JUwCoSqFE9DtyIeroY9QBwZc6D3HAcsSjLlnSFHEm0hYElAGUAVkyQLSDS3JLlAHR61qXAQw9oBRIlgILlAJt3NgDUIEHiD2EbrLyP17T27shmCA3kQQ5VXH2Vx3TefHEOo8mn0C8qi6KA1Z/ylGbf3VR9Z7zYsHji9g3w67gDEDauukqoxNEDe29iaFmGSdkjQA5vjvWlsjapFjT9FPqNQKk0VasNUOqOJ5OC7xdwSZeRhHawh7OcRmEPHIoeTi8Tpw8lUALCiSiB9a7gdDD0Xbq6JkjowdWuSnQC+N4M1+76ToLEbtpY0dHv9OXS3R67AxWtX0ZR8ssYU8D67ZfApM6xKQ2bik6p8ceYduP59U6i7LuyOO0eCRNGm1jzIG5g8ae0Rorb08ZxrZTBhyuR00+Ik6P1YIbD0wSQ2K0B2mNbwd2utVmTo/jJpp/vJ4HDBMEcUogtvUhgQrGSss8gAtTKWL0N8+/KUjENOTxWTy7HJyXlaeyjxEFXQWTcMat0aaI0em2KGBaMgqCXEEwQEGQ5dLqKFbYUR5rJwgoAygD8mQAzs2X5bTokIGOyliFDGBaMkqBZCkw4Oz8ca1UAwhudFXsvCL0S5RXlUepknYxORmQ51MkJzvt0hVcC9eYNPjEubEx9yAJo4a4VDDJiqpUFDRMM+jXx9btB/NW6ROzEd0llSo53G/XKhHFN3hobMYtK1XZBIMWFXmYZXAp1PFtvcYIWagmuBajog53gb8o9ih8osaT5QpQgX0/6+ydR3BMBmHHQ9SanZbE7OGpD+TGNLJp+rRl7DQkmiM6pVR4K8+KRMwZuBwVqEEqcNy1uTZgA9doTNdsWEYKJ03hGSwRt/uH5zRCDKeOoQ3mT42MYf/YjNmlF8SjBswKpXlkOu4B0IqAg5pts1b4p8d1VR9VcLvTf9ywDk7r/ZblyhaZPdpVrbdynzVFBdSlDvluqiIyrzJbCFBAvth7u8NhjYuBz1o9Hk3gIbNGTZrvUAPtlB78MJNp0vhxg2EM+OBbMI8l0p2ndFAKsQrzZZGqEmCcHncVpwkuC+lTnojqRaJqQgvqxoYVLKPVA+sSYb1EWIkCzUBHhtU4aFutmhHeZ6wXfw9QBhMzzY7LNEvdhB9h6FejpJ7ZTmvzMDHzTHPAfaOHsKYIOCFgyRfrOX0ay4emikVFes6K9AVbhjTQM8XloiNXdDDExKAIl4hOHjwVHMHHteINcbb5iY6Bmy2iN2n0dP3k6FlA+vchyw9q00ScH05kJYKuNqukG6Y4H9T5lLExH+RbDkrgU9zApRYML5aKLTEt47w1ZaVkXl70Szv1VkLEAmOPTRITJHFyJLZ2jhBV4tgcgq7JJodrylOeHYQzyEnTqOri0pmR04Os3vU0BY7fkcJJUwia0CNnTWqiIRO6WbS8L56BS7/GT0R12LON0wf2M3375NZkP2/BPjE1p8Ed4K4mkPIbxF1titO/aBE+THyAFVDaq9qu8x9RnPz8NbJq045Xba0EM8hDI6tmFp9H1hB7dJPNtlyJNfF1WJeOmgWhpsqyLSwx+BblsJ0WMhlMdWLzitWkhxAnYZXp8ZA59n5HiiyPM7Csb7uee5SeyRe7vHRlpnIToa7LZBmmgI/4MtwfF0+ZAS3XUwfaXpv+m8RxWnvvU+Ju7r/Ens+O+As= \ No newline at end of file diff --git a/src/acc/libsmm_acc/README.md b/src/acc/libsmm_acc/README.md index faa0aab6806..8978689c00b 100644 --- a/src/acc/libsmm_acc/README.md +++ b/src/acc/libsmm_acc/README.md @@ -12,12 +12,10 @@ For a description of the library (some details are outdated, but this neverthele ## Directory Organization -- [`kernels/`](https://github.com/cp2k/dbcsr/blob/develop/src/acc/libsmm_acc/kernels/): GPU kernels (CUDA- and HIP-compatible) for matrix-matrix multiplication and python interface to autotuning and predictive code. -- [`notebooks/`](https://github.com/cp2k/dbcsr/blob/develop/src/acc/libsmm_acc/notebooks/): jupyter notebooks for exploring data generated from autotuning and prediction. +- [`kernels/`](https://github.com/cp2k/dbcsr/blob/develop/src/acc/libsmm_acc/kernels/): GPU kernels (CUDA- and HIP-compatible) for matrix-matrix multiplication and Python interface to autotuning code. - `generate_*.py`: utility scripts for `libsmm_acc` compilation - `libsmm_acc*`: libsmm_acc C++ and CUDA / HIP code -- [`parameters/`](https://github.com/cp2k/dbcsr/blob/develop/src/acc/libsmm_acc/parameters/): contains `parameters_GPU.json` files. These are sets of matrix-matrix multiplication parameters for different (m, n, k)-triplets optimized for a given GPU card. You can explore these parameters interactively using the [provided jupyter notebook](https://github.com/cp2k/dbcsr/blob/develop/src/acc/libsmm_acc/notebooks/inspect_autotuned_parameters.ipynb) -- [`predict/`](https://github.com/cp2k/dbcsr/blob/develop/src/acc/libsmm_acc/predict/): scripts for prediction of optimal parameter sets, see [predictive modeling of kernel parameters](https://github.com/cp2k/dbcsr/blob/develop/src/acc/libsmm_acc/predict/README.md) +- [`parameters/`](https://github.com/cp2k/dbcsr/blob/develop/src/acc/libsmm_acc/parameters/): contains `parameters_GPU.json` files. These are sets of matrix-matrix multiplication parameters for different (m, n, k)-triplets optimized for a given GPU card. - [`tune/`](https://github.com/cp2k/dbcsr/blob/develop/src/acc/libsmm_acc/tune/): scripts for autotuning of optimal parameter sets, see [autotuning of kernel parameters](https://github.com/cp2k/dbcsr/blob/develop/src/acc/libsmm_acc/tune/README.md) ## Matrix-matrix Multiplication Kernels and Parameters @@ -46,7 +44,7 @@ which take between 3 - 7 **parameters** (see figure at the top): - **w**: input slab width (width of slab `P_A` and `P_B`) - **v**: output slab width (width of slab `P_C`) -The performance of the matrix-matrix multiplication kernels is highly dependent on the choice of algorithm and parameters. For this reason, `libsmm_acc` provides lists of optimal parameters for different GPU cards and different (m, n, k)-triplets. These sets of optimal parameters can be found either through *autotuning* or *predictive modeling*. +The performance of the matrix-matrix multiplication kernels is highly dependent on the choice of algorithm and parameters. For this reason, `libsmm_acc` provides lists of optimal parameters for different GPU cards and different (m, n, k)-triplets. ## Contributing to libsmm_acc @@ -56,19 +54,13 @@ We expect users to contribute to the library by providing new optimized kernels Follow the [autotuning procedure](https://github.com/cp2k/dbcsr/blob/develop/src/acc/libsmm_acc/tune/README.md) -#### Predictive modeling of kernel parameters - -Follow the [predictive modeling procedure](https://github.com/cp2k/dbcsr/blob/develop/src/acc/libsmm_acc/predict/README.md) - #### Adding a new kernel 1. Choose a kernel `name` 2. Add the kernel's code (must be able to compile by both `nvcc` and `hip`) in file `kernels/smm_acc_dnt_name.h` -3. Add python kernel class inheriting from base class `kernels/smm_acc_dnt_name.py` - -4. Add the new kernel to the `kernel_algorithm` data structure in [`kernels/smm_acc_predict.py`](https://github.com/cp2k/dbcsr/blob/develop/src/acc/libsmm_acc/kernels/smm_acc_predict.py) +3. Add Python kernel class inheriting from base class `kernels/smm_acc_dnt_name.py` #### Adding support for a new GPU card @@ -85,4 +77,4 @@ Follow the [predictive modeling procedure](https://github.com/cp2k/dbcsr/blob/de } ``` -then add matrix-matrix multiplication parameters for this GPU using *autotuning* and *predictive modeling* +then add matrix-matrix multiplication parameters for this GPU using *autotuning*. diff --git a/src/acc/libsmm_acc/kernels/README.md b/src/acc/libsmm_acc/kernels/README.md index 31e4b81eb61..14a268c3d3d 100644 --- a/src/acc/libsmm_acc/kernels/README.md +++ b/src/acc/libsmm_acc/kernels/README.md @@ -14,8 +14,6 @@ * `smm_acc_dnt_ALGORITHM.h` Batched Multiply Kernel CUDA/HIP code -* [`smm_acc_predict.py`](https://github.com/cp2k/dbcsr/blob/develop/src/acc/libsmm_acc/kernels/smm_acc_predict.py) Class and helper functions for parameter prediction procedure - * [`smm_acc_transpose.h`](https://github.com/cp2k/dbcsr/blob/develop/src/acc/libsmm_acc/kernels/smm_acc_transpose.h) Transpose CUDA/HIP code ## Batched Multiplication Kernels diff --git a/src/acc/libsmm_acc/notebooks/README.md b/src/acc/libsmm_acc/notebooks/README.md deleted file mode 100644 index df0114cf103..00000000000 --- a/src/acc/libsmm_acc/notebooks/README.md +++ /dev/null @@ -1,12 +0,0 @@ -# libsmm_acc Notebooks - -Notebooks for exploring data generated from auto-tuning and prediction. - -**Requirements** -Python version required: python 3.6+ - -Install all python packages required (if you do not want this project's requirements to interfere with your other Python projects, consider doing so in a [virtual environment](https://docs.python.org/3/tutorial/venv.html)), using - -```bash -pip install -r requirements.txt -``` diff --git a/src/acc/libsmm_acc/notebooks/inspect_autotuned_parameters.ipynb b/src/acc/libsmm_acc/notebooks/inspect_autotuned_parameters.ipynb deleted file mode 100644 index e2e971ede58..00000000000 --- a/src/acc/libsmm_acc/notebooks/inspect_autotuned_parameters.ipynb +++ /dev/null @@ -1,279 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "collapsed": true - }, - "source": [ - "# `libcusmm`: explore the space of autotuned parameters" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This notebook can be used to explore the space of autotuned parameters, stored in files named `parameters_GPU.json`." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Library imports" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "import json, os" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Read data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Choose a GPU" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "GPU = 'P100' # Options: K20X, K40, K80, P100, V100, Mi50, Mi100, Mi250" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "params = '../parameters_' + GPU + '.json' \n", - "assert os.path.exists(params)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Read autotuned parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "with open(params) as f:\n", - " all_parameters = pd.DataFrame([params for params in json.load(f)])\n", - "autotuned_parameters = all_parameters[all_parameters['source'] == 'autotuned']\n", - "print(\"Reading autotuned data from\", params)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "ordered_columns = ['m', 'n', 'k', 'perf', 'algorithm', 'threads', 'grouping', 'minblocks', 'tile_m', 'tile_n', 'v', 'w']\n", - "autotuned_parameters = autotuned_parameters[ordered_columns]\n", - "print('Autotuned parameters:')\n", - "display(autotuned_parameters)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Data Description" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Numer of columns:', len(autotuned_parameters.columns), '\\nNumber of rows:', len(autotuned_parameters.index.values))\n", - "print('\\nColumn names:')\n", - "for c in autotuned_parameters.columns.values: \n", - " print(c)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "autotuned_parameters.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "import pandas_profiling \n", - "pandas_profiling.ProfileReport(autotuned_parameters)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Plot performances" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib inline \n", - "import matplotlib.pyplot as plt" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "autotuned_parameters['mnk'] = autotuned_parameters['m'] * autotuned_parameters['n'] * autotuned_parameters['k']\n", - "plt.semilogx(autotuned_parameters['mnk'], autotuned_parameters['perf'], '.', markersize=3)\n", - "plt.xlabel('Training (m, n, k) triplets (in order of increasing m*n*k)')\n", - "plt.ylabel('Performance [Gflops]')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Parameter frequencies" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# ignore the 'threads' parameter since it has to be adapted to the size of matrix C\n", - "parameter_set = ['algorithm', 'grouping', 'minblocks', 'tile_m', 'tile_n', 'v', 'w']" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Most frequent parameter sets" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def get_par_set(algorithm, grouping, minblocks, tile_m, tile_n, v, w):\n", - " par_set= algorithm + '_' + str(int(grouping)) + '_' + str(int(minblocks)) \n", - " if not np.isnan(tile_m):\n", - " par_set += '_' + str(int(tile_m)) + '_' + str(int(tile_n))\n", - " if not np.isnan(v):\n", - " par_set += '_' + str(int(v)) + '_' + str(int(w))\n", - " return par_set\n", - " \n", - "vget = np.vectorize(get_par_set)\n", - "autotuned_parameters['param_set'] = vget(*[a for a in autotuned_parameters[parameter_set].values.transpose()])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "param_set_freq = autotuned_parameters['param_set'].value_counts(dropna=True)\n", - "autotuned_parameters['param_set_freq'] = autotuned_parameters['param_set'].apply(lambda item: param_set_freq[item])\n", - "autotuned_parameters.sort_values(by='param_set_freq', ascending=False, inplace=True)\n", - "autotuned_parameters.iloc[:50,:]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Most frequent parameters (independently of each other)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "most_frequent_values = dict()\n", - "for c in autotuned_parameters.columns.values: \n", - " plt.figure\n", - " plt.hist(autotuned_parameters[c].dropna(), bins=50)\n", - " plt.title(c)\n", - " plt.show()\n", - " if c in parameter_set: \n", - " col = autotuned_parameters[c].dropna().values\n", - " values, counts = np.unique(col, return_counts=True)\n", - " ind_most_freq = np.argmax(counts)\n", - " most_freq_val = values[ind_most_freq]\n", - " most_frequent_values[c] = most_freq_val" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.5" - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} diff --git a/src/acc/libsmm_acc/notebooks/inspect_training_data.ipynb b/src/acc/libsmm_acc/notebooks/inspect_training_data.ipynb deleted file mode 100644 index 7778bd35aef..00000000000 --- a/src/acc/libsmm_acc/notebooks/inspect_training_data.ipynb +++ /dev/null @@ -1,607 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "collapsed": true - }, - "source": [ - "# `libcusmm`: Explore the Training Data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This notebook allows you to explore the training data collected from autotuning before proceeding to training." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Import libraries" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import re, sys, os, json, random\n", - "import numpy as np\n", - "import pandas as pd\n", - "import dask.dataframe as dd\n", - "from nb_helper import *" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Read training data from autotuning folders \n", - "\n", - "Read from files of form `tune_*x*x*/raw_training_data_*x*x*_algo.csv`. \n", - "If you want to read from aggregated Parquet files (recommended), skip to lower" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Path to autotuning data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Provide the path to the autotuning data:\n", - "- You can use the bash cell below to navigate your filetree:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%bash\n", - "ls -ad AUTOTUNING_DATA_PATH/tune_*x*x*/" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "- Then, copy what you've replaced `AUTOTUNING_DATA_PATH` with in the Python variable `autotuning_data_path` below:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "autotuning_data_path = '' # may not recognize '~', please provide an absolute path:\n", - "check_autotuning_data_path(autotuning_data_path)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Set options\n", - "\n", - "Set the following options appropriately:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "to_read = 100 # How many / which data folders to read. Options: \n", - " # - 'all': reads from all available data folders. \n", - " # Beware, this might result in memory errors if large amounts of data are made available\n", - " # - a number: reads this number of data folders (e.g. 100)\n", - " # - a regex: reads the data folders with matching regex (e.g. tune_4x*x*)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "algorithm = get_algorithm_to_explore('all') # algorithms to explore. Options: all, tiny, small, medium" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Get the list of folders to read\n", - "folders_to_read = get_folders_to_read(to_read, autotuning_data_path)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Read training data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "raw_files_to_read, derived_files_to_read = get_files_to_read(folders_to_read, algorithm)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "%%time\n", - "num_f = len(files_to_read)\n", - "data_raw = dd.read_csv(raw_files_to_read, dtype={}).set_index(\"Unnamed: 0\")\n", - "data_derived = dd.read_csv(derived_files_to_read, dtype={}).set_index(\"Unnamed: 0\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# merge the two: " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Read training data from Parquet files\n", - "\n", - "Read from files of form `training_data_algorithm.parquet`." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Path to autotuning data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Provide the path to the autotuning data:\n", - "- You can use the bash cell below to navigate your filetree:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%bash\n", - "ls -ad AUTOTUNING_DATA_PATH/*.parquet" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "- Then, copy what you've replaced `AUTOTUNING_DATA_PATH` with in the Python variable `training_data_path` below:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "training_data_path = '../tune_dataset_V100/' # may not recognize '~', please provide an absolute path:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "algorithm = \"small\" # algorithm to explore. Options: tiny, small, medium, largeDB1, largeDB2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "parquet_data_file = os.path.join(training_data_path, \"training_data_\" + algorithm + \".parquet\")\n", - "data = dd.read_parquet(parquet_data_file)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Data inspection\n", - "\n", - "### Data head" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "for i in range(0, len(data.columns.values), page_width):\n", - " display(data.iloc[:,i:i+page_width].head())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Data description" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Data size :', sys.getsizeof(data)/10**6, 'MB')\n", - "print('Number of columns:', len(data.columns.values))\n", - "print('Number of rows : {:,}'.format(len(data.index)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "#for i in range(0, len(data.columns.values), page_width):\n", - "# display(data.iloc[:,i:i+page_width].describe())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Columns" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "#print('Number of columns:', len(data.columns), '\\nNumber of rows:', len(data.index), '\\n')\n", - "for col in data.columns: \n", - " print('{:<40} {}'.format(col, data[col].dtype))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Feature categories\n", - "mnk = ['m', 'n', 'k']\n", - "kernel_pars = ['algorithm', 'threads_per_blk', 'grouping', 'minblocks',\n", - " 'tile_m', 'tile_n', 'w', 'v', 'nbytes_smem', 'nbytes_cmem', 'regs_per_thread']\n", - "kernel_pars = list(set(kernel_pars) & set(data.columns.values))\n", - "perf = ['perf (Gflop/s)', 'perf_scaled']\n", - "common = ['Gflops', 'mxnxk', 'size_a', 'size_b', 'size_c', 'nblks', \n", - " 'warps_per_blk', 'nwarps', 'sm_desired', 'nthreads', 'ru_param_stack_unroll_factor']" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Features\n", - "\n", - "Features in the left-most column correspond to \"raw\" parameters\n", - "* **green** kernel parameters \n", - "* **grey** GPU card properties (taken from Nvidia/AMD documentation) \n", - "* **pink** autotuning parameters (taken from DBCSR codebase) \n", - "\n", - "Other features correspond to derived parameters, computed from the \"raw\" parameters\n", - "* **yellow** matrix sizes\n", - "* **light grey** launch parameters\n", - "* **blue** and **purple** estimations of resource usages" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "![parameters dependency graph](../../../../docs/media/images/libsmm_acc_predictive_modeling_features.png)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "thresh = 300000 # do not perform very long operations on row counts above this threshold" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "data_to_profile = data\n", - "n_rows_data = len(data)\n", - "if n_rows_data > thresh: # if it is a very large dataframe, perform op on subsampled rows\n", - " data_to_profile = data.sample(frac = thresh / n_rows_data)\n", - "\n", - "import pandas_profiling \n", - "pandas_profiling.ProfileReport(data_to_profile.compute())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Data visualization" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib inline\n", - "import matplotlib\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sns" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Get Series from Dask to Pandas\n", - "data_mxnxk = data['mxnxk'].compute()\n", - "data_perf = data['perf (Gflop/s)'].compute()\n", - "data_perf_scaled = data['perf_scaled'].compute()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.semilogx(data_mxnxk, data_perf, '.', markersize=1)\n", - "plt.xlabel('Training (m, n, k) triplets (in order of increasing m*n*k)')\n", - "plt.ylabel('Performance [Gflops]')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Data visualization (scaled performance)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.plot(data_mxnxk, data_perf_scaled, '.', markersize=1)\n", - "plt.xlabel('Training (m, n, k) triplets (in order of increasing m*n*k)')\n", - "plt.ylabel('Performance scaled (overall)')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Performance profile" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Choose (m, n, k) triplet\n", - "m_plot, n_plot, k_plot = (4, 4, 4)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "data_mnk = data[data['m'] == m_plot][ \n", - " data['n'] == n_plot][ \n", - " data['k'] == k_plot].compute()\n", - "data_mnk.sort_values(by='perf (Gflop/s)', ascending=True, inplace=True)\n", - "plt.plot(data_mnk['perf (Gflop/s)'].values)\n", - "plt.xlabel('parameter set')\n", - "plt.ylabel('perf (Gflop/s)')\n", - "plt.title('Performance profile for kernel ' + str(m_plot) + 'x'+ str(n_plot) + 'x'+ str(k_plot))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Histograms with Bokeh\n", - "from bokeh.plotting import figure \n", - "from bokeh.models import ColumnDataSource, HoverTool\n", - "from bokeh.io import output_notebook, show\n", - "output_notebook()\n", - "\n", - "# Create histogram\n", - "num_bins = 100 \n", - "hist, edges = np.histogram(data_mnk['perf (Gflop/s)'], bins=num_bins)\n", - "df_hist = pd.DataFrame({'hist': hist, 'left': edges[:-1], 'right': edges[1:]})\n", - "source = ColumnDataSource(df_hist)\n", - "\n", - "# Create tool \n", - "hover = HoverTool(tooltips=[('# occurences', '@hist'), ('low', '@left'), ('high', '@right')])\n", - "\n", - "# Create the figure\n", - "p = figure(plot_width=800, plot_height=800, title=\"Performance histogram\",\n", - " toolbar_location=None, tools=\"\")\n", - "p.xgrid.grid_line_color = None\n", - "p.xaxis.axis_label = \"Performance (GFlop/s)\"\n", - "p.xaxis.major_label_orientation = 1.2\n", - "p.yaxis.axis_label = \"# occurrences\"\n", - "p.quad(source=source, bottom=0, top='hist', left='left', right='right', fill_color='blue')\n", - "p.add_tools(hover)\n", - "show(p)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Histograms with Bokeh\n", - "from bokeh.plotting import figure \n", - "from bokeh.models import ColumnDataSource, HoverTool\n", - "from bokeh.io import output_notebook, show\n", - "output_notebook()\n", - "\n", - "# Create histogram\n", - "num_bins = 100 \n", - "hist, edges = np.histogram(data_mnk['perf_scaled'], bins=num_bins)\n", - "df_hist = pd.DataFrame({'hist': hist, 'left': edges[:-1], 'right': edges[1:]})\n", - "source = ColumnDataSource(df_hist)\n", - "\n", - "# Create tool \n", - "hover = HoverTool(tooltips=[('# occurences', '@hist'), ('low', '@left'), ('high', '@right')])\n", - "\n", - "# Create the figure\n", - "p = figure(plot_width=800, plot_height=800, title=\"Performance histogram\",\n", - " toolbar_location=None, tools=\"\")\n", - "p.xgrid.grid_line_color = None\n", - "p.xaxis.axis_label = \"Performance scaled\"\n", - "p.xaxis.major_label_orientation = 1.2\n", - "p.yaxis.axis_label = \"# occurrences\"\n", - "p.quad(source=source, bottom=0, top='hist', left='left', right='right', fill_color='blue')\n", - "p.add_tools(hover)\n", - "show(p)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Top slices of perf. distribution\n", - "pars_autotuning_top = {\n", - " 5: list(), \n", - " 2: list(), \n", - " 1: list(), \n", - " 0.5: list()\n", - "}\n", - "max_perf = float(data_mnk['perf (Gflop/s)'].max())\n", - "max_perf_idx = data_mnk['perf (Gflop/s)'].idxmax()\n", - "max_perf_row = data_mnk.loc[max_perf_idx]\n", - "max_perf_cond = max_perf_row[mnk + kernel_pars + ['perf (Gflop/s)']]\n", - "\n", - "print('Maximally performing parameter set:')\n", - "display(max_perf_cond)\n", - "for perc in pars_autotuning_top.keys():\n", - " lim = max_perf - max_perf*perc/100\n", - " blob = data_mnk.loc[data_mnk['perf (Gflop/s)'] >= lim]\n", - " print('\\ntop', perc, '%')\n", - " display(blob[kernel_pars + ['perf (Gflop/s)']].describe())\n", - " pars_autotuning_top[perc].append(blob)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Pair plot " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "data_pairplot = data\n", - "n_rows_data = len(data)\n", - "if n_rows_data > thresh: # if it is a very large dataframe, perform op on subsampled rows\n", - " data_pairplot = data.sample(frac = thresh / n_rows_data)\n", - "\n", - "sns.pairplot(data_pairplot[mnk + kernel_pars + perf].compute().dropna())" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.5" - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} diff --git a/src/acc/libsmm_acc/notebooks/libsmm_acc_predictive_modeling_features.png b/src/acc/libsmm_acc/notebooks/libsmm_acc_predictive_modeling_features.png deleted file mode 120000 index b77db205b4d..00000000000 --- a/src/acc/libsmm_acc/notebooks/libsmm_acc_predictive_modeling_features.png +++ /dev/null @@ -1 +0,0 @@ -../../../../docs/media/images/libsmm_acc_predictive_modeling_features.png \ No newline at end of file diff --git a/src/acc/libsmm_acc/notebooks/nb_helper.py b/src/acc/libsmm_acc/notebooks/nb_helper.py deleted file mode 100644 index 1897b8b0320..00000000000 --- a/src/acc/libsmm_acc/notebooks/nb_helper.py +++ /dev/null @@ -1,111 +0,0 @@ -# -*- coding: utf-8 -*- -#################################################################################################### -# Copyright (C) by the DBCSR developers group - All rights reserved # -# This file is part of the DBCSR library. # -# # -# For information on the license, see the LICENSE file. # -# For further information please visit https://dbcsr.cp2k.org # -# SPDX-License-Identifier: GPL-2.0+ # -#################################################################################################### - -import os -import re - - -# =============================================================================== -# I/O -# kernel_folder_pattern = re.compile('tune_(\d+)x(\d+)x(\d+)$') -kernel_folder_pattern = re.compile(r"tune_(\d+x\d+x\d+)$") -page_width = 5 # columns per output line - - -def check_autotuning_data_path(autotuning_data_path): - # sanity checks - assert os.path.exists(autotuning_data_path), ( - "This path does not exist: " + autotuning_data_path - ) - assert len(os.listdir(autotuning_data_path)) > 0, ( - "No folders found in path: " + autotuning_data_path - ) - # print infos - print( - "Number of tuning data folders found: {}".format( - len(os.listdir(autotuning_data_path)) - ) - ) - - -def get_folders_to_read(to_read, autotuning_data_path): - if to_read == "all": - folders_to_read = [ - os.path.join(autotuning_data_path, f) - for f in os.listdir(autotuning_data_path) - if kernel_folder_pattern.match(f) is not None - ] - elif isinstance(to_read, int): - folders_to_read = [ - os.path.join(autotuning_data_path, f) - for f in os.listdir(autotuning_data_path) - if kernel_folder_pattern.match(f) is not None - ] - folders_to_read = folders_to_read[:to_read] - elif isinstance(to_read, str): - to_read = re.compile(to_read) - folders_to_read = [ - os.path.join(autotuning_data_path, f) - for f in os.listdir(autotuning_data_path) - if to_read.match(f) is not None - ] - else: - raise AssertionError("Cannot recognize option: " + to_read) - - num_folders_to_read = len(folders_to_read) - assert num_folders_to_read > 0 - print("Data folders to be read from (total: {:,})\n".format(num_folders_to_read)) - for f in folders_to_read: - print(f) - - return folders_to_read - - -def get_algorithm_to_explore(algo): - algo_to_read = ( - [algo] if algo != "all" else ["tiny", "small", "medium", "largeDB1", "largeDB2"] - ) - print("Algorithm(s) to explore:") - for a in algo_to_read: - print(a) - - return algo_to_read - - -def get_files_to_read(folders_to_read, algo_to_read): - files_to_read = list() - for i, kernel_folder in enumerate(folders_to_read): - print( - "\nfrom {}, read ({}/{:,})".format( - kernel_folder, i + 1, len(folders_to_read) - ) - ) - - for name_algo in algo_to_read: - mnk_string = kernel_folder_pattern.search(kernel_folder).groups()[0] - raw_file_base = "raw_training_data_" + mnk_string + "_" + name_algo + ".csv" - raw_file = os.path.join(kernel_folder, raw_file_base) - derived_file_base = "training_data_" + mnk_string + "_" + name_algo + ".csv" - derived_file = os.path.join(kernel_folder, derived_file_base) - - if os.path.exists(raw_file) and os.path.exists(derived_file): - # Read raw parameters file - files_to_read.append(raw_file) - - # Read derived parameters file - files_to_read.append(derived_file) - - else: - if not os.path.exists(raw_file): - print("\t...{:50} no file".format(raw_file_base)) - if not os.path.exists(derived_file): - print("\t...{:50} no file".format(derived_file_base)) - - return files_to_read diff --git a/src/acc/libsmm_acc/notebooks/requirements.txt b/src/acc/libsmm_acc/notebooks/requirements.txt deleted file mode 100644 index f36ef7a07a0..00000000000 --- a/src/acc/libsmm_acc/notebooks/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -bokeh==1.0.4 -matplotlib==3.0.2 -numpy==1.22.0 -pandas==0.23.4 -pandas-profiling==1.4.1 -seaborn==0.9.0 diff --git a/src/acc/libsmm_acc/predict/README.md b/src/acc/libsmm_acc/predict/README.md deleted file mode 100644 index ddb967a2679..00000000000 --- a/src/acc/libsmm_acc/predict/README.md +++ /dev/null @@ -1,150 +0,0 @@ -# Training Procedure for Predictive Modeling of Optimal Parameters in `libsmm_acc` - -The performance of the matrix-matrix multiplication kernels is highly dependent on the choice of algorithm and parameters, this is why [*autotuning*](https://github.com/cp2k/dbcsr/blob/develop/src/acc/libsmm_acc/README.md) is used to find optimal kernel parameters. - -However, the auto-tuning procedure is expensive, and the space of (m,n,k)-triplets to explore is large. The following predictive modeling procedure is set up to predict optimal parameters for (m,n,k)-triplets that have not been auto-tuned from the data gathered from auto-tuning other (m,n,k)-triplets. - ---- - -### Requirements - -Python version required: `python 3.6+` - -Install all python packages required (if you do not want this project's requirements to interfere with your other Python projects, consider doing so in a [virtual environment](https://docs.python.org/3/tutorial/venv.html)), using - -```bash -pip install -r requirements.txt -``` - ---- - -### Predictive parameters - -The input features for the predictive models can be 'raw' parameters, or hand-engineered features 'derived' from the raw features (matrix sizes, launch parameters and resource usage estimations). - ---- - -### Predictive modeling procedure - -#### 1. Get the data - -Get the data to be used for training, either by downloading data from the [dedicated repository](https://github.com/cp2k/dbcsr-data), or by auto-tuning new kernels yourself and combining them with pre-existing data. - -##### 1.a Download pre-collected data from dedicated repository - -- Download data from the dedicated repository: - - ```bash - wget https://github.com/cp2k/dbcsr-data/blob/master/GPU/raw_training_data_ALGORITHM.csv # for ALGORITHM = tiny, small, medium, largeDB1, largeDB2 - ``` - -- Compute derived parameters from raw parameters and create a record of baseline and maximum performances: run [`prepare_training_data.py`](https://github.com/cp2k/dbcsr/blob/develop/src/acc/libsmm_acc/predict/prepare_training_data.py), providing the CUDA/HIP architecture number and the location of the downloaded data: - - ```bash - ./prepare_training_data.py # –arch 60 --folder /scratch/autotuning_dataset, e.g. - ``` - -##### 1.b (optional) Aquire data from auto-tuning - -- We would appreciate if you would upload the data resulting from your auto-tuning procedure to the [dedicated repository](https://github.com/cp2k/dbcsr-data). For this, please take note, at this stage, of the [information required to upload your data](https://github.com/cp2k/dbcsr-data/blob/master/git-commit.template). - -- If you're auto-tuning data for a new GPU, make sure that the GPU's compute architecture properties are given in the file [`kernels/gpu_properties.json`](https://github.com/cp2k/dbcsr/blob/develop/src/acc/libsmm_acc/kernels/gpu_properties.json). If not, please add them. - -- Follow the [instructions for auto-tuning](tune.md). - -- If all went well, you now have directories named `tune_mxnxk` containing log files in which parameter sets and their corresponding measured performances are recorded. - -- Collect the information in all the `tune_mxnxk` directories into CSV files: run [`predict_collect.py`](https://github.com/cp2k/dbcsr/blob/develop/src/acc/libsmm_acc/predict/predict_collect.py), providing the location of the auto-tuning data: - - ```bash - ./predict_collect.py # --folder /scratch/autotuning_dataset, e.g. - ``` - -You should now have 5 CSV files containing raw data (`raw_training_data_ALGORITHM.csv`, for `ALGORITHM = tiny, small, medium, largeDB1, largeDB2`) - -#### 2. Prepare the data for predictive modeling - -A few steps are needed to make the data ready for training: - -- Record maximum and baseline performances of (m,n,k)-triplets in JSON files -- Compute derived training data and write it to a CSV file -- Compress training data files from CSV to Parquet files - -```bash -./prepare_training_data.py # --folder /scratch/autotuning_dataset -a 60 -j12, e.g. to run with 12 threads -``` - -The data preparation is relatively computationally expensive, especially for large data sets. -A good way of running it, is to - -1. Compute just the maximum and baseline parameters for each algorithm separately (`-l ALGORITHM --skip_derived_data=True`), adjusting the `-j` parameter so it runs fast enough, while not running into "out-of-memory"-errors -2. Run again with `--skip_derived_data=True` to create the files that aggregate maximum and baseline performances for all algorithms. -3. Compute derived data records for each algorithm separately (`-l ALGORITHM`), adjusting the `-j` option. -4. Run the script again without specifying the algorithm nor skipping the derived data to make sure all necessary files have been generated. - -##### At the end, you should end up with the following files: - -- `raw_training_data_ALGORITHM.csv` (containing all *raw* parameters for training a model for algorithm ALGORITHM, obtained in step 1) -- `training_data_ALGORITHM.csv` (containing all *derived* parameters for training a model for algorithm ALGORITHM) -- `training_data_ALGORITHM.parquet` (containing all *raw* and *derived* parameters for training a model for algorithm ALGORITHM in Parquet files, convenient for reading in parallel using Dask) -- `baseline_performances_ALGORITHM.json` and `baseline_performances_by_algo.json` (containing, for each (m, n, k)-triplet in the training data, its baseline performance, i.e. its performance were it to be run with a set of parameters that are an expert's "best guess"). Additionally, the baseline performances are plotted in `baseline_performances.svg`. -- `maximum_performances_ALGORITHM.json`, `max_performances_by_algo.json` and `max_performances.json` (containing, for each (m, n, k)-triplet, its maximum performance). Additionally, the maximum performances are plotted in `maximum_performances.svg`. - -#### 3. (optional) Explore the data - -Explore the data interactively using the [provided Jupyter notebook](https://github.com/cp2k/dbcsr/blob/develop/src/acc/libsmm_acc/notebooks/inspect_training_data.ipynb). - -#### 4. Train - -For each algorithm, build a predictive model using decision trees and feature selection based on the features' permutation importance. - -```bash -./predict_train.py # --algo medium --folder /scratch/autotuning_dataset, e.g. -``` - -Use the command-line parameters `--folder` and `--destination_folder` to choose the folder from which data is read, as well as the folder to which models, logs, etc. are written. -Repeat this step for all algorithms. -This may take several hours. For example, training algorithm 'medium' for the P100 took 11 hours on a single Greina (CSCS) node. -Moreover, depending on the size of the training data, large amounts of memory may be needed. For example, training algorithm 'medium' for the P100 was run on a 192 GB node. - -#### 5. Generate optimal parameters - -Given predictive models (in the form of serialized [scikit-learn](https://scikit-learn.org/) model objects) for all unseen (m,n,k)s, generate or update a file of optimal parameters - -```bash -./predict_genpars.py -c 5000 \ # chunk size - -j 12 \ # 12 threads - --largeDB2 /scratch/largeDB2/feature_tree_refit.p \ # path to models - --largeDB1 /scratch/largeDB1/feature_tree_refit.p \ - --medium /scratch/medium/feature_tree_refit.p \ - --small /scratch/small/feature_tree_refit.p \ - --tiny /scratch/tiny/feature_tree_refit.p -``` - -This may take several hours. For example, generating parameters for the P100 took 8 hours on a single Piz Daint (CSCS) node. For this reason, intermediate results are stored in JSON files in a folder `predict_genpars_ckpt`. Once this script has finished running, and you've successfully obtained a new `parameters_GPU.json` file, you may delete the checkpoint folder `predict_genpars_ckpt`. - -#### 6. Evaluate the predicted parameters - -```bash -./predict_evaluate.py -f libsmm_acc_predicted.out -n libsmm_acc_baseline.out -``` - -#### 7. Contribute your new parameters and data - -##### Contribute training data - -See [instructions](https://github.com/cp2k/dbcsr-data#contributing) in our [dedicated repository](https://github.com/cp2k/dbcsr-data) - -##### Contribute predicted parameters - -Submit a pull request updating the `parameters_GPU.json` file in question. - ---- - -### Contributing to the training procedure - -#### Adding a new predictive feature - -- Choose the new feature's name, "`NAME`" -- Add the feature as a method of `class PredictiveParameters`, named `get_NAME` -- Add the derived feature to the data structure `derived_parameters` in [`kernels/smm_acc_predict.py`](https://github.com/cp2k/dbcsr/blob/develop/src/acc/libsmm_acc/kernels/smm_acc_predict.py) diff --git a/src/acc/libsmm_acc/predict/predict_collect.py b/src/acc/libsmm_acc/predict/predict_collect.py deleted file mode 100755 index ab41ebe1de2..00000000000 --- a/src/acc/libsmm_acc/predict/predict_collect.py +++ /dev/null @@ -1,268 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -#################################################################################################### -# Copyright (C) by the DBCSR developers group - All rights reserved # -# This file is part of the DBCSR library. # -# # -# For information on the license, see the LICENSE file. # -# For further information please visit https://dbcsr.cp2k.org # -# SPDX-License-Identifier: GPL-2.0+ # -#################################################################################################### - -import sys -import os -import re -import glob -import argparse -import pandas as pd - -sys.path.append("../") - -from kernels.smm_acc import to_string, kernel_algorithm, parameter_types # noqa: E402 - - -# =============================================================================== -def main(tunedir): - """ - Once autotuning of new kernels has been run, - - collect the parameter information and performance from log files, - - dump them to CSV files for data analysis and training of a predictive model - """ - # =============================================================================== - # Check for old data files first - for algorithm in kernel_algorithm.keys(): - training_data_file = os.path.join(tunedir, f"raw_training_data_{algorithm}.csv") - if os.path.exists(training_data_file): - print( - f"WARNING: Found old data file {training_data_file}, re(move) it first ... exiting" - ) - sys.exit(1) - - # Find all the 'tune_MxNxK' folders - kernel_folder_pattern = re.compile(r"tune_(\d+)x(\d+)x(\d+)$") - kernel_folders = [ - os.path.join(tunedir, ak) - for ak in os.listdir(tunedir) - if kernel_folder_pattern.match(ak) is not None - ] - n_kernels = len(kernel_folders) - assert n_kernels > 0, ( - "Found no kernel folders of format" - + str(kernel_folder_pattern) - + " in folder " - + tunedir - ) - print(f"Found {n_kernels} kernel folders") - - # Collect information and write to csv - collect_training_data(kernel_folders, kernel_folder_pattern) - - # Print commands to merge CSVs into one big CSV for training data - merge_data_files(tunedir) - - -# =============================================================================== -# Helper variables and functions (formatting & writing) -autotuning_line = re.compile( - r"OK Kernel_dnt_(\w+) m (\d+)\s+n (\d+)\s+k (\d+)\s+" - + r"(?:tile_m (\d+)\s+tile_n (\d+)\s+(?:w (\d+)\s+v (\d+)\s+)?)?" - + r"threads (\d+)\s+grouping (\d+)\s+minblocks (\d+)\s+GFlop/s (\d+(?:\.\d+)?)" -) - - -def read_log_file(log_folder, m, n, k): - """ - Given a folder of kernel autotuning, read and parse the autotuning information in the log file - and return it in the form of a pandas Dataframe. - :param log_folder: folder of kernel autotuning - :return: pandas Dataframe containing autotuning information - """ - # Find log files in the log folder - log_files = [f for f in os.listdir(log_folder) if f[-4:] == ".log"] - assert len(log_files) > 0 - log_files = sorted(log_files) - - # Parse the log files and collect data - data = list() - for log_file in log_files: - print(f"Processing log file {log_file}") - with open(os.path.join(log_folder, log_file), "r") as f: - log_file_content = f.read().splitlines() - - for line in log_file_content: - if "OK" in line: # this line contains autotuning data - # Parse the line - match = autotuning_line.match(line) - assert match is not None, "Found null match: " + line - - # Get algorithm, parameters, and performance - data.append( - { - "m": m, - "n": n, - "k": k, - "algorithm": match.group(1), - "threads": match.group(9), - "grouping": match.group(10), - "minblocks": match.group(11), - "tile_m": ( - match.group(5) if match.group(5) is not None else None - ), - "tile_n": ( - match.group(6) if match.group(6) is not None else None - ), - "w": match.group(7) if match.group(7) is not None else None, - "v": match.group(8) if match.group(8) is not None else None, - "perf (Gflop/s)": match.group(12), - } - ) - - print(f"{len(data)} autotuning lines found") - - # Merge dictionaries into a pandas dataframe - dataframe = pd.DataFrame(data) - for col in dataframe.columns: - dataframe[col] = dataframe[col].astype(parameter_types[col], errors="ignore") - - return dataframe - - -def collect_training_data(kernel_folders, kernel_folder_pattern): - """ - Collect training data from log files resulting of autotuning - """ - - # =============================================================================== - # For each folder: - n_kernels = len(kernel_folders) - for i, kernel_folder in enumerate(kernel_folders): - print(f"\nProcess folder {kernel_folder} ({i+1}/{n_kernels})") - - # Find (m, n, k) - # Each folder contains data for just one (m, n, k) but potentially mutliple algorithms - match = kernel_folder_pattern.search(kernel_folder).groups() - m = int(match[0]) - n = int(match[1]) - k = int(match[2]) - - # =============================================================================== - # Collect info from log files - log_files = [f for f in os.listdir(kernel_folder) if f[-4:] == ".log"] - if len(log_files) > 0: - data = read_log_file(kernel_folder, m, n, k) - else: - print(f"No log files found in folder {kernel_folder} ... skipping") - continue - - # =============================================================================== - # Write parameters to CSV - for name_algo, kernel_algo in kernel_algorithm.items(): - # if applicable to this mnk - if name_algo in data["algorithm"].values: - # Does collected csv file exist already? - raw_parameters_file_name = os.path.join( - kernel_folder, - "raw_training_data_" - + to_string(m, n, k) - + "_" - + name_algo - + ".csv", - ) - - if os.path.exists(raw_parameters_file_name): - print(f"Found csv file {raw_parameters_file_name} ... skipping") - else: - # Get the data corresponding to this algorithm - data_algo = data[data["algorithm"] == name_algo] - # Write raw parameters - pars_to_get = kernel_algo.launch_parameters + ["perf (Gflop/s)"] - data_algo[pars_to_get].to_csv(raw_parameters_file_name, index=False) - print("Wrote", raw_parameters_file_name) - - -# =============================================================================== -def merge_data_files(tunedir): - """ - Merge CSV files - """ - for algorithm in kernel_algorithm.keys(): - training_data_file = os.path.join( - tunedir, "raw_training_data_{algorithm}.csv".format(algorithm=algorithm) - ) - - if os.path.exists(training_data_file): - print(f"\nFound {training_data_file} ... skipping") - os.rename(training_data_file, f"{training_data_file}.bak") - - print(f"\nMerging partial CSV files into {training_data_file} ... ") - - filenames_pattern = os.path.join( - tunedir, - "tune_*/raw_training_data_*_{algorithm}.csv".format(algorithm=algorithm), - ) - print("Merging all files with pattern:", filenames_pattern) - filenames = glob.glob(filenames_pattern) - if len(filenames) == 0: - print("Found no files matching this pattern ... skipping") - - else: - print(f"Found {len(filenames)} files matching this pattern") - - with open(training_data_file, "w") as out: - # Write the first file, including its header - fn_1 = filenames.pop(0) - with open(fn_1) as f: - header_line_ref = next(f) # read header line - out.write(header_line_ref) # write header line - out.write(f.read()) # write the rest of the file - # Write the rest of the files, skipping the header line each time - for i, fn in enumerate(filenames): - print("writing from {} ({}/{})".format(fn, i + 1, len(filenames))) - with open(fn) as f: - header_line = next(f) # skip header line - assert header_line == header_line_ref, ( - 'Cannot merge file "' - + fn - + '", because its header line:\n' - + header_line - + 'is different from the header line of file "' - + fn_1 - + '":\n' - + header_line_ref - ) - out.write(f.read()) - - print("Wrote to {}".format(training_data_file)) - - -# =============================================================================== -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description=""" - Collect matrix-matrix multiplication parameters and performances measured during autotuning. For that, - parse the log files created by the autotuning and record parameter sets and their performances to CSV files. - - This script is part of the workflow for predictive modelling of optimal libsmm_acc parameters. - For more details, see README.md. - """, - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - ) - parser.add_argument( - "-f", - "--folder", - metavar="FOLDER", - type=str, - default=".", - help="Folder in which the folders tune_*x*x*x/ are to be found", - ) - parser.add_argument( - "-a", - "--arch", - metavar="ARCHITECTURE_NUMBER", - type=int, - default=80, - help="GPU architecture code. Options: sm_35, sm_37, sm_60, sm_70, sm_80, gfx906", - ) - - args = parser.parse_args() - main(args.folder) diff --git a/src/acc/libsmm_acc/predict/predict_evaluate.py b/src/acc/libsmm_acc/predict/predict_evaluate.py deleted file mode 100755 index a5b3de7f4af..00000000000 --- a/src/acc/libsmm_acc/predict/predict_evaluate.py +++ /dev/null @@ -1,174 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -#################################################################################################### -# Copyright (C) by the DBCSR developers group - All rights reserved # -# This file is part of the DBCSR library. # -# # -# For information on the license, see the LICENSE file. # -# For further information please visit https://dbcsr.cp2k.org # -# SPDX-License-Identifier: GPL-2.0+ # -#################################################################################################### - -import re -import numpy as np -import argparse -from predict_helpers import ( - performance_gain, - relative_performance_gain, - plot_absolute_performance_gain, - plot_relative_performance_gain, - plot_performance_gains, -) - - -# =============================================================================== -def main(file, file_baseline): - """ - Given a file containing the results of the LBSMM_ACC performance test, perform evaluation of the predictive model. - """ - # =============================================================================== - # Read optimal-parameter-prediction result file - with open(file) as f: - result_file = f.read().splitlines() - results_predictive_model = read_result_file(result_file) - - # Read baseline result file - with open(file_baseline) as f: - result_file = f.read().splitlines() - results_baseline = read_result_file(result_file) - - # =============================================================================== - # Performance comparison quantities - improved_over_baseline = dict( - zip( - sorted(results_predictive_model.keys()), - [ - results_predictive_model[(m, n, k)] > results_baseline[(m, n, k)] - for m, n, k in sorted(results_predictive_model.keys()) - ], - ) - ) - perf_gain_over_baseline = performance_gain( - results_baseline, results_predictive_model - ) - rel_perf_gain_over_baseline = relative_performance_gain( - results_baseline, results_predictive_model - ) - - # =============================================================================== - # Print results - header = "m, n, k: baseline perf. [Gflops], predictive model perf. [Gflops], performance gain [? ]" - print(header) - line = ( - "{m:>2}, {n:>2}, {k:>2}: {baseline_perf:>7.2f}, {predictive_model_perf:>7.2f}, " - + "{performance_gain:>7.2f}, {better}" - ) - for m, n, k in sorted(results_predictive_model.keys()): - print( - line.format( - m=m, - n=n, - k=k, - baseline_perf=results_baseline[(m, n, k)], - predictive_model_perf=results_predictive_model[(m, n, k)], - performance_gain=perf_gain_over_baseline[(m, n, k)], - better=improved_over_baseline[(m, n, k)], - ) - ) - - print( - "\nKernel performances improved by predictive model:", - list(improved_over_baseline.values()).count(True), - "/", - len(results_predictive_model.keys()), - ) - perf_gain_improved = [pg for pg in perf_gain_over_baseline.values() if pg > 0] - print( - "Mean performance gain amongst improved kernels: {:.2f} Gflops".format( - np.mean(perf_gain_improved) - ) - ) - - print( - "\nKernel performances reduced by predictive model:", - list(improved_over_baseline.values()).count(False), - "/", - len(results_predictive_model.keys()), - ) - perf_gain_deteriorated = [pg for pg in perf_gain_over_baseline.values() if pg < 0] - print( - "Mean performance loss amongst deteriorated kernels: {:.2f} Gflops".format( - np.mean(perf_gain_deteriorated) - ) - ) - - print( - "\nMean performance gain overall: {:.2f} Gflops".format( - np.mean(list(perf_gain_over_baseline.values())) - ) - ) - - # =============================================================================== - # Plot results (testing set: predictive modelling VS naïve) - plot_absolute_performance_gain( - perf_gain_over_baseline, "non-autotuned", "baseline", "predictive model" - ) - plot_relative_performance_gain( - rel_perf_gain_over_baseline, "non-autotuned", "baseline", "predictive model" - ) - plot_performance_gains( - results_predictive_model, - results_baseline, - "non-autotuned", - "baseline", - "predictive model", - ) - - -# =============================================================================== -def read_result_file(file): - results = dict() - result_line = re.compile(r"OK (\d+) x (\d+) x (\d+) GFlop/s (\d+(?:\.\d+)?)") - for line in file: - match = result_line.match(line) - if match is not None: - m = int(match.group(1)) - n = int(match.group(2)) - k = int(match.group(3)) - perf = float(match.group(4)) - results[(m, n, k)] = perf - - return results - - -# =============================================================================== -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description=""" - Given a file containing the results of the LIBSMM_ACC performance test, perform evaluation of the predictive - model. - - This script is part of the workflow for predictive modelling of optimal libsmm_acc parameters. - For more details, see README.md. - """, - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - ) - parser.add_argument( - "-f", - "--file", - metavar="filename.out", - type=str, - default="", - help="Result file to evaluate. Output of tests/libsmm_acc_timer_multiply.cpp", - ) - parser.add_argument( - "-n", - "--file_baseline", - metavar="filename.out", - type=str, - default="", - help="Baseline performance file to compare against.", - ) - - args = parser.parse_args() - main(args.file, args.file_baseline) diff --git a/src/acc/libsmm_acc/predict/predict_genpars.py b/src/acc/libsmm_acc/predict/predict_genpars.py deleted file mode 100755 index 61f377053ce..00000000000 --- a/src/acc/libsmm_acc/predict/predict_genpars.py +++ /dev/null @@ -1,406 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -#################################################################################################### -# Copyright (C) by the DBCSR developers group - All rights reserved # -# This file is part of the DBCSR library. # -# # -# For information on the license, see the LICENSE file. # -# For further information please visit https://dbcsr.cp2k.org # -# SPDX-License-Identifier: GPL-2.0+ # -#################################################################################################### - -import gc -import os -import sys -import json -import pandas as pd -from itertools import product -import argparse -from joblib import Parallel, delayed -from predict_helpers import safe_pickle_load -from warnings import simplefilter - -simplefilter(action="ignore", category=UserWarning) - -sys.path.append("../") -from kernels.smm_acc import to_tuple, to_string # noqa: E402 -from kernels.smm_acc_predict import ( # noqa: E402 - gpu_architectures, - kernel_algorithm, - params_dict_to_kernel, - PredictiveParameters, -) - -# The joblib backend spawns additional processes, which do not inherit the warning filters applied using warnings.filterwarnings -os.environ["PYTHONWARNINGS"] = "ignore::UserWarning" - - -# =============================================================================== -def main(params, njobs, baseline, paths_to_models, chunk_size): - """ - Update parameter file with new optimal parameter predictions given newly trained decision trees - """ - # =============================================================================== - # Load GPU and autotuning properties - assert ( - os.path.basename(params) in gpu_architectures.keys() - ), "Cannot find compute version for file " + str(params) - arch_code = gpu_architectures[os.path.basename(params)] - with open("../kernels/gpu_properties.json") as f: - gpu_properties = json.load(f)[arch_code] - with open("../kernels/autotuning_properties.json") as f: - autotuning_properties = json.load(f) - - # Load autotuned kernel parameters - with open(params) as f: - all_kernels = [params_dict_to_kernel(**params) for params in json.load(f)] - print("libsmm_acc: Found %d existing parameter sets." % len(all_kernels)) - autotuned_mnks = [(k.m, k.n, k.k) for k in all_kernels if k.autotuned] - autotuned_kernels_ = [k for k in all_kernels if k.autotuned] - autotuned_kernels = dict(zip(autotuned_mnks, autotuned_kernels_)) - - # =============================================================================== - # Construct the list of (m,n,k)-triplets for which parameter sets should be made available to libcusmm - mnks = combinations(list(range(4, 46))) - mnks = set.union(set(mnks), set(autotuned_kernels.keys())) - - # =============================================================================== - # Compute parameter sets - mnks_to_predict = list() - kernels_to_print = dict() - for m, n, k in mnks: - if (m, n, k) in autotuned_kernels.keys(): - kernels_to_print[(m, n, k)] = autotuned_kernels[(m, n, k)] - else: - mnks_to_predict.append((m, n, k)) - - if baseline: - kernels = get_baseline_kernels( - mnks_to_predict, gpu_properties, autotuning_properties - ) - else: - kernels = get_optimal_kernels( - mnks_to_predict, - njobs, - chunk_size, - paths_to_models, - gpu_properties, - autotuning_properties, - 1, - ) - - kernels_to_print.update(kernels) - - # =============================================================================== - # Write to file - with open(params, "w") as f: - s = json.dumps( - [ - kernels_to_print[kernel].as_dict_for_parameters_json - for kernel in sorted(kernels_to_print.keys()) - ] - ) - s = s.replace("}, ", "},\n") - s = s.replace("[", "[\n") - s = s.replace("]", "\n]") - f.write(s) - print("Wrote new predicted parameters to file", params) - - -# =============================================================================== -# Helpers -def combinations(sizes): - return list(product(sizes, sizes, sizes)) - - -def remove_empty_entries(ld): - """ - Given a list of dictionaries "ld", remove its list elements that are empty dicts - """ - return [d for d in ld if d] # empty dictionaries evaluate to False - - -def find_optimal_kernel( - mnk, algo, tree, tree_features, gpu_properties, autotuning_properties -): - """ - Find the optimal kernel parameter set for a given (m, n, k) and a given algorithm - :return: optimal_kernels: dictionary, keys: (m, n, k), values: Kernel object describing best parameters - """ - - # Get parameter space for this (m, n, k) and this algorithm - m, n, k = mnk - parameter_space_ = kernel_algorithm[algo].promising_parameters( - m, n, k, gpu_properties, autotuning_properties - ) - parameter_space = pd.DataFrame(parameter_space_) - del parameter_space_ - parameter_space["algorithm"] = [algo] * len( - parameter_space.index - ) # Add "algorithm" column - if len(parameter_space.index) == 0: - optimal_kernels = dict() - - else: - # Get predictor features from raw parameters - parameter_sets = PredictiveParameters( - parameter_space, gpu_properties, autotuning_properties, None - ) - predictors = parameter_sets.get_features(tree_features) - if algo == "medium": - predictors = predictors.rename( - columns=dict( - zip( - predictors.columns, - [ - "f{}".format(i) - for i in range(0, len(predictors.columns) + 1) - ], - ) - ) - ) - - # Predict performances - performances_scaled = tree.predict(predictors) - del predictors - parameter_performances = parameter_sets.params - del parameter_sets - parameter_performances["perf"] = performances_scaled - del performances_scaled - - # Pick optimal kernel - optimal_kernel = max( - parameter_performances.to_dict("records"), key=lambda x: x["perf"] - ) - del parameter_performances - optimal_kernels = dict() - optimal_kernels[(m, n, k)] = params_dict_to_kernel( - **optimal_kernel, source="predicted" - ) - - return optimal_kernels - - -def get_optimal_kernels( - mnks_to_predict, - njobs, - chunk_size, - paths_to_models, - gpu_properties, - autotuning_properties, - top_k, -): - # optimal_kernels_list is a list of dictionaries - # - keys: (m, n, k), - # - values: Kernel object describing best parameters - # - number of elements in each dictionary = top_k - # each element of the list corresponds to the search of optimal kernels for a given mnk and a given algorithm - - print("Getting optimal kernels") - - # =============================================================================== - # Load predictive trees and feature list - tree = dict() - kernel_to_investigate = dict() - for algo in kernel_algorithm.keys(): - path_to_model = paths_to_models[algo] - if path_to_model is not None: - print( - "Algorithm: {:<8}, loading model from: {}".format(algo, path_to_model) - ) - tree[algo] = dict() - tree[algo]["file"] = path_to_model - features, tree[algo]["tree"] = safe_pickle_load(tree[algo]["file"]) - tree[algo]["features"] = features.tolist() - kernel_to_investigate[algo] = kernel_algorithm[algo] - else: - print("Algorithm: {:<8}, no model found.".format(algo)) - - if len(kernel_to_investigate) == 0: - print("No model found. Specify path to predictive models using ") - sys.exit(1) - - # =============================================================================== - # Get mnks_by_algo to compute: - mnks_by_algo = list(product(mnks_to_predict, kernel_to_investigate.keys())) - num_mnks_by_algo = len(mnks_by_algo) - optimal_kernels_list = list() - ckpt_folder_name = "predict_genpars_ckpt" - - if not os.path.exists(ckpt_folder_name): - os.mkdir(ckpt_folder_name) - print("Caching intermediate results to:", ckpt_folder_name) - - for i in range(0, num_mnks_by_algo, chunk_size): - # Chunk up tasks - start_chunk = i - end_chunk = int(min(start_chunk + chunk_size, num_mnks_by_algo)) - print(f"Completed {i} tasks out of {num_mnks_by_algo}") - - # Create checkpoint file or load checkpointed data from it - checkpoint_file_name = os.path.join( - ckpt_folder_name, f"chunk_{start_chunk}-{end_chunk - 1}.json" - ) - - if os.path.exists(checkpoint_file_name): - with open(checkpoint_file_name, "r") as f: - optimal_kernels_list__ = json.load(f) - optimal_kernels_list_ = list() - for i, optker in enumerate(optimal_kernels_list__): - optimal_kernels_list_.append({}) - for k, v in optker.items(): - algo = v.pop("algorithm") - optimal_kernels_list_[i][to_tuple(k)] = kernel_algorithm[algo]( - **v - ) - print(f"Read chunk {start_chunk}-{end_chunk - 1}\n") - - else: - if njobs == 1: - j = i - optimal_kernels_list_ = list() - # Ignore joblib and run serially: - for mnk, algo in mnks_by_algo[start_chunk:end_chunk]: - j += 1 - gc.collect() - print( - f"{j:6d} of {num_mnks_by_algo}: Find optimal kernels for mnk = {mnk} algo = {algo}" - ) - optker = find_optimal_kernel( - mnk, - algo, - tree[algo]["tree"], - tree[algo]["features"], - gpu_properties, - autotuning_properties, - ) - if optker: - optimal_kernels_list_.append(optker) - - else: - # Run prediction tasks in parallel with joblib - optimal_kernels_list_ = Parallel(n_jobs=njobs, verbose=2)( - delayed(find_optimal_kernel, check_pickle=True)( - mnk, - algo, - tree[algo]["tree"], - tree[algo]["features"], - gpu_properties, - autotuning_properties, - ) - for mnk, algo in mnks_by_algo[start_chunk:end_chunk] - ) - optimal_kernels_list_ = remove_empty_entries(optimal_kernels_list_) - - with open(checkpoint_file_name, "w") as f: - optimal_kernels_list__ = list() - for i, optker in enumerate(optimal_kernels_list_): - optimal_kernels_list__.append({}) - for k, v in optker.items(): - optimal_kernels_list__[i][to_string(k)] = v.as_dict - json.dump(optimal_kernels_list__, f) - print(f"Checkpoint file {checkpoint_file_name} written") - - optimal_kernels_list += optimal_kernels_list_ - - print("Finished gathering candidates for optimal parameter space") - - # Group optimal kernel candidates by (m,n,k) in a dictionary - optimal_kernels_mnk_algo = dict() - for optimal_kernel_mnk in optimal_kernels_list: - for mnk, kernels_mnk in optimal_kernel_mnk.items(): - m, n, k = mnk - if (m, n, k) in optimal_kernels_mnk_algo.keys(): - optimal_kernels_mnk_algo[(m, n, k)].append(kernels_mnk) - else: - optimal_kernels_mnk_algo[(m, n, k)] = [kernels_mnk] - - # Find optimal kernel per mnk among the different algorithm possibilities - optimal_kernels = dict() - for mnk, candidate_kernels in optimal_kernels_mnk_algo.items(): - m, n, k = mnk - optimal_kernel_mnk = sorted( - candidate_kernels, key=lambda x: x.perf, reverse=True - )[:top_k] - optimal_kernels[(m, n, k)] = optimal_kernel_mnk[0] - - return optimal_kernels - - -def get_baseline_kernels(mnks_to_predict, gpu_propertes, autotuning_properties): - print("Getting baseline kernels") - baseline_algorithm = "medium" - baseline_kernels = list() - for m, n, k in mnks_to_predict: - baseline_kernels[(m, n, k)] = kernel_algorithm[baseline_algorithm].baseline( - m, n, k, gpu_propertes, autotuning_properties - ) - - return baseline_kernels - - -# =============================================================================== -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description=""" - Update parameter file with new optimal parameter predictions given newly trained decision trees. - - This script is part of the workflow for predictive modelling of optimal libsmm_acc parameters. - For more details, see README.md. - """, - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - ) - - parser.add_argument( - "-p", - "--params", - metavar="parameters_GPU.json", - default="../parameters/parameters_A100.json", - help="Parameter file to read and update with predictions", - ) - parser.add_argument( - "-j", "--njobs", type=int, default=-1, help="Number of joblib jobs" - ) - parser.add_argument( - "--baseline", - default=False, - help="Generate a parameter file corresponding to the baseline of a predictive model", - ) - parser.add_argument( - "--tiny", - default=None, - help="Path to model trained for algorithm 'tiny'. If not given, ignore this algorithm.", - ) - parser.add_argument( - "--small", - default=None, - help="Path to model trained for algorithm 'small'. If not given, ignore this algorithm.", - ) - parser.add_argument( - "--medium", - default=None, - help="Path to model trained for algorithm 'medium'. If not given, ignore this algorithm.", - ) - parser.add_argument( - "--largeDB1", - default=None, - help="Path to model trained for algorithm 'largeDB1'. If not given, ignore this algorithm.", - ) - parser.add_argument( - "--largeDB2", - default=None, - help="Path to model trained for algorithm 'largeDB2'. If not given, ignore this algorithm.", - ) - parser.add_argument( - "-c", - "--chunk_size", - type=int, - default=5000, - help="Chunk size for dispatching joblib jobs. If memory errors are experienced, reduce this number", - ) - - args = parser.parse_args() - paths_to_models = dict() - for algo in kernel_algorithm.keys(): - paths_to_models[algo] = args.__dict__[algo] - main(args.params, args.njobs, args.baseline, paths_to_models, args.chunk_size) diff --git a/src/acc/libsmm_acc/predict/predict_helpers.py b/src/acc/libsmm_acc/predict/predict_helpers.py deleted file mode 100644 index 890d793f003..00000000000 --- a/src/acc/libsmm_acc/predict/predict_helpers.py +++ /dev/null @@ -1,301 +0,0 @@ -# -*- coding: utf-8 -*- -#################################################################################################### -# Copyright (C) by the DBCSR developers group - All rights reserved # -# This file is part of the DBCSR library. # -# # -# For information on the license, see the LICENSE file. # -# For further information please visit https://dbcsr.cp2k.org # -# SPDX-License-Identifier: GPL-2.0+ # -#################################################################################################### - -import sys -import os -import pickle -import numpy as np -import pandas as pd -import matplotlib.pyplot as plt - -sys.path.append("../") -from kernels.smm_acc import to_string # noqa: E402 - - -# =============================================================================== -# I/O helpers -def safe_pickle(data, file): - """ - Pickle big files safely by processing them in chunks. - This wrapper is a workaround for a bug on OSX (https://bugs.python.org/issue24658) - - :param data: data to be pickled - :param file: file to pickle it into - """ - max_bytes = 2**31 - 1 # Maximum number of bytes to write in one chunk - pickle_out = pickle.dumps(data) - n_bytes = len(pickle_out) - with open(file, "wb") as f: - count = 0 - for i in range(0, n_bytes, max_bytes): - f.write(pickle_out[i : min(n_bytes, i + max_bytes)]) - count += 1 - - -def safe_pickle_load(file_path): - """ - Load big pickled files safely by processing them in chunks - This wrapper is a workaround a bug on OSX (https://bugs.python.org/issue24658) - - :param data: data to be loaded through pickle - :param file: file to read from - """ - max_bytes = 2**31 - 1 # Maximum number of bytes to read in one chunk - bytes_in = bytearray(0) - input_size = os.path.getsize(file_path) - with open(file_path, "rb") as f: - for _ in range(0, input_size, max_bytes): - bytes_in += f.read(max_bytes) - return pickle.loads(bytes_in) - - -# =============================================================================== -# Model evaluation helpers -def performance_gain(baseline, current): - """ - Compute the absolute perfomance gain, in Gflop/s between a baseline and a 'current' - :param baseline, current: dictionary, keys: (m, n, k), values: performance in Gflop/s - :return: dictionary, keys: (m, n, k), values: performance difference in Gflop/s - """ - return dict( - zip( - sorted(current.keys()), - [ - current[(m, n, k)] - baseline[(m, n, k)] - for m, n, k in sorted(current.keys()) - ], - ) - ) - - -def relative_performance_gain(baseline, current): - """ - Compute the relative perfomance gain (no units), between a baseline and a 'current' - :param baseline, current: dictionary, keys: (m, n, k), values: performance in Gflop/s - :return: dictionary, keys: (m, n, k), values: relative performance difference (no units) - """ - return dict( - zip( - sorted(current.keys()), - [ - (current[(m, n, k)] - baseline[(m, n, k)]) / baseline[(m, n, k)] - for m, n, k in sorted(current.keys()) - ], - ) - ) - - -def plot_absolute_performance_gain( - perf_gain, mnk_names, baseline_name, current_name, pp=None -): - mnk_products = [ - m * n * k - for m, n, k in sorted(perf_gain.keys(), key=lambda x: x[0] * x[1] * x[2]) - ] - - plt.figure() - plt.plot(mnk_products, list(perf_gain.values()), ".", markersize=3) - plt.plot([mnk_products[0], mnk_products[-1]], [0, 0], "-r") - plt.xlabel(mnk_names + " (m, n, k) triplets (in order of increasing m*n*k)") - plt.ylabel("Performance Gain [Gflops]") - plt.title( - "Performance gain of " - + current_name - + " VS " - + baseline_name - + " parameter set" - ) - if pp is not None: - pp.savefig() - else: - plt.show() - plt.close() - - -def plot_relative_performance_gain( - rel_perf_gain, mnk_names, baseline_name, current_name, pp=None -): - mnk_products = [ - m * n * k - for m, n, k in sorted(rel_perf_gain.keys(), key=lambda x: x[0] * x[1] * x[2]) - ] - - plt.figure() - plt.plot( - mnk_products, 100 * np.array(list(rel_perf_gain.values())), ".", markersize=3 - ) - plt.plot([mnk_products[0], mnk_products[-1]], [0, 0], "-r") - plt.xlabel(mnk_names + " (m, n, k) triplets (in order of increasing m*n*k)") - plt.ylabel("Performance Gain [%]") - plt.title( - "Relative performance gain of " - + current_name - + " VS " - + baseline_name - + " parameter set" - ) - if pp is not None: - pp.savefig() - else: - plt.show() - plt.close() - - -def plot_performance_gains( - perf_gain1, perf_gain2, mnk_names, perf_gain1_name, perf_gain2_name, pp=None -): - mnks = [ - (m, n, k) - for m, n, k in sorted(perf_gain2.keys(), key=lambda x: x[0] * x[1] * x[2]) - ] - mnk_products = [ - m * n * k - for m, n, k in sorted(perf_gain2.keys(), key=lambda x: x[0] * x[1] * x[2]) - ] - res1 = [perf_gain1[mnk] for mnk in mnks] - res2 = [perf_gain2[mnk] for mnk in mnks] - - marker_size = 3 - plt.figure() - plt.plot(mnk_products, res1, ".", markersize=marker_size) - plt.plot(mnk_products, res2, ".", color="#d62728", markersize=marker_size) - plt.xlabel(mnk_names + " (m, n, k) triplets (in order of increasing m*n*k)") - plt.ylabel("Performance [Gflops]") - plt.xscale("log") - plt.legend([perf_gain1_name, perf_gain2_name]) - plt.title( - "Performance of " - + perf_gain1_name - + " and " - + perf_gain2_name - + " parameter set" - ) - if pp is not None: - pp.savefig() - else: - plt.show() - plt.close() - - -def plot_scaled_performance_gains( - perf_gain1, perf_gain2, mnk_names, perf_gain1_name, perf_gain2_name, pp=None -): - mnks = [ - (m, n, k) - for m, n, k in sorted(perf_gain2.keys(), key=lambda x: x[0] * x[1] * x[2]) - ] - mnk_products = [ - m * n * k - for m, n, k in sorted(perf_gain2.keys(), key=lambda x: x[0] * x[1] * x[2]) - ] - res1 = np.array([perf_gain1[mnk] for mnk in mnks]) - res2 = np.array([perf_gain2[mnk] for mnk in mnks]) - - marker_size = 3 - plt.figure() - plt.plot(mnk_products, 100 * res1, ".", markersize=marker_size) - plt.plot(mnk_products, 100 * res2, ".", color="#d62728", markersize=marker_size) - plt.xlabel(mnk_names + " (m, n, k) triplets (in order of increasing m*n*k)") - plt.ylabel("Scaled performance [%]") - plt.xscale("log") - plt.legend([perf_gain1_name, perf_gain2_name]) - plt.title( - "Performance of " - + perf_gain1_name - + " and " - + perf_gain2_name - + " parameter set" - ) - if pp is not None: - pp.savefig() - else: - plt.show() - plt.close() - - -def plot_choice_goodness( - m, - n, - k, - baseline_performances, - max_performances, - y_true, - y_pred, - train, - pp, - scaled=True, -): - # Sort in ascending performances - data_mnk = pd.DataFrame() - if scaled: - data_mnk["perf_true"] = (100 * y_true).tolist() - data_mnk["perf_pred"] = (100 * y_pred).tolist() - else: - data_mnk["perf_true"] = y_true.flatten().tolist() - data_mnk["perf_pred"] = y_pred.tolist() - data_mnk.sort_values(by="perf_true", inplace=True) - - # Plot - plt.figure() - marker_size = 1 - par_set_ids = range(len(data_mnk.index.values)) - plt.plot( - par_set_ids, - data_mnk["perf_true"], - "b.", - markersize=marker_size, - label="measured performances", - ) - plt.xlabel("Parameter set id") - plt.ylabel("Percentage of autotuned performance achieved [%]") - type = "train" if train else "test" - plt.title( - "Performance profile of parameter sets for " - + str((m, n, k)) - + "-triplet (" - + type - + ")" - ) - - # Annotate - x = [0, len(y_true)] - y = np.array([1, 1]) - perf_num = "{:2.2f}" - - # chosen - idx_perf_chosen = data_mnk["perf_pred"].idxmax() - perf_chosen = data_mnk["perf_true"][idx_perf_chosen] - plt.plot( - x, - perf_chosen * y, - "r-", - label="perf of chosen param set: " + perf_num.format(perf_chosen) + "%", - ) - - # baseline - if scaled: - # baseline = per algo, scale it to 0-1 - perf_baseline = ( - 100 - * baseline_performances[to_string(m, n, k)] - / max_performances["{}x{}x{}".format(m, n, k)] - ) - else: - perf_baseline = baseline_performances[to_string(m, n, k)] - plt.plot( - x, - perf_baseline * y, - "g-", - label="perf of baseline param set: " + perf_num.format(perf_baseline) + "%", - ) - - plt.legend(loc="lower right") - pp.savefig() - plt.close() diff --git a/src/acc/libsmm_acc/predict/predict_train.py b/src/acc/libsmm_acc/predict/predict_train.py deleted file mode 100755 index cf2b3845202..00000000000 --- a/src/acc/libsmm_acc/predict/predict_train.py +++ /dev/null @@ -1,1685 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -#################################################################################################### -# Copyright (C) by the DBCSR developers group - All rights reserved # -# This file is part of the DBCSR library. # -# # -# For information on the license, see the LICENSE file. # -# For further information please visit https://dbcsr.cp2k.org # -# SPDX-License-Identifier: GPL-2.0+ # -#################################################################################################### - -import os -import sys -import datetime -import json -import random -import numpy as np -import pandas as pd -import xgboost as xgb -import dask.dataframe as dd -import matplotlib.pyplot as plt -import argparse -from predict_helpers import ( - safe_pickle, - safe_pickle_load, - plot_choice_goodness, - plot_performance_gains, - plot_scaled_performance_gains, - plot_absolute_performance_gain, - plot_relative_performance_gain, - performance_gain, -) - -sys.path.append("../") -from kernels.smm_predict import to_tuple, to_string # noqa: E402 - -visual_separator = ( - "\n----------------------------------------------------------------------------" -) - - -# =============================================================================== -def main( - datadir, - destdir, - algo, - model_args, - nrows, - prefitted_model_folder, - run_intermediate_evaluation, -): - """ - Train a Machine Learning model on autotuning data to predict a kernel's performance given - its template parameters - """ - # =============================================================================== - # Create folder to store results of this training and start a log - folder, log_file, log = get_log_folder(prefitted_model_folder, destdir, algo) - - # =============================================================================== - # Override algorithm option if working on a pre-fitted model, and log program options - log += print_and_log(visual_separator) - algo, model_args, nrows, log = dump_or_load_options( - algo, model_args, prefitted_model_folder, nrows, folder, log - ) - - # =============================================================================== - # Get maximum and baseline performances - ( - max_performances, - max_performances_algo, - max_performances_ref, - baseline_performances_algo, - ) = get_reference_performances(datadir, algo) - - # =============================================================================== - # Read data - log += print_and_log(visual_separator) - X, X_mnk, Y, log, data_nrows = read_data(algo, datadir, nrows, folder, log) - - # =============================================================================== - # AT THIS POINT, WE MOVE FROM DASK (out-of-memory dataframes) TO PANDAS - # =============================================================================== - log += print_and_log("[moving to pandas] Compute X ...") - X = X.compute() - log += print_and_log("[moving to pandas] Compute Y ...") - Y = Y.compute() - log += print_and_log("[moving to pandas] Compute X_mnk ...") - X_mnk = X_mnk.compute() - log += print_and_log("[moving to pandas] Done") - - # =============================================================================== - # Get or train partial model (i.e. trained on the "training" part of the data, not the entire dataset) - log += print_and_log(visual_separator) - if len(prefitted_model_folder) == 0: # train a model - log += print_and_log("\nPreparing to fit model...") - ( - X_train, - Y_train, - X_mnk_train, - X_test, - Y_test, - X_mnk_test, - model_partial, - log, - ) = train_model(X, X_mnk, Y, algo, model_args, folder, log) - - else: # load pre-trained model - log += print_and_log( - "\nReading partial pre-fitted partial model from " + prefitted_model_folder - ) - ( - X_train, - Y_train, - X_mnk_train, - X_test, - Y_test, - X_mnk_test, - model_partial, - log, - ) = fetch_pre_trained_model_partial( - X, X_mnk, Y, model_args, prefitted_model_folder, log - ) - - # =============================================================================== - # Evaluate partial model - if model_partial is not None: - log = evaluate_model( - model_partial, - X_train, - X_mnk_train, - Y_train, - X_test, - X_mnk_test, - Y_test, - max_performances_ref, - max_performances_algo, - baseline_performances_algo, - data_nrows, - log, - folder, - ) - - # =============================================================================== - # Refit to the entire dataset - # Get or train model fit on the entire dataset (i.e. not just on the "training" part of the data) - model_file = os.path.join(prefitted_model_folder, "feature_tree_refit.p") - if ( - run_intermediate_evaluation - or len(prefitted_model_folder) == 0 - or not os.path.exists(model_file) - ): - log += print_and_log(visual_separator) - log += print_and_log("\nRefit to the entire dataset:") - X = X_train.append(X_test, ignore_index=True) - X_mnk = X_mnk_train.append(X_mnk_test, ignore_index=True) - Y = Y_train.append(Y_test, ignore_index=True) - model_partial.fit(X, Y) - model = ( - model_partial # This model is fit on the entire dataset, it is not partial - ) - results_file = os.path.join(folder, "feature_tree_refit.p") - safe_pickle([X.columns.values, model], results_file) - else: - log += print_and_log( - "\nReading pre-fitted model from " + prefitted_model_folder - ) - X, model, log = fetch_pre_trained_model(prefitted_model_folder, X, log) - - # =============================================================================== - # Evaluate refit-model - log = evaluate_model( - model, - X, - X_mnk, - Y, - None, - None, - None, - max_performances_ref, - max_performances_algo, - baseline_performances_algo, - data_nrows, - log, - folder, - ) - - # =============================================================================== - # Print log - log += print_and_log(visual_separator) - with open(log_file, "w") as f: - f.write(log) - - -# =============================================================================== -# Model hyperparameters -optimized_hyperparameters = { - # chosen by hyperparameter optimization. The optimal parameter depends on the GPU, the data ... - # the values below are the average of the optimal value for the P100 and the V100 - "tiny": { - "scikit_max_depth": 16, - "scikit_min_samples_leaf": 2, - "scikit_min_samples_split": 15, - "xgboost_max_depth": 12, - "xgboost_learning_rate": 0.1, - "xgboost_n_estimators": 100, - }, - "small": { - "scikit_max_depth": 16, - "scikit_min_samples_leaf": 2, - "scikit_min_samples_split": 15, - "xgboost_max_depth": 14, - "xgboost_learning_rate": 0.1, - "xgboost_n_estimators": 170, - }, - "medium": { - "scikit_max_depth": 18, - "scikit_min_samples_leaf": 2, - "scikit_min_samples_split": 13, - "xgboost_max_depth": 14, - "xgboost_learning_rate": 0.1, - "xgboost_n_estimators": 140, - }, - "largeDB1": { - "scikit_max_depth": 18, - "scikit_min_samples_leaf": 2, - "scikit_min_samples_split": 15, - "xgboost_max_depth": 14, - "xgboost_learning_rate": 0.1, - "xgboost_n_estimators": 170, - }, - "largeDB2": { - "scikit_max_depth": 18, - "scikit_min_samples_leaf": 2, - "scikit_min_samples_split": 15, - "xgboost_max_depth": 14, - "xgboost_learning_rate": 0.1, - "xgboost_n_estimators": 170, - }, -} - - -# =============================================================================== -# Printing and dumping helpers -def get_log_folder(prefitted_model_folder, destination_folder, algo): - """Create a unique log folder for this run in which logs, plots etc. will be stored""" - if len(prefitted_model_folder) == 0: - # Create a new folder for this model - file_signature = datetime.datetime.now().strftime("%Y-%m-%d--%H-%M") - folder_name = os.path.join( - "model_selection", os.path.join(algo, file_signature) - ) - if destination_folder != ".": - folder = os.path.join(destination_folder, folder_name) - else: - folder = folder_name - log_file = os.path.join(folder, "log.txt") - if not os.path.exists(folder): - while True: # loop until we've created a folder - try: - os.makedirs(folder) - break - except FileExistsError: - time_stamp_seconds = datetime.datetime.now().strftime("-%S") - new_folder = folder + time_stamp_seconds - print( - "Folder {} exists already. Trying to create folder {}.".format( - folder, new_folder - ) - ) - folder = new_folder - - else: - # If loading a pre-fitted model, use this pre-fitted model's folder as a log folder, but create a new log file - folder = prefitted_model_folder - log_file_signature = datetime.datetime.now().strftime("%Y-%m-%d--%H-%M") - log_file = os.path.join(folder, "log_" + log_file_signature + ".txt") - - # Log folder and file - log = "" - log += print_and_log("\nLogging to:") - log += print_and_log("\t" + folder) - log += print_and_log("\t" + log_file) - - return folder, log_file, log - - -def dump_or_load_options(algo, model_args, prefitted_model, nrows, folder, log): - options_file_name = os.path.join(folder, "options.json") - pgm_options = {"folder": folder, "algo": algo, "nrows": nrows} - pgm_options.update(model_args) - - if len(prefitted_model) == 0: - # if we're training a model, dump options to folder so they can be reloaded in another run - print("Dump options to", options_file_name) - with open(options_file_name, "w") as f: - json.dump(pgm_options, f) - - else: - # if we're using a pre-fitted model, load options from that model - print("Read options from", options_file_name) - with open(options_file_name, "r") as f: - pgm_options = json.load(f) - - algo = pgm_options["algo"] - model_args_list = ["model", "splits", "ntrees", "njobs"] - model_args = dict() - for m in model_args_list: - model_args[m] = pgm_options[m] - nrows = pgm_options["nrows"] - - # Log options - log += print_and_log("Predict-train running with options:") - for opt, opt_val in pgm_options.items(): - log += print_and_log("{:<15}: {}".format(opt, opt_val)) - - return algo, model_args, nrows, log - - -def print_and_log(msg): - if not isinstance(msg, str): - msg = str(msg) - log = "\n" + msg - print(msg) - return log - - -def dask_to_pandas(*dfs): - """Convert training data dask -> pandas""" - pd_dfs = [df.compute() for df in dfs] - return pd_dfs[0] if len(pd_dfs) == 1 else pd_dfs - - -def pandas_to_dask(*dfs): - """Convert training data pandas -> dask""" - dd_dfs = [dd.from_pandas(df, npartitions=3) for df in dfs] - return dd_dfs[0] if len(dd_dfs) == 1 else dd_dfs - - -# =============================================================================== -# Custom loss functions and scorers -def perf_loss(y_true, y_pred, top_k, X_mnk, scaled=True): - """ - Compute the relative performance losses per mnk if one were to measure the top-k best predicted sets of parameters - and pick the best out of this top-k - - :param y_true: ground truth performances (performance scaled between 0 and 1) - :param y_pred: estimated performances (performance scaled between 0 and 1) - :param top_k: number of top performances to measure - :param X_mnk: corresponding mnks - :return: perf_losses: array of relative performance losses (in %), one array element per mnk - """ - assert len(y_true.index) == y_pred.flatten().size - assert len(y_true.index) == len(X_mnk.index) - - perf_losses = list() - mnks = np.unique(X_mnk["mnk"].values) - for mnk in mnks: - # Get performances per mnk - idx_mnk = np.where(X_mnk == mnk)[0].tolist() - assert len(idx_mnk) > 0, "idx_mnk is empty" - y_true_mnk = y_true.iloc[idx_mnk] - y_pred_mnk = y_pred[idx_mnk] - - # Get top-k best predicted performances - if top_k != 1: - top_k_idx = np.argpartition(-y_pred_mnk, top_k)[:top_k] - else: - top_k_idx = np.argmax(y_pred_mnk) - y_correspmax = y_true_mnk.iloc[top_k_idx] - - # Chosen max perf. among predicted max performances - maxperf_chosen = np.amax(y_correspmax) - - # True Max. performances - if not scaled: - maxperf = float(y_true_mnk.max(axis=0)) - assert maxperf >= 0, "Found non-positive value for maxperf: " + str(maxperf) - perf_loss = (maxperf - maxperf_chosen) / maxperf - else: - perf_loss = 1.0 - maxperf_chosen - - # Relative performance loss incurred by using model-predicted parameters instead of autotuned ones [%] - perf_losses.append(100 * perf_loss) - - return perf_losses - - -def worse_rel_perf_loss_of_k(y_true, y_pred, top_k, X_mnk, scaled=True): - y = np.array(perf_loss(y_true, y_pred, top_k, X_mnk, scaled)) - return float(y.max(axis=0)) - - -def mean_rel_perf_loss_of_k(y_true, y_pred, top_k, X_mnk, scaled=True): - y = np.array(perf_loss(y_true, y_pred, top_k, X_mnk, scaled)) - return float(y.mean(axis=0)) - - -def worse_case_scorer(estimator, X, y, top_k): - """ - :param estimator: the model that should be evaluated - :param X: validation data - :param y: ground truth target for X - :return: score: a floating point number that quantifies the estimator prediction quality on X, with reference to y - """ - mnk = dd.DataFrame() - mnk["mnk"] = X["mnk"].copy() - y_pred = estimator.predict(X.drop(["mnk"].values, axis=1)) - score = worse_rel_perf_loss_of_k(y, y_pred, top_k, mnk) - return ( - -score - ) # by scikit-learn convention, higher numbers are better, so the value should be negated - - -def worse_case_scorer_top1(estimator, X, y): - return worse_case_scorer(estimator, X, y, 1) - - -def mean_scorer(estimator, X, y, top_k): - """ - :param estimator: the model that should be evaluated - :param X: validation data - :param y: ground truth target for X - :return: score: a floating point number that quantifies the estimator prediction quality on X, with reference to y - """ - mnk = dd.DataFrame() - mnk["mnk"] = X["mnk"].copy() - y_pred = estimator.predict(X.drop(["mnk"].values, axis=1)) - score = mean_rel_perf_loss_of_k(y, y_pred, top_k, mnk) - return ( - -score - ) # by scikit-learn convention, higher numbers are better, so the value should be negated - - -def mean_scorer_top1(estimator, X, y): - return mean_scorer(estimator, X, y, 1) - - -# =============================================================================== -# Read and prepare data -def get_reference_performances(folder, algo): - import json - - maxperf_file = os.path.join(folder, "max_performances.json") - with open(maxperf_file) as f: - max_performances = json.load(f) - - maxperf_file = os.path.join(folder, "max_performances_by_algo.json") - with open(maxperf_file) as f: - max_performances_algo = json.load(f)[algo] - - max_performances_ref = max_performances - - baseline_file = os.path.join(folder, "baseline_performances_by_algo.json") - with open(baseline_file) as f: - baseline_performances_algo = json.load(f)[algo] - - return ( - max_performances, - max_performances_algo, - max_performances_ref, - baseline_performances_algo, - ) - - -def read_data(algo, read_from, nrows, folder, log): - parquet_data_file = os.path.join(read_from, "training_data_" + algo + ".parquet") - log += print_and_log("\nRead data from " + parquet_data_file) - - # =============================================================================== - # Get 'X' - cols_to_ignore = [ - "perf_scaled", - "mnk", - "perf (Gflop/s)", - "perf_scaled_by_algo", - "perf_squared", - ] - X = dd.read_parquet(parquet_data_file) - cols_to_drop = set(cols_to_ignore).intersection(set(X.columns.values)) - log += print_and_log("\nDropping following columns from X:\n" + str(cols_to_drop)) - X = X.drop(cols_to_drop, axis=1) - log += print_and_log( - "X : {:>8,} x {:>8,} ({:>2.2} MB)".format( - len(X), len(X.columns), sys.getsizeof(X) / 10**6 - ) - ) - log += print_and_log("Head:") - log += print_and_log(X.head()) - n_features = len(list(X.columns)) - predictor_names = X.columns.values - log += print_and_log("\nPredictor variables: (" + str(n_features) + ")") - for i, p in enumerate(predictor_names): - log += print_and_log("\t{:2}) {}".format(i + 1, p)) - - # =============================================================================== - # Get 'Y' - log += print_and_log("\nRead Y") - Y = dd.read_parquet(parquet_data_file, columns=["perf_scaled"]) - log += print_and_log( - "Y : {:>8,} ({:>2.2} MB)".format(len(Y), sys.getsizeof(Y) / 10**6) - ) - log += print_and_log("Head:") - log += print_and_log(Y.head()) - - # =============================================================================== - # Get 'X_mnk' - log += print_and_log("\nRead X_mnk") - X_mnk = dd.read_parquet(parquet_data_file, columns=["mnk"]) - nrows_data = len(X_mnk.index) - log += print_and_log( - "X_mnk : {:>8,} ({:>2.2} MB)".format(nrows_data, sys.getsizeof(X_mnk) / 10**6) - ) - log += print_and_log("Head:") - log += print_and_log(X_mnk.head()) - log += print_and_log("# unique mnks:") - log += print_and_log(str(X_mnk["mnk"].nunique().compute()) + "\n") - - return X, X_mnk, Y, log, nrows_data - - -# =============================================================================== -# Predictive modelling -def get_hyperparameter_grid(algo, model_name, n_features): - # Hyper-parameters to optimize - param_grid = dict() - if "scikit" in model_name: # it is a scikit-learn model - if algo == "medium": - max_depth = [10, 13, 16, 18, 21, 24] - min_samples_split = [2, 8, 12, 18] - min_samples_leaf = [2, 8, 12, 18] - elif algo == "tiny": - step = 1 - max_depth = range(4, int(2 * n_features) + 1, step) - min_samples_split = range(1, 26, step) - min_samples_leaf = range(1, 26, step) - elif algo == "small": - step = 3 - max_depth = range(4, int(2 * n_features) + 1, step) - min_samples_split = [2, 5, 8, 13, 18] - min_samples_leaf = [2, 5, 8, 13, 18] - else: # largeDB1,2 - step = 3 - max_depth = range(4, int(2 * n_features) + 1, step) - min_samples_split = range(2, 21, step) - min_samples_leaf = range(2, 21, step) - param_grid = { - model_name + "__estimator__" + "max_depth": list(max_depth), - model_name + "__estimator__" + "min_samples_split": list(min_samples_split), - model_name + "__estimator__" + "min_samples_leaf": list(min_samples_leaf), - } - elif "xgb" in model_name: # it is an XGBOOST model - if algo == "medium": - max_depth = [16, 13] - n_estimators = [100, 140] - learning_rate = [0.1] - elif algo == "tiny": - max_depth = range(10, n_features + 2, 1) - n_estimators = range(30, 160, 20) - learning_rate = range(1, 5) - learning_rate = [i / 10 for i in learning_rate] - elif algo == "small": - max_max_depth = 20 - max_depth = range(10, min(max_max_depth, n_features + 2), 4) - n_estimators = range(50, 200, 30) - learning_rate = [0.1, 0.3] - else: # largeDB1,2 - max_max_depth = 20 - max_depth = range(10, min(max_max_depth, n_features + 2), 4) - n_estimators = range(50, 200, 30) - learning_rate = [0.1, 0.3] - param_grid = { - "max_depth": list(max_depth), - "learning_rate": list(learning_rate), - "n_estimators": list(n_estimators), - } - else: - raise AssertionError("Cannot recognize model: " + model_name) - - return param_grid - - -def get_scikit_DecisionTree_model(algo): - from sklearn.tree import DecisionTreeRegressor - - model = DecisionTreeRegressor( - criterion="mse", - splitter="best", - min_samples_split=optimized_hyperparameters[algo]["scikit_min_samples_split"], - min_samples_leaf=optimized_hyperparameters[algo]["scikit_min_samples_leaf"], - max_depth=optimized_hyperparameters[algo]["scikit_max_depth"], - max_features=None, - max_leaf_nodes=None, - ) - # Feature selection through permutation importance - from eli5.sklearn import PermutationImportance - - model_perm = PermutationImportance(model, cv=None) - return model_perm, "scikit-Decision_Tree" - - -def get_scikit_RandomForest_model(algo, njobs, ntrees): - from sklearn.ensemble import RandomForestRegressor - - model = RandomForestRegressor( - criterion="mse", - n_estimators=ntrees, - min_samples_split=optimized_hyperparameters[algo]["scikit_min_samples_split"], - min_samples_leaf=optimized_hyperparameters[algo]["scikit_min_samples_leaf"], - max_depth=optimized_hyperparameters[algo]["scikit_max_depth"], - bootstrap=True, - max_features="sqrt", - n_jobs=njobs, - ) - return model, "scikit-Random_Forest" - - -def get_xgb_DecisionTree_model(algo, njobs, ntrees): - params = { - "max_depth": optimized_hyperparameters[algo]["xgboost_max_depth"], - "learning_rate": optimized_hyperparameters[algo]["xgboost_learning_rate"], - "n_estimators": optimized_hyperparameters[algo]["xgboost_n_estimators"], - "tree_method": "exact", - "verbosity": 2, - "objective": "reg:squarederror", - "booster": "gbtree", - "n_jobs": njobs, - } - model = xgb.XGBRegressor(**params) - return model, "xgb-Decision_Tree" - - -def get_xgb_DecisionTree_dask_model(algo, njobs, ntrees): - params = { - "max_depth": optimized_hyperparameters[algo]["xgboost_max_depth"], - "learning_rate": optimized_hyperparameters[algo]["xgboost_learning_rate"], - "n_estimators": optimized_hyperparameters[algo]["xgboost_n_estimators"], - "tree_method": "exact", - "verbosity": 2, - "objective": "reg:squarederror", - "booster": "gbtree", - "n_jobs": njobs, - } - from dask_ml.xgboost import XGBRegressor_dask - - model = XGBRegressor_dask(**params) - return model, "xgb-Decision_Tree_dask" - - -def get_xgb_DecisionTree_GPU_model(algo, njobs, ntrees): - params = { - "max_depth": optimized_hyperparameters[algo]["xgboost_max_depth"], - "learning_rate": optimized_hyperparameters[algo]["xgboost_learning_rate"], - "n_estimators": optimized_hyperparameters[algo]["xgboost_n_estimators"], - "tree_method": "gpu_hist", - "verbosity": 2, - "objective": "reg:squarederror", - "booster": "gbtree", - "n_jobs": njobs, - } - model = xgb.XGBRegressor(**params) - return model, "xgb-Decision_Tree_GPU" - - -def get_xgb_RandomForest_model(algo, njobs, ntrees): - params = { - "max_depth": optimized_hyperparameters[algo]["xgboost_max_depth"], - "learning_rate": optimized_hyperparameters[algo]["xgboost_learning_rate"], - "n_estimators": optimized_hyperparameters[algo]["xgboost_n_estimators"], - "tree_method": "exact", - "nthread": njobs, - "subsample": 0.5, - "colsample_bynode": 0.8, - "num_parallel_tree": ntrees, - "verbosity": 2, - "objective": "reg:squarederror", - } - model = xgb.XGBRFRegressor(**params) - return model, "xgb-Random_Forest" - - -def get_model(model_to_train, algo, njobs, ntrees): - if model_to_train == "DT": - model, model_name = get_scikit_DecisionTree_model(algo) - elif model_to_train == "RF": - model, model_name = get_scikit_RandomForest_model(algo, njobs, ntrees) - elif model_to_train == "xgb-DT": - model, model_name = get_xgb_DecisionTree_model(algo, njobs, ntrees) - elif model_to_train == "xgb-DT-dask": - model, model_name = get_xgb_DecisionTree_dask_model(algo, njobs, ntrees) - elif model_to_train == "xgb-DT-GPU": - model, model_name = get_xgb_DecisionTree_GPU_model(algo, njobs, ntrees) - elif model_to_train == "xgb-RF": - model, model_name = get_xgb_RandomForest_model(algo, njobs, ntrees) - else: - raise AssertionError( - "Cannot recognize model: " + model_to_train + ". Options: DT, RF" - ) - return model, model_name - - -def get_train_test_partition(to_partition, test, train=None): - """ - Perform train/test partition - :param to_partition: sequence of objects to partition - :param test: ndarray, test-indices - :param train (optional): ndarray - :return: - """ - if train is None: # Retrieve training indices - all_indices = set(range(len(to_partition[0].index))) - train = list(all_indices - set(test)) - - print( - "About to partition into train (len: {:,}) / test (len: {:,})".format( - len(train), len(test) - ) - ) - partitioned = list() - for df in to_partition: - df_train = df.iloc[ - train, : - ] # train: use for hyper-parameter optimization (via CV) and training - partitioned.append(df_train) - df_test = df.iloc[ - test, : - ] # test : use for evaluation of 'selected/final' model - partitioned.append(df_test) - - print("Returning object of length: {}".format(len(partitioned))) - return partitioned - - -def train_model(X, X_mnk, Y, algo, model_options, folder, log): - # =============================================================================== - # Get options - results_file = os.path.join(folder, "feature_tree.p") - - # =============================================================================== - # Testing splitter (train/test-split) - from sklearn.model_selection import GroupShuffleSplit - - cv = GroupShuffleSplit(n_splits=2, test_size=0.2) - train_test_splits = cv.split(X, Y, groups=X_mnk["mnk"]) - train, test = next(train_test_splits) - ( - X_train, - X_test, - Y_train, - Y_test, - X_mnk_train, - X_mnk_test, - ) = get_train_test_partition([X, Y, X_mnk], test, train) - plot_train_test_partition(test, train, X_mnk, folder) - log += print_and_log( - "\nComplete train/test split, total size=" - + str(X.shape) - + ", test size=" - + str(X_test.shape) - + ", train_size=" - + str(X_train.shape) - ) - del X, X_mnk, Y # free memory - predictor_names = X_train.columns.values - - # =============================================================================== - # Predictive model - model_to_train = model_options["model"] - model, model_name = get_model( - model_to_train, algo, model_options["njobs"], model_options["ntrees"] - ) - log += print_and_log( - "\nStart tune/train for model " + model_name + " with parameters:" - ) - log += print_and_log(model) - - # =============================================================================== - # Cross-validation splitter (train/validation-split) - test_size = 0.3 - cv = GroupShuffleSplit(n_splits=model_options["splits"], test_size=test_size) - - # =============================================================================== - # Feature selection: SelectFromModel - from sklearn.feature_selection import SelectFromModel - - feature_importance_threshold = ( - 0.0005 # only remove the features with VERY little importance - ) - model.cv = cv.split(X_train.values, Y_train.values, groups=X_mnk_train.values) - model.fit(X_train.values, Y_train.values) - model_fs = SelectFromModel( - model, threshold=feature_importance_threshold, max_features=None, prefit=True - ) - print(model_fs) - model.cv = None - - # =============================================================================== - # Info on feature selection - all_feature_names = X_train.columns.values.tolist() - feature_support = model_fs.get_support() - features_importances = model.feature_importances_ - feature_name_importance = zip( - all_feature_names, features_importances, feature_support - ) - feature_name_importance = sorted( - feature_name_importance, key=lambda x: x[1], reverse=True - ) - - log += print_and_log(visual_separator) - n_selected_features = np.sum(feature_support) - log += print_and_log("Optimal number of features : {}".format(n_selected_features)) - - # Selected features - log += print_and_log("\nFeatures:") - selected_features = list() - selected_feature_importances = list() - for i, (feat_name, feat_imp, feat_in) in enumerate(feature_name_importance): - in_or_out = "accepted" if feat_in else " x rejected" - log += print_and_log( - "{:>2}) {:<40}, imp: {:>1.3f} {}".format( - i + 1, feat_name, feat_imp, in_or_out - ) - ) - if feat_in: - selected_features.append(feat_name) - selected_feature_importances.append(feat_imp) - plot_feature_importance(features_importances, all_feature_names, folder) - - # Drop non-selected features - features_to_drop = [f for f in predictor_names if f not in selected_features] - X_train = X_train.drop(features_to_drop, axis=1) - X_test = X_test.drop(features_to_drop, axis=1) - n_features = len(X_train.columns) - - # =============================================================================== - # Fit - out_of_memory_computation = "dask" in model_options["model"] - if out_of_memory_computation: - X_train, Y_train = pandas_to_dask(X_train, Y_train) - - if model_options["hyperparameter_optimization"]: - # Hyperparameter Optimization - param_grid = get_hyperparameter_grid(algo, model_name, n_features) - if param_grid is None: - raise AssertionError("param_grid object is None. Please implement!") - - # At this point, we "cheat"/"take a shortcut" in 2 ways: - # - we split into train/test partitions using the simple default splitter, not one that is aware of mnk-groups - # - we use an overall MSE scorer, not one that looks at the performance loss of predicted mnks wrt. autotuned - if out_of_memory_computation: - from dask_ml.model_selection import GridSearchCV - - gds_pars = { - "estimator": model, - "param_grid": param_grid, - "cv": model_options["splits"], - "refit": True, - "n_jobs": 1, - } - else: - from sklearn.model_selection import GridSearchCV - - gds_pars = { - "estimator": model, - "param_grid": param_grid, - "cv": model_options["splits"], - "refit": True, - "n_jobs": 1, - "verbose": 2, - } - gds = GridSearchCV(**gds_pars) - log += print_and_log(visual_separator) - log += print_and_log("\nStart hyperparameter optimization & training ... :\n") - log += print_and_log("Hyper-parameter grid:") - for par, values in param_grid.items(): - log += print_and_log("\t" + par + ": " + str(values)) - log += print_and_log("\n") - gds.fit(X_train.values, Y_train.values) - log += print_and_log("... done") - describe_hpo(gds, log, folder) - model = gds.best_estimator_ - - else: - # Fit - log += print_and_log(visual_separator) - log += print_and_log("\nStart fitting model with predictors:\n") - for i, p in enumerate(X_train.columns.values): - log += print_and_log("\t{:>2}) {}".format(i + 1, p)) - - model.fit(X_train, Y_train) - - safe_pickle([X_train.columns.values, model, test], results_file) - log += print_and_log("\nCompleted fit, wrote results to " + results_file) - log += print_and_log(visual_separator) - return_model = model - - # Return - if "mnk" in X_train.columns.values: - X_train.drop("mnk", axis=1, inplace=True) - if "mnk" in X_test.columns.values: - X_train.drop("mnk", axis=1, inplace=True) - - if out_of_memory_computation: - X_train, Y_train = dask_to_pandas(X_train, Y_train) - - return X_train, Y_train, X_mnk_train, X_test, Y_test, X_mnk_test, return_model, log - - -def fetch_pre_trained_model(model_path_folder, X, log): - model_path = os.path.join(model_path_folder, "feature_tree_refit.p") - print("fetched pre-trained model from: {}".format(model_path)) - features, model = safe_pickle_load(model_path) - print("Pickled variables:\nfeatures:{}\nmodel:{}".format(features, model)) - - log += print_and_log("\nDrop non-selected features") - predictor_names = X.columns.values.tolist() - features_to_drop = [f for f in predictor_names if f not in features] - X.drop(features_to_drop, axis=1, inplace=True) - return X, model, log - - -def fetch_pre_trained_model_partial(X, X_mnk, Y, model_options, model_path_folder, log): - # Load pre-trained model, selected features and indices of test-set - model_path = os.path.join(model_path_folder, "feature_tree.p") - print("fetched partial pre-trained model from: {}".format(model_path)) - features, model, test_indices = safe_pickle_load(model_path) - print( - "Pickled stuff:\nfeatures:{}\nmodel:{}\ntest_indices:{}".format( - features, model, test_indices - ) - ) - if "mnk" in features: - features.remove("mnk") - - log += print_and_log("\nPerform train/test split") - ( - X_train, - X_test, - Y_train, - Y_test, - X_mnk_train, - X_mnk_test, - ) = get_train_test_partition([X, Y, X_mnk], test_indices) - log += print_and_log( - "\nComplete train/test split, total size=" - + str(X.shape) - + ", test size=" - + str(X_test.shape) - + ", train_size=" - + str(X_train.shape) - ) - - log += print_and_log("\nDrop non-selected features") - predictor_names = X_train.columns.values.tolist() - features_to_drop = [f for f in predictor_names if f not in features] - X_train.drop(features_to_drop, axis=1, inplace=True) - X_test.drop(features_to_drop, axis=1, inplace=True) - - out_of_memory_computation = "dask" in model_options["model"] - if out_of_memory_computation: - X_train, Y_train = pandas_to_dask(X_train, Y_train) - - return X_train, Y_train, X_mnk_train, X_test, Y_test, X_mnk_test, model, log - - -# =============================================================================== -# Describe and evaluate model -def describe_hpo(gs, log, folder): - # Scores obtained during hyperparameter optimization - columns_to_print = list() - for par in gs.param_grid.keys(): - columns_to_print.append("param_" + par) - columns_to_print += [ - "mean_test_score", - "std_test_score", - "mean_train_score", - "std_train_score", - ] - log += print_and_log("\nHyperparameter search results (head):") - cv_results = pd.DataFrame(gs.cv_results_)[columns_to_print] - with pd.option_context("display.max_rows", None, "display.max_columns", None): - log += print_and_log(cv_results.head()) - cv_results_path = os.path.join(folder, "hyperparameter_optimization_results.csv") - with open(cv_results_path, "w") as f: - cv_results.to_csv(f, index=False) - log += print_and_log("Wrote hyperparameter results to " + cv_results_path) - - # Best parameter set - log += print_and_log("\nBest parameters set found on development set:") - for bestpar_name, bestpar_value in gs.best_params_.items(): - log += print_and_log("\t{}: {}".format(bestpar_name, bestpar_value)) - - # Best estimator - log += print_and_log("\nBest estimator:") - best_estimator = gs.best_estimator_ - log += print_and_log(best_estimator) - log += print_and_log(visual_separator) - - return log - - -def describe_model(model, X, Y, log): - predictor_names = X.columns.values.tolist() - log += print_and_log("Model:") - log += print_and_log(model) - - log += print_and_log("Predictor variables:") - for p in predictor_names: - log += print_and_log("\t{}".format(p)) - - return log - - -def print_custom_error(y_true, y_pred, X_mnk, log, scaled=True): - result_line = ( - "\tRelative performance loss compared to autotuned max:\n" - + "top-{}: worse: {:>6.3f} [%], mean: {:>6.3f} [%]" - ) - for top_k in [1]: - log += print_and_log( - result_line.format( - top_k, - worse_rel_perf_loss_of_k(y_true, y_pred, top_k, X_mnk, scaled), - mean_rel_perf_loss_of_k(y_true, y_pred, top_k, X_mnk, scaled), - ) - ) - return log - - -def print_error(y_true, y_pred, log): - from sklearn.metrics import mean_absolute_error, mean_squared_error - - result_line = "\tOverall error:\n" + "absolute: {:>6.3f}, mean squared {:>6.3f}" - log += print_and_log( - result_line.format( - mean_absolute_error(y_true, y_pred), mean_squared_error(y_true, y_pred) - ) - ) - return log - - -def scale_back(y_scaled, x_mnk, max_performances, mnk=None): - if mnk is None: - corresponding_maxperf = np.array( - [max_performances[mnk] for mnk in x_mnk["mnk"].values.tolist()] - ) - else: - corresponding_maxperf = max_performances[mnk] - return y_scaled * corresponding_maxperf - - -def plot_train_test_partition(test_idx, train_idx, X_mnk, folder): - import matplotlib.pyplot as plt - - mnks_string_train = X_mnk["mnk"].iloc[train_idx].unique() - mnks_train = to_tuple(*mnks_string_train) - mnks_string_test = X_mnk["mnk"].iloc[test_idx].unique() - mnks_test = to_tuple(*mnks_string_test) - - y_train_product = ( - dict() - ) # keys: m*n*k, values: how many times this mnk-product appears in training-mnks - for m, n, k in mnks_train: - mxnxk = m * n * k - if mxnxk in y_train_product.keys(): - y_train_product[mxnxk] += 1 - else: - y_train_product[mxnxk] = 1 - - train_mnks = list() - train_counts = list() - for mnk, count in y_train_product.items(): - for c in range(count): - train_mnks.append(mnk) - train_counts.append(c + 1) - - y_test_product = dict() - for m, n, k in mnks_test: - mxnxk = m * n * k - if mxnxk in y_test_product.keys(): - y_test_product[mxnxk] += 1 - else: - y_test_product[mxnxk] = 1 - - test_mnks = list() - test_counts = list() - for mnk, count in y_test_product.items(): - for c in range(count): - test_mnks.append(mnk) - if mnk in y_train_product.keys(): - test_counts.append(y_train_product[mnk] + c + 1) - else: - test_counts.append(c + 1) - - plt.figure(figsize=(30, 5)) - markersize = 12 - plt.plot( - train_mnks, - train_counts, - "o", - markersize=markersize, - color="blue", - label="training mnks (" + str(len(train_mnks)) + ")", - ) - plt.plot( - test_mnks, - test_counts, - "o", - markersize=markersize, - color="red", - label="testing mnks (" + str(len(test_mnks)) + ")", - ) - plot_file_path = os.path.join(folder, "train-test_split.svg") - plt.xlabel("m * n * k triplets") - plt.ylabel("number of occurences in data set") - plt.title("Train/test split") - maxcount = max(max(test_counts), max(train_counts)) + 1 - plt.ylim([0, maxcount]) - plt.legend() - plt.savefig(plot_file_path) - - -def plot_feature_importance(importances, names, folder): - plt.rcdefaults() - fig, ax = plt.subplots() - - ax.set_title("Feature importances") - ax.barh(range(len(names)), importances, color="g", align="center") - ax.set_yticks(np.arange(len(importances))) - ax.set_yticklabels(names) - ax.invert_yaxis() - plot_file_path = os.path.join(folder, "feature_importance.svg") - plt.savefig(plot_file_path) - print(plot_file_path) - - -def plot_loss_histogram(y_true, y_pred, X_mnk, folder): - import matplotlib.pyplot as plt - - # Get losses - top_k = 1 - y = np.array(perf_loss(y_true, y_pred, top_k, X_mnk, False)) - - # Losses-histogram - num_bins = 100 - plt.figure() - plt.hist(y, num_bins, facecolor="green", alpha=0.75) - plt.xlabel("relative performance loss [%]") - plt.ylabel("# occurrences") - plt.title( - "Performance losses for top-k=" - + str(top_k) - + " (" - + str(len(y)) - + " test mnks)" - ) - plot_file_path = os.path.join(folder, "result_losses.svg") - plt.savefig(plot_file_path) - print(plot_file_path) - - -def plot_prediction_accuracy(m, n, k, y_true, y_pred, train, pp): - plt.figure() - if train: - plt.plot(100 * y_true, 100 * y_pred, "b.", label="truth") - else: - plt.plot(100 * y_true, 100 * y_pred, "r.", label="truth") - plt.xlabel("true scaled performance [%]") - plt.ylabel("predicted scaled performance [%]") - type = "train" if train else "test" - plt.title("Prediction accuracy for kernel " + str((m, n, k)) + " (" + type + ")") - pp.savefig() - - -def get_predive_model_performances( - y_true, y_pred, x_mnk, max_performances_ref, max_performances_algo -): - predictive_model_perf_scaled = dict() - - for mnk_string in x_mnk["mnk"].unique(): - idx_mnk = np.where(x_mnk == mnk_string)[0].tolist() - assert len(idx_mnk) > 0, "idx_mnk is empty" - m, n, k = to_tuple(mnk_string) - - perf_chosen_idx = [np.argmax(y_pred[idx_mnk])] - perf_effective = y_true.iloc[idx_mnk].iloc[perf_chosen_idx].values.item() - predictive_model_perf_scaled[(m, n, k)] = ( - perf_effective # 'scaled' between 0 and 1 - ) - - predictive_model_perf = dict( - zip( - predictive_model_perf_scaled.keys(), - [ - perf_scaled * max_performances_ref[to_string(mnk)] - for mnk, perf_scaled in predictive_model_perf_scaled.items() - ], - ) - ) - - # Re-scale performances by algorithm for a fair comparison - predictive_model_perf_scaled = dict( - zip( - predictive_model_perf.keys(), - [ - perf / max_performances_algo[mnk] - for mnk, perf in predictive_model_perf.items() - ], - ) - ) - - return predictive_model_perf, predictive_model_perf_scaled - - -# =============================================================================== -def evaluate_model( - model, - X_train, - X_mnk_train, - Y_train, - X_test, - X_mnk_test, - Y_test, - max_performances_ref, - max_performances_algo, - baseline_performances_algo, - data_nrows, - log, - folder, -): - """Main evaluation function""" - if model is None: - return log - - # Start evaluation - log += print_and_log(visual_separator) - log += print_and_log("Start model evaluation") - if all([x is not None for x in [X_test, Y_test]]): - log = describe_model(model, X_test, Y_test, log) - - # Training error - if all([x is not None for x in [X_train, X_mnk_train, Y_train]]): - y_train_pred = model.predict(X_train.values) - log += print_and_log("\nTraining error: (train&val)") - log = print_custom_error(Y_train, y_train_pred, X_mnk_train, log, True) - log = print_error(Y_train, y_train_pred, log) - - # Test error - if all([x is not None for x in [X_test, X_mnk_test, Y_test]]): - y_test_pred = model.predict(X_test) - log += print_and_log("\nTesting error:") - log = print_custom_error(Y_test, y_test_pred, X_mnk_test, log, True) - log = print_error(Y_test, y_test_pred, log) - - # Training error (scaled-back) - if all([x is not None for x in [X_train, X_mnk_train, Y_train]]): - log += print_and_log("\nTraining error (scaled back): (train&val)") - y_train_pred_scaled_back = scale_back( - y_train_pred, X_mnk_train, max_performances_ref - ) - y_train_scaled_back = pd.DataFrame( - scale_back(Y_train.values.flatten(), X_mnk_train, max_performances_ref) - ) - log = print_custom_error( - y_train_scaled_back, y_train_pred_scaled_back, X_mnk_train, log, False - ) - log = print_error(y_train_scaled_back, y_train_pred_scaled_back, log) - - if all([x is not None for x in [X_test, X_mnk_test, Y_test]]): - # Test error (scaled-back) - log += print_and_log("\nTesting error (scaled back): (test&val)") - y_test_pred_scaled_back = scale_back( - y_test_pred, X_mnk_test, max_performances_ref - ) - y_test_scaled_back = pd.DataFrame( - scale_back(Y_test.values.flatten(), X_mnk_test, max_performances_ref) - ) - log = print_custom_error( - y_test_scaled_back, y_test_pred_scaled_back, X_mnk_test, log, False - ) - log = print_error(y_test_scaled_back, y_test_pred_scaled_back, log) - - # =============================================================================== - # Print histogram for "best" estimator - if all([x is not None for x in [X_test, X_mnk_test, Y_test]]): - log += print_and_log("\nPlot result histogram:") - plot_loss_histogram(Y_test, y_test_pred, X_mnk_test, folder) - - # =============================================================================== - # Plot prediction accuracy and goodness of choice for a few mnks (training-set) - if all([x is not None for x in [X_train, X_mnk_train, Y_train]]): - n_samples = 10 if data_nrows < 100000000 else 2 - mnks_to_plot = random.sample(X_mnk_train["mnk"].values.tolist(), n_samples) - - from matplotlib.backends.backend_pdf import PdfPages - - plot_file_path = os.path.join(folder, "evaluation_by_mnk_refit.pdf") - if all([x is not None for x in [X_test, X_mnk_test, Y_test]]): - plot_file_path = os.path.join(folder, "evaluation_by_mnk.pdf") - pp = PdfPages(plot_file_path) - - for mnk_string in mnks_to_plot: - # Get performances per mnk - idx_mnk = np.where(X_mnk_train == mnk_string)[0].tolist() - assert len(idx_mnk) > 0, "idx_mnk is empty" - m_, n_, k_ = to_tuple(mnk_string) - y_train_pred_mnk = y_train_pred[idx_mnk] - Y_train_mnk = Y_train.iloc[idx_mnk] - - log += print_and_log("Prediction accuracy plot: " + str(mnk_string)) - - plot_prediction_accuracy( - m_, n_, k_, Y_train_mnk, y_train_pred_mnk, True, pp - ) - - log += print_and_log("Goodness plot: " + str(mnk_string)) - plot_choice_goodness( - m_, - n_, - k_, - baseline_performances_algo, - max_performances_ref, - Y_train["perf_scaled"].iloc[idx_mnk].values, - y_train_pred_mnk, - True, - pp, - ) - - # =============================================================================== - # Plot prediction accuracy for a few mnks (testing-set) - if all([x is not None for x in [X_test, X_mnk_test, Y_test]]): - mnks_to_plot = random.sample(X_mnk_test["mnk"].values.tolist(), n_samples) - for mnk_string in mnks_to_plot: - # Get performances per mnk - idx_mnk = np.where(X_mnk_test == mnk_string)[0].tolist() - assert len(idx_mnk) > 0, "idx_mnk is empty" - m_, n_, k_ = to_tuple(mnk_string) - - log += print_and_log("Prediction accuracy plot: " + str(mnk_string)) - plot_prediction_accuracy( - m_, n_, k_, Y_test.iloc[idx_mnk], y_test_pred[idx_mnk], False, pp - ) - - log += print_and_log("Goodness plot: " + str(mnk_string)) - plot_choice_goodness( - m_, - n_, - k_, - baseline_performances_algo, - max_performances_ref, - Y_test["perf_scaled"].iloc[idx_mnk].values, - y_test_pred[idx_mnk], - False, - pp, - True, - ) - - if all([x is not None for x in [X_train, X_mnk_train, Y_train]]): - pp.close() - - # =============================================================================== - # Scale baseline and max performances - max_performances_algo = dict( - zip( - [to_tuple(mnk_string) for mnk_string in max_performances_algo.keys()], - max_performances_algo.values(), - ) - ) - max_performances_algo_scaled = dict( - zip(max_performances_algo.keys(), [1.0] * len(max_performances_algo)) - ) - baseline_performances_algo = dict( - zip( - [to_tuple(mnk_string) for mnk_string in baseline_performances_algo.keys()], - baseline_performances_algo.values(), - ) - ) - baseline_performances_algo_scaled = dict( - zip( - [(m, n, k) for m, n, k in baseline_performances_algo.keys()], - [ - perf / max_performances_algo[(m, n, k)] - for (m, n, k), perf in baseline_performances_algo.items() - ], - ) - ) - - # =============================================================================== - # Compare max performances and baseline - from matplotlib.backends.backend_pdf import PdfPages - - plot_file_path = os.path.join(folder, "evaluation_by_overall_refit.pdf") - if all([x is not None for x in [X_test, X_mnk_test, Y_test]]): - plot_file_path = os.path.join(folder, "evaluation_overall.pdf") - pp = PdfPages(plot_file_path) - - if all([x is not None for x in [X_test, X_mnk_test, Y_test]]): - plot_performance_gains( - max_performances_algo, - baseline_performances_algo, - "trained", - "max. performance per algorithm", - "baseline per algorithm", - pp, - ) - plot_scaled_performance_gains( - max_performances_algo_scaled, - baseline_performances_algo_scaled, - "trained", - "max. performance per algorithm", - "baseline per algorithm", - pp, - ) - - # =============================================================================== - # 'Results' = y_true ( y_chosen ) - if all([x is not None for x in [X_train, X_mnk_train, Y_train]]): - ( - predictive_model_perf_train, - predictive_model_perf_train_scaled, - ) = get_predive_model_performances( - Y_train, - y_train_pred, - X_mnk_train, - max_performances_ref, - max_performances_algo, - ) - - if all([x is not None for x in [X_test, X_mnk_test, Y_test]]): - ( - predictive_model_perf_test, - predictive_model_perf_test_scaled, - ) = get_predive_model_performances( - Y_test, - y_test_pred, - X_mnk_test, - max_performances_ref, - max_performances_algo, - ) - - # =============================================================================== - # Plot results (training set: predictive modelling VS naïve) - log += print_and_log("\nPredictive model VS baseline: ") - - if all([x is not None for x in [X_train, X_mnk_train, Y_train]]): - perf_gain_pred_train_over_baseline = performance_gain( - baseline_performances_algo, predictive_model_perf_train - ) - plot_absolute_performance_gain( - perf_gain_pred_train_over_baseline, - "trained", - "baseline per algorithm", - "predictive model", - pp, - ) - - scaled_perf_gain_pred_train_over_baseline = performance_gain( - baseline_performances_algo_scaled, predictive_model_perf_train_scaled - ) - plot_relative_performance_gain( - scaled_perf_gain_pred_train_over_baseline, - "trained", - "baseline per algorithm", - "predictive model", - pp, - ) - - if all([x is not None for x in [X_test, X_mnk_test, Y_test]]): - perf_gain_pred_test_over_baseline = performance_gain( - baseline_performances_algo, predictive_model_perf_test - ) - plot_absolute_performance_gain( - perf_gain_pred_test_over_baseline, - "tested", - "baseline per algorithm", - "predictive model", - pp, - ) - - scaled_perf_gain_pred_test_over_baseline = performance_gain( - baseline_performances_algo_scaled, predictive_model_perf_test_scaled - ) - plot_relative_performance_gain( - scaled_perf_gain_pred_test_over_baseline, - "tested", - "baseline per algorithm", - "predictive model", - pp, - ) - - log += print_and_log("\nPredictive model VS autotuned: ") - perf_gain_pred_train_over_max = performance_gain( - max_performances_algo, predictive_model_perf_train - ) - plot_absolute_performance_gain( - perf_gain_pred_train_over_max, - "trained", - "max. performance per algorithm", - "predictive model", - pp, - ) - scaled_perf_gain_pred_train_over_max = performance_gain( - max_performances_algo_scaled, predictive_model_perf_train_scaled - ) - plot_relative_performance_gain( - scaled_perf_gain_pred_train_over_max, - "trained", - "max. performance per algorithm", - "predictive model", - pp, - ) - - if all([x is not None for x in [X_test, X_mnk_test, Y_test]]): - perf_gain_pred_test_over_max = performance_gain( - max_performances_algo, predictive_model_perf_test - ) - plot_absolute_performance_gain( - perf_gain_pred_test_over_max, - "tested", - "max. performance per algorithm", - "predictive model", - pp, - ) - scaled_perf_gain_pred_test_over_max = performance_gain( - max_performances_algo_scaled, predictive_model_perf_test_scaled - ) - plot_relative_performance_gain( - scaled_perf_gain_pred_test_over_max, - "tested", - "max. performance per algorithm", - "predictive model", - pp, - ) - - if all([x is not None for x in [X_test, X_mnk_test, Y_test]]): - log += print_and_log("\nCompare performances: ") - plot_performance_gains( - baseline_performances_algo, - predictive_model_perf_train, - "trained", - "baseline per algorithm", - "predictive model", - pp, - ) - plot_performance_gains( - max_performances_algo, - predictive_model_perf_train, - "trained", - "max. performance per algorithm", - "predictive model", - pp, - ) - - if all([x is not None for x in [X_test, X_mnk_test, Y_test]]): - plot_performance_gains( - baseline_performances_algo, - predictive_model_perf_test, - "tested", - "baseline per algorithm", - "predictive model", - pp, - ) - plot_performance_gains( - max_performances_algo, - predictive_model_perf_test, - "tested", - "max. performance per algorithm", - "predictive model", - pp, - ) - - pp.close() - - return log - - -# =============================================================================== -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description=""" - Train a Machine Learning model on autotuning data to predict a kernel's performance given - its template parameters - - - This script is part of the workflow for predictive modelling of optimal libsmm_acc parameters. - For more details, see README.md. - """, - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - ) - parser.add_argument( - "-d", - "--destination_folder", - metavar="FOLDER", - type=str, - default=".", - help="Folder in which to write plots, models, etc.", - ) - parser.add_argument( - "-f", - "--folder", - metavar="FOLDER", - type=str, - default=".", - help="Folder from which to read data", - ) - parser.add_argument( - "-a", "--algo", metavar="algoname", default="", help="Algorithm to train on" - ) - parser.add_argument( - "-m", - "--model", - default="DT", - help="Model to train. Options: DT (Decision Trees), RF (Random Forests), xgb-DT, xgb-DT-dask (out-of-memory" - + "xgboost), xgb-DT-GPU (with GPU support), xgb-RF", - ) - parser.add_argument( - "-o", - "--hyperparameter_optimization", - default=False, - help="Whether to do hyperparameter optimization. If False, the model will be trained with 'best guess' parameters", - ) - parser.add_argument( - "-s", - "--splits", - default=3, - metavar="NUMBER", - type=int, - help="Number of cross-validation splits used in RFECV and GridSearchCV", - ) - parser.add_argument( - "-e", - "--ntrees", - default=3, - metavar="NUMBER", - type=int, - help="Number of estimators in RF", - ) - parser.add_argument( - "-j", - "--njobs", - default=-1, - metavar="NUMBER", - type=int, - help="Number of parallel jobs that Joblib will launch (used by GridSearchCV and XGBoost)", - ) - parser.add_argument( - "-r", - "--nrows", - default=None, - metavar="NUMBER", - type=int, - help="Number of rows of data to load. Default: None (load all)", - ) - parser.add_argument( - "-g", - "--prefitted_model", - metavar="filename", - default="", - help="Path to pickled model object to load instead of re-training model", - ) - parser.add_argument( - "-i", - "--intermediate_evaluation", - default=False, - help="Whether to perform evaluation of the model trained on part of the model", - ) - parser.set_defaults(intermediate_evaluation=False) - - args = parser.parse_args() - model_args = { - "model": args.model, - "splits": args.splits, - "ntrees": args.ntrees, - "njobs": args.njobs, - "hyperparameter_optimization": args.hyperparameter_optimization, - } - main( - args.folder, - args.destination_folder, - args.algo, - model_args, - args.nrows, - args.prefitted_model, - args.intermediate_evaluation, - ) diff --git a/src/acc/libsmm_acc/predict/prepare_training_data.py b/src/acc/libsmm_acc/predict/prepare_training_data.py deleted file mode 100755 index d8240d9e2d4..00000000000 --- a/src/acc/libsmm_acc/predict/prepare_training_data.py +++ /dev/null @@ -1,832 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -#################################################################################################### -# Copyright (C) by the DBCSR developers group - All rights reserved # -# This file is part of the DBCSR library. # -# # -# For information on the license, see the LICENSE file. # -# For further information please visit https://dbcsr.cp2k.org # -# SPDX-License-Identifier: GPL-2.0+ # -#################################################################################################### - - -import sys -import os -import json -import argparse -import numpy as np -import pandas as pd -import dask.dataframe as dd -from joblib import Parallel, delayed -from tqdm import tqdm - -sys.path.append("../") - -from kernels.smm_acc import kernel_algorithm, mnk_pattern # noqa: E402 -from kernels.smm_acc_predict import ( # noqa: E402 - PredictiveParameters, - derived_parameters, -) - - -# =============================================================================== -# HELPER FUNCTIONS -# =============================================================================== -def update_maximums(dictionnary_to_update, dictionnary_partial): - for mnk, new_perf in dictionnary_partial.items(): - if mnk in dictionnary_to_update.keys(): - if new_perf > dictionnary_to_update[mnk]: - dictionnary_to_update[mnk] = new_perf - else: - dictionnary_to_update[mnk] = new_perf - return dictionnary_to_update - - -# =============================================================================== -def get_idx_baseline(data_mnk, algorithm, baseline_pars): - if algorithm in ["tiny"]: - idx_baseline = data_mnk[ - (data_mnk.m == baseline_pars["m"]) - & (data_mnk.n == baseline_pars["n"]) - & (data_mnk.k == baseline_pars["k"]) - & (data_mnk.threads == baseline_pars["threads"]) - & (data_mnk.grouping == baseline_pars["grouping"]) - & (data_mnk.minblocks == baseline_pars["minblocks"]) - ].index.tolist() - elif algorithm in ["small", "medium"]: - idx_baseline = data_mnk[ - (data_mnk.m == baseline_pars["m"]) - & (data_mnk.n == baseline_pars["n"]) - & (data_mnk.k == baseline_pars["k"]) - & (data_mnk.threads == baseline_pars["threads"]) - & (data_mnk.grouping == baseline_pars["grouping"]) - & (data_mnk.minblocks == baseline_pars["minblocks"]) - & (data_mnk.tile_m == baseline_pars["tile_m"]) - & (data_mnk.tile_n == baseline_pars["tile_n"]) - ].index.tolist() - else: # algorithm is largeDB1 or largeDB2 - idx_baseline = data_mnk[ - (data_mnk.m == baseline_pars["m"]) - & (data_mnk.n == baseline_pars["n"]) - & (data_mnk.k == baseline_pars["k"]) - & (data_mnk.threads == baseline_pars["threads"]) - & (data_mnk.minblocks == baseline_pars["minblocks"]) - & (data_mnk.tile_m == baseline_pars["tile_m"]) - & (data_mnk.tile_n == baseline_pars["tile_n"]) - & (data_mnk.w == baseline_pars["w"]) - & (data_mnk.v == baseline_pars["v"]) - ].index.tolist() - return idx_baseline - - -def get_performance_closest_to_baseline( - data, algorithm, mnk, gpu_properties, autotuning_properties -): - """ - Sometimes, the so-called "baseline" parameter set does not appear in the training data. - This function finds the performance of the parameter set from the training data whose parameters are closest to those of the - baseline parameter sets. - """ - m, n, k = mnk_pattern.match(mnk).groups() - m, n, k = int(m), int(n), int(k) - - data_mnk = data[(data["m"] == m) & (data["n"] == n) & (data["k"] == k)].compute() - baseline_pars = kernel_algorithm[algorithm].baseline( - m, n, k, gpu_properties, autotuning_properties - ) - - # Get performance of baseline parameters for this algorithm & this mnk: - idx_baseline = get_idx_baseline(data_mnk, algorithm, baseline_pars) - - # Get performance of baseline parameters for this algorithm & this mnk: - if len(idx_baseline) == 0: - # Generate space of possibilities - pars_sets = kernel_algorithm[algorithm].promising_parameters( - m, n, k, gpu_properties, autotuning_properties - ) - # Sort space by distance to baseline set - pars_sets.sort( - key=lambda x: kernel_algorithm[algorithm].parameter_set_distance( - x, baseline_pars - ) - ) - - for pars_set in pars_sets: - idx_baseline = get_idx_baseline(data_mnk, algorithm, pars_set) - if len(idx_baseline) > 0: - break - else: - raise AssertionError( - f'Could not find closest baseline for mnk=({m}x{n}x{k}) and for algorithm "{algorithm}.\n' - f"Last baseline parameters searched:\n{baseline_pars}\n" - f"Parameter sets searched:\n" - ) - - idx_baseline = idx_baseline[0] - baseline_perf = data_mnk["perf (Gflop/s)"][idx_baseline] - return round(baseline_perf, 3) - - -def process_chunk(data_chunk, algorithm, gpu_properties, autotuning_properties): - """ - Given a chunk of data, compute the baseline and maximum performance of the (m, n, k)-triplets featured in the chunk of data. - """ - # Add "mnk" column - data_chunk["mnk"] = ( - data_chunk["m"].astype(str) - + "x" - + data_chunk["n"].astype(str) - + "x" - + data_chunk["k"].astype(str) - ) - # Get mnks - mnks = data_chunk["mnk"].unique() - - # For each (mnk), ... - baseline_performances = dict() - max_performances = dict() - for mnk in mnks: - data_mnk = data_chunk[data_chunk["mnk"] == mnk] - m, n, k = mnk_pattern.match(mnk).groups() - m, n, k = int(m), int(n), int(k) - - # Get baseline configuration for this algorithm & this mnk: - baseline_pars = kernel_algorithm[algorithm].baseline( - m, n, k, gpu_properties, autotuning_properties - ) - - # Get performance of baseline parameters for this algorithm & this mnk: - idx_baseline = get_idx_baseline(data_mnk, algorithm, baseline_pars) - if len(idx_baseline) < 1: - baseline_perf = 0 - else: - idx_baseline = idx_baseline[0] - baseline_perf = data_mnk["perf (Gflop/s)"][idx_baseline] - - baseline_performances[mnk] = round(baseline_perf, 3) - - # Get max performance for this algorithm & this mnk - max_perf = data_mnk["perf (Gflop/s)"].max() - max_performances[mnk] = round(max_perf, 3) - - return baseline_performances, max_performances - - -# =============================================================================== -def write_to_parquet(data_path, algorithm): - """ - Compress CSV files to parquet - """ - # Check whether the files corresponding to this algorithm have been compressed to parquet already - parquet_file = os.path.join(data_path, "training_data_" + algorithm + ".parquet") - parquet_file_done = os.path.join( - data_path, "training_data_" + algorithm + ".parquet.done" - ) - print( - "\n\n------------------------------------------------------------------------" - ) - if os.path.exists(parquet_file_done): - print("Found {:40}, skipping".format(parquet_file_done)) - - else: - print("Didn't find {:40}, generating".format(parquet_file_done)) - - # [RAW] Read CSV files into Pandas dataframes - data_file_raw = os.path.join( - data_path, "raw_training_data_" + algorithm + ".csv" - ) - print("\nRead raw data from: {}".format(data_file_raw)) - data_raw = dd.read_csv(data_file_raw) - raw_data_nrows = len(data_raw) - # n_partitions should be > 1 ! - n_partitions = max(1, int(raw_data_nrows // 1e5)) - data_raw = data_raw.repartition(npartitions=n_partitions) - data_raw = data_raw.reset_index().set_index("index") - data_raw["idx"] = 1 - data_raw["idx"] = data_raw.idx.cumsum() - data_raw = data_raw.set_index("idx", sorted=True) - print("Raw data head:\n", data_raw.head()) - - # [DERIVED] Read CSV files into Pandas dataframes - data_file_derived = os.path.join( - data_path, "training_data_" + algorithm + ".csv" - ) - print("\nRead derived data from: {}".format(data_file_derived)) - data_derived = dd.read_csv(data_file_derived) - derived_data_nrows = len(data_derived) - data_derived = data_derived.repartition(npartitions=n_partitions) - data_derived = data_derived.reset_index().set_index("index") - data_derived["idx"] = 1 - data_derived["idx"] = data_derived.idx.cumsum() - data_derived = data_derived.set_index("idx", sorted=True) - print("Derived data head:\n", data_derived.head()) - - # Merge raw/derived data together - print("Merging raw and derived ...") - data = dd.merge(data_raw, data_derived, left_index=True, right_index=True) - - len_data, len_data_raw, len_data_derived = ( - len(data), - raw_data_nrows, - derived_data_nrows, - ) - nrows_message_temp = """ - Data 1 : {:15,}, - Data 2 : {:15,}, - Merged data: {:15,}""" - nrows_message = nrows_message_temp.format( - len_data_raw, len_data_derived, len_data - ) - assert len_data == len_data_raw, "Mismatch in number of rows\n" + nrows_message - assert len_data == len_data_derived, ( - "Mismatch in number of rows\n" + nrows_message - ) - - # Add "mnk" column - data["mnk"] = ( - data["m"].astype(str) - + "x" - + data["n"].astype(str) - + "x" - + data["k"].astype(str) - ) - - # Print info on merged dataset - print("\nMerged data head:", data.head()) - data_nrows = len(data) - nrows_message = """ -Data : {:15,}, -Raw data : {:15,}, -Derived data: {:15,}""".format( - data_nrows, raw_data_nrows, derived_data_nrows - ) - assert data_nrows == raw_data_nrows, ( - "Mismatch in number of rows\n" + nrows_message - ) - assert data_nrows == derived_data_nrows, ( - "Mismatch in number of rows\n" + nrows_message - ) - print(nrows_message) - - # Compress files to Parquet - print("Compress and write to {}".format(parquet_file)) - data.to_parquet(parquet_file, engine="fastparquet", compression="snappy") - open( - parquet_file_done, "w" - ).close() # touch a file to mark that parquet is done - - -# =============================================================================== -def get_non_null(nlist): - """ - Given a list of numbers, return its first positive element, if it exists, zero otherwise. - """ - for e in nlist: - if e > 0: - return e - return 0 - - -def get_max(nlist): - """ - Return the largest element of a list of numbers - """ - return np.array(nlist).max() - - -def list_of_dics_to_dic_of_lists(list_of_dics): - """ - Given a list "list_of_dics" of dictionaries "d", with keys "k" and values "v", - construct a dictionary with keys "k" and values which are lists "[v1, v2, ...]" - of the values corresponding to "k" in the various dictionaries "d" - """ - dic_of_lists = dict() - for dic in list_of_dics: - for k, v in dic.items(): - if k not in dic_of_lists.keys(): - dic_of_lists[k] = list() - dic_of_lists[k].append(v) - return dic_of_lists - - -def dic_of_dics_to_dic_of_lists(dic_of_dics): - dic_of_lists = dict() - for _, dic in dic_of_dics.items(): - for k, v in dic.items(): - if k not in dic_of_lists.keys(): - dic_of_lists[k] = list() - dic_of_lists[k].append(v) - return dic_of_lists - - -def write_baseline_and_max_records_per_algorithm( - data_path, algorithm, arch, n_jobs, chunk_size -): - """ - Write records of baseline performances and maximum performances for the training mnks. - This function reads from the raw data file (`raw_training_data_ALGORITHM.csv`) - Writes to JSON files. - """ - # Read GPU properties and autotuning properties - with open("../kernels/gpu_properties.json") as f: - gpu_properties = json.load(f)[arch] - with open("../kernels/autotuning_properties.json") as f: - autotuning_properties = json.load(f) - - # Check whether record of baseline exists - baseline_performances_per_algo_file = os.path.join( - data_path, "baseline_performances_" + algorithm + ".json" - ) - max_performances_per_algo_file = os.path.join( - data_path, "max_performances_" + algorithm + ".json" - ) - print( - "\n\n------------------------------------------------------------------------" - ) - if os.path.exists(baseline_performances_per_algo_file) and os.path.exists( - max_performances_per_algo_file - ): - print("Found {:40}, skipping".format(baseline_performances_per_algo_file)) - print("Found {:40}, skipping".format(max_performances_per_algo_file)) - - else: - print("Processing data of algorithm {}".format(algorithm)) - raw_pars_cols = kernel_algorithm[algorithm].launch_parameters - if algorithm in ["largeDB1", "largeDB2"]: - raw_pars_cols.remove("grouping") - - data_file_raw = os.path.join( - data_path, "raw_training_data_" + algorithm + ".csv" - ) - baseline_and_maximums_performance_dictionaries = Parallel( - n_jobs=n_jobs, verbose=1 - )( - delayed(process_chunk, check_pickle=True)( - data_chunk, algorithm, gpu_properties, autotuning_properties - ) - for data_chunk in tqdm( - pd.read_csv(data_file_raw, chunksize=chunk_size), disable=True - ) - ) - - baseline_performance_dictionaries, maximums_performance_dictionaries = zip( - *baseline_and_maximums_performance_dictionaries - ) - baseline_performance_dictionary = list_of_dics_to_dic_of_lists( - baseline_performance_dictionaries - ) - assert ( - 0 not in baseline_performance_dictionary.values() - ), "Found a max. performance of 0" - maximums_performance_dictionary = list_of_dics_to_dic_of_lists( - maximums_performance_dictionaries - ) - assert ( - 0 not in maximums_performance_dictionary.values() - ), "Found a baseline performance of 0" - - # Write max performances to files - max_performances = dict() - print("\nComputing maximum performances ...") - for mnk, max_list in maximums_performance_dictionary.items(): - perf = get_max(max_list) - max_performances[mnk] = perf - with open(max_performances_per_algo_file, "w") as f: - json.dump(max_performances, f, indent="\t", sort_keys=True) - print("Wrote maximum performances to:\n", max_performances_per_algo_file) - - # Write baseline performances to files - baseline_performances = dict() - - def get_baseline_performance(mnk, base_list, raw_pars_cols): - perf = get_non_null(base_list) - if perf == 0: - data_file = os.path.join( - data_path, "raw_training_data_" + algorithm + ".csv" - ) - data = dd.read_csv(data_file) - perf = get_performance_closest_to_baseline( - data, algorithm, mnk, gpu_properties, autotuning_properties - ) - return perf - - print("\nComputing baseline performances ...") - baseline_performances_ = Parallel(n_jobs=n_jobs, verbose=1)( - delayed(get_baseline_performance, check_pickle=True)( - mnk, base_list, raw_pars_cols - ) - for mnk, base_list in tqdm( - baseline_performance_dictionary.items(), disable=True - ) - ) - - baseline_performances = dict( - zip(baseline_performance_dictionary.keys(), baseline_performances_) - ) - with open(baseline_performances_per_algo_file, "w") as f: - json.dump(baseline_performances, f, indent="\t", sort_keys=True) - print("Wrote baseline performances to:\n", baseline_performances_per_algo_file) - - -# =============================================================================== -def plot_baseline(baseline_perfs_by_algo, data_path, algorithms): - import re - import matplotlib.pyplot as plt - - print("\nPlotting baseline performances ...") - - # Get all mnks - mnk_sequences = list() - for _algo, baseline_dic in baseline_perfs_by_algo.items(): - mnk_sequences += list(baseline_dic.keys()) - all_mnks = list(set.union(set(mnk_sequences))) - - # Reduce baseline_perfs_by_algo to baseline_perfs - baseline_perfs = dict() - for mnk in all_mnks: - for algo in [ - "medium", - "small", - "largeDB1", - "largeDB2", - "tiny", - ]: # algorithms in order of baseline-ness - if mnk in baseline_perfs_by_algo[algo].keys(): - baseline_perfs[mnk] = baseline_perfs_by_algo[algo][mnk] - break - else: - raise AssertionError( - "NOOOO this is actually impossible by def of all_mnks, isn't it?" - ) - - # Sort - mnks = list() - mnk_str = re.compile(r"(\d+)x(\d+)x(\d+)") - for mnk_s in baseline_perfs.keys(): - match = mnk_str.match(mnk_s) - mnks.append((int(match.group(1)), int(match.group(2)), int(match.group(3)))) - - baseline_performances = zip(mnks, baseline_perfs.values()) - - baseline_performances_sorted = [ - (mnk[0] * mnk[1] * mnk[2], p) - for mnk, p in sorted( - baseline_performances, key=lambda x: x[0][0] * x[0][1] * x[0][2] - ) - ] - mnk_sorted, baseline_perf_sorted = list(zip(*baseline_performances_sorted)) - - # Plot - plt.plot(mnk_sorted, baseline_perf_sorted, ".", markersize=1) - plt.xlabel("(m, n, k) triplets of training data (in order of increasing m*n*k)") - plt.ylabel("Baseline performances (Gflop/s)") - plt.title("Baseline performances on training data") - algorithm_extension = "_" + algorithms[0] if len(algorithms) == 0 else "" - file_name = os.path.join( - data_path, "baseline_performances" + algorithm_extension + ".svg" - ) - plt.savefig(file_name) - print("... wrote to", file_name) - plt.close() - - -def write_baseline_record(data_path, algorithms): - baseline_performances_by_algo_file = os.path.join( - data_path, "baseline_performances_by_algo.json" - ) - if os.path.exists(baseline_performances_by_algo_file): - print("Found {:40}, skipping".format(baseline_performances_by_algo_file)) - with open(baseline_performances_by_algo_file) as f: - baseline_performances_by_algo = json.load(f) - - else: - print( - "File {:40} not found, generating".format( - baseline_performances_by_algo_file - ) - ) - # Get baseline performances by algorithm - baseline_performances_by_algo = dict() - for algorithm in algorithms: - # Read baseline parameters - baseline_performances_per_algo_file = os.path.join( - data_path, "baseline_performances_" + algorithm + ".json" - ) - with open(baseline_performances_per_algo_file, "r") as f: - baseline_algorithm = json.load(f) - # Add to dictionary - baseline_performances_by_algo[algorithm] = baseline_algorithm - - # Write to file - with open(baseline_performances_by_algo_file, "w") as f: - json.dump(baseline_performances_by_algo, f, indent="\t", sort_keys=True) - print("\nWrote baseline performances to:\n", baseline_performances_by_algo_file) - - plot_baseline(baseline_performances_by_algo, data_path, algorithms) - - -def write_max_by_algo_record(data_path, algorithms): - max_performances_by_algo_file = os.path.join( - data_path, "max_performances_by_algo.json" - ) - if os.path.exists(max_performances_by_algo_file): - print("Found {:40}, skipping".format(max_performances_by_algo_file)) - - else: - # Get max performances by algorithm - max_performances_by_algo = dict() - for algorithm in algorithms: - # Read max parameters - max_performances_per_algo_file = os.path.join( - data_path, "max_performances_" + algorithm + ".json" - ) - with open(max_performances_per_algo_file, "r") as f: - max_algorithm = json.load(f) - # Add to dictionary - max_performances_by_algo[algorithm] = max_algorithm - - # Write to file - with open(max_performances_by_algo_file, "w") as f: - json.dump(max_performances_by_algo, f, indent="\t", sort_keys=True) - print( - "\nWrote max performances by algorithm to:\n", max_performances_by_algo_file - ) - - -def plot_max_performances(max_perfs, data_path, algorithms): - import re - import matplotlib.pyplot as plt - - print("\nPlotting max. performances ...") - - mnks = list() - mnk_str = re.compile(r"(\d+)x(\d+)x(\d+)") - for mnk_s in max_perfs.keys(): - match = mnk_str.match(mnk_s) - mnks.append((int(match.group(1)), int(match.group(2)), int(match.group(3)))) - - max_performances = zip(mnks, max_perfs.values()) - max_performances_sorted = [ - (mnk[0] * mnk[1] * mnk[2], p) - for mnk, p in sorted( - max_performances, key=lambda x: x[0][0] * x[0][1] * x[0][2] - ) - ] - mnk_sorted, max_perf_sorted = list(zip(*max_performances_sorted)) - - # Plot - plt.plot(mnk_sorted, max_performances_sorted, ".", markersize=1) - plt.xlabel("(m, n, k) triplets of training data (in order of increasing m*n*k)") - plt.ylabel("Max. performances (Gflop/s)") - plt.title("Maximum performances on training data") - algorithm_extension = "_" + algorithms[0] if len(algorithms) == 0 else "" - file_name = os.path.join( - data_path, "max_performances" + algorithm_extension + ".svg" - ) - plt.savefig(file_name) - print("... wrote to", file_name) - plt.close() - - -def write_max_record(data_path, algorithms): - max_performances_file = os.path.join(data_path, "max_performances.json") - if os.path.exists(max_performances_file): - print("Found {:40}, skipping".format(max_performances_file)) - with open(max_performances_file) as f: - max_performances = json.load(f) - - else: - # Get max performances - max_performances_by_algo = dict() - for algorithm in algorithms: - # Read max parameters - max_performances_per_algo_file = os.path.join( - data_path, "max_performances_" + algorithm + ".json" - ) - with open(max_performances_per_algo_file, "r") as f: - max_algorithm = json.load(f) - # Add to dictionary - max_performances_by_algo[algorithm] = max_algorithm - - # Reduce along max - max_performances_list = dic_of_dics_to_dic_of_lists(max_performances_by_algo) - max_performances = dict() - for mnk, max_list in max_performances_list.items(): - max_performances[mnk] = get_max(max_list) - - # Write to file - with open(max_performances_file, "w") as f: - json.dump(max_performances, f, indent="\t", sort_keys=True) - print("\nWrote max performances to:\n", max_performances_file) - - plot_max_performances(max_performances, data_path, algorithms) - - -def get_derived_pars( - data_path, - i, - data_chunk, - algorithm, - gpu_properties, - autotuning_properties, - max_performances, -): - # Compute derived parameters - data_chunk["algorithm"] = [algorithm] * len( - data_chunk.index - ) # add 'algorithm' column manually - parameter_sets = PredictiveParameters( - data_chunk, gpu_properties, autotuning_properties, max_performances - ) - pars_to_get = derived_parameters["common"] + derived_parameters[algorithm] - new_data = parameter_sets.get_features(pars_to_get) - - # Write to CSV - filename = os.path.join(data_path, "training_data_{}-{}.csv".format(algorithm, i)) - new_data.to_csv(filename, index=False) - - return filename - - -def write_derived_data(data_path, algorithm, arch, n_jobs, chunk_size): - """ - The predictive modelling procedure uses not only the raw parameters as features, but also some - "derived" features computed using algorithm characteristics and hardware knowledge. - This function reads raw parameters from `data_path`, computes derived parameters and writes them - to the same folder. - """ - derived_training_data_filename = os.path.join( - data_path, "training_data_{}.csv".format(algorithm) - ) - print( - "\n\n------------------------------------------------------------------------" - ) - if os.path.exists(derived_training_data_filename): - print("Found {:40}, skipping".format(derived_training_data_filename)) - - else: - print("Didn't find {:40}, generating".format(derived_training_data_filename)) - - # Read max performances, GPU properties and autotuning properties - maxperf_file = os.path.join(data_path, "max_performances.json") - with open(maxperf_file) as f: - max_performances = json.load(f) - with open("../kernels/gpu_properties.json") as f: - gpu_properties = json.load(f)[arch] - with open("../kernels/autotuning_properties.json") as f: - autotuning_properties = json.load(f) - - # Compute derived data from raw data - raw_training_data_filename = os.path.join( - data_path, "raw_training_data_{}.csv".format(algorithm) - ) - print( - "reading raw data from {} and computing derived parameters".format( - raw_training_data_filename - ) - ) - - derived_training_data_filenames = Parallel(n_jobs=n_jobs, verbose=1)( - delayed(get_derived_pars, check_pickle=True)( - data_path, - i, - data_chunk, - algorithm, - gpu_properties, - autotuning_properties, - max_performances, - ) - for i, data_chunk in enumerate( - pd.read_csv(raw_training_data_filename, chunksize=chunk_size) - ) - ) - - # Merge the CSV files (one for each iteration of the above Joblib loop) into one file - assert len(derived_training_data_filenames) > 0, "No training data files" - if len(derived_training_data_filenames) == 1: - # No merging is necessary. Simply rename the file - os.rename( - derived_training_data_filenames[0], derived_training_data_filename - ) - - else: - with open(derived_training_data_filename, "w") as out: - # Write the first file, including its header - fn_1 = derived_training_data_filenames.pop(0) - with open(fn_1) as f: - out.write(f.read()) - os.remove(fn_1) - # Write the rest of the files, skipping the header line each time - for i, fn in enumerate(derived_training_data_filenames): - print( - "writing from {} ({}/{})".format( - fn, i + 1, len(derived_training_data_filenames) - ) - ) - with open(fn) as f: - next(f) # skip header line - out.write(f.read()) - # Delete the file we just merged - os.remove(fn) - - print("\tWrote", derived_training_data_filename) - - -# =============================================================================== -def main(data_path, algorithms_to_prep, arch, n_jobs, chunk_size, skip_derived_data): - # =============================================================================== - # Write baseline and maximum performance records - for algorithm in algorithms_to_prep: - write_baseline_and_max_records_per_algorithm( - data_path, algorithm, arch, n_jobs, chunk_size - ) - - if set(algorithms_to_prep) == set(kernel_algorithm.keys()): - write_baseline_record(data_path, algorithms_to_prep) - write_max_by_algo_record(data_path, algorithms_to_prep) - write_max_record(data_path, algorithms_to_prep) - - # =============================================================================== - if not skip_derived_data: - for algorithm in algorithms_to_prep: - write_derived_data(data_path, algorithm, arch, n_jobs, chunk_size) - write_to_parquet(data_path, algorithm) - - -# =============================================================================== -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description=""" - Prepare the data collected with autotuning for training, - After downloading raw data from the dedicated repository, use this script to - - Record maximum and baseline performances of (m,n,k)-triplets in JSON files - - Compute derived training data and write it to a CSV file - - Compress training data csv files to parquet file format - - - This script is part of the workflow for predictive modelling of optimal libcusmm parameters. - For more details, see predict.md - """, - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - ) - parser.add_argument( - "-f", - "--folder", - metavar="FOLDER", - type=str, - default=".", - help="Path to the data to be converted to parquet.", - ) - parser.add_argument( - "-l", - "--algorithm", - metavar="ALGORITHM", - default="", - help="Algorithms to prepare", - ) - parser.add_argument( - "-a", - "--arch", - metavar="ARCHITECTURE", - type=str, - default="sm_80", - help="CUDA architecture number. Options: sm_35, sm_37, sm_60, sm_70, sm_80, gfx906", - ) - parser.add_argument( - "-j", - "--njobs", - default=-1, - metavar="NUMBER", - type=int, - help="Number of parallel jobs that Joblib will launch. If you run into out-of-memory errors, reduce this.", - ) - parser.add_argument( - "-c", - "--chunk_size", - type=int, - default=20000, - help="Chunk size for dispatching joblib jobs. If memory errors are experienced, reduce this number", - ) - parser.add_argument( - "-s", - "--skip_derived_data", - type=bool, - default=False, - help=( - "Skip the computation of derived data. Set to true if computing baseline & max records for " - "each algorithm separately" - ), - ) - - args = parser.parse_args() - algorithms_to_prep = ( - kernel_algorithm.keys() if args.algorithm == "" else [args.algorithm] - ) - main( - args.folder, - algorithms_to_prep, - args.arch, - args.njobs, - args.chunk_size, - args.skip_derived_data, - ) diff --git a/src/acc/libsmm_acc/predict/requirements.txt b/src/acc/libsmm_acc/predict/requirements.txt deleted file mode 100644 index a9187ccbc03..00000000000 --- a/src/acc/libsmm_acc/predict/requirements.txt +++ /dev/null @@ -1,11 +0,0 @@ -eli5>=0.8.1 -joblib>=0.13.1, <=0.17.0 -tqdm>=4.32.2 -matplotlib>=3.0.2 -numpy>=1.16.0 -pandas>=0.23.4 -scikit-learn>=0.20.2 -dask[dataframe]>=2021.10.0 -xgboost>=0.90 -fastparquet>=0.3.1 -python-snappy>=0.5.4 diff --git a/src/acc/libsmm_acc/tune/README.md b/src/acc/libsmm_acc/tune/README.md index 01b00710f83..96c8571e12e 100644 --- a/src/acc/libsmm_acc/tune/README.md +++ b/src/acc/libsmm_acc/tune/README.md @@ -65,7 +65,6 @@ The `tune_setup.py` script generates job files. You have to adapt the script to output += "date\n" ... - ... ``` @@ -235,11 +234,7 @@ Wrote parameters.new.json The file `parameters.new.json` can now be used as a parameter file. Rename it to `parameters_GPU.json`, with the appropriate `GPU`. -#### 8. (optional) Explore the data - -Explore the data interactively using the [provided Jupyter Notebook](https://github.com/cp2k/dbcsr/blob/develop/src/acc/libsmm_acc/notebooks/inspect_training_data.ipynb). - -#### 9. Contribute parameters to the community +#### 8. Contribute parameters to the community **Contribute new optimal parameters**