diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml new file mode 100644 index 0000000..5231ca1 --- /dev/null +++ b/.github/workflows/docker.yml @@ -0,0 +1,33 @@ +name: Build and publish Docker image for the project + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + Docker: + if: github.repository == 'antmicro/dl-in-iot-course' + runs-on: ubuntu-latest + steps: + - name: Cancel previous run + uses: styfle/cancel-workflow-action@0.11.0 + with: + access_token: ${{ github.token }} + - name: Checkout sources + uses: actions/checkout@v4 + - name: Build Docker image + run: docker build . -f environments/Dockerfile --tag ghcr.io/${{ github.repository }} + - name: Login to registry + if: github.ref == 'refs/heads/main' && github.event_name != 'pull_request' + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ github.token }} + - name: Push image to registry + if: github.ref == 'refs/heads/main' && github.event_name != 'pull_request' + run: docker push ghcr.io/${{ github.repository }} diff --git a/.github/workflows/flake8.yml b/.github/workflows/flake8.yml deleted file mode 100644 index 000242c..0000000 --- a/.github/workflows/flake8.yml +++ /dev/null @@ -1,22 +0,0 @@ -name: Flake8 - -on: - push: - branches: [master] - pull_request: - branches: [master] - -jobs: - flake8: - runs-on: ubuntu-latest - steps: - - name: Checkout sources - uses: actions/checkout@v3 - - name: Setup Python - uses: actions/setup-python@v4 - with: - python-version: 3.9 - - name: Install flake8 - run: pip install -U flake8 - - name: Run flake8 - run: flake8 diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml new file mode 100644 index 0000000..46a7425 --- /dev/null +++ b/.github/workflows/pre-commit.yml @@ -0,0 +1,20 @@ +name: pre-commit checks + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - name: Checkout sources + uses: actions/checkout@v4 + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: 3.11 + - name: Run pre-commit + uses: pre-commit/action@v3.0.0 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..075cbea --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,12 @@ +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: trailing-whitespace +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.1.2 + hooks: + - id: ruff + args: [--fix, --exit-non-zero-on-fix] + - id: ruff-format +exclude: .+\.patch diff --git a/README.md b/README.md index e03f3b4..dabba8f 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # Optimization of Deep Learning applications for IoT devices - Course tasks -Copyright (c) 2021-2022 [Antmicro](https://www.antmicro.com) +Copyright (c) 2021-2024 [Antmicro](https://www.antmicro.com) -This repository contains tasks for laboratories for the "Optimization of Deep Learning applications for IoT devices" course. +This repository contains tasks for laboratories for the "Optimization of Neural Network applications for IoT devices" course. ## Course classes @@ -14,6 +14,7 @@ Please follow the links to go to the list of tasks: * [Lab 04 - Introduction to Apache TVM](dl_in_iot_course/l04_tvm) * [Lab 05 - Implementing a TensorFlow Lite delegate](dl_in_iot_course/l05_tflite_delegate) * [Lab 06 - Fine-tuning of model and operations in Apache TVM](dl_in_iot_course/l06_tvm_fine_tuning) +* [Lab 07 - Accelerating ML models on FPGAs with TFLite Micro and CFU Playground](cfu-playground) ## Cloning the repository diff --git a/dl_in_iot_course/__init__.py b/dl_in_iot_course/__init__.py index 1be6ee5..c96cca6 100644 --- a/dl_in_iot_course/__init__.py +++ b/dl_in_iot_course/__init__.py @@ -7,4 +7,4 @@ import os import sys -sys.path.insert(0, os.path.abspath(__file__ + '../')) +sys.path.insert(0, os.path.abspath(__file__ + "../")) diff --git a/dl_in_iot_course/l02_quantization/README.md b/dl_in_iot_course/l02_quantization/README.md index 197ab97..51f3faa 100644 --- a/dl_in_iot_course/l02_quantization/README.md +++ b/dl_in_iot_course/l02_quantization/README.md @@ -50,7 +50,7 @@ It requires implementing methods for: * `[2pt]` Finish the `ImbalancedINT8Model` class: * Implement `optimize_model` method, where the `calibration_dataset_generator` will take all examples for objects with 5 class and use them for calibration: - + * Use `self.dataset.dataX` and `self.dataset.dataY` to extract all inputs for a particular class. * Remember to use self.dataset.prepare_input_sample method. @@ -64,7 +64,7 @@ It requires implementing methods for: ``` In the `build/results` directory, the script will create: - + * `-metrics.md` file - contains basic metrics, such as accuracy, precision, sensitivity or G-Mean, along with inference time * `-confusion-matrix.png` file - contains visualization of confusion matrix for the model evaluation. Those files will be created for: diff --git a/dl_in_iot_course/l02_quantization/model_training.py b/dl_in_iot_course/l02_quantization/model_training.py index df39de6..1aa4e38 100644 --- a/dl_in_iot_course/l02_quantization/model_training.py +++ b/dl_in_iot_course/l02_quantization/model_training.py @@ -12,13 +12,13 @@ def __init__(self, modelpath: Path, dataset: PetDataset, from_file=True): self.from_file = from_file self.numclasses = dataset.numclasses self.mean, self.std = dataset.get_input_mean_std() - self.inputspec = tf.TensorSpec((1, 224, 224, 3), name='input_1') + self.inputspec = tf.TensorSpec((1, 224, 224, 3), name="input_1") self.dataset = dataset self.prepare() def load_model(self): tf.keras.backend.clear_session() - if hasattr(self, 'model') and self.model is not None: + if hasattr(self, "model") and self.model is not None: del self.model self.model = tf.keras.models.load_model(str(self.modelpath)) @@ -30,43 +30,23 @@ def prepare(self): self.load_model() else: self.base = tf.keras.applications.MobileNetV2( - input_shape=(224, 224, 3), - include_top=False, - weights='imagenet' + input_shape=(224, 224, 3), include_top=False, weights="imagenet" ) self.base.trainable = False - avgpool = tf.keras.layers.GlobalAveragePooling2D()( - self.base.output - ) - layer1 = tf.keras.layers.Dense( - 1024, - activation='relu')(avgpool) + avgpool = tf.keras.layers.GlobalAveragePooling2D()(self.base.output) + layer1 = tf.keras.layers.Dense(1024, activation="relu")(avgpool) d1 = tf.keras.layers.Dropout(0.3)(layer1) - layer2 = tf.keras.layers.Dense( - 512, - activation='relu')(d1) + layer2 = tf.keras.layers.Dense(512, activation="relu")(d1) d2 = tf.keras.layers.Dropout(0.3)(layer2) - layer3 = tf.keras.layers.Dense( - 128, - activation='relu')(d2) + layer3 = tf.keras.layers.Dense(128, activation="relu")(d2) d3 = tf.keras.layers.Dropout(0.3)(layer3) - output = tf.keras.layers.Dense( - self.numclasses, - name='out_layer' - )(d3) - self.model = tf.keras.models.Model( - inputs=self.base.input, - outputs=output - ) + output = tf.keras.layers.Dense(self.numclasses, name="out_layer")(d3) + self.model = tf.keras.models.Model(inputs=self.base.input, outputs=output) print(self.model.summary()) def train_model( - self, - batch_size: int, - learning_rate: int, - epochs: int, - logdir: Path): - + self, batch_size: int, learning_rate: int, epochs: int, logdir: Path + ): def preprocess_input(path, onehot): data = tf.io.read_file(path) img = tf.io.decode_jpeg(data, channels=3) @@ -79,93 +59,69 @@ def preprocess_input(path, onehot): img = (img - self.mean) / self.std return img, tf.convert_to_tensor(onehot) - Xt, Xv, Yt, Yv = self.dataset.split_dataset( - 0.25 - ) + Xt, Xv, Yt, Yv = self.dataset.split_dataset(0.25) Yt = list(self.dataset.onehotvectors[Yt]) Yv = list(self.dataset.onehotvectors[Yv]) traindataset = tf.data.Dataset.from_tensor_slices((Xt, Yt)) traindataset = traindataset.map( - preprocess_input, - num_parallel_calls=tf.data.experimental.AUTOTUNE + preprocess_input, num_parallel_calls=tf.data.experimental.AUTOTUNE ).batch(batch_size) validdataset = tf.data.Dataset.from_tensor_slices((Xv, Yv)) validdataset = validdataset.map( - preprocess_input, - num_parallel_calls=tf.data.experimental.AUTOTUNE + preprocess_input, num_parallel_calls=tf.data.experimental.AUTOTUNE ).batch(batch_size) tensorboard_callback = tf.keras.callbacks.TensorBoard( - str(logdir), - histogram_freq=1 + str(logdir), histogram_freq=1 ) model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( - filepath=str(logdir / 'weights.{epoch:02d}-{val_loss:.2f}.h5'), - monitor='val_categorical_accuracy', - mode='max', - save_best_only=True + filepath=str(logdir / "weights.{epoch:02d}-{val_loss:.2f}.h5"), + monitor="val_categorical_accuracy", + mode="max", + save_best_only=True, ) self.model.compile( optimizer=tf.keras.optimizers.Adam(lr=learning_rate), loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True), - metrics=[ - tf.keras.metrics.CategoricalAccuracy() - ] + metrics=[tf.keras.metrics.CategoricalAccuracy()], ) self.model.fit( traindataset, epochs=epochs, - callbacks=[ - tensorboard_callback, - model_checkpoint_callback - ], - validation_data=validdataset + callbacks=[tensorboard_callback, model_checkpoint_callback], + validation_data=validdataset, ) -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser() + parser.add_argument("--modelpath", help="Path to the model file", type=Path) + parser.add_argument("--dataset-root", help="Path to the dataset file", type=Path) parser.add_argument( - '--modelpath', - help='Path to the model file', - type=Path - ) - parser.add_argument( - '--dataset-root', - help='Path to the dataset file', - type=Path - ) - parser.add_argument( - '--download-dataset', - help='Download the dataset before training', - action='store_true' + "--download-dataset", + help="Download the dataset before training", + action="store_true", ) parser.add_argument( - '--batch-size', - help='Batch size for the training process', + "--batch-size", + help="Batch size for the training process", type=int, - default=128 + default=128, ) parser.add_argument( - '--learning-rate', - help='Starting learning rate for Adam optimizer', + "--learning-rate", + help="Starting learning rate for Adam optimizer", type=float, - default=0.0001 + default=0.0001, ) parser.add_argument( - '--num-epochs', - help='Number of training epochs', - type=int, - default=50 + "--num-epochs", help="Number of training epochs", type=int, default=50 ) parser.add_argument( - '--logdir', - help='The path to the logging directory', - type=Path, - default='logs' + "--logdir", help="The path to the logging directory", type=Path, default="logs" ) args = parser.parse_args() @@ -175,10 +131,5 @@ def preprocess_input(path, onehot): args.logdir.mkdir(parents=True, exist_ok=True) - model.train_model( - args.batch_size, - args.learning_rate, - args.num_epochs, - args.logdir - ) + model.train_model(args.batch_size, args.learning_rate, args.num_epochs, args.logdir) model.save_model() diff --git a/dl_in_iot_course/l02_quantization/quantization_experiments.py b/dl_in_iot_course/l02_quantization/quantization_experiments.py index db68051..3ce9d38 100644 --- a/dl_in_iot_course/l02_quantization/quantization_experiments.py +++ b/dl_in_iot_course/l02_quantization/quantization_experiments.py @@ -14,6 +14,7 @@ class NativeModel(ModelTester): This tester verifies the work of the native TensorFlow model without any optimizations. """ + def prepare_model(self): self.model = tf.keras.models.load_model(str(self.modelpath)) # TODO print model summary @@ -36,7 +37,7 @@ class FP32Model(ModelTester): def preprocess_input(self, X): # since we only want to measure inference time, not tensor allocation, # we mode setting tensor to preprocess_input - self.model.set_tensor(self.model.get_input_details()[0]['index'], X) + self.model.set_tensor(self.model.get_input_details()[0]["index"], X) # TODO def run_inference(self): @@ -47,12 +48,14 @@ class INT8Model(ModelTester): """ This tester tests the performance of FP32 TensorFlow Lite model. """ + def __init__( - self, - dataset: PetDataset, - modelpath: Path, - originalmodel: Optional[Path] = None, - calibrationdatasetpercent: float = 0.5): + self, + dataset: PetDataset, + modelpath: Path, + originalmodel: Optional[Path] = None, + calibrationdatasetpercent: float = 0.5, + ): """ Initializer for INT8Model. @@ -76,9 +79,9 @@ def __init__( def optimize_model(self, originalmodel: Path): def calibration_dataset_generator(): return self.dataset.calibration_dataset_generator( - self.calibrationdatasetpercent, - 1234 + self.calibrationdatasetpercent, 1234 ) + # TODO finish implementation # TODO def prepare_model(self): @@ -96,33 +99,21 @@ def optimize_model(self, originalmodel: Path): pass -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser() + parser.add_argument("--model-path", help="Path to the model file", type=Path) + parser.add_argument("--dataset-root", help="Path to the dataset file", type=Path) parser.add_argument( - '--model-path', - help='Path to the model file', - type=Path - ) - parser.add_argument( - '--dataset-root', - help='Path to the dataset file', - type=Path + "--download-dataset", + help="Download the dataset before training", + action="store_true", ) + parser.add_argument("--results-path", help="Path to the results", type=Path) parser.add_argument( - '--download-dataset', - help='Download the dataset before training', - action='store_true' - ) - parser.add_argument( - '--results-path', - help='Path to the results', - type=Path - ) - parser.add_argument( - '--test-dataset-fraction', - help='What fraction of the test dataset should be used for evaluation', + "--test-dataset-fraction", + help="What fraction of the test dataset should be used for evaluation", type=float, - default=1.0 + default=1.0, ) args = parser.parse_args() @@ -134,11 +125,7 @@ def optimize_model(self, originalmodel: Path): # test of the model executed natively tester = NativeModel(dataset, args.model_path) tester.prepare_model() - tester.test_inference( - args.results_path, - 'native', - args.test_dataset_fraction - ) + tester.test_inference(args.results_path, "native", args.test_dataset_fraction) # TODO uncomment tests for each implemented class to test its work diff --git a/dl_in_iot_course/l03_pruning_clustering/pruning_clustering_experiments.py b/dl_in_iot_course/l03_pruning_clustering/pruning_clustering_experiments.py index f5a6abd..42c88a4 100644 --- a/dl_in_iot_course/l03_pruning_clustering/pruning_clustering_experiments.py +++ b/dl_in_iot_course/l03_pruning_clustering/pruning_clustering_experiments.py @@ -9,7 +9,6 @@ class TFMOTOptimizedModel(ModelTester): - def compress_and_fine_tune(self, originalmodel: Path): """ Runs selected compression algorithm and fine-tunes the model. @@ -44,31 +43,29 @@ def preprocess_input(path, onehot): self.batch_size = 8 self.learning_rate = 0.00001 self.epochs = 1 - Xt, Xv, Yt, Yv = self.dataset.split_dataset( - 0.4 - ) + Xt, Xv, Yt, Yv = self.dataset.split_dataset(0.4) Yt = list(self.dataset.onehotvectors[Yt]) Yv = list(self.dataset.onehotvectors[Yv]) # TensorFlow Dataset object for training - self.traindataset = tf.data.Dataset.from_tensor_slices((Xt, Yt)).map( - preprocess_input, - num_parallel_calls=tf.data.experimental.AUTOTUNE - ).batch(self.batch_size) + self.traindataset = ( + tf.data.Dataset.from_tensor_slices((Xt, Yt)) + .map(preprocess_input, num_parallel_calls=tf.data.experimental.AUTOTUNE) + .batch(self.batch_size) + ) # TensorFlow Dataset object for validation - self.validdataset = tf.data.Dataset.from_tensor_slices((Xv, Yv)).map( - preprocess_input, - num_parallel_calls=tf.data.experimental.AUTOTUNE - ).batch(self.batch_size) + self.validdataset = ( + tf.data.Dataset.from_tensor_slices((Xv, Yv)) + .map(preprocess_input, num_parallel_calls=tf.data.experimental.AUTOTUNE) + .batch(self.batch_size) + ) # loss function self.loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True) # Adam optimizer - self.optimizer = tf.keras.optimizers.Adam( - learning_rate=self.learning_rate - ) + self.optimizer = tf.keras.optimizers.Adam(learning_rate=self.learning_rate) # Categorical accuracy metric self.metrics = [tf.keras.metrics.CategoricalAccuracy()] @@ -92,13 +89,15 @@ class ClusteredModel(TFMOTOptimizedModel): """ This tester tests the performance of the clustered model. """ + def __init__( - self, - dataset: PetDataset, - modelpath: Path, - originalmodel: Optional[Path] = None, - logdir: Optional[Path] = None, - num_clusters: int = 16): + self, + dataset: PetDataset, + modelpath: Path, + originalmodel: Optional[Path] = None, + logdir: Optional[Path] = None, + num_clusters: int = 16, + ): """ Initializer for ClusteredModel. @@ -131,13 +130,15 @@ class PrunedModel(TFMOTOptimizedModel): """ This tester tests the performance of the pruned model. """ + def __init__( - self, - dataset: PetDataset, - modelpath: Path, - originalmodel: Optional[Path] = None, - logdir: Optional[Path] = None, - target_sparsity: float = 0.3): + self, + dataset: PetDataset, + modelpath: Path, + originalmodel: Optional[Path] = None, + logdir: Optional[Path] = None, + target_sparsity: float = 0.3, + ): """ Initializer for PrunedModel. @@ -163,40 +164,26 @@ def __init__( def compress_and_fine_tune(self, originalmodel): self.epochs = 4 self.sched = tfmot.sparsity.keras.ConstantSparsity( - self.target_sparsity, - begin_step=0, - end_step=1, - frequency=1) + self.target_sparsity, begin_step=0, end_step=1, frequency=1 + ) # TODO implement model pruning and fine-tuning -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser() + parser.add_argument("--model-path", help="Path to the model file", type=Path) + parser.add_argument("--dataset-root", help="Path to the dataset file", type=Path) parser.add_argument( - '--model-path', - help='Path to the model file', - type=Path - ) - parser.add_argument( - '--dataset-root', - help='Path to the dataset file', - type=Path - ) - parser.add_argument( - '--download-dataset', - help='Download the dataset before training', - action='store_true' - ) - parser.add_argument( - '--results-path', - help='Path to the results', - type=Path + "--download-dataset", + help="Download the dataset before training", + action="store_true", ) + parser.add_argument("--results-path", help="Path to the results", type=Path) parser.add_argument( - '--test-dataset-fraction', - help='What fraction of the test dataset should be used for evaluation', + "--test-dataset-fraction", + help="What fraction of the test dataset should be used for evaluation", type=float, - default=1.0 + default=1.0, ) args = parser.parse_args() diff --git a/dl_in_iot_course/l03_pruning_clustering/structured_pruning_experiments.py b/dl_in_iot_course/l03_pruning_clustering/structured_pruning_experiments.py index 22577c4..923ee67 100644 --- a/dl_in_iot_course/l03_pruning_clustering/structured_pruning_experiments.py +++ b/dl_in_iot_course/l03_pruning_clustering/structured_pruning_experiments.py @@ -19,7 +19,7 @@ import tensorflow as tf # noqa: F401 # For training use 'cuda', for evaluation purposes use 'cpu' -DEVICE = 'cpu' +DEVICE = "cpu" # Initial learning rate for Adam optimizer TRAINING_LEARNING_RATE = 0.001 FINETUNE_LEARNING_RATE = 0.0001 @@ -40,6 +40,7 @@ class FashionClassifier(nn.Module): PyTorch module containing a simple classifier for Fashion MNIST dataset. """ + def __init__(self): """ Creates all model layers and structures. @@ -71,14 +72,15 @@ def forward(self, x): return x def train_model( - self, - optimizer, - criterion, - epochs, - trainloader, - valloader=None, - lastbestmodelpath=None, - evaluate_model=True): + self, + optimizer, + criterion, + epochs, + trainloader, + valloader=None, + lastbestmodelpath=None, + evaluate_model=True, + ): """ Trains the model on given training dataset. @@ -119,13 +121,17 @@ def train_model( losssum += loss losscount += 1 - bar.set_description(f'train epoch: {epoch:3}') - print(f'Mean loss for epoch {epoch}: {losssum.data.cpu().numpy() / losscount}') # noqa: E501 + bar.set_description(f"train epoch: {epoch:3}") + print( + f"Mean loss for epoch {epoch}: {losssum.data.cpu().numpy() / losscount}" + ) # noqa: E501 if evaluate_model: acc = self.evaluate(valloader) - print(f'Val accuracy for epoch {epoch}: {acc}') + print(f"Val accuracy for epoch {epoch}: {acc}") if acc > best_acc: - print(f'ACCURACY improved for epoch {epoch}: prev={best_acc}, curr={acc}') # noqa: E501 + print( + f"ACCURACY improved for epoch {epoch}: prev={best_acc}, curr={acc}" + ) # noqa: E501 best_acc = acc if lastbestmodelpath: torch.save(self.state_dict(), lastbestmodelpath) @@ -154,7 +160,7 @@ def evaluate(self, dataloader): numinferences = 0 with torch.no_grad(): bar = tqdm(dataloader) - for (images, labels) in bar: + for images, labels in bar: images = images.to(self.device) labels = labels.to(self.device) start = time.perf_counter() @@ -164,11 +170,11 @@ def evaluate(self, dataloader): _, predicted = torch.max(outputs, 1) total += labels.size(0) correct += (predicted == labels).sum().item() - bar.set_description(f'valid [correct={correct}, total={total}') + bar.set_description(f"valid [correct={correct}, total={total}") acc = 100 * correct / total meaninference = 1000.0 * inferencetimesum / numinferences - print(f'Achieved accuracy: {acc} %') - print(f'Mean inference time: {meaninference} ms') + print(f"Achieved accuracy: {acc} %") + print(f"Mean inference time: {meaninference} ms") return acc def convert_to_onnx(self, outputpath): @@ -202,50 +208,41 @@ def convert_onnx_to_tflite(onnx_file, tflite_file): def main(): parser = argparse.ArgumentParser() parser.add_argument( - '--input-model', - type=Path, - help='Path to the PyTorch model', - required=True - ) - parser.add_argument( - '--backup-model', - type=Path, - help='Path where the best current model will be saved', - required=True + "--input-model", type=Path, help="Path to the PyTorch model", required=True ) parser.add_argument( - '--final-model', + "--backup-model", type=Path, - help='Path where the final model will be saved', - required=True + help="Path where the best current model will be saved", + required=True, ) parser.add_argument( - '--onnx-model', + "--final-model", type=Path, - help='Path to ONNX file with model' + help="Path where the final model will be saved", + required=True, ) + parser.add_argument("--onnx-model", type=Path, help="Path to ONNX file with model") parser.add_argument( - '--tflite-model', - type=Path, - help='Path to TFLite file with model' + "--tflite-model", type=Path, help="Path to TFLite file with model" ) parser.add_argument( - '--dataset-path', + "--dataset-path", type=Path, - help='Path where train and test dataset should be stored', - required=True + help="Path where train and test dataset should be stored", + required=True, ) parser.add_argument( - '--train-model', - action='store_true', - help='Trains the model from scratch and saves it to input_model path' + "--train-model", + action="store_true", + help="Trains the model from scratch and saves it to input_model path", ) args = parser.parse_args() # create train/test dataset paths - traindatasetpath = args.dataset_path / 'train' - testdatasetpath = args.dataset_path / 'test' + traindatasetpath = args.dataset_path / "train" + testdatasetpath = args.dataset_path / "test" traindatasetpath.mkdir(parents=True, exist_ok=True) testdatasetpath.mkdir(parents=True, exist_ok=True) @@ -258,7 +255,7 @@ def main(): traindatasetpath, train=True, download=True, - transform=transforms.Compose([transforms.ToTensor()]) + transform=transforms.Compose([transforms.ToTensor()]), ) # compute mean/std for the train dataset @@ -269,12 +266,14 @@ def main(): # add transforms for dataset data # introduce basic data augmentations - dataset.transform = transforms.Compose([ - transforms.ToTensor(), - transforms.RandomHorizontalFlip(), - transforms.RandomAffine(5, scale=(0.95, 1.05)), - transforms.Normalize(mean, std) - ]) + dataset.transform = transforms.Compose( + [ + transforms.ToTensor(), + transforms.RandomHorizontalFlip(), + transforms.RandomAffine(5, scale=(0.95, 1.05)), + transforms.Normalize(mean, std), + ] + ) # split training dataset into training and validation dataset trainset, valset = torch.utils.data.random_split(dataset, [40000, 20000]) @@ -284,32 +283,24 @@ def main(): testdatasetpath, train=False, download=True, - transform=transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize(mean, std) - ]) + transform=transforms.Compose( + [transforms.ToTensor(), transforms.Normalize(mean, std)] + ), ) - print(f'No. of samples: train={len(trainset)}, val={len(valset)}, test={len(tdataset)}') # noqa: E501 + print( + f"No. of samples: train={len(trainset)}, val={len(valset)}, test={len(tdataset)}" + ) # noqa: E501 # define dataloaders for each dataset trainloader = torch.utils.data.DataLoader( - trainset, - batch_size=BATCH_SIZE, - num_workers=0, - shuffle=True + trainset, batch_size=BATCH_SIZE, num_workers=0, shuffle=True ) valloader = torch.utils.data.DataLoader( - valset, - batch_size=1, - num_workers=0, - shuffle=False + valset, batch_size=1, num_workers=0, shuffle=False ) testloader = torch.utils.data.DataLoader( - tdataset, - batch_size=1, - num_workers=0, - shuffle=False + tdataset, batch_size=1, num_workers=0, shuffle=False ) # define loss @@ -317,10 +308,7 @@ def main(): # train the model or load from file if args.train_model: - toptimizer = torch.optim.Adam( - model.parameters(), - lr=TRAINING_LEARNING_RATE - ) + toptimizer = torch.optim.Adam(model.parameters(), lr=TRAINING_LEARNING_RATE) model.train_model( toptimizer, criterion, @@ -328,22 +316,19 @@ def main(): trainloader, valloader, args.backup_model, - True + True, ) # use the model with the highest accuracy shutil.copy(str(args.backup_model), str(args.input_model)) # load the model - input_data = torch.load( - args.input_model, - map_location=torch.device(DEVICE) - ) + input_data = torch.load(args.input_model, map_location=torch.device(DEVICE)) model.load_state_dict(input_data, strict=False) # print the model - print('ORIGINAL MODEL') + print("ORIGINAL MODEL") print(model) - print('ORIGINAL MODEL QUALITY') + print("ORIGINAL MODEL QUALITY") model.evaluate(testloader) # create a NNI-traced optimizer using the Adam optimizer @@ -356,13 +341,7 @@ def main(): def trainer(mod, opt, crit): model.train_model( - opt, - crit, - MEASUREMENTS_EPOCHS, - trainloader, - valloader, - None, - False + opt, crit, MEASUREMENTS_EPOCHS, trainloader, valloader, None, False ) # define APoZRankPruner @@ -374,18 +353,18 @@ def trainer(mod, opt, crit): _, masks = pruner.compress() # show pruned weights - print('Pruned weights:') + print("Pruned weights:") pruner.show_pruned_weights() - print('Unwrapping the model...') + print("Unwrapping the model...") pruner._unwrap_model() - print('Unwrapped model') + print("Unwrapped model") # TODO create ModelSpeedup object with model, masks # dummy_input and run speedup_model - print('MODEL AFTER PRUNING') + print("MODEL AFTER PRUNING") print(model) - print('PRUNED MODEL QUALITY BEFORE FINE-TUNING') + print("PRUNED MODEL QUALITY BEFORE FINE-TUNING") model.evaluate(testloader) # TODO define fine-tune optimizer @@ -397,12 +376,12 @@ def trainer(mod, opt, crit): FINE_TUNE_EPOCHS, trainloader, valloader, - args.backup_model + args.backup_model, ) torch.save(model.state_dict(), args.final_model) - print('PRUNED MODEL QUALITY AFTER FINE-TUNING') + print("PRUNED MODEL QUALITY AFTER FINE-TUNING") model.evaluate(testloader) if args.onnx_model: @@ -412,5 +391,5 @@ def trainer(mod, opt, crit): convert_onnx_to_tflite(args.onnx_model, args.tflite_model) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/dl_in_iot_course/l04_tvm/tvm_experiments.py b/dl_in_iot_course/l04_tvm/tvm_experiments.py index c06f3c3..a43f2cd 100644 --- a/dl_in_iot_course/l04_tvm/tvm_experiments.py +++ b/dl_in_iot_course/l04_tvm/tvm_experiments.py @@ -14,15 +14,16 @@ class TVMModel(ModelTester): def __init__( - self, - dataset: PetDataset, - modelpath: Path, - originalmodel: Optional[Path] = None, - logdir: Optional[Path] = None, - target: str = 'llvm', - target_host: Optional[str] = None, - opt_level: int = 3, - use_nchw_layout: bool = False): + self, + dataset: PetDataset, + modelpath: Path, + originalmodel: Optional[Path] = None, + logdir: Optional[Path] = None, + target: str = "llvm", + target_host: Optional[str] = None, + opt_level: int = 3, + use_nchw_layout: bool = False, + ): """ Initializer for ModelTester. @@ -70,7 +71,7 @@ def run_inference(self): pass def optimize_model(self, originalmodel: Path): - with open(originalmodel, 'rb') as f: + with open(originalmodel, "rb") as f: modelfile = f.read() tflite_model = tflite.Model.GetRootAsModel(modelfile, 0) # noqa: F841 @@ -81,24 +82,24 @@ def optimize_model(self, originalmodel: Path): input_details = interpreter.get_input_details()[0] output_details = interpreter.get_output_details()[0] - if input_details['dtype'] in [np.int8, np.uint8]: + if input_details["dtype"] in [np.int8, np.uint8]: self.quantized = True - self.input_dtype = input_details['dtype'] - self.in_scale, self.in_zero_point = input_details['quantization'] - self.output_dtype = output_details['dtype'] - self.out_scale, self.out_zero_point = output_details[ - 'quantization' - ] + self.input_dtype = input_details["dtype"] + self.in_scale, self.in_zero_point = input_details["quantization"] + self.output_dtype = output_details["dtype"] + self.out_scale, self.out_zero_point = output_details["quantization"] transforms = [relay.transform.RemoveUnusedFunctions()] if self.use_nchw_layout: transforms.append( - relay.transform.ConvertLayout({ - "nn.conv2d": ['NCHW', 'default'], - # TODO add support for converting layout in quantized - # network - }) + relay.transform.ConvertLayout( + { + "nn.conv2d": ["NCHW", "default"], + # TODO add support for converting layout in quantized + # network + } + ) ) seq = transform.Sequential(transforms) # noqa: F841 @@ -107,54 +108,41 @@ def optimize_model(self, originalmodel: Path): pass -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( - '--fp32-model-path', - help='Path to the FP32 TFLite model file', + "--fp32-model-path", + help="Path to the FP32 TFLite model file", type=Path, - required=True + required=True, ) parser.add_argument( - '--int8-model-path', - help='Path to the INT8 TFLite model file', + "--int8-model-path", + help="Path to the INT8 TFLite model file", type=Path, - required=True + required=True, ) parser.add_argument( - '--dataset-root', - help='Path to the dataset file', - type=Path, - required=True + "--dataset-root", help="Path to the dataset file", type=Path, required=True ) parser.add_argument( - '--download-dataset', - help='Download the dataset before training', - action='store_true' + "--download-dataset", + help="Download the dataset before training", + action="store_true", ) parser.add_argument( - '--results-path', - help='Path to the results', - type=Path, - required=True + "--results-path", help="Path to the results", type=Path, required=True ) parser.add_argument( - '--test-dataset-fraction', - help='What fraction of the test dataset should be used for evaluation', + "--test-dataset-fraction", + help="What fraction of the test dataset should be used for evaluation", type=float, - default=1.0 - ) - parser.add_argument( - '--target', - help='The device to run the model on', - type=str, - default='llvm' + default=1.0, ) parser.add_argument( - '--target-host', - help='The host CPU type', - default=None + "--target", help="The device to run the model on", type=str, default="llvm" ) + parser.add_argument("--target-host", help="The host CPU type", default=None) args = parser.parse_args() diff --git a/dl_in_iot_course/l05_tflite_delegate/delegate_experiment.py b/dl_in_iot_course/l05_tflite_delegate/delegate_experiment.py index 6510470..3585f83 100644 --- a/dl_in_iot_course/l05_tflite_delegate/delegate_experiment.py +++ b/dl_in_iot_course/l05_tflite_delegate/delegate_experiment.py @@ -38,54 +38,40 @@ def convert_to_tflite(tfpath: Path, tflitepath: Path): pass -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( - '--input-onnx-model-path', - help='Path to the ONNX model file', + "--input-onnx-model-path", + help="Path to the ONNX model file", type=Path, - required=True + required=True, ) parser.add_argument( - '--tensorflow-model-path', - help='Path to the compiled model file', + "--tensorflow-model-path", + help="Path to the compiled model file", type=Path, - required=True + required=True, ) parser.add_argument( - '--compiled-model-path', - help='Path to the compiled model file', + "--compiled-model-path", + help="Path to the compiled model file", type=Path, - required=True + required=True, ) parser.add_argument( - '--delegate-path', - help='Path to the model delegate', - type=Path, - required=True + "--delegate-path", help="Path to the model delegate", type=Path, required=True ) parser.add_argument( - '--num-tests', - help='Number of tests to conduct', - type=int, - default=1000 + "--num-tests", help="Number of tests to conduct", type=int, default=1000 ) args = parser.parse_args() - convert_onnx_to_tensorflow( - args.input_onnx_model_path, - args.tensorflow_model_path - ) + convert_onnx_to_tensorflow(args.input_onnx_model_path, args.tensorflow_model_path) - convert_to_tflite( - args.tensorflow_model_path, - args.compiled_model_path - ) + convert_to_tflite(args.tensorflow_model_path, args.compiled_model_path) - nodelegate = tf.lite.Interpreter( - str(args.compiled_model_path) - ) + nodelegate = tf.lite.Interpreter(str(args.compiled_model_path)) nodelegate.allocate_tensors() delegate = tf.lite.Interpreter( @@ -95,18 +81,16 @@ def convert_to_tflite(tfpath: Path, tflitepath: Path): delegate.allocate_tensors() for _ in tqdm(range(args.num_tests)): - x = np.random.randint(-200, 200, size=(1, 4)).astype('float32') - y = np.random.randint(-200, 200, size=(1, 3)).astype('float32') - nodelegate.set_tensor(nodelegate.get_input_details()[0]['index'], x) - nodelegate.set_tensor(nodelegate.get_input_details()[1]['index'], y) + x = np.random.randint(-200, 200, size=(1, 4)).astype("float32") + y = np.random.randint(-200, 200, size=(1, 3)).astype("float32") + nodelegate.set_tensor(nodelegate.get_input_details()[0]["index"], x) + nodelegate.set_tensor(nodelegate.get_input_details()[1]["index"], y) nodelegate.invoke() nodelegateres = nodelegate.get_tensor( - nodelegate.get_output_details()[0]['index'] + nodelegate.get_output_details()[0]["index"] ) - delegate.set_tensor(delegate.get_input_details()[0]['index'], x) - delegate.set_tensor(delegate.get_input_details()[1]['index'], y) + delegate.set_tensor(delegate.get_input_details()[0]["index"], x) + delegate.set_tensor(delegate.get_input_details()[1]["index"], y) delegate.invoke() - delegateres = delegate.get_tensor( - delegate.get_output_details()[0]['index'] - ) + delegateres = delegate.get_tensor(delegate.get_output_details()[0]["index"]) assert_almost_equal(nodelegateres, delegateres, 0.01) diff --git a/dl_in_iot_course/l06_tvm_fine_tuning/fine_tuning_experiments.py b/dl_in_iot_course/l06_tvm_fine_tuning/fine_tuning_experiments.py index 8ee761b..e1ab0f7 100644 --- a/dl_in_iot_course/l06_tvm_fine_tuning/fine_tuning_experiments.py +++ b/dl_in_iot_course/l06_tvm_fine_tuning/fine_tuning_experiments.py @@ -16,17 +16,18 @@ class TVMFineTunedModel(TVMModel): def __init__( - self, - dataset: PetDataset, - modelpath: Path, - optlogpath: Path, - graphoptlogpath: Path, - originalmodel: Optional[Path] = None, - logdir: Optional[Path] = None, - target: str = 'llvm', - target_host: Optional[str] = None, - opt_level: int = 3, - tunertype: str = 'xgb'): + self, + dataset: PetDataset, + modelpath: Path, + optlogpath: Path, + graphoptlogpath: Path, + originalmodel: Optional[Path] = None, + logdir: Optional[Path] = None, + target: str = "llvm", + target_host: Optional[str] = None, + opt_level: int = 3, + tunertype: str = "xgb", + ): """ Initializer for TVMFineTunedModel. @@ -67,7 +68,8 @@ def __init__( target, target_host, opt_level, - True) + True, + ) def get_tuner(self, task): """ @@ -82,14 +84,14 @@ def get_tuner(self, task): ------- tvm.autotvm.tuner.Tuner : Tuner for the task """ - assert self.tunertype in ['xgb', 'ga', 'random', 'gridsearch'] - if self.tunertype == 'xgb': - return XGBTuner(task, loss_type='rank') - elif self.tunertype == 'ga': + assert self.tunertype in ["xgb", "ga", "random", "gridsearch"] + if self.tunertype == "xgb": + return XGBTuner(task, loss_type="rank") + elif self.tunertype == "ga": return GATuner(task, pop_size=50) - elif self.tunertype == 'random': + elif self.tunertype == "random": return RandomTuner(task) - elif self.tunertype == 'gridsearch': + elif self.tunertype == "gridsearch": return GridSearchTuner(task) def tune_kernels(self, tasks, measure_option): @@ -123,7 +125,7 @@ def tune_graph(self, graph): assert NotImplementedError def optimize_model(self, originalmodel: Path): - with open(originalmodel, 'rb') as f: + with open(originalmodel, "rb") as f: modelfile = f.read() tflite_model = tflite.Model.GetRootAsModel(modelfile, 0) # noqa: F841 @@ -134,22 +136,22 @@ def optimize_model(self, originalmodel: Path): input_details = interpreter.get_input_details()[0] output_details = interpreter.get_output_details()[0] - self.input_dtype = input_details['dtype'] - self.input_shape = input_details['shape'] - self.input_name = input_details['name'] - self.output_dtype = output_details['dtype'] + self.input_dtype = input_details["dtype"] + self.input_shape = input_details["shape"] + self.input_name = input_details["name"] + self.output_dtype = output_details["dtype"] # we do not quantized models in this converter - assert input_details['dtype'] not in [np.int8, np.uint8] + assert input_details["dtype"] not in [np.int8, np.uint8] - mod, params = relay.frontend.from_tflite( - tflite_model - ) + mod, params = relay.frontend.from_tflite(tflite_model) transforms = [relay.transform.RemoveUnusedFunctions()] transforms.append( - relay.transform.ConvertLayout({ - "nn.conv2d": ['NCHW', 'default'], - }) + relay.transform.ConvertLayout( + { + "nn.conv2d": ["NCHW", "default"], + } + ) ) seq = transform.Sequential(transforms) # noqa: F841 @@ -160,62 +162,50 @@ def optimize_model(self, originalmodel: Path): measure_option = autotvm.measure_option( # noqa: F841 builder=autotvm.LocalBuilder(), runner=autotvm.LocalRunner( - number=4, - repeat=10, - enable_cpu_cache_flush=True - ) + number=4, repeat=10, enable_cpu_cache_flush=True + ), ) # TODO finish implementation assert NotImplementedError -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( - '--fp32-model-path', - help='Path to the FP32 TFLite model file', + "--fp32-model-path", + help="Path to the FP32 TFLite model file", type=Path, - required=True + required=True, ) parser.add_argument( - '--dataset-root', - help='Path to the dataset file', - type=Path, - required=True + "--dataset-root", help="Path to the dataset file", type=Path, required=True ) parser.add_argument( - '--download-dataset', - help='Download the dataset before training', - action='store_true' + "--download-dataset", + help="Download the dataset before training", + action="store_true", ) parser.add_argument( - '--results-path', - help='Path to the results', - type=Path, - required=True + "--results-path", help="Path to the results", type=Path, required=True ) parser.add_argument( - '--test-dataset-fraction', - help='What fraction of the test dataset should be used for evaluation', + "--test-dataset-fraction", + help="What fraction of the test dataset should be used for evaluation", type=float, - default=1.0 + default=1.0, ) parser.add_argument( - '--target', - help='The device to run the model on', + "--target", + help="The device to run the model on", type=str, - default='llvm -mcpu=core-avx2' - ) - parser.add_argument( - '--target-host', - help='The host CPU type', - default=None + default="llvm -mcpu=core-avx2", ) + parser.add_argument("--target-host", help="The host CPU type", default=None) parser.add_argument( - '--tuner-type', - help='Type of the tuner to use for kernel optimizations', - default='xgb' + "--tuner-type", + help="Type of the tuner to use for kernel optimizations", + default="xgb", ) args = parser.parse_args() @@ -226,18 +216,14 @@ def optimize_model(self, originalmodel: Path): tester = TVMFineTunedModel( dataset, - args.results_path / f'{args.fp32_model_path.stem}.tvm-tune.so', - args.results_path / f'{args.fp32_model_path.stem}.tvm-tune.kernellog', - args.results_path / f'{args.fp32_model_path.stem}.tvm-tune.graphlog', + args.results_path / f"{args.fp32_model_path.stem}.tvm-tune.so", + args.results_path / f"{args.fp32_model_path.stem}.tvm-tune.kernellog", + args.results_path / f"{args.fp32_model_path.stem}.tvm-tune.graphlog", args.fp32_model_path, - args.results_path / 'tvm-tune', + args.results_path / "tvm-tune", args.target, args.target_host, 3, - args.tuner_type - ) - tester.test_inference( - args.results_path, - 'tvm-tune', - args.test_dataset_fraction + args.tuner_type, ) + tester.test_inference(args.results_path, "tvm-tune", args.test_dataset_fraction) diff --git a/dl_in_iot_course/misc/draw.py b/dl_in_iot_course/misc/draw.py index 36933cb..5aedadc 100644 --- a/dl_in_iot_course/misc/draw.py +++ b/dl_in_iot_course/misc/draw.py @@ -12,13 +12,14 @@ def draw_confusion_matrix( - confusion_matrix: np.ndarray, - outpath: Optional[Path], - title: str, - class_names: List[str], - cmap=None, - figsize: Optional[Tuple] = None, - dpi: Optional[int] = None): + confusion_matrix: np.ndarray, + outpath: Optional[Path], + title: str, + class_names: List[str], + cmap=None, + figsize: Optional[Tuple] = None, + dpi: Optional[int] = None, +): """ Creates a confusion matrix plot. @@ -41,7 +42,7 @@ def draw_confusion_matrix( The dpi of the plot """ if cmap is None: - cmap = plt.get_cmap('BuPu') + cmap = plt.get_cmap("BuPu") confusion_matrix = np.array(confusion_matrix, dtype=np.float32, copy=True) @@ -50,8 +51,7 @@ def draw_confusion_matrix( correctactual = correctactual.reshape(1, len(class_names)) # compute precision - correctpredicted = \ - confusion_matrix.diagonal() / confusion_matrix.sum(axis=0) + correctpredicted = confusion_matrix.diagonal() / confusion_matrix.sum(axis=0) correctpredicted = correctpredicted.reshape(len(class_names), 1) # compute overall accuracy @@ -70,19 +70,15 @@ def draw_confusion_matrix( # create axes fig = plt.figure(figsize=figsize, dpi=dpi) gs = gridspec.GridSpec(len(class_names) + 1, len(class_names) + 1) - axConfMatrix = fig.add_subplot(gs[0:len(class_names), 0:len(class_names)]) + axConfMatrix = fig.add_subplot(gs[0 : len(class_names), 0 : len(class_names)]) axPredicted = fig.add_subplot( - gs[len(class_names), 0:len(class_names)], - sharex=axConfMatrix + gs[len(class_names), 0 : len(class_names)], sharex=axConfMatrix ) axActual = fig.add_subplot( - gs[0:len(class_names), len(class_names)], - sharey=axConfMatrix + gs[0 : len(class_names), len(class_names)], sharey=axConfMatrix ) axTotal = fig.add_subplot( - gs[len(class_names), len(class_names)], - sharex=axActual, - sharey=axPredicted + gs[len(class_names), len(class_names)], sharex=axActual, sharey=axPredicted ) # define ticks for classes @@ -90,112 +86,118 @@ def draw_confusion_matrix( # configure and draw confusion matrix axConfMatrix.set_xticks(ticks) - axConfMatrix.set_xticklabels(class_names, fontsize='large', rotation=90) + axConfMatrix.set_xticklabels(class_names, fontsize="large", rotation=90) axConfMatrix.set_yticks(ticks) - axConfMatrix.set_yticklabels(class_names, fontsize='large') - axConfMatrix.set_xlabel('Actual class', fontsize='x-large') - axConfMatrix.set_ylabel('Predicted class', fontsize='x-large') + axConfMatrix.set_yticklabels(class_names, fontsize="large") + axConfMatrix.set_xlabel("Actual class", fontsize="x-large") + axConfMatrix.set_ylabel("Predicted class", fontsize="x-large") img = axConfMatrix.imshow( confusion_matrix, - interpolation='nearest', + interpolation="nearest", cmap=cmap, - aspect='auto', + aspect="auto", vmin=0.0, - vmax=1.0 + vmax=1.0, ) - axConfMatrix.xaxis.set_ticks_position('top') - axConfMatrix.xaxis.set_label_position('top') + axConfMatrix.xaxis.set_ticks_position("top") + axConfMatrix.xaxis.set_label_position("top") # add percentages for confusion matrix - for i, j in itertools.product( - range(len(class_names)), - range(len(class_names))): + for i, j in itertools.product(range(len(class_names)), range(len(class_names))): txt = axConfMatrix.text( - j, i, - ('100' if confusion_matrix[i, j] == 1.0 - else f'{100.0 * confusion_matrix[i,j]:3.1f}'), - ha='center', - va='center', - color='black', - fontsize='medium') - txt.set_path_effects([ - patheffects.withStroke(linewidth=5, foreground='w') - ]) + j, + i, + ( + "100" + if confusion_matrix[i, j] == 1.0 + else f"{100.0 * confusion_matrix[i,j]:3.1f}" + ), + ha="center", + va="center", + color="black", + fontsize="medium", + ) + txt.set_path_effects([patheffects.withStroke(linewidth=5, foreground="w")]) # configure and draw sensitivity percentages axPredicted.set_xticks(ticks) axPredicted.set_yticks([0]) - axPredicted.set_xlabel('Sensitivity', fontsize='large') + axPredicted.set_xlabel("Sensitivity", fontsize="large") axPredicted.imshow( correctactual, - interpolation='nearest', - cmap='RdYlGn', - aspect='auto', + interpolation="nearest", + cmap="RdYlGn", + aspect="auto", vmin=0.0, - vmax=1.0 + vmax=1.0, ) for i in range(len(class_names)): txt = axPredicted.text( - i, 0, - ('100' if correctactual[0, i] == 1.0 - else f'{100.0 * correctactual[0, i]:3.1f}'), - ha='center', - va='center', - color='black', - fontsize='medium') - txt.set_path_effects([ - patheffects.withStroke(linewidth=5, foreground='w') - ]) + i, + 0, + ( + "100" + if correctactual[0, i] == 1.0 + else f"{100.0 * correctactual[0, i]:3.1f}" + ), + ha="center", + va="center", + color="black", + fontsize="medium", + ) + txt.set_path_effects([patheffects.withStroke(linewidth=5, foreground="w")]) # configure and draw precision percentages axActual.set_xticks([0]) axActual.set_yticks(ticks) - axActual.set_ylabel('Precision', fontsize='large') - axActual.yaxis.set_label_position('right') + axActual.set_ylabel("Precision", fontsize="large") + axActual.yaxis.set_label_position("right") axActual.imshow( correctpredicted, - interpolation='nearest', - cmap='RdYlGn', - aspect='auto', + interpolation="nearest", + cmap="RdYlGn", + aspect="auto", vmin=0.0, - vmax=1.0 + vmax=1.0, ) for i in range(len(class_names)): txt = axActual.text( - 0, i, - ('100' if correctpredicted[i, 0] == 1.0 - else f'{100.0 * correctpredicted[i, 0]:3.1f}'), - ha='center', - va='center', - color='black', - fontsize='medium') - txt.set_path_effects([ - patheffects.withStroke(linewidth=5, foreground='w') - ]) + 0, + i, + ( + "100" + if correctpredicted[i, 0] == 1.0 + else f"{100.0 * correctpredicted[i, 0]:3.1f}" + ), + ha="center", + va="center", + color="black", + fontsize="medium", + ) + txt.set_path_effects([patheffects.withStroke(linewidth=5, foreground="w")]) # configure and draw total accuracy axTotal.set_xticks([0]) axTotal.set_yticks([0]) - axTotal.set_xlabel('Accuracy', fontsize='large') + axTotal.set_xlabel("Accuracy", fontsize="large") axTotal.imshow( np.array([[accuracy]]), - interpolation='nearest', - cmap='RdYlGn', - aspect='auto', + interpolation="nearest", + cmap="RdYlGn", + aspect="auto", vmin=0.0, - vmax=1.0 + vmax=1.0, ) txt = axTotal.text( - 0, 0, - f'{100 * accuracy:3.1f}', - ha='center', - va='center', - color='black', - fontsize='medium' + 0, + 0, + f"{100 * accuracy:3.1f}", + ha="center", + va="center", + color="black", + fontsize="medium", ) - txt.set_path_effects([ - patheffects.withStroke(linewidth=5, foreground='w') - ]) + txt.set_path_effects([patheffects.withStroke(linewidth=5, foreground="w")]) # disable axes for other matrices than confusion matrix for a in (axPredicted, axActual, axTotal): @@ -204,24 +206,18 @@ def draw_confusion_matrix( # draw colorbar for confusion matrix cbar = fig.colorbar( - img, - ax=[axPredicted, axConfMatrix, axActual, axTotal], - shrink=0.5, - pad=0.1 + img, ax=[axPredicted, axConfMatrix, axActual, axTotal], shrink=0.5, pad=0.1 ) for t in cbar.ax.get_yticklabels(): - t.set_fontsize('medium') - suptitlehandle = fig.suptitle( - f'{title} (ACC={accuracy:.5f})', - fontsize='xx-large' - ) + t.set_fontsize("medium") + suptitlehandle = fig.suptitle(f"{title} (ACC={accuracy:.5f})", fontsize="xx-large") if outpath is None: plt.show() else: plt.savefig( outpath, dpi=dpi, - bbox_inches='tight', + bbox_inches="tight", bbox_extra_artists=[suptitlehandle], - pad_inches=0.1 + pad_inches=0.1, ) diff --git a/dl_in_iot_course/misc/metrics.py b/dl_in_iot_course/misc/metrics.py index ce472f8..1529968 100644 --- a/dl_in_iot_course/misc/metrics.py +++ b/dl_in_iot_course/misc/metrics.py @@ -30,10 +30,7 @@ def mean_precision(confusion_matrix: np.ndarray): ------- float : mean precision value """ - return np.mean( - confusion_matrix.diagonal() / - np.sum(confusion_matrix, axis=1) - ) + return np.mean(confusion_matrix.diagonal() / np.sum(confusion_matrix, axis=1)) def mean_sensitivity(confusion_matrix: np.ndarray): @@ -49,10 +46,7 @@ def mean_sensitivity(confusion_matrix: np.ndarray): ------- float : Mean sensitivity """ - return np.mean( - confusion_matrix.diagonal() / - np.sum(confusion_matrix, axis=0) - ) + return np.mean(confusion_matrix.diagonal() / np.sum(confusion_matrix, axis=0)) def g_mean(confusion_matrix: np.ndarray): @@ -68,7 +62,9 @@ def g_mean(confusion_matrix: np.ndarray): ------- float : G-Mean value """ - return np.float_power(np.prod( - np.array(confusion_matrix).diagonal() / - np.sum(confusion_matrix, axis=0) - ), 1.0 / np.array(confusion_matrix).shape[0]) + return np.float_power( + np.prod( + np.array(confusion_matrix).diagonal() / np.sum(confusion_matrix, axis=0) + ), + 1.0 / np.array(confusion_matrix).shape[0], + ) diff --git a/dl_in_iot_course/misc/modeltester.py b/dl_in_iot_course/misc/modeltester.py index 2b94c38..ccee6b7 100644 --- a/dl_in_iot_course/misc/modeltester.py +++ b/dl_in_iot_course/misc/modeltester.py @@ -15,11 +15,12 @@ class ModelTester(object): """ def __init__( - self, - dataset: PetDataset, - modelpath: Path, - originalmodel: Optional[Path] = None, - logdir: Optional[Path] = None): + self, + dataset: PetDataset, + modelpath: Path, + originalmodel: Optional[Path] = None, + logdir: Optional[Path] = None, + ): """ Initializer for ModelTester. @@ -68,10 +69,8 @@ def _run_inference(self) -> Optional[Any]: return result def test_inference( - self, - resultspath: Path, - prefix: str, - testdatasetpercentage: float = 0.3): + self, resultspath: Path, prefix: str, testdatasetpercentage: float = 0.3 + ): """ Runs inference on test data and evaluates the model. @@ -92,11 +91,10 @@ def test_inference( dataY = self.dataset.testY else: _, dataX, _, dataY = self.dataset.split_dataset( - percentage=testdatasetpercentage, - usetest=True + percentage=testdatasetpercentage, usetest=True ) # for each entry in the test dataset - for X, y in tqdm(list(zip(dataX, dataY)), desc=f'evaluating {prefix}'): + for X, y in tqdm(list(zip(dataX, dataY)), desc=f"evaluating {prefix}"): # preprocess data Xp = self.dataset.prepare_input_sample(X) yp = self.dataset.prepare_output_sample(y) @@ -109,22 +107,24 @@ def test_inference( # draw the final confusion matrix draw_confusion_matrix( self.dataset.confusion_matrix, - resultspath / f'{prefix}-confusion-matrix.png', - 'Confusion matrix', - self.dataset.classnames + resultspath / f"{prefix}-confusion-matrix.png", + "Confusion matrix", + self.dataset.classnames, ) # generate a file - with open(resultspath / f'{prefix}-metrics.md', 'w') as metfile: + with open(resultspath / f"{prefix}-metrics.md", "w") as metfile: conf_matrix = self.dataset.confusion_matrix - metfile.writelines([ - f'Model type: {prefix}\n\n', - f'* Accuracy: {metrics.accuracy(conf_matrix)}\n', - f'* Mean precision: {metrics.mean_precision(conf_matrix)}\n', - f'* Mean sensitivity: {metrics.mean_sensitivity(conf_matrix)}\n', # noqa: E501 - f'* G-Mean: {metrics.g_mean(conf_matrix)}\n', - f'* Mean inference time: {np.mean(self.timemeasurements[1:])} ms\n' # noqa: E501 - f'* Top-5 percentage: {self.dataset.top_5_count / self.dataset.total}\n' # noqa: E501 - ]) + metfile.writelines( + [ + f"Model type: {prefix}\n\n", + f"* Accuracy: {metrics.accuracy(conf_matrix)}\n", + f"* Mean precision: {metrics.mean_precision(conf_matrix)}\n", + f"* Mean sensitivity: {metrics.mean_sensitivity(conf_matrix)}\n", # noqa: E501 + f"* G-Mean: {metrics.g_mean(conf_matrix)}\n", + f"* Mean inference time: {np.mean(self.timemeasurements[1:])} ms\n" # noqa: E501 + f"* Top-5 percentage: {self.dataset.top_5_count / self.dataset.total}\n", # noqa: E501 + ] + ) def preprocess_input(self, X: Any): """ diff --git a/dl_in_iot_course/misc/pet_dataset.py b/dl_in_iot_course/misc/pet_dataset.py index 2d29502..46aa30b 100644 --- a/dl_in_iot_course/misc/pet_dataset.py +++ b/dl_in_iot_course/misc/pet_dataset.py @@ -40,10 +40,8 @@ class PetDataset(object): The affinity of images to classes is taken from annotations, but the class IDs are starting from 0 instead of 1, as in the annotations. """ - def __init__( - self, - root: Path, - download_dataset: bool = False): + + def __init__(self, root: Path, download_dataset: bool = False): """ Prepares all structures and data required for providing data samples. @@ -75,28 +73,24 @@ def prepare(self): Those lists will store file paths and classes for objects. """ - with open(self.root / 'annotations' / 'trainval.txt', 'r') as datadesc: + with open(self.root / "annotations" / "trainval.txt", "r") as datadesc: for line in datadesc: - if line.startswith('#'): + if line.startswith("#"): continue - fields = line.split(' ') - self.dataX.append( - str(self.root / 'images' / (fields[0] + '.jpg')) - ) + fields = line.split(" ") + self.dataX.append(str(self.root / "images" / (fields[0] + ".jpg"))) self.dataY.append(int(fields[1]) - 1) - clsname = fields[0].rsplit('_', 1)[0] - if not self.dataY[-1] in self.classnames: + clsname = fields[0].rsplit("_", 1)[0] + if self.dataY[-1] not in self.classnames: self.classnames[self.dataY[-1]] = clsname assert self.classnames[self.dataY[-1]] == clsname self.numclasses = len(self.classnames) - with open(self.root / 'annotations' / 'test.txt', 'r') as datadesc: + with open(self.root / "annotations" / "test.txt", "r") as datadesc: for line in datadesc: - if line.startswith('#'): + if line.startswith("#"): continue - fields = line.split(' ') - self.testX.append( - str(self.root / 'images' / (fields[0] + '.jpg')) - ) + fields = line.split(" ") + self.testX.append(str(self.root / "images" / (fields[0] + ".jpg"))) self.testY.append(int(fields[1]) - 1) self.reset_metrics() self.mean, self.std = self.get_input_mean_std() @@ -115,11 +109,11 @@ def download_dataset(self): Downloads the dataset to the root directory defined in the constructor. """ self.root.mkdir(parents=True, exist_ok=True) - imgs = 'https://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz' - anns = 'https://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz' # noqa: E501 + imgs = "https://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz" + anns = "https://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz" # noqa: E501 with tempfile.TemporaryDirectory() as tmpdir: - tarimgspath = Path(tmpdir) / 'dataset.tar.gz' - tarannspath = Path(tmpdir) / 'annotations.tar.gz' + tarimgspath = Path(tmpdir) / "dataset.tar.gz" + tarannspath = Path(tmpdir) / "annotations.tar.gz" download_url(imgs, tarimgspath) download_url(anns, tarannspath) tf = tarfile.open(tarimgspath) @@ -134,10 +128,8 @@ def get_data(self) -> Tuple[List, List]: return (self.dataX, self.dataY) def split_dataset( - self, - percentage: float = 0.25, - seed: int = 12345, - usetest: bool = False): + self, percentage: float = 0.25, seed: int = 12345, usetest: bool = False + ): """ Extracts validation dataset from the train dataset. @@ -161,14 +153,13 @@ def split_dataset( test_size=percentage, random_state=seed, shuffle=True, - stratify=self.testY if usetest else self.dataY + stratify=self.testY if usetest else self.dataY, ) return (dataxtrain, dataxvalid, dataytrain, datayvalid) def calibration_dataset_generator( - self, - percentage: float = 0.25, - seed: int = 12345): + self, percentage: float = 0.25, seed: int = 12345 + ): """ Creates generator for the calibration data. @@ -180,7 +171,7 @@ def calibration_dataset_generator( The seed for random state """ _, X, _, _ = self.split_dataset(percentage, seed) - for x in tqdm(X, desc='calibration'): + for x in tqdm(X, desc="calibration"): yield [self.prepare_input_sample(x)] def evaluate(self, predictions: List, truth: List): @@ -217,7 +208,9 @@ def get_input_mean_std(self) -> Tuple[Any, Any]: the standardization values for a given train dataset. Tuple of two variables describing mean and std values. """ - return np.array([0.485, 0.456, 0.406], dtype='float32'), np.array([0.229, 0.224, 0.225], dtype='float32') # noqa: E501 + return np.array([0.485, 0.456, 0.406], dtype="float32"), np.array( + [0.229, 0.224, 0.225], dtype="float32" + ) # noqa: E501 def get_class_names(self) -> List[str]: """ @@ -243,7 +236,7 @@ def prepare_input_sample(self, sample: Path) -> np.ndarray: np.ndarray : Preprocessed input """ img = Image.open(sample) - img = img.convert('RGB') + img = img.convert("RGB") img = img.resize((224, 224)) npimg = np.array(img).astype(np.float32) / 255.0 npimg = (npimg - self.mean) / self.std diff --git a/dl_in_iot_course/misc/utils.py b/dl_in_iot_course/misc/utils.py index 118d29d..e517386 100644 --- a/dl_in_iot_course/misc/utils.py +++ b/dl_in_iot_course/misc/utils.py @@ -11,12 +11,6 @@ def update_to(self, b=1, bsize=1, tsize=None): def download_url(url, output_path): with DownloadProgressBar( - unit='B', - unit_scale=True, - miniters=1, - desc=url.split('/')[-1]) as t: - urllib.request.urlretrieve( - url, - filename=output_path, - reporthook=t.update_to - ) + unit="B", unit_scale=True, miniters=1, desc=url.split("/")[-1] + ) as t: + urllib.request.urlretrieve(url, filename=output_path, reporthook=t.update_to) diff --git a/environments/Dockerfile b/environments/Dockerfile index 18beefa..522814c 100644 --- a/environments/Dockerfile +++ b/environments/Dockerfile @@ -1,73 +1,51 @@ -FROM archlinux:latest +FROM debian:bookworm -ENV PIPINST 'python -m pip install --no-cache-dir --upgrade' +ENV INST 'env DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends' +ENV PIPINST 'python3 -m pip install --no-cache-dir --upgrade' -RUN pacman --noconfirm -Syu -RUN pacman --noconfirm -Sy \ - blas \ - cblas \ - clang \ +RUN apt-get update && $INST \ + build-essential \ cmake \ curl \ - fd \ - flake8 \ + fd-find \ + ffmpeg \ + fonts-lato \ + g++ \ gcc \ git \ - gtest \ - ipython \ + git-lfs \ + libglfw3 \ + libglfw3-dev \ + libglib2.0-0 \ + libgomp1 \ + libopenblas-dev \ + libprotoc-dev \ + libpython3-dev \ + libtinfo5 \ + libtinfo-dev \ llvm \ - make \ - protobuf \ - pybind11 \ - python-cffi \ - python-cloudpickle \ - python-jinja \ - python-matplotlib \ - python-numpy \ - python-opencv \ - python-pip \ - python-psutil \ - python-pytest \ - python-pytorch \ - python-scikit-learn \ - python-setuptools \ - python-tornado \ - python-tqdm \ - tmux \ - vim \ + llvm-dev \ + mono-complete \ + procps \ + protobuf-compiler \ + python3 \ + python3-dev \ + python3-opencv \ + python3-pip \ + python3-venv \ wget \ - zip + zip \ + && apt-get autoremove && apt-get clean && rm -rf /var/lib/apt/lists/* -RUN $PIPINST \ - cloudpickle \ - flatbuffers \ - jupyterlab \ - netron \ - nni \ - notebook \ - onnx \ - ptipython \ - tensorflow==2.10 \ - tensorflow-addons==0.18.0 \ - tensorflow-model-optimization==0.7.2 \ - tensorflow-probability==0.18.0 \ - tflite \ - torchvision==0.13.1 \ - "xgboost>=1.1.0,<1.6.0" +RUN rm /usr/lib/python3.11/EXTERNALLY-MANAGED +RUN $PIPINST pip setuptools PyYAML cmake flit_core -RUN $PIPINST git+https://github.com/onnx/onnx-tensorflow.git@v1.10.0 -RUN $PIPINST git+https://github.com/onnx/tensorflow-onnx.git@v1.13.0 +RUN $PIPINST \ + flake8 \ + pre-commit \ + pydocstyle \ + ruff -COPY cpu-config.cmake /tvm-config.cmake -RUN cd / && git clone --recursive https://github.com/apache/tvm.git -b v0.10.0 /tvm && \ - cd /tvm && mkdir build && cd build && \ - mv /tvm-config.cmake /tvm/build/config.cmake && \ - cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr/local .. && \ - make -j`nproc` && \ - make install && \ - ldconfig && \ - CONDA_BUILD=yes $PIPINST -e /tvm/python && \ - cd / && \ - rm -rf /tvm/build +COPY requirements.txt /tmp/requirements.txt -ENV LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH +RUN $PIPINST -r /tmp/requirements.txt diff --git a/environments/cfu-playground-environment.def b/environments/cfu-playground-environment.def deleted file mode 100644 index 12497f9..0000000 --- a/environments/cfu-playground-environment.def +++ /dev/null @@ -1,76 +0,0 @@ -BootStrap: docker -From: debian:bullseye - -%post - export INST='env DEBIAN_FRONTEND=noninteractive apt-get install -y' - export PIPINST='python3 -m pip install --no-cache-dir --upgrade' - - apt_wrapper () { - apt-get update - $INST $@ - apt-get -y autoremove && apt-get -y clean && rm -rf /var/lib/apt/lists - ldconfig - } - - apt_wrapper \ - autoconf \ - automake \ - build-essential \ - ccache \ - cmake \ - coreutils \ - curl \ - expect \ - fd-find \ - g++ \ - gcc \ - git \ - gtk-sharp2 \ - libevent-dev \ - libftdi1-dev \ - libgtk2.0-dev \ - libjson-c-dev \ - libopenblas-dev \ - libprotoc-dev \ - libtinfo-dev \ - libtool \ - libusb-1.0.0-dev \ - llvm \ - llvm-dev \ - make \ - ninja-build \ - openocd \ - policykit-1 \ - protobuf-compiler \ - python3 \ - python3-dev \ - python3-pip \ - tar \ - tmux \ - uml-utilities \ - verilator \ - vim \ - wget \ - yosys \ - zip - - $PIPINST pip setuptools - $PIPINST \ - amaranth-yosys \ - cffi \ - cloudpickle \ - cmake \ - numpy \ - opencv-python \ - psutil \ - pytest \ - tornado \ - tqdm - - cd / - wget https://static.dev.sifive.com/dev-tools/freedom-tools/v2020.08/riscv64-unknown-elf-gcc-10.1.0-2020.08.2-x86_64-linux-ubuntu14.tar.gz - tar xvzf /riscv64-unknown-elf-gcc-10.1.0-2020.08.2-x86_64-linux-ubuntu14.tar.gz - rm /riscv64-unknown-elf-gcc-10.1.0-2020.08.2-x86_64-linux-ubuntu14.tar.gz - -%environment - PATH=$PATH:/riscv64-unknown-elf-gcc-10.1.0-2020.08.2-x86_64-linux-ubuntu14/bin diff --git a/environments/cpu-config.cmake b/environments/cpu-config.cmake deleted file mode 100644 index 4f3fa5e..0000000 --- a/environments/cpu-config.cmake +++ /dev/null @@ -1,356 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -#-------------------------------------------------------------------- -# Template custom cmake configuration for compiling -# -# This file is used to override the build options in build. -# If you want to change the configuration, please use the following -# steps. Assume you are on the root directory. First copy the this -# file so that any local changes will be ignored by git -# -# $ mkdir build -# $ cp cmake/config.cmake build -# -# Next modify the according entries, and then compile by -# -# $ cd build -# $ cmake .. -# -# Then build in parallel with 8 threads -# -# $ make -j8 -#-------------------------------------------------------------------- - -#--------------------------------------------- -# Backend runtimes. -#--------------------------------------------- - -# Whether enable CUDA during compile, -# -# Possible values: -# - ON: enable CUDA with cmake's auto search -# - OFF: disable CUDA -# - /path/to/cuda: use specific path to cuda toolkit -set(USE_CUDA OFF) - -# Whether enable ROCM runtime -# -# Possible values: -# - ON: enable ROCM with cmake's auto search -# - OFF: disable ROCM -# - /path/to/rocm: use specific path to rocm -set(USE_ROCM OFF) - -# Whether enable SDAccel runtime -set(USE_SDACCEL OFF) - -# Whether enable Intel FPGA SDK for OpenCL (AOCL) runtime -set(USE_AOCL OFF) - -# Whether enable OpenCL runtime -# -# Possible values: -# - ON: enable OpenCL with cmake's auto search -# - OFF: disable OpenCL -# - /path/to/opencl-sdk: use specific path to opencl-sdk -set(USE_OPENCL OFF) - -# Whether enable Metal runtime -set(USE_METAL OFF) - -# Whether enable Vulkan runtime -# -# Possible values: -# - ON: enable Vulkan with cmake's auto search -# - OFF: disable vulkan -# - /path/to/vulkan-sdk: use specific path to vulkan-sdk -set(USE_VULKAN OFF) - -# Whether enable OpenGL runtime -set(USE_OPENGL ON) - -# Whether enable MicroTVM runtime -set(USE_MICRO OFF) - -# Whether enable RPC runtime -set(USE_RPC ON) - -# Whether to build the C++ RPC server binary -set(USE_CPP_RPC OFF) - -# Whether to build the iOS RPC server application -set(USE_IOS_RPC OFF) - -# Whether embed stackvm into the runtime -set(USE_STACKVM_RUNTIME OFF) - -# Whether enable tiny embedded graph executor. -set(USE_GRAPH_EXECUTOR ON) - -# Whether enable tiny graph executor with CUDA Graph -set(USE_GRAPH_EXECUTOR_CUDA_GRAPH OFF) - -# Whether enable pipeline executor. -set(USE_PIPELINE_EXECUTOR OFF) - -# Whether to enable the profiler for the graph executor and vm -set(USE_PROFILER ON) - -# Whether enable microTVM standalone runtime -set(USE_MICRO_STANDALONE_RUNTIME OFF) - -# Whether build with LLVM support -# Requires LLVM version >= 4.0 -# -# Possible values: -# - ON: enable llvm with cmake's find search -# - OFF: disable llvm, note this will disable CPU codegen -# which is needed for most cases -# - /path/to/llvm-config: enable specific LLVM when multiple llvm-dev is available. -set(USE_LLVM ON) - -#--------------------------------------------- -# Contrib libraries -#--------------------------------------------- -# Whether to build with BYODT software emulated posit custom datatype -# -# Possible values: -# - ON: enable BYODT posit, requires setting UNIVERSAL_PATH -# - OFF: disable BYODT posit -# -# set(UNIVERSAL_PATH /path/to/stillwater-universal) for ON -set(USE_BYODT_POSIT OFF) - -# Whether use BLAS, choices: openblas, atlas, apple -set(USE_BLAS none) - -# Whether to use MKL -# Possible values: -# - ON: Enable MKL -# - /path/to/mkl: mkl root path -# - OFF: Disable MKL -# set(USE_MKL /opt/intel/mkl) for UNIX -# set(USE_MKL ../IntelSWTools/compilers_and_libraries_2018/windows/mkl) for WIN32 -# set(USE_MKL ) if using `pip install mkl` -set(USE_MKL OFF) - -# Whether use MKLDNN library, choices: ON, OFF, path to mkldnn library -set(USE_MKLDNN OFF) - -# Whether use OpenMP thread pool, choices: gnu, intel -# Note: "gnu" uses gomp library, "intel" uses iomp5 library -set(USE_OPENMP gnu) - -# Whether use contrib.random in runtime -set(USE_RANDOM ON) - -# Whether use NNPack -set(USE_NNPACK OFF) - -# Possible values: -# - ON: enable tflite with cmake's find search -# - OFF: disable tflite -# - /path/to/libtensorflow-lite.a: use specific path to tensorflow lite library -set(USE_TFLITE OFF) - -# /path/to/tensorflow: tensorflow root path when use tflite library -set(USE_TENSORFLOW_PATH none) - -# Required for full builds with TFLite. Not needed for runtime with TFLite. -# /path/to/flatbuffers: flatbuffers root path when using tflite library -set(USE_FLATBUFFERS_PATH none) - -# Possible values: -# - OFF: disable tflite support for edgetpu -# - /path/to/edgetpu: use specific path to edgetpu library -set(USE_EDGETPU OFF) - -# Possible values: -# - ON: enable cuDNN with cmake's auto search in CUDA directory -# - OFF: disable cuDNN -# - /path/to/cudnn: use specific path to cuDNN path -set(USE_CUDNN OFF) - -# Whether use cuBLAS -set(USE_CUBLAS OFF) - -# Whether use MIOpen -set(USE_MIOPEN OFF) - -# Whether use MPS -set(USE_MPS OFF) - -# Whether use rocBlas -set(USE_ROCBLAS OFF) - -# Whether use contrib sort -set(USE_SORT ON) - -# Whether use MKL-DNN (DNNL) codegen -set(USE_DNNL_CODEGEN OFF) - -# Whether to use Arm Compute Library (ACL) codegen -# We provide 2 separate flags since we cannot build the ACL runtime on x86. -# This is useful for cases where you want to cross-compile a relay graph -# on x86 then run on AArch. -# -# An example of how to use this can be found here: docs/deploy/arm_compute_lib.rst. -# -# USE_ARM_COMPUTE_LIB - Support for compiling a relay graph offloading supported -# operators to Arm Compute Library. OFF/ON -# USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR - Run Arm Compute Library annotated functions via the ACL -# runtime. OFF/ON/"path/to/ACL" -set(USE_ARM_COMPUTE_LIB OFF) -set(USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR OFF) - -# Whether to build with Arm Ethos-N support -# Possible values: -# - OFF: disable Arm Ethos-N support -# - path/to/arm-ethos-N-stack: use a specific version of the -# Ethos-N driver stack -set(USE_ETHOSN OFF) -# If USE_ETHOSN is enabled, use ETHOSN_HW (ON) if Ethos-N hardware is available on this machine -# otherwise use ETHOSN_HW (OFF) to use the software test infrastructure -set(USE_ETHOSN_HW OFF) - -# Whether to build with Arm(R) Ethos(TM)-U NPU codegen support -set(USE_ETHOSU OFF) - -# Whether to build with TensorRT codegen or runtime -# Examples are available here: docs/deploy/tensorrt.rst. -# -# USE_TENSORRT_CODEGEN - Support for compiling a relay graph where supported operators are -# offloaded to TensorRT. OFF/ON -# USE_TENSORRT_RUNTIME - Support for running TensorRT compiled modules, requires presense of -# TensorRT library. OFF/ON/"path/to/TensorRT" -set(USE_TENSORRT_CODEGEN OFF) -set(USE_TENSORRT_RUNTIME OFF) - -# Whether use VITIS-AI codegen -set(USE_VITIS_AI OFF) - -# Build Verilator codegen and runtime -set(USE_VERILATOR OFF) - -# Build ANTLR parser for Relay text format -# Possible values: -# - ON: enable ANTLR by searching default locations (cmake find_program for antlr4 and /usr/local for jar) -# - OFF: disable ANTLR -# - /path/to/antlr-*-complete.jar: path to specific ANTLR jar file -set(USE_ANTLR OFF) - -# Whether use Relay debug mode -set(USE_RELAY_DEBUG OFF) - -# Whether to build fast VTA simulator driver -set(USE_VTA_FSIM OFF) - -# Whether to build cycle-accurate VTA simulator driver -set(USE_VTA_TSIM OFF) - -# Whether to build VTA FPGA driver (device side only) -set(USE_VTA_FPGA OFF) - -# Whether use Thrust -set(USE_THRUST OFF) - -# Whether to build the TensorFlow TVMDSOOp module -set(USE_TF_TVMDSOOP OFF) - -# Whether to build the PyTorch custom class module -set(USE_PT_TVMDSOOP OFF) - -# Whether to use STL's std::unordered_map or TVM's POD compatible Map -set(USE_FALLBACK_STL_MAP OFF) - -# Whether to use hexagon device -set(USE_HEXAGON_DEVICE OFF) -set(USE_HEXAGON_SDK /path/to/sdk) - -# Whether to build the hexagon launcher -set(USE_HEXAGON_LAUNCHER OFF) - -# Hexagon architecture to target when compiling TVM itself (not the target for -# compiling _by_ TVM). This applies to components like the TVM runtime, but is -# also used to select correct include/library paths from the Hexagon SDK when -# building offloading runtime for Android. -# Valid values are v60, v62, v65, v66, v68. -set(USE_HEXAGON_ARCH "v66") - -# Whether to use ONNX codegen -set(USE_TARGET_ONNX ON) - -# Whether enable BNNS runtime -set(USE_BNNS OFF) - -# Whether to use libbacktrace -# Libbacktrace provides line and column information on stack traces from errors. -# It is only supported on linux and macOS. -# Possible values: -# - AUTO: auto set according to system information and feasibility -# - ON: enable libbacktrace -# - OFF: disable libbacktrace -set(USE_LIBBACKTRACE AUTO) - -# Whether to build static libtvm_runtime.a, the default is to build the dynamic -# version: libtvm_runtime.so. -# -# The static runtime library needs to be linked into executables with the linker -# option --whole-archive (or its equivalent). The reason is that the TVM registry -# mechanism relies on global constructors being executed at program startup. -# Global constructors alone are not sufficient for the linker to consider a -# library member to be used, and some of such library members (object files) may -# not be included in the final executable. This would make the corresponding -# runtime functions to be unavailable to the program. -set(BUILD_STATIC_RUNTIME OFF) - - -# Caches the build so that building is faster when switching between branches. -# If you switch branches, build and then encounter a linking error, you may -# need to regenerate the build tree through "make .." (the cache will -# still provide significant speedups). -# Possible values: -# - AUTO: search for path to ccache, disable if not found. -# - ON: enable ccache by searching for the path to ccache, report an error if not found -# - OFF: disable ccache -# - /path/to/ccache: use specific path to ccache -set(USE_CCACHE AUTO) - -# Whether to enable PAPI support in profiling. PAPI provides access to hardware -# counters while profiling. -# Possible values: -# - ON: enable PAPI support. Will search PKG_CONFIG_PATH for a papi.pc -# - OFF: disable PAPI support. -# - /path/to/folder/containing/: Path to folder containing papi.pc. -set(USE_PAPI OFF) - -# Whether to use GoogleTest for C++ unit tests. When enabled, the generated -# build file (e.g. Makefile) will have a target "cpptest". -# Possible values: -# - ON: enable GoogleTest. The package `GTest` will be required for cmake -# to succeed. -# - OFF: disable GoogleTest. -# - AUTO: cmake will attempt to find the GTest package, if found GTest will -# be enabled, otherwise it will be disabled. -# Note that cmake will use `find_package` to find GTest. Please use cmake's -# predefined variables to specify the path to the GTest package if needed. -set(USE_GTEST AUTO) - -# Enable using CUTLASS as a BYOC backend -# Need to have USE_CUDA=ON -set(USE_CUTLASS OFF) diff --git a/environments/development-environment-gpu.def b/environments/development-environment-gpu.def deleted file mode 100644 index ca48adb..0000000 --- a/environments/development-environment-gpu.def +++ /dev/null @@ -1,95 +0,0 @@ -BootStrap: docker -From: nvidia/cuda:11.4.2-cudnn8-devel-ubuntu20.04 - -%files - gpu-config.cmake /tvm-config.cmake -%post - export INST='env DEBIAN_FRONTEND=noninteractive apt-get install -y' - export PIPINST='python3 -m pip install --no-cache-dir --upgrade' - - apt_wrapper () { - apt-get update - $INST $@ - apt-get -y autoremove && apt-get -y clean && rm -rf /var/lib/apt/lists - ldconfig - } - - apt_wrapper \ - build-essential \ - curl \ - fd-find \ - g++ \ - gcc \ - git \ - libglfw3 \ - libglfw3-dev \ - libgomp1 \ - libopenblas-dev \ - libprotoc-dev \ - libtinfo-dev \ - llvm \ - llvm-dev \ - protobuf-compiler \ - python3 \ - python3-dev \ - python3-pip \ - tmux \ - vim \ - wget \ - zip - - $PIPINST pip setuptools pyyaml - $PIPINST \ - cffi \ - cloudpickle \ - cmake \ - flake8 \ - ipython \ - Jinja2 \ - jupyterlab \ - matplotlib \ - netron \ - nni \ - notebook \ - numpy \ - onnx \ - opencv-python \ - psutil \ - ptipython \ - pytest \ - scikit-learn \ - tensorflow==2.10 \ - tensorflow-addons==0.18.0 \ - tensorflow-model-optimization==0.7.2 \ - tensorflow-probability \ - tflite \ - torch==1.12.1 \ - torchvision==0.13.1 \ - tornado \ - tqdm \ - "xgboost>=1.1.0,<1.6.0" - - $PIPINST git+https://github.com/onnx/onnx-tensorflow.git@v1.10.0 - $PIPINST git+https://github.com/onnx/tensorflow-onnx.git@v1.13.0 - - export LD_LIBRARY_PATH=/usr/local/lib:/usr/local/cuda/lib64/:/usr/local/cuda/compat/:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:$LD_LIBRARY_PATH - - ldconfig - - git clone --recursive https://github.com/apache/tvm.git -b v0.10.0 /tvm - cd /tvm - mkdir build && cd build - mv /tvm-config.cmake /tvm/build/config.cmake - cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr/local .. - make -j`nproc` - make install - - ldconfig - - CONDA_BUILD=yes $PIPINST -e /tvm/python - cd / - rm -rf /tvm/build - -%environment - LD_LIBRARY_PATH=/usr/local/lib:/usr/local/cuda/lib64/:/usr/local/cuda/compat/:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:$LD_LIBRARY_PATH - PYTHONPATH=/tvm/python:$PYTHONPATH diff --git a/environments/development-environment.def b/environments/development-environment.def deleted file mode 100644 index 14d92f4..0000000 --- a/environments/development-environment.def +++ /dev/null @@ -1,82 +0,0 @@ -BootStrap: docker -From: archlinux:latest - -%files - cpu-config.cmake /tvm-config.cmake - -%post - export PIPINST='python3 -m pip install --no-cache-dir --upgrade' - - pacman --noconfirm -Syu - pacman --noconfirm -Sy \ - blas \ - cblas \ - clang \ - cmake \ - curl \ - fd \ - flake8 \ - gcc \ - git \ - gtest \ - ipython \ - llvm \ - make \ - protobuf \ - pybind11 \ - python-cffi \ - python-cloudpickle \ - python-jinja \ - python-matplotlib \ - python-numpy \ - python-opencv \ - python-pip \ - python-psutil \ - python-pytest \ - python-pytorch \ - python-scikit-learn \ - python-setuptools \ - python-tornado \ - python-tqdm \ - tmux \ - vim \ - wget \ - zip - - $PIPINST \ - cloudpickle \ - flatbuffers \ - jupyterlab \ - netron \ - nni \ - notebook \ - onnx \ - ptipython \ - tensorflow==2.10 \ - tensorflow-addons==0.18.0 \ - tensorflow-model-optimization==0.7.2 \ - tensorflow-probability==0.18.0 \ - tflite \ - torchvision==0.13.1 \ - "xgboost>=1.1.0,<1.6.0" - - $PIPINST git+https://github.com/onnx/onnx-tensorflow.git@v1.10.0 - $PIPINST git+https://github.com/onnx/tensorflow-onnx.git@v1.13.0 - - git clone --recursive https://github.com/apache/tvm.git -b v0.10.0 /tvm - cd /tvm - mkdir build && cd build - mv /tvm-config.cmake /tvm/build/config.cmake - cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr/local .. - make -j`nproc` - make install - - ldconfig - - CONDA_BUILD=yes $PIPINST -e /tvm/python - cd / - rm -rf /tvm/build - -%environment - LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH - PYTHONPATH=/tvm/python:$PYTHONPATH diff --git a/environments/gpu-config.cmake b/environments/gpu-config.cmake deleted file mode 100644 index 9c8d141..0000000 --- a/environments/gpu-config.cmake +++ /dev/null @@ -1,356 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -#-------------------------------------------------------------------- -# Template custom cmake configuration for compiling -# -# This file is used to override the build options in build. -# If you want to change the configuration, please use the following -# steps. Assume you are on the root directory. First copy the this -# file so that any local changes will be ignored by git -# -# $ mkdir build -# $ cp cmake/config.cmake build -# -# Next modify the according entries, and then compile by -# -# $ cd build -# $ cmake .. -# -# Then build in parallel with 8 threads -# -# $ make -j8 -#-------------------------------------------------------------------- - -#--------------------------------------------- -# Backend runtimes. -#--------------------------------------------- - -# Whether enable CUDA during compile, -# -# Possible values: -# - ON: enable CUDA with cmake's auto search -# - OFF: disable CUDA -# - /path/to/cuda: use specific path to cuda toolkit -set(USE_CUDA ON) - -# Whether enable ROCM runtime -# -# Possible values: -# - ON: enable ROCM with cmake's auto search -# - OFF: disable ROCM -# - /path/to/rocm: use specific path to rocm -set(USE_ROCM OFF) - -# Whether enable SDAccel runtime -set(USE_SDACCEL OFF) - -# Whether enable Intel FPGA SDK for OpenCL (AOCL) runtime -set(USE_AOCL OFF) - -# Whether enable OpenCL runtime -# -# Possible values: -# - ON: enable OpenCL with cmake's auto search -# - OFF: disable OpenCL -# - /path/to/opencl-sdk: use specific path to opencl-sdk -set(USE_OPENCL OFF) - -# Whether enable Metal runtime -set(USE_METAL OFF) - -# Whether enable Vulkan runtime -# -# Possible values: -# - ON: enable Vulkan with cmake's auto search -# - OFF: disable vulkan -# - /path/to/vulkan-sdk: use specific path to vulkan-sdk -set(USE_VULKAN OFF) - -# Whether enable OpenGL runtime -set(USE_OPENGL ON) - -# Whether enable MicroTVM runtime -set(USE_MICRO OFF) - -# Whether enable RPC runtime -set(USE_RPC ON) - -# Whether to build the C++ RPC server binary -set(USE_CPP_RPC OFF) - -# Whether to build the iOS RPC server application -set(USE_IOS_RPC OFF) - -# Whether embed stackvm into the runtime -set(USE_STACKVM_RUNTIME OFF) - -# Whether enable tiny embedded graph executor. -set(USE_GRAPH_EXECUTOR ON) - -# Whether enable tiny graph executor with CUDA Graph -set(USE_GRAPH_EXECUTOR_CUDA_GRAPH OFF) - -# Whether enable pipeline executor. -set(USE_PIPELINE_EXECUTOR OFF) - -# Whether to enable the profiler for the graph executor and vm -set(USE_PROFILER ON) - -# Whether enable microTVM standalone runtime -set(USE_MICRO_STANDALONE_RUNTIME OFF) - -# Whether build with LLVM support -# Requires LLVM version >= 4.0 -# -# Possible values: -# - ON: enable llvm with cmake's find search -# - OFF: disable llvm, note this will disable CPU codegen -# which is needed for most cases -# - /path/to/llvm-config: enable specific LLVM when multiple llvm-dev is available. -set(USE_LLVM ON) - -#--------------------------------------------- -# Contrib libraries -#--------------------------------------------- -# Whether to build with BYODT software emulated posit custom datatype -# -# Possible values: -# - ON: enable BYODT posit, requires setting UNIVERSAL_PATH -# - OFF: disable BYODT posit -# -# set(UNIVERSAL_PATH /path/to/stillwater-universal) for ON -set(USE_BYODT_POSIT OFF) - -# Whether use BLAS, choices: openblas, atlas, apple -set(USE_BLAS none) - -# Whether to use MKL -# Possible values: -# - ON: Enable MKL -# - /path/to/mkl: mkl root path -# - OFF: Disable MKL -# set(USE_MKL /opt/intel/mkl) for UNIX -# set(USE_MKL ../IntelSWTools/compilers_and_libraries_2018/windows/mkl) for WIN32 -# set(USE_MKL ) if using `pip install mkl` -set(USE_MKL OFF) - -# Whether use MKLDNN library, choices: ON, OFF, path to mkldnn library -set(USE_MKLDNN OFF) - -# Whether use OpenMP thread pool, choices: gnu, intel -# Note: "gnu" uses gomp library, "intel" uses iomp5 library -set(USE_OPENMP gnu) - -# Whether use contrib.random in runtime -set(USE_RANDOM ON) - -# Whether use NNPack -set(USE_NNPACK OFF) - -# Possible values: -# - ON: enable tflite with cmake's find search -# - OFF: disable tflite -# - /path/to/libtensorflow-lite.a: use specific path to tensorflow lite library -set(USE_TFLITE OFF) - -# /path/to/tensorflow: tensorflow root path when use tflite library -set(USE_TENSORFLOW_PATH none) - -# Required for full builds with TFLite. Not needed for runtime with TFLite. -# /path/to/flatbuffers: flatbuffers root path when using tflite library -set(USE_FLATBUFFERS_PATH none) - -# Possible values: -# - OFF: disable tflite support for edgetpu -# - /path/to/edgetpu: use specific path to edgetpu library -set(USE_EDGETPU OFF) - -# Possible values: -# - ON: enable cuDNN with cmake's auto search in CUDA directory -# - OFF: disable cuDNN -# - /path/to/cudnn: use specific path to cuDNN path -set(USE_CUDNN ON) - -# Whether use cuBLAS -set(USE_CUBLAS ON) - -# Whether use MIOpen -set(USE_MIOPEN OFF) - -# Whether use MPS -set(USE_MPS OFF) - -# Whether use rocBlas -set(USE_ROCBLAS OFF) - -# Whether use contrib sort -set(USE_SORT ON) - -# Whether use MKL-DNN (DNNL) codegen -set(USE_DNNL_CODEGEN OFF) - -# Whether to use Arm Compute Library (ACL) codegen -# We provide 2 separate flags since we cannot build the ACL runtime on x86. -# This is useful for cases where you want to cross-compile a relay graph -# on x86 then run on AArch. -# -# An example of how to use this can be found here: docs/deploy/arm_compute_lib.rst. -# -# USE_ARM_COMPUTE_LIB - Support for compiling a relay graph offloading supported -# operators to Arm Compute Library. OFF/ON -# USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR - Run Arm Compute Library annotated functions via the ACL -# runtime. OFF/ON/"path/to/ACL" -set(USE_ARM_COMPUTE_LIB OFF) -set(USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR OFF) - -# Whether to build with Arm Ethos-N support -# Possible values: -# - OFF: disable Arm Ethos-N support -# - path/to/arm-ethos-N-stack: use a specific version of the -# Ethos-N driver stack -set(USE_ETHOSN OFF) -# If USE_ETHOSN is enabled, use ETHOSN_HW (ON) if Ethos-N hardware is available on this machine -# otherwise use ETHOSN_HW (OFF) to use the software test infrastructure -set(USE_ETHOSN_HW OFF) - -# Whether to build with Arm(R) Ethos(TM)-U NPU codegen support -set(USE_ETHOSU OFF) - -# Whether to build with TensorRT codegen or runtime -# Examples are available here: docs/deploy/tensorrt.rst. -# -# USE_TENSORRT_CODEGEN - Support for compiling a relay graph where supported operators are -# offloaded to TensorRT. OFF/ON -# USE_TENSORRT_RUNTIME - Support for running TensorRT compiled modules, requires presense of -# TensorRT library. OFF/ON/"path/to/TensorRT" -set(USE_TENSORRT_CODEGEN OFF) -set(USE_TENSORRT_RUNTIME OFF) - -# Whether use VITIS-AI codegen -set(USE_VITIS_AI OFF) - -# Build Verilator codegen and runtime -set(USE_VERILATOR OFF) - -# Build ANTLR parser for Relay text format -# Possible values: -# - ON: enable ANTLR by searching default locations (cmake find_program for antlr4 and /usr/local for jar) -# - OFF: disable ANTLR -# - /path/to/antlr-*-complete.jar: path to specific ANTLR jar file -set(USE_ANTLR OFF) - -# Whether use Relay debug mode -set(USE_RELAY_DEBUG OFF) - -# Whether to build fast VTA simulator driver -set(USE_VTA_FSIM OFF) - -# Whether to build cycle-accurate VTA simulator driver -set(USE_VTA_TSIM OFF) - -# Whether to build VTA FPGA driver (device side only) -set(USE_VTA_FPGA OFF) - -# Whether use Thrust -set(USE_THRUST OFF) - -# Whether to build the TensorFlow TVMDSOOp module -set(USE_TF_TVMDSOOP OFF) - -# Whether to build the PyTorch custom class module -set(USE_PT_TVMDSOOP OFF) - -# Whether to use STL's std::unordered_map or TVM's POD compatible Map -set(USE_FALLBACK_STL_MAP OFF) - -# Whether to use hexagon device -set(USE_HEXAGON_DEVICE OFF) -set(USE_HEXAGON_SDK /path/to/sdk) - -# Whether to build the hexagon launcher -set(USE_HEXAGON_LAUNCHER OFF) - -# Hexagon architecture to target when compiling TVM itself (not the target for -# compiling _by_ TVM). This applies to components like the TVM runtime, but is -# also used to select correct include/library paths from the Hexagon SDK when -# building offloading runtime for Android. -# Valid values are v60, v62, v65, v66, v68. -set(USE_HEXAGON_ARCH "v66") - -# Whether to use ONNX codegen -set(USE_TARGET_ONNX ON) - -# Whether enable BNNS runtime -set(USE_BNNS OFF) - -# Whether to use libbacktrace -# Libbacktrace provides line and column information on stack traces from errors. -# It is only supported on linux and macOS. -# Possible values: -# - AUTO: auto set according to system information and feasibility -# - ON: enable libbacktrace -# - OFF: disable libbacktrace -set(USE_LIBBACKTRACE AUTO) - -# Whether to build static libtvm_runtime.a, the default is to build the dynamic -# version: libtvm_runtime.so. -# -# The static runtime library needs to be linked into executables with the linker -# option --whole-archive (or its equivalent). The reason is that the TVM registry -# mechanism relies on global constructors being executed at program startup. -# Global constructors alone are not sufficient for the linker to consider a -# library member to be used, and some of such library members (object files) may -# not be included in the final executable. This would make the corresponding -# runtime functions to be unavailable to the program. -set(BUILD_STATIC_RUNTIME OFF) - - -# Caches the build so that building is faster when switching between branches. -# If you switch branches, build and then encounter a linking error, you may -# need to regenerate the build tree through "make .." (the cache will -# still provide significant speedups). -# Possible values: -# - AUTO: search for path to ccache, disable if not found. -# - ON: enable ccache by searching for the path to ccache, report an error if not found -# - OFF: disable ccache -# - /path/to/ccache: use specific path to ccache -set(USE_CCACHE AUTO) - -# Whether to enable PAPI support in profiling. PAPI provides access to hardware -# counters while profiling. -# Possible values: -# - ON: enable PAPI support. Will search PKG_CONFIG_PATH for a papi.pc -# - OFF: disable PAPI support. -# - /path/to/folder/containing/: Path to folder containing papi.pc. -set(USE_PAPI OFF) - -# Whether to use GoogleTest for C++ unit tests. When enabled, the generated -# build file (e.g. Makefile) will have a target "cpptest". -# Possible values: -# - ON: enable GoogleTest. The package `GTest` will be required for cmake -# to succeed. -# - OFF: disable GoogleTest. -# - AUTO: cmake will attempt to find the GTest package, if found GTest will -# be enabled, otherwise it will be disabled. -# Note that cmake will use `find_package` to find GTest. Please use cmake's -# predefined variables to specify the path to the GTest package if needed. -set(USE_GTEST AUTO) - -# Enable using CUTLASS as a BYOC backend -# Need to have USE_CUDA=ON -set(USE_CUTLASS OFF) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..58005a6 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,14 @@ +apache-tvm +matplotlib +nni +numpy +onnx +onnx_tf +Pillow +scikit_learn +tensorflow==2.13.0 +tensorflow_model_optimization +tflite +torch +torchvision +tqdm