From 6e4547a08e1dd4060569cdf510e06b323544234c Mon Sep 17 00:00:00 2001 From: Emre Sahin Date: Fri, 1 Dec 2023 23:06:42 +0300 Subject: [PATCH] Updated train script --- book/src/how-to/create-a-data-pipeline.md | 148 +++++++++++----------- 1 file changed, 76 insertions(+), 72 deletions(-) diff --git a/book/src/how-to/create-a-data-pipeline.md b/book/src/how-to/create-a-data-pipeline.md index 8222d85c5..44e28c7be 100644 --- a/book/src/how-to/create-a-data-pipeline.md +++ b/book/src/how-to/create-a-data-pipeline.md @@ -35,7 +35,7 @@ total 21112 -rw-r--r-- 1 iex staff 10792680 Nov 17 19:46 chinese_mnist.zip -rw-r--r-- 1 iex staff 1124 Nov 28 14:27 image_to_numpy_array.py -rw-r--r-- 1 iex staff 40 Dec 1 11:59 requirements.txt --rw-r--r-- 1 iex staff 4360 Dec 1 21:57 train.py +-rw-r--r-- 1 iex staff 4436 Dec 1 22:52 train.py ``` Let's start by tracking the data file with Xvc. @@ -52,10 +52,10 @@ data file, we'll only read from it, so we set the recheck type as symlink. ```console $ ls -l total 32 -lrwxr-xr-x 1 iex staff 195 Dec 1 21:57 chinese_mnist.zip -> [CWD]/.xvc/b3/b24/2c9/422f91b804ea3008bc0bc025e97bf50c1d902ae7a0f13588b84f59023d/0.zip +lrwxr-xr-x 1 iex staff 195 Dec 1 22:52 chinese_mnist.zip -> [CWD]/.xvc/b3/b24/2c9/422f91b804ea3008bc0bc025e97bf50c1d902ae7a0f13588b84f59023d/0.zip -rw-r--r-- 1 iex staff 1124 Nov 28 14:27 image_to_numpy_array.py -rw-r--r-- 1 iex staff 40 Dec 1 11:59 requirements.txt --rw-r--r-- 1 iex staff 4360 Dec 1 21:57 train.py +-rw-r--r-- 1 iex staff 4436 Dec 1 22:52 train.py ``` @@ -68,11 +68,11 @@ $ unzip -q chinese_mnist.zip $ ls -l total 32 -lrwxr-xr-x 1 iex staff 195 Dec 1 21:57 chinese_mnist.zip -> [CWD]/.xvc/b3/b24/2c9/422f91b804ea3008bc0bc025e97bf50c1d902ae7a0f13588b84f59023d/0.zip +lrwxr-xr-x 1 iex staff 195 Dec 1 22:52 chinese_mnist.zip -> [CWD]/.xvc/b3/b24/2c9/422f91b804ea3008bc0bc025e97bf50c1d902ae7a0f13588b84f59023d/0.zip drwxr-xr-x 4 iex staff 128 Nov 17 19:45 data -rw-r--r-- 1 iex staff 1124 Nov 28 14:27 image_to_numpy_array.py -rw-r--r-- 1 iex staff 40 Dec 1 11:59 requirements.txt --rw-r--r-- 1 iex staff 4360 Dec 1 21:57 train.py +-rw-r--r-- 1 iex staff 4436 Dec 1 22:52 train.py ``` @@ -309,32 +309,32 @@ Let's run the pipeline at this point to test. ```console $ xvc -vv pipeline run -[INFO] Found explicit dependency: XvcStep { name: "install-requirements" } -> Step(StepDep { name: "init-venv" }) -[INFO] Found explicit dependency: XvcStep { name: "create-test-array" } -> Step(StepDep { name: "install-requirements" }) [INFO] Found explicit dependency: XvcStep { name: "create-train-array" } -> Step(StepDep { name: "install-requirements" }) +[INFO] Found explicit dependency: XvcStep { name: "install-requirements" } -> Step(StepDep { name: "init-venv" }) [INFO] Found explicit dependency: XvcStep { name: "create-validate-array" } -> Step(StepDep { name: "install-requirements" }) +[INFO] Found explicit dependency: XvcStep { name: "create-test-array" } -> Step(StepDep { name: "install-requirements" }) [INFO][pipeline/src/pipeline/mod.rs::343] Pipeline Graph: digraph { - 0 [ label = "(30018, 14524638717440636931)" ] - 1 [ label = "(30016, 17100650958363218553)" ] - 2 [ label = "(30009, 3990512960304683731)" ] - 3 [ label = "(30011, 10670284972343587248)" ] - 4 [ label = "(30010, 5124195226955327635)" ] - 5 [ label = "(30012, 13164969405306743296)" ] - 0 -> 1 [ label = "Step" ] - 3 -> 0 [ label = "Step" ] - 4 -> 0 [ label = "Step" ] - 5 -> 0 [ label = "Step" ] + 0 [ label = "(30010, 4200972040571775892)" ] + 1 [ label = "(30016, 9393516097102918615)" ] + 2 [ label = "(30018, 11297886604680049042)" ] + 3 [ label = "(30009, 3311159181882078797)" ] + 4 [ label = "(30012, 3998576099235507000)" ] + 5 [ label = "(30011, 4322586287807154762)" ] + 0 -> 2 [ label = "Step" ] + 2 -> 1 [ label = "Step" ] + 4 -> 2 [ label = "Step" ] + 5 -> 2 [ label = "Step" ] } -[INFO] Waiting for dependency steps for step create-validate-array [INFO] Waiting for dependency steps for step create-test-array +[INFO] No dependency steps for step init-venv +[INFO] Waiting for dependency steps for step create-train-array [INFO] Waiting for dependency steps for step install-requirements [INFO] No dependency steps for step recheck-data -[INFO] Waiting for dependency steps for step create-train-array [INFO] [recheck-data] Dependencies has changed -[INFO] No dependency steps for step init-venv +[INFO] Waiting for dependency steps for step create-validate-array [INFO] [init-venv] Dependencies has changed [DONE] recheck-data (xvc file recheck data/train/ data/validate/ data/test/) [DONE] init-venv (python3 -m venv .venv) @@ -389,14 +389,14 @@ Installing collected packages: mpmath, typing-extensions, threadpoolctl, sympy, Successfully installed MarkupSafe-2.1.3 filelock-3.13.1 fsspec-2023.10.0 jinja2-3.1.2 joblib-1.3.2 mpmath-1.3.0 networkx-3.2.1 numpy-1.26.2 opencv-python-4.8.1.78 pyyaml-6.0.1 scikit-learn-1.3.2 scipy-1.11.4 sympy-1.12 threadpoolctl-3.2.0 torch-2.1.1 typing-extensions-4.8.0 [DONE] install-requirements (.venv/bin/python3 -m pip install -r requirements.txt) -[INFO] Dependency steps completed successfully for step create-validate-array -[INFO] Dependency steps completed successfully for step create-train-array [INFO] Dependency steps completed successfully for step create-test-array -[INFO] [create-validate-array] Dependencies has changed +[INFO] Dependency steps completed successfully for step create-train-array +[INFO] Dependency steps completed successfully for step create-validate-array [INFO] [create-test-array] Dependencies has changed [INFO] [create-train-array] Dependencies has changed -[DONE] create-validate-array (.venv/bin/python3 image_to_numpy_array.py --dir data/validate/) +[INFO] [create-validate-array] Dependencies has changed [DONE] create-test-array (.venv/bin/python3 image_to_numpy_array.py --dir data/test/) +[DONE] create-validate-array (.venv/bin/python3 image_to_numpy_array.py --dir data/validate/) [DONE] create-train-array (.venv/bin/python3 image_to_numpy_array.py --dir data/train/) ``` @@ -405,16 +405,16 @@ Now, when we take a look at the data directories, we find `images.npy` and `clas ```console $ zsh -cl 'ls -l data/train/*.npy' --rw-r--r-- 1 iex staff 72128 Dec 1 21:59 data/train/classes.npy --rw-r--r-- 1 iex staff 110592128 Dec 1 21:59 data/train/images.npy +-rw-r--r-- 1 iex staff 72128 Dec 1 22:53 data/train/classes.npy +-rw-r--r-- 1 iex staff 110592128 Dec 1 22:53 data/train/images.npy $ zsh -cl 'ls -l data/test/*.npy' --rw-r--r-- 1 iex staff 24128 Dec 1 21:59 data/test/classes.npy --rw-r--r-- 1 iex staff 36864128 Dec 1 21:59 data/test/images.npy +-rw-r--r-- 1 iex staff 24128 Dec 1 22:53 data/test/classes.npy +-rw-r--r-- 1 iex staff 36864128 Dec 1 22:53 data/test/images.npy $ zsh -cl 'ls -l data/validate/*.npy' --rw-r--r-- 1 iex staff 24128 Dec 1 21:59 data/validate/classes.npy --rw-r--r-- 1 iex staff 36864128 Dec 1 21:59 data/validate/images.npy +-rw-r--r-- 1 iex staff 24128 Dec 1 22:53 data/validate/classes.npy +-rw-r--r-- 1 iex staff 36864128 Dec 1 22:53 data/validate/images.npy ``` @@ -527,43 +527,43 @@ We're ready to run the pipeline and train the model. ```console $ xvc -vv pipeline run -[INFO] Found explicit dependency: XvcStep { name: "create-train-array" } -> Step(StepDep { name: "install-requirements" }) -[INFO] Found explicit dependency: XvcStep { name: "create-validate-array" } -> Step(StepDep { name: "install-requirements" }) [INFO] Found explicit dependency: XvcStep { name: "install-requirements" } -> Step(StepDep { name: "init-venv" }) +[INFO] Found explicit dependency: XvcStep { name: "create-train-array" } -> Step(StepDep { name: "install-requirements" }) [INFO] Found explicit dependency: XvcStep { name: "create-test-array" } -> Step(StepDep { name: "install-requirements" }) -[INFO][pipeline/src/pipeline/mod.rs::151] Found implicit dependency: XvcStep { name: "train-model" } -> XvcStep { name: "create-train-array" } (via XvcPath("data/train/images.npy")) +[INFO] Found explicit dependency: XvcStep { name: "create-validate-array" } -> Step(StepDep { name: "install-requirements" }) [INFO][pipeline/src/pipeline/mod.rs::151] Found implicit dependency: XvcStep { name: "train-model" } -> XvcStep { name: "create-train-array" } (via XvcPath("data/train/classes.npy")) -[INFO][pipeline/src/pipeline/mod.rs::151] Found implicit dependency: XvcStep { name: "train-model" } -> XvcStep { name: "create-validate-array" } (via XvcPath("data/validate/classes.npy")) -[INFO][pipeline/src/pipeline/mod.rs::151] Found implicit dependency: XvcStep { name: "train-model" } -> XvcStep { name: "create-validate-array" } (via XvcPath("data/validate/images.npy")) +[INFO][pipeline/src/pipeline/mod.rs::151] Found implicit dependency: XvcStep { name: "train-model" } -> XvcStep { name: "create-train-array" } (via XvcPath("data/train/images.npy")) [INFO][pipeline/src/pipeline/mod.rs::151] Found implicit dependency: XvcStep { name: "train-model" } -> XvcStep { name: "create-test-array" } (via XvcPath("data/test/classes.npy")) [INFO][pipeline/src/pipeline/mod.rs::151] Found implicit dependency: XvcStep { name: "train-model" } -> XvcStep { name: "create-test-array" } (via XvcPath("data/test/images.npy")) +[INFO][pipeline/src/pipeline/mod.rs::151] Found implicit dependency: XvcStep { name: "train-model" } -> XvcStep { name: "create-validate-array" } (via XvcPath("data/validate/classes.npy")) +[INFO][pipeline/src/pipeline/mod.rs::151] Found implicit dependency: XvcStep { name: "train-model" } -> XvcStep { name: "create-validate-array" } (via XvcPath("data/validate/images.npy")) [INFO][pipeline/src/pipeline/mod.rs::343] Pipeline Graph: digraph { - 0 [ label = "(30016, 17100650958363218553)" ] - 1 [ label = "(30009, 3990512960304683731)" ] - 2 [ label = "(30010, 5124195226955327635)" ] - 3 [ label = "(30012, 13164969405306743296)" ] - 4 [ label = "(30018, 14524638717440636931)" ] - 5 [ label = "(30024, 5581936731820059836)" ] - 6 [ label = "(30011, 10670284972343587248)" ] - 2 -> 4 [ label = "Step" ] - 3 -> 4 [ label = "Step" ] + 0 [ label = "(30018, 11297886604680049042)" ] + 1 [ label = "(30010, 4200972040571775892)" ] + 2 [ label = "(30011, 4322586287807154762)" ] + 3 [ label = "(30009, 3311159181882078797)" ] + 4 [ label = "(30012, 3998576099235507000)" ] + 5 [ label = "(30016, 9393516097102918615)" ] + 6 [ label = "(30024, 1564924864017263034)" ] + 0 -> 5 [ label = "Step" ] + 1 -> 0 [ label = "Step" ] + 2 -> 0 [ label = "Step" ] 4 -> 0 [ label = "Step" ] - 6 -> 4 [ label = "Step" ] - 5 -> 2 [ label = "File" ] - 5 -> 3 [ label = "File" ] - 5 -> 6 [ label = "File" ] + 6 -> 1 [ label = "File" ] + 6 -> 2 [ label = "File" ] + 6 -> 4 [ label = "File" ] } -[INFO] No dependency steps for step init-venv -[INFO] Waiting for dependency steps for step install-requirements -[INFO] Waiting for dependency steps for step create-train-array [INFO] Waiting for dependency steps for step train-model -[INFO] Waiting for dependency steps for step create-validate-array -[INFO] No dependency steps for step recheck-data +[INFO] Waiting for dependency steps for step create-train-array +[INFO] Waiting for dependency steps for step install-requirements [INFO] Waiting for dependency steps for step create-test-array +[INFO] No dependency steps for step init-venv +[INFO] No dependency steps for step recheck-data [INFO] [recheck-data] Dependencies has changed +[INFO] Waiting for dependency steps for step create-validate-array [INFO] [init-venv] No changed dependencies. Skipping thorough comparison. [INFO] [init-venv] No missing Outputs and no changed dependencies [INFO] Dependency steps completed successfully for step install-requirements @@ -572,32 +572,36 @@ digraph { [INFO] Dependency steps completed successfully for step create-validate-array [INFO] Dependency steps completed successfully for step create-test-array [INFO] Dependency steps completed successfully for step create-train-array -[INFO] [create-validate-array] No changed dependencies. Skipping thorough comparison. -[INFO] [create-validate-array] No missing Outputs and no changed dependencies [INFO] [create-test-array] No changed dependencies. Skipping thorough comparison. [INFO] [create-test-array] No missing Outputs and no changed dependencies +[INFO] [create-validate-array] No changed dependencies. Skipping thorough comparison. +[INFO] [create-validate-array] No missing Outputs and no changed dependencies [INFO] [create-train-array] No changed dependencies. Skipping thorough comparison. [INFO] [create-train-array] No missing Outputs and no changed dependencies [INFO] Dependency steps completed successfully for step train-model [DONE] recheck-data (xvc file recheck data/train/ data/validate/ data/test/) [INFO] [train-model] Dependencies has changed -[WARN] [ERR] [train-model] Traceback (most recent call last): - File "[CWD]/train.py", line 76, in - outputs = model(inputs) - ^^^^^^^^^^^^^ - File "[CWD]/.venv/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl - return self._call_impl(*args, **kwargs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "[CWD]/.venv/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl - return forward_call(*args, **kwargs) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - File "[CWD]/train.py", line 27, in forward - x = x.view(-1, 16 * 13 * 13) - ^^^^^^^^^^^^^^^^^^^^^^^^ -RuntimeError: view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead. - -[OUT] [train-model] torch.Size([4, 64, 64, 3]) +[OUT] [train-model] [1, 2000] loss: 1.036 +Accuracy of the network on the validation images: 78 % +[2, 2000] loss: 0.468 +Accuracy of the network on the validation images: 76 % +Confusion Matrix: +[[158 0 0 1 6 1 3 9 1 4 0 6 4 1 6] + [ 0 167 28 0 0 0 0 0 0 0 0 0 3 0 2] + [ 0 7 175 10 0 1 0 6 0 0 0 0 1 0 0] + [ 4 2 104 86 0 2 1 0 0 1 0 0 0 0 0] + [ 0 2 1 0 190 0 0 3 0 0 0 4 0 0 0] + [ 0 3 36 24 2 91 0 29 0 2 0 2 4 0 7] + [ 0 8 6 2 1 0 149 5 15 3 4 4 2 1 0] + [ 0 5 11 0 3 1 0 148 0 9 2 0 1 0 20] + [ 0 2 1 0 2 0 0 6 183 0 0 1 1 2 2] + [ 0 1 3 0 1 0 0 19 5 132 0 2 1 2 34] + [ 0 10 0 0 0 0 1 3 0 0 168 0 18 0 0] + [ 1 0 4 0 43 1 0 22 0 0 0 120 6 0 3] + [ 0 12 3 5 1 1 1 5 0 0 66 1 105 0 0] + [ 2 1 0 0 6 3 1 8 0 4 13 18 5 137 2] + [ 0 0 3 0 8 0 0 5 0 20 0 2 3 2 157]] -[ERROR] Step train-model finished UNSUCCESSFULLY with command .venv/bin/python3 train.py --train_dir data/train/ --val_dir data/validate --test_dir data/test +[DONE] train-model (.venv/bin/python3 train.py --train_dir data/train/ --val_dir data/validate --test_dir data/test) ```