Integrate skin disease classification task

epfml · May 31, 2024 · a4d6479 · a4d6479
1 parent 0eb9626
commit a4d6479
Show file tree

Hide file tree

Showing 4 changed files with 150 additions and 2 deletions.
diff --git a/discojs/src/default_tasks/index.ts b/discojs/src/default_tasks/index.ts
@@ -1,5 +1,6 @@
 export { cifar10 } from './cifar10/index.js'
 export { lusCovid } from './lus_covid.js'
+export { skinMnist } from './skin_mnist.js'
 export { mnist } from './mnist.js'
 export { simpleFace } from './simple_face/index.js'
 export { titanic } from './titanic.js'

diff --git a/discojs/src/default_tasks/lus_covid.ts b/discojs/src/default_tasks/lus_covid.ts
@@ -11,7 +11,7 @@ export const lusCovid: TaskProvider = {
         taskTitle: 'COVID Lung Ultrasound',
         summary: {
           preview: 'Do you have a data of lung ultrasound images on patients <b>suspected of Lower Respiratory Tract infection (LRTI) during the COVID pandemic</b>? <br> Learn how to discriminate between COVID positive and negative patients by joining this task.',
-          overview: "Don’t have a dataset of your own? Download a sample of a few cases <a class='underline' href='https://drive.switch.ch/index.php/s/zM5ZrUWK3taaIly' target='_blank'>here</a>."
+          overview: "Don't have a dataset of your own? Download a sample of a few cases <a class='underline' href='https://drive.switch.ch/index.php/s/zM5ZrUWK3taaIly' target='_blank'>here</a>."
         },
         model: "We use a simplified* version of the <b>DeepChest model</b>: A deep learning model developed in our lab (<a class='underline' href='https://www.epfl.ch/labs/mlo/igh-intelligent-global-health/'>intelligent Global Health</a>.). On a cohort of 400 Swiss patients suspected of LRTI, the model obtained over 90% area under the ROC curve for this task. <br><br>*Simplified to ensure smooth running on your browser, the performance is minimally affected. Details of the adaptations are below <br>- <b>Removed</b>: positional embedding (i.e. we don’t take the anatomic position into consideration). Rather, the model now does mean pooling over the feature vector of the images for each patient <br>- <b>Replaced</b>: ResNet18 by Mobilenet",
         dataFormatInformation: 'This model takes as input an image dataset. It consists on a set of lung ultrasound images per patient with its corresponding label of covid positive or negative. Moreover, to identify the images per patient you have to follow the follwing naming pattern: "patientId_*.png"',

diff --git a/discojs/src/default_tasks/skin_mnist.ts b/discojs/src/default_tasks/skin_mnist.ts
@@ -0,0 +1,147 @@
+import * as tf from '@tensorflow/tfjs'
+
+import type { Model, Task, TaskProvider } from '../index.js'
+import { data, models } from '../index.js'
+
+export const skinMnist: TaskProvider = {
+  getTask (): Task {
+    return {
+      id: 'skin_mnist',
+      displayInformation: {
+        taskTitle: 'Skin Disease Classification',
+        summary: {
+          preview: 'Can you determine the skin disease from the dermatoscopic images?',
+          overview: 'HAM10000 "Human Against Machine with 10000 training images" dataset is a large collection of multi-source dermatoscopic images of pigmented lesions from Kaggle'
+        },
+        dataFormatInformation: '',
+        dataExampleText: 'Below you find an example',
+        dataExampleImage: 'http://walidbn.com/ISIC_0024306.jpg'
+      },
+      trainingInformation: {
+        modelID: 'skin-mnist-model',
+        epochs: 50,
+        roundDuration: 2,
+        validationSplit: 0.2,
+        batchSize: 5,
+        preprocessingFunctions: [data.ImagePreprocessing.Resize, data.ImagePreprocessing.Normalize],
+        dataType: 'image',
+        IMAGE_H: 28,
+        IMAGE_W: 28,
+        LABEL_LIST: ['nv', 'vasc', 'mel', 'bkl', 'df', 'akiec', 'bcc'],
+        scheme: 'federated',
+        noiseScale: undefined,
+        clippingRadius: undefined
+      }
+    }
+  },
+
+  // async getModel (): Promise<Model> {
+  //   const imageHeight = 28
+  //   const imageWidth = 28
+  //   const imageChannels = 3
+  //   const numOutputClasses = 7
+  //   const model = tf.sequential()
+
+  //   // In the first layer of our convolutional neural network we have
+  //   // to specify the input shape. Then we specify some parameters for
+  //   // the convolution operation that takes place in this layer.
+  //   model.add(tf.layers.conv2d({
+  //     inputShape: [imageHeight, imageWidth, imageChannels],
+  //     kernelSize: 5,
+  //     filters: 8,
+  //     strides: 1,
+  //     activation: 'relu',
+  //     kernelInitializer: 'varianceScaling'
+  //   }))
+
+  //   // The MaxPooling layer acts as a sort of downsampling using max values
+  //   // in a region instead of averaging.
+  //   model.add(tf.layers.maxPooling2d({ poolSize: [2, 2], strides: [2, 2] }))
+
+  //   // Repeat the conv2d + maxPooling block.
+  //   // Note that we have more filters in the convolution.
+  //   model.add(tf.layers.conv2d({
+  //     kernelSize: 5,
+  //     filters: 16,
+  //     strides: 1,
+  //     activation: 'relu',
+  //     kernelInitializer: 'varianceScaling'
+  //   }))
+  //   model.add(tf.layers.maxPooling2d({ poolSize: [2, 2], strides: [2, 2] }))
+
+  //   // Now we flatten the output from the 2D filters into a 1D vector to prepare
+  //   // it for input into our last layer. This is common practice when feeding
+  //   // higher dimensional data to a final classification output layer.
+  //   model.add(tf.layers.flatten())
+
+  //   // Our last layer is a dense layer which has 2 output units, one for each
+  //   // output class.
+  //   model.add(tf.layers.dense({
+  //     units: numOutputClasses,
+  //     kernelInitializer: 'varianceScaling',
+  //     activation: 'softmax'
+  //   }))
+
+  //   model.compile({
+  //     optimizer: 'adam',
+  //     loss: 'categoricalCrossentropy',
+  //     metrics: ['accuracy']
+  //   })
+
+  //   return Promise.resolve(new models.TFJS(model))
+  // }
+  async getModel(): Promise<Model> {
+    const imageHeight = 28
+    const imageWidth = 28
+    const imageChannels = 3
+    const numOutputClasses = 7
+
+    const model = tf.sequential()
+
+    model.add(
+      tf.layers.conv2d({
+        inputShape: [imageHeight, imageWidth, imageChannels],
+        filters: 256,
+        kernelSize: 3,
+        activation: 'relu'
+      })
+    )
+
+    model.add(tf.layers.maxPooling2d({ poolSize: [2, 2] }))
+    model.add(tf.layers.dropout({ rate: 0.3 }))
+
+    model.add(
+      tf.layers.conv2d({
+        filters: 128,
+        kernelSize: 3,
+        activation: 'relu'
+      })
+    )
+
+    model.add(tf.layers.maxPooling2d({ poolSize: [2, 2] }))
+    model.add(tf.layers.dropout({ rate: 0.3 }))
+
+    model.add(
+      tf.layers.conv2d({
+        filters: 64,
+        kernelSize: 3,
+        activation: 'relu'
+      })
+    )
+
+    model.add(tf.layers.maxPooling2d({ poolSize: [2, 2] }))
+    model.add(tf.layers.dropout({ rate: 0.3 }))
+
+    model.add(tf.layers.flatten())
+
+    model.add(tf.layers.dense({ units: 32 }))
+    model.add(tf.layers.dense({ units: numOutputClasses, activation: 'softmax' }))
+
+    model.compile({
+      optimizer: tf.train.adam(0.00001),
+      loss: 'categoricalCrossentropy',
+      metrics: ['accuracy']
+    })
+    return Promise.resolve(new models.TFJS(model))
+  }
+}
diff --git a/webapp/src/components/data/dataset_input/DatasetInput.vue b/webapp/src/components/data/dataset_input/DatasetInput.vue
@@ -208,7 +208,7 @@ const addFiles = (files: FileList, label?: string) => {
           // Match the selected files with the csv file names and label
           const imageFile = filesArray.find(file => row.filename === file.name.split('.').slice(0, -1).join('.'))
           if (imageFile === undefined) {
-            toaster.error("An image was not found in the CSV file, make sure the CSV filenames don't include file extensions.")
+            toaster.error("Images specified in the CSV file are missing, make sure the CSV filenames don't include file extensions.")
             throw new Error("Image not found in the CSV file")
           } else if (imageFile) {
             props.datasetBuilder.addFiles([imageFile], row.label)