diff --git a/.github/workflows/lint-test-build.yml b/.github/workflows/lint-test-build.yml index 1b76d8f17..41eaf42ea 100644 --- a/.github/workflows/lint-test-build.yml +++ b/.github/workflows/lint-test-build.yml @@ -332,7 +332,7 @@ jobs: cache: npm - run: npm ci - run: npm --workspace={./discojs/discojs-{core,node},./server} run build - - run: npm --workspace=./cli start -- -t cifar10 -u 1 -e 1 + - run: npm --workspace=./cli start -- -t cifar10 -u 3 -e 1 test-docs-examples: needs: [build-lib-core, build-lib-node, build-server, download-datasets] diff --git a/cli/src/data.ts b/cli/src/data.ts index 74b50120a..8537ebb52 100644 --- a/cli/src/data.ts +++ b/cli/src/data.ts @@ -1,4 +1,4 @@ -import { Range } from 'immutable' +import { Range, Repeat } from 'immutable' import fs from 'node:fs/promises' import path from 'node:path' @@ -23,7 +23,7 @@ async function simplefaceData (task: Task): Promise { async function cifar10Data (cifar10: Task): Promise { const dir = '../datasets/CIFAR10/' const files = (await fs.readdir(dir)).map((file) => path.join(dir, file)) - const labels = Range(0, 24).map((label) => (label % 10).toString()).toArray() + const labels = Repeat('airplane', 24).toArray() // TODO read labels in csv return await new NodeImageLoader(cifar10).loadAll(files, { labels }) } diff --git a/discojs/discojs-core/src/dataset/data/image_data.spec.ts b/discojs/discojs-core/src/dataset/data/image_data.spec.ts index db4fe0392..72386914e 100644 --- a/discojs/discojs-core/src/dataset/data/image_data.spec.ts +++ b/discojs/discojs-core/src/dataset/data/image_data.spec.ts @@ -2,21 +2,13 @@ import { assert, expect } from 'chai' import * as tf from '@tensorflow/tfjs' import { ImageData } from './image_data.js' -import type { Task } from '../../index.js' +import { defaultTasks } from '../../index.js' describe('image data checks', () => { - const simplefaceMock: Task = { - id: 'simpleface', - displayInformation: {}, - trainingInformation: { - IMAGE_H: 200, - IMAGE_W: 200 - } - } as unknown as Task - + const simpleFaceTask = defaultTasks.simpleFace.getTask() it('throw an error on incorrectly formatted data', async () => { try { - await ImageData.init(tf.data.array([tf.zeros([150, 150, 3]), tf.zeros([150, 150, 3])]), simplefaceMock, 3) + await ImageData.init(tf.data.array([tf.zeros([150, 150, 3]), tf.zeros([150, 150, 3])]), simpleFaceTask, 3) } catch (e) { expect(e).to.be.an.instanceOf(Error) return @@ -26,6 +18,6 @@ describe('image data checks', () => { }) it('do nothing on correctly formatted data', async () => { - await ImageData.init(tf.data.array([tf.zeros([200, 200, 3]), tf.zeros([200, 200, 3])]), simplefaceMock, 3) + await ImageData.init(tf.data.array([tf.zeros([200, 200, 3]), tf.zeros([200, 200, 3])]), simpleFaceTask, 3) }) }) diff --git a/discojs/discojs-core/src/dataset/data/preprocessing/text_preprocessing.spec.ts b/discojs/discojs-core/src/dataset/data/preprocessing/text_preprocessing.spec.ts index c6b40e925..794626ea1 100644 --- a/discojs/discojs-core/src/dataset/data/preprocessing/text_preprocessing.spec.ts +++ b/discojs/discojs-core/src/dataset/data/preprocessing/text_preprocessing.spec.ts @@ -10,7 +10,10 @@ describe('text preprocessing', function () { function initMockTask(): Task { return { id: 'mock-task-id', - displayInformation: {}, + displayInformation: { + taskTitle: 'mock title', + summary: { overview: '', preview: '' } + }, trainingInformation: { modelID: 'model-id', epochs: 1, diff --git a/discojs/discojs-core/src/dataset/data/tabular_data.spec.ts b/discojs/discojs-core/src/dataset/data/tabular_data.spec.ts index a0d727570..40040c3e7 100644 --- a/discojs/discojs-core/src/dataset/data/tabular_data.spec.ts +++ b/discojs/discojs-core/src/dataset/data/tabular_data.spec.ts @@ -3,37 +3,16 @@ import { Map, Set } from 'immutable' import * as tf from '@tensorflow/tfjs' import { TabularData } from './tabular_data.js' -import type { Task } from '../../index.js' +import { defaultTasks } from '../../index.js' + describe('tabular data checks', () => { - const titanicMock: Task = { - id: 'titanic', - displayInformation: {}, - trainingInformation: { - modelID: 'titanic', - epochs: 1, - roundDuration: 1, - validationSplit: 0, - batchSize: 1, - dataType: 'tabular', - scheme: 'federated', - inputColumns: [ - 'PassengerId', - 'Age', - 'SibSp', - 'Parch', - 'Fare', - 'Pclass' - ], - outputColumns: [ - 'Survived' - ] - } - } + const titanicTask = defaultTasks.titanic.getTask() + const dataConfig = { - features: titanicMock.trainingInformation.inputColumns, - labels: titanicMock.trainingInformation.outputColumns + features: titanicTask.trainingInformation.inputColumns, + labels: titanicTask.trainingInformation.outputColumns } const columnConfigs = Map( @@ -51,7 +30,7 @@ describe('tabular data checks', () => { it('throw an error on incorrectly formatted data', async () => { try { - await TabularData.init(tf.data.csv('file://../../datasets/cifar10-labels.csv', csvConfig), titanicMock, 3) + await TabularData.init(tf.data.csv('file://../../datasets/cifar10-labels.csv', csvConfig), titanicTask, 3) } catch (e) { expect(e).to.be.an.instanceOf(Error) return @@ -61,6 +40,6 @@ describe('tabular data checks', () => { }) it('do nothing on correctly formatted data', async () => { - await TabularData.init(tf.data.csv('file://../../datasets/titanic_train.csv', csvConfig), titanicMock, 3) + await TabularData.init(tf.data.csv('file://../../datasets/titanic_train.csv', csvConfig), titanicTask, 3) }) }) diff --git a/discojs/discojs-core/src/dataset/dataset_builder.ts b/discojs/discojs-core/src/dataset/dataset_builder.ts index 6319711e3..991ee1f1d 100644 --- a/discojs/discojs-core/src/dataset/dataset_builder.ts +++ b/discojs/discojs-core/src/dataset/dataset_builder.ts @@ -98,7 +98,7 @@ export class DatasetBuilder { async build (config?: DataConfig): Promise { // Require that at least one source collection is non-empty, but not both if ((this._sources.length > 0) === (this.labelledSources.size > 0)) { - throw new Error('Please provide dataset input files') + throw new Error('Please provide dataset input files') // This error message is parsed in DatasetInput.vue } let dataTuple: DataSplit diff --git a/discojs/discojs-core/src/default_tasks/cifar10.ts b/discojs/discojs-core/src/default_tasks/cifar10.ts index 413f26baa..8202e94a9 100644 --- a/discojs/discojs-core/src/default_tasks/cifar10.ts +++ b/discojs/discojs-core/src/default_tasks/cifar10.ts @@ -13,11 +13,10 @@ export const cifar10: TaskProvider = { preview: 'In this challenge, we ask you to classify images into categories based on the objects shown on the image.', overview: 'The CIFAR-10 dataset is a collection of images that are commonly used to train machine learning and computer vision algorithms. It is one of the most widely used datasets for machine learning research.' }, - limitations: 'The training data is limited to small images of size 32x32.', - tradeoffs: 'Training success strongly depends on label distribution', dataFormatInformation: 'Images should be of .png format and of size 32x32.
The label file should be .csv, where each row contains a file_name, class.

e.g. if you have images: 0.png (of a frog) and 1.png (of a car)
labels.csv contains: (Note that no header is needed)
0.png, frog
1.png, car', dataExampleText: 'Below you can find 10 random examples from each of the 10 classes in the dataset.', - dataExampleImage: 'https://storage.googleapis.com/deai-313515.appspot.com/example_training_data/cifar10-example.png' + dataExampleImage: 'https://storage.googleapis.com/deai-313515.appspot.com/example_training_data/cifar10-example.png', + sampleDatasetLink: 'https://www.kaggle.com/competitions/cifar-10/data' }, trainingInformation: { modelID: 'cifar10-model', @@ -29,7 +28,7 @@ export const cifar10: TaskProvider = { preprocessingFunctions: [data.ImagePreprocessing.Resize], IMAGE_H: 224, IMAGE_W: 224, - LABEL_LIST: ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'], + LABEL_LIST: ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'], scheme: 'decentralized', noiseScale: undefined, clippingRadius: 20, diff --git a/discojs/discojs-core/src/default_tasks/geotags.ts b/discojs/discojs-core/src/default_tasks/geotags.ts deleted file mode 100644 index 14e24c004..000000000 --- a/discojs/discojs-core/src/default_tasks/geotags.ts +++ /dev/null @@ -1,73 +0,0 @@ -import { Range } from 'immutable' -import * as tf from '@tensorflow/tfjs' - -import type { Model, Task, TaskProvider } from '../index.js' -import { data, models } from '../index.js' -import { LabelTypeEnum } from '../task/label_type.js' - -export const geotags: TaskProvider = { - getTask (): Task { - return { - id: 'geotags', - displayInformation: { - taskTitle: 'GeoTags', - summary: { - preview: 'In this challenge, we predict the geo-location of a photo given its pixels in terms of a cell number of a grid built on top of Switzerland', - overview: 'The geotags dataset is a collection of images with geo-location information used to train a machine learning algorithm to predict the location of a photo given its pixels.' - }, - limitations: 'The training data is limited to images of size 224x224.', - tradeoffs: 'Training success strongly depends on label distribution', - dataFormatInformation: 'Images should be of .png format and of size 224x224.
The label file should be .csv, where each row contains a file_name, class. The class is the cell number of a the given grid of Switzerland. ', - labelDisplay: { - labelType: LabelTypeEnum.POLYGON_MAP, - mapBaseUrl: 'https://disco-polygon.web.app/' - } - }, - trainingInformation: { - modelID: 'geotags-model', - epochs: 10, - roundDuration: 10, - validationSplit: 0.2, - batchSize: 10, - dataType: 'image', - IMAGE_H: 224, - IMAGE_W: 224, - preprocessingFunctions: [data.ImagePreprocessing.Resize], - LABEL_LIST: Range(0, 127).map(String).toArray(), - scheme: 'federated', - noiseScale: undefined, - clippingRadius: 20, - decentralizedSecure: true, - minimumReadyPeers: 3, - maxShareValue: 100 - } - } - }, - - async getModel (): Promise { - const pretrainedModel = await tf.loadLayersModel( - 'https://storage.googleapis.com/deai-313515.appspot.com/models/geotags/model.json' - ) - - const numLayers = pretrainedModel.layers.length - - pretrainedModel.layers.forEach(layer => { layer.trainable = false }) - pretrainedModel.layers[numLayers - 1].trainable = true - - const model = tf.sequential({ - layers: [ - tf.layers.inputLayer({ inputShape: [224, 224, 3] }), - tf.layers.rescaling({ scale: 1 / 127.5, offset: -1 }), // Rescaling input between -1 and 1 - pretrainedModel - ] - }) - - model.compile({ - optimizer: 'adam', - loss: 'categoricalCrossentropy', - metrics: ['accuracy'] - }) - - return new models.TFJS(model) - } -} diff --git a/discojs/discojs-core/src/default_tasks/index.ts b/discojs/discojs-core/src/default_tasks/index.ts index e85e0f216..7ee583f1f 100644 --- a/discojs/discojs-core/src/default_tasks/index.ts +++ b/discojs/discojs-core/src/default_tasks/index.ts @@ -1,8 +1,6 @@ export { cifar10 } from './cifar10.js' -export { geotags } from './geotags.js' export { lusCovid } from './lus_covid.js' export { mnist } from './mnist.js' export { simpleFace } from './simple_face.js' -export { skinMnist } from './skin_mnist.js' export { titanic } from './titanic.js' export { wikitext } from './wikitext.js' diff --git a/discojs/discojs-core/src/default_tasks/lus_covid.ts b/discojs/discojs-core/src/default_tasks/lus_covid.ts index f2404c437..a0fa1366a 100644 --- a/discojs/discojs-core/src/default_tasks/lus_covid.ts +++ b/discojs/discojs-core/src/default_tasks/lus_covid.ts @@ -14,16 +14,16 @@ export const lusCovid: TaskProvider = { overview: "Don’t have a dataset of your own? Download a sample of a few cases here." }, model: "We use a simplified* version of the DeepChest model: A deep learning model developed in our lab (intelligent Global Health.). On a cohort of 400 Swiss patients suspected of LRTI, the model obtained over 90% area under the ROC curve for this task.

*Simplified to ensure smooth running on your browser, the performance is minimally affected. Details of the adaptations are below
- Removed: positional embedding (i.e. we don’t take the anatomic position into consideration). Rather, the model now does mean pooling over the feature vector of the images for each patient
- Replaced: ResNet18 by Mobilenet", - tradeoffs: 'We are using a simpler version of DeepChest in order to be able to run it on the browser.', dataFormatInformation: 'This model takes as input an image dataset. It consists on a set of lung ultrasound images per patient with its corresponding label of covid positive or negative. Moreover, to identify the images per patient you have to follow the follwing naming pattern: "patientId_*.png"', dataExampleText: 'Below you can find an example of an expected lung image for patient 2 named: 2_QAID_1.masked.reshaped.squared.224.png', - dataExampleImage: 'https://storage.googleapis.com/deai-313515.appspot.com/example_training_data/2_QAID_1.masked.reshaped.squared.224.png' + dataExampleImage: 'https://storage.googleapis.com/deai-313515.appspot.com/example_training_data/2_QAID_1.masked.reshaped.squared.224.png', + sampleDatasetLink: 'https://drive.switch.ch/index.php/s/zM5ZrUWK3taaIly' }, trainingInformation: { modelID: 'lus-covid-model', epochs: 50, roundDuration: 2, - validationSplit: 0, + validationSplit: 0.2, batchSize: 5, IMAGE_H: 100, IMAGE_W: 100, diff --git a/discojs/discojs-core/src/default_tasks/mnist.ts b/discojs/discojs-core/src/default_tasks/mnist.ts index 4e0b62ee6..5c285b4c2 100644 --- a/discojs/discojs-core/src/default_tasks/mnist.ts +++ b/discojs/discojs-core/src/default_tasks/mnist.ts @@ -14,7 +14,6 @@ export const mnist: TaskProvider = { overview: 'The MNIST handwritten digit classification problem is a standard dataset used in computer vision and deep learning. Although the dataset is effectively solved, we use it to test our Decentralised Learning algorithms and platform.' }, model: 'The current model is a very simple CNN and its main goal is to test the app and the Decentralizsed Learning functionality.', - tradeoffs: 'We are using a simple model, first a 2d convolutional layer > max pooling > 2d convolutional layer > max pooling > convolutional layer > 2 dense layers.', dataFormatInformation: 'This model is trained on images corresponding to digits 0 to 9. You can upload each digit image of your dataset in the box corresponding to its label. The model taskes images of size 28x28 as input.', dataExampleText: 'Below you can find an example of an expected image representing the digit 9.', dataExampleImage: 'http://storage.googleapis.com/deai-313515.appspot.com/example_training_data/9-mnist-example.png' diff --git a/discojs/discojs-core/src/default_tasks/simple_face.ts b/discojs/discojs-core/src/default_tasks/simple_face.ts index 8d5df19b3..82be26e72 100644 --- a/discojs/discojs-core/src/default_tasks/simple_face.ts +++ b/discojs/discojs-core/src/default_tasks/simple_face.ts @@ -13,8 +13,6 @@ export const simpleFace: TaskProvider = { preview: 'Can you detect if the person in a picture is a child or an adult?', overview: 'Simple face is a small subset of face_task from Kaggle' }, - limitations: 'The training data is limited to small images of size 200x200.', - tradeoffs: 'Training success strongly depends on label distribution', dataFormatInformation: '', dataExampleText: 'Below you find an example', dataExampleImage: 'https://storage.googleapis.com/deai-313515.appspot.com/example_training_data/simple_face-example.png' diff --git a/discojs/discojs-core/src/default_tasks/skin_mnist.ts b/discojs/discojs-core/src/default_tasks/skin_mnist.ts deleted file mode 100644 index 73a682ab4..000000000 --- a/discojs/discojs-core/src/default_tasks/skin_mnist.ts +++ /dev/null @@ -1,103 +0,0 @@ -import * as tf from '@tensorflow/tfjs' - -import type { Model, Task, TaskProvider } from '../index.js' -import { data, models } from '../index.js' - -export const skinMnist: TaskProvider = { - getTask (): Task { - return { - id: 'skin_mnist', - displayInformation: { - taskTitle: 'Skin disease classification', - summary: { - preview: 'Can you determine the skin disease from the dermatoscopic images?', - overview: - 'HAM10000 "Human Against Machine with 10000 training images" dataset is a large collection of multi-source dermatoscopic images of pigmented lesions from Kaggle' - }, - limitations: - 'The training data is limited to small images of size 28x28, similarly to the MNIST dataset.', - tradeoffs: 'Training success strongly depends on label distribution', - dataFormatInformation: '', - dataExampleText: 'Below you find an example', - dataExampleImage: 'http://walidbn.com/ISIC_0024306.jpg' - }, - trainingInformation: { - modelID: 'skin_mnist-model', - epochs: 50, - roundDuration: 1, - validationSplit: 0.1, - batchSize: 32, - preprocessingFunctions: [data.ImagePreprocessing.Normalize], - dataType: 'image', - IMAGE_H: 28, - IMAGE_W: 28, - LABEL_LIST: [ - 'Melanocytic nevi', - 'Melanoma', - 'Benign keratosis-like lesions', - 'Basal cell carcinoma', - 'Actinic keratoses', - 'Vascular lesions', - 'Dermatofibroma' - ], - scheme: 'federated', - noiseScale: undefined, - clippingRadius: undefined - } - } - }, - - getModel (): Promise { - const numClasses = 7 - const size = 28 - - const model = tf.sequential() - - model.add( - tf.layers.conv2d({ - inputShape: [size, size, 3], - filters: 256, - kernelSize: 3, - activation: 'relu' - }) - ) - - model.add(tf.layers.maxPooling2d({ poolSize: [2, 2] })) - model.add(tf.layers.dropout({ rate: 0.3 })) - - model.add( - tf.layers.conv2d({ - filters: 128, - kernelSize: 3, - activation: 'relu' - }) - ) - - model.add(tf.layers.maxPooling2d({ poolSize: [2, 2] })) - model.add(tf.layers.dropout({ rate: 0.3 })) - - model.add( - tf.layers.conv2d({ - filters: 64, - kernelSize: 3, - activation: 'relu' - }) - ) - - model.add(tf.layers.maxPooling2d({ poolSize: [2, 2] })) - model.add(tf.layers.dropout({ rate: 0.3 })) - - model.add(tf.layers.flatten()) - - model.add(tf.layers.dense({ units: 32 })) - model.add(tf.layers.dense({ units: numClasses, activation: 'softmax' })) - - model.compile({ - optimizer: tf.train.adam(0.001), - loss: 'categoricalCrossentropy', - metrics: ['accuracy'] - }) - - return Promise.resolve(new models.TFJS(model)) - } -} diff --git a/discojs/discojs-core/src/default_tasks/titanic.ts b/discojs/discojs-core/src/default_tasks/titanic.ts index fba223e32..9f23e2c93 100644 --- a/discojs/discojs-core/src/default_tasks/titanic.ts +++ b/discojs/discojs-core/src/default_tasks/titanic.ts @@ -14,7 +14,6 @@ export const titanic: TaskProvider = { overview: 'We all know the unfortunate story of the Titanic: this flamboyant new transatlantic boat that sunk in 1912 in the North Atlantic Ocean. Today, we revist this tragedy by trying to predict the survival odds of the passenger given some basic features.' }, model: 'The current model does not normalize the given data and applies only a very simple pre-processing of the data.', - tradeoffs: 'We are using a small model for this task: 4 fully connected layers with few neurons. This allows fast training but can yield to reduced accuracy.', dataFormatInformation: 'This model takes as input a CSV file with 12 columns. The features are general information about the passenger (sex, age, name, etc.) and specific related Titanic data such as the ticket class bought by the passenger, its cabin number, etc.

pclass: A proxy for socio-economic status (SES)
1st = Upper
2nd = Middle
3rd = Lower

age: Age is fractional if less than 1. If the age is estimated, it is in the form of xx.5

sibsp: The dataset defines family relations in this way:
Sibling = brother, sister, stepbrother, stepsister
Spouse = husband, wife (mistresses and fiancés were ignored)

parch: The dataset defines family relations in this way:
Parent = mother, father
Child = daughter, son, stepdaughter, stepson
Some children travelled only with a nanny, therefore parch=0 for them.

The first line of the CSV contains the header:
PassengerId, Survived, Pclass, Name, Sex, Age, SibSp, Parch, Ticket, Fare, Cabin, Embarked

Each susequent row contains the corresponding data.', dataExampleText: 'Below one can find an example of a datapoint taken as input by our model. In this datapoint, the person is young man named Owen Harris that unfortunnalty perished with the Titanic. He boarded the boat in South Hamptons and was a 3rd class passenger. On the testing & validation page, the data should not contain the label column (Survived).', dataExample: [ diff --git a/discojs/discojs-core/src/default_tasks/wikitext.ts b/discojs/discojs-core/src/default_tasks/wikitext.ts index 7f80f2db5..deb657c32 100644 --- a/discojs/discojs-core/src/default_tasks/wikitext.ts +++ b/discojs/discojs-core/src/default_tasks/wikitext.ts @@ -11,10 +11,9 @@ export const wikitext: TaskProvider = { preview: 'In this challenge, we ask you to do next word prediction on a dataset of Wikipedia articles.', overview: 'Wikitext-103-raw is a dataset comprising unprocessed text excerpts from Wikipedia articles, designed for tasks related to natural language processing and language modeling.' }, - limitations: 'The dataset may contain noise, inconsistencies, and unstructured content due to its raw nature, potentially posing challenges for certain NLP tasks.', - tradeoffs: 'The raw format may lack structured annotations and may require additional preprocessing for specific applications.', dataFormatInformation: 'The dataset is organized as a large text file, with each line representing a segment of raw text from Wikipedia articles.', - dataExampleText: 'An example excerpt from the dataset could be: "The history of artificial intelligence dates back to ancient times, with philosophical discussions on the nature of thought and reasoning."' + dataExampleText: 'An example excerpt from the dataset could be: "The history of artificial intelligence dates back to ancient times, with philosophical discussions on the nature of thought and reasoning."', + sampleDatasetLink: 'https://dax-cdn.cdn.appdomain.cloud/dax-wikitext-103/1.0.1/wikitext-103.tar.gz' }, trainingInformation: { dataType: 'text', diff --git a/discojs/discojs-core/src/task/display_information.ts b/discojs/discojs-core/src/task/display_information.ts index f8daa730c..2fa5a0e30 100644 --- a/discojs/discojs-core/src/task/display_information.ts +++ b/discojs/discojs-core/src/task/display_information.ts @@ -1,11 +1,9 @@ import { type Summary, isSummary } from './summary.js' import { type DataExample, isDataExample } from './data_example.js' -import { type LabelType, isLabelType } from './label_type.js' export interface DisplayInformation { - taskTitle?: string - summary?: Summary - tradeoffs?: string + taskTitle: string + summary: Summary dataFormatInformation?: string // TODO merge dataExample dataExampleText?: string @@ -14,9 +12,10 @@ export interface DisplayInformation { dataExample?: DataExample[] // TODO no need for undefined headers?: string[] + // Displays the image at this URL in the UI as an example when connecting data dataExampleImage?: string - limitations?: string - labelDisplay?: LabelType + // URL to download a dataset for the task, is displayed in the UI when asking to connect data + sampleDatasetLink?: string } export function isDisplayInformation (raw: unknown): raw is DisplayInformation { @@ -29,31 +28,41 @@ export function isDisplayInformation (raw: unknown): raw is DisplayInformation { dataExampleImage, dataExampleText, dataFormatInformation, + sampleDatasetLink, headers, - labelDisplay, - limitations, model, summary, taskTitle, - tradeoffs }: Partial> = raw if ( typeof taskTitle !== 'string' || (dataExampleText !== undefined && typeof dataExampleText !== 'string') || + (sampleDatasetLink !== undefined && typeof sampleDatasetLink !== 'string') || (dataFormatInformation !== undefined && typeof dataFormatInformation !== 'string') || - (tradeoffs !== undefined && typeof tradeoffs !== 'string') || (model !== undefined && typeof model !== 'string') || - (dataExampleImage !== undefined && typeof dataExampleImage !== 'string') || - (labelDisplay !== undefined && !isLabelType(labelDisplay)) || - (limitations !== undefined && typeof limitations !== 'string') + (dataExampleImage !== undefined && typeof dataExampleImage !== 'string') ) { return false } - if (summary !== undefined && !isSummary(summary)) { + if (!isSummary(summary)) { return false } + if (sampleDatasetLink !== undefined) { + try { + new URL(sampleDatasetLink) + } catch { + return false + } + } + if (dataExampleImage !== undefined) { + try { + new URL(dataExampleImage) + } catch { + return false + } + } if ( dataExample !== undefined && !( @@ -75,13 +84,11 @@ export function isDisplayInformation (raw: unknown): raw is DisplayInformation { dataExampleImage, dataExampleText, dataFormatInformation, + sampleDatasetLink, headers, - labelDisplay, - limitations, model, summary, taskTitle, - tradeoffs, } const _correct: DisplayInformation = repack const _total: Record = repack diff --git a/discojs/discojs-core/src/task/index.ts b/discojs/discojs-core/src/task/index.ts index 64cd69aa6..ed2c3c873 100644 --- a/discojs/discojs-core/src/task/index.ts +++ b/discojs/discojs-core/src/task/index.ts @@ -4,4 +4,3 @@ export { isDigest, type Digest } from './digest.js' export { isDisplayInformation, type DisplayInformation } from './display_information.js' export type { TrainingInformation } from './training_information.js' export { pushTask, fetchTasks } from './task_handler.js' -export { LabelTypeEnum } from './label_type.js' diff --git a/discojs/discojs-core/src/task/label_type.ts b/discojs/discojs-core/src/task/label_type.ts deleted file mode 100644 index db31ff247..000000000 --- a/discojs/discojs-core/src/task/label_type.ts +++ /dev/null @@ -1,41 +0,0 @@ -export interface LabelType { - labelType: LabelTypeEnum - mapBaseUrl?: string -} - -export enum LabelTypeEnum { - TEXT, POLYGON_MAP -} - -function isLabelTypeEnum(raw: unknown): raw is LabelTypeEnum { - switch (raw) { - case LabelTypeEnum.TEXT: break - case LabelTypeEnum.POLYGON_MAP: break - default: return false - } - - const _: LabelTypeEnum = raw - - return true -} - -export function isLabelType(raw: unknown): raw is LabelType { - if (typeof raw !== 'object' || raw === null) { - return false - } - - const { labelType, mapBaseUrl }: Partial> = raw - - if ( - !isLabelTypeEnum(labelType) || - (mapBaseUrl !== undefined && typeof mapBaseUrl !== 'string') - ) { - return false - } - - const repack = { labelType, mapBaseUrl } - const _correct: LabelType = repack - const _total: Record = repack - - return true -} diff --git a/discojs/discojs-node/src/data/image_loader.spec.ts b/discojs/discojs-node/src/data/image_loader.spec.ts index 63009b645..dd0a15074 100644 --- a/discojs/discojs-node/src/data/image_loader.spec.ts +++ b/discojs/discojs-node/src/data/image_loader.spec.ts @@ -1,5 +1,5 @@ import { assert, expect } from 'chai' -import { List, Range } from 'immutable' +import { List, Range, Repeat } from 'immutable' import fs from 'node:fs/promises' import * as tf from '@tensorflow/tfjs' import { node as tfNode } from '@tensorflow/tfjs-node' @@ -84,8 +84,8 @@ describe('image loader', () => { }) it('loads multiple samples with labels', async () => { - const labels = Range(0, 24).map((label) => (label % 10)) - const stringLabels = labels.map((label) => label.toString()) + const labels = Repeat(3, 24) //internally, disco maps string labels to their index in the task LABEL_LIST + const stringLabels = labels.map(_ => 'cat') // so cat is mapped to integer 3 const oneHotLabels = List(tf.oneHot(labels.toArray(), 10).arraySync() as number[]) const datasetContent = List(await (await LOADERS.CIFAR10 diff --git a/discojs/discojs-node/src/data/tabular_loader.spec.ts b/discojs/discojs-node/src/data/tabular_loader.spec.ts index 81dcc8fba..ae41d5159 100644 --- a/discojs/discojs-node/src/data/tabular_loader.spec.ts +++ b/discojs/discojs-node/src/data/tabular_loader.spec.ts @@ -1,44 +1,22 @@ import { assert, expect } from 'chai' import * as tf from '@tensorflow/tfjs' -import type { Task } from '@epfml/discojs-core' +import { defaultTasks } from '@epfml/discojs-core' import { TabularLoader } from './tabular_loader.js' -const inputFiles = ['../../datasets/titanic_train.csv'] - -const titanicMock: Task = { - id: 'titanic', - displayInformation: {}, - trainingInformation: { - modelID: 'titanic', - epochs: 1, - roundDuration: 1, - validationSplit: 0, - batchSize: 1, - dataType: 'tabular', - scheme: 'federated', - inputColumns: [ - 'PassengerId', - 'Age', - 'SibSp', - 'Parch', - 'Fare', - 'Pclass' - ], - outputColumns: [ - 'Survived' - ] - } -} describe('tabular loader', () => { + const inputFiles = ['../../datasets/titanic_train.csv'] + + const titanicTask = defaultTasks.titanic.getTask() + it('loads a single sample', async () => { - const loaded = new TabularLoader(titanicMock, ',').loadAll( + const loaded = new TabularLoader(titanicTask, ',').loadAll( inputFiles, { - features: titanicMock.trainingInformation?.inputColumns, - labels: titanicMock.trainingInformation?.outputColumns, + features: titanicTask.trainingInformation?.inputColumns, + labels: titanicTask.trainingInformation?.outputColumns, shuffle: false } ) @@ -50,7 +28,7 @@ describe('tabular loader', () => { */ expect(sample).to.eql({ value: { - xs: [1, 3, 22, 1, 0, 7.25], + xs: [3, 22, 1, 0, 7.25], ys: [0] }, done: false @@ -58,7 +36,7 @@ describe('tabular loader', () => { }) it('shuffles samples', async () => { - const titanic = titanicMock + const titanic = titanicTask const loader = new TabularLoader(titanic, ',') const config = { features: titanic.trainingInformation?.inputColumns, diff --git a/docs/VUEJS.md b/docs/VUEJS.md index 95e180a32..1047eaaad 100644 --- a/docs/VUEJS.md +++ b/docs/VUEJS.md @@ -137,11 +137,10 @@ For now a template that shows how to create tasks can be found. ### Main front-end packages -| Name | Keyword | Description | -| ---------------------------------------------------------------------------- | :-------------: | :------------------------------------------------------------------------ | -| [vee-validate](https://vee-validate.logaretm.com/v4/) | `Form` | Form Validation for Vue.js | -| [vue-toast-notification](https://github.com/ankurk91/vue-toast-notification) | `Notifications` | Toast notification plugin for Vue.js | -| [tippy](https://atomiks.github.io/tippyjs/) | `Menu` | Plugin to build menu / side bars | -| [vue-i18n](https://vue-i18n.intlify.dev/) | `Internation.` | Internationalization plugin for Vue.js | -| [vue-router](https://router.vuejs.org/) | `Routing` | Official router plugin for Vue.js | -| [yup](https://github.com/jquense/yup) | `Form` | Schema builder for runtime value parsing and validation (forms). | +| Name | Keyword | Description | +| ---------------------------------------------------------------------------- | :-------------: | :--------------------------------------------------------------- | +| [vee-validate](https://vee-validate.logaretm.com/v4/) | `Form` | Form Validation for Vue.js | +| [vue-toast-notification](https://github.com/ankurk91/vue-toast-notification) | `Notifications` | Toast notification plugin for Vue.js | +| [tippy](https://atomiks.github.io/tippyjs/) | `Menu` | Plugin to build menu / side bars | +| [vue-router](https://router.vuejs.org/) | `Routing` | Official router plugin for Vue.js | +| [yup](https://github.com/jquense/yup) | `Form` | Schema builder for runtime value parsing and validation (forms). | diff --git a/docs/examples/custom_task.ts b/docs/examples/custom_task.ts index 064813ba6..873fb4e38 100644 --- a/docs/examples/custom_task.ts +++ b/docs/examples/custom_task.ts @@ -10,7 +10,11 @@ const customTask: TaskProvider = { return { id: 'custom-task', displayInformation: { - taskTitle: 'Custom task' + taskTitle: 'Custom task', + summary: { + preview: 'task preview', + overview: 'task overview' + } }, trainingInformation: { modelID: 'custom-model', diff --git a/package-lock.json b/package-lock.json index 3aac6be98..6d5c48b6a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -773,47 +773,6 @@ "integrity": "sha512-93zYdMES/c1D69yZiKDBj0V24vqNzB/koF26KPaagAfd3P/4gUlh3Dys5ogAK+Exi9QyzlD8x/08Zt7wIKcDcA==", "dev": true }, - "node_modules/@intlify/core-base": { - "version": "9.13.1", - "resolved": "https://registry.npmjs.org/@intlify/core-base/-/core-base-9.13.1.tgz", - "integrity": "sha512-+bcQRkJO9pcX8d0gel9ZNfrzU22sZFSA0WVhfXrf5jdJOS24a+Bp8pozuS9sBI9Hk/tGz83pgKfmqcn/Ci7/8w==", - "dependencies": { - "@intlify/message-compiler": "9.13.1", - "@intlify/shared": "9.13.1" - }, - "engines": { - "node": ">= 16" - }, - "funding": { - "url": "https://github.com/sponsors/kazupon" - } - }, - "node_modules/@intlify/message-compiler": { - "version": "9.13.1", - "resolved": "https://registry.npmjs.org/@intlify/message-compiler/-/message-compiler-9.13.1.tgz", - "integrity": "sha512-SKsVa4ajYGBVm7sHMXd5qX70O2XXjm55zdZB3VeMFCvQyvLew/dLvq3MqnaIsTMF1VkkOb9Ttr6tHcMlyPDL9w==", - "dependencies": { - "@intlify/shared": "9.13.1", - "source-map-js": "^1.0.2" - }, - "engines": { - "node": ">= 16" - }, - "funding": { - "url": "https://github.com/sponsors/kazupon" - } - }, - "node_modules/@intlify/shared": { - "version": "9.13.1", - "resolved": "https://registry.npmjs.org/@intlify/shared/-/shared-9.13.1.tgz", - "integrity": "sha512-u3b6BKGhE6j/JeRU6C/RL2FgyJfy6LakbtfeVF8fJXURpZZTzfh3e05J0bu0XPw447Q6/WUp3C4ajv4TMS4YsQ==", - "engines": { - "node": ">= 16" - }, - "funding": { - "url": "https://github.com/sponsors/kazupon" - } - }, "node_modules/@isaacs/cliui": { "version": "8.0.2", "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz", @@ -11973,25 +11932,6 @@ "eslint": ">=6.0.0" } }, - "node_modules/vue-i18n": { - "version": "9.13.1", - "resolved": "https://registry.npmjs.org/vue-i18n/-/vue-i18n-9.13.1.tgz", - "integrity": "sha512-mh0GIxx0wPtPlcB1q4k277y0iKgo25xmDPWioVVYanjPufDBpvu5ySTjP5wOrSvlYQ2m1xI+CFhGdauv/61uQg==", - "dependencies": { - "@intlify/core-base": "9.13.1", - "@intlify/shared": "9.13.1", - "@vue/devtools-api": "^6.5.0" - }, - "engines": { - "node": ">= 16" - }, - "funding": { - "url": "https://github.com/sponsors/kazupon" - }, - "peerDependencies": { - "vue": "^3.0.0" - } - }, "node_modules/vue-router": { "version": "4.3.2", "resolved": "https://registry.npmjs.org/vue-router/-/vue-router-4.3.2.tgz", @@ -12477,7 +12417,6 @@ "tippy.js": "6", "vee-validate": "4", "vue": "3", - "vue-i18n": "9", "vue-router": "4", "vue-toast-notification": "3", "vue3-apexcharts": "1", diff --git a/server/tests/client/federated.spec.ts b/server/tests/client/federated.spec.ts index 61d5fa884..6b768632d 100644 --- a/server/tests/client/federated.spec.ts +++ b/server/tests/client/federated.spec.ts @@ -37,7 +37,10 @@ describe("federated client", function () { url, { id: "nonValidTask", - displayInformation: {}, + displayInformation: { + taskTitle: 'mock title', + summary: { overview: '', preview: '' } + }, trainingInformation: { modelID: "irrelevant", epochs: 1, diff --git a/server/tests/e2e/federated.spec.ts b/server/tests/e2e/federated.spec.ts index aa61ec29d..e7976b947 100644 --- a/server/tests/e2e/federated.spec.ts +++ b/server/tests/e2e/federated.spec.ts @@ -1,7 +1,7 @@ import fs from 'node:fs/promises' import path from 'node:path' import type { Server } from 'node:http' -import { List, Range } from 'immutable' +import { List, Repeat } from 'immutable' import { assert, expect } from 'chai' import type { RoundLogs, WeightsContainer } from '@epfml/discojs-core' @@ -29,7 +29,7 @@ describe("end-to-end federated", function () { async function cifar10user (): Promise { const dir = DATASET_DIR + 'CIFAR10/' const files = (await fs.readdir(dir)).map((file) => path.join(dir, file)) - const labels = Range(0, 24).map((label) => (label % 10).toString()).toArray() + const labels = Repeat('cat', 24).toArray() // TODO read labels in csv const cifar10Task = defaultTasks.cifar10.getTask() diff --git a/web-client/cypress/e2e/tasks.cy.ts b/web-client/cypress/e2e/tasks.cy.ts index 6a3ea672a..9025a3f5b 100644 --- a/web-client/cypress/e2e/tasks.cy.ts +++ b/web-client/cypress/e2e/tasks.cy.ts @@ -5,11 +5,12 @@ describe("tasks page", () => { cy.intercept({ hostname: "server", pathname: "tasks" }, [ defaultTasks.titanic.getTask(), defaultTasks.mnist.getTask(), - defaultTasks.geotags.getTask(), + defaultTasks.cifar10.getTask(), ]); cy.visit("/#/list"); - cy.get('div[id="tasks"]').children().should("have.length", 3); + // Length 4 = 3 tasks and 1 div for text description + cy.get('div[id="tasks"]').children().should("have.length", 4); }); it("redirects to training", () => { diff --git a/web-client/cypress/e2e/training.cy.ts b/web-client/cypress/e2e/training.cy.ts index 7241f68fb..37010dcd1 100644 --- a/web-client/cypress/e2e/training.cy.ts +++ b/web-client/cypress/e2e/training.cy.ts @@ -9,7 +9,7 @@ describe("training page", () => { cy.contains("button", "get started").click(); cy.contains("button", "train").click(); - cy.contains("button", "join").click(); + cy.contains("button", "participate").click(); const navigationButtons = 3; for (let i = 0; i < navigationButtons; i++) { @@ -26,7 +26,7 @@ describe("training page", () => { ]); // cypress really wants to JSON encode our buffer. - // to avoid that, we are replacing it directly in the reponse + // to avoid that, we are replacing it directly in the response cy.intercept( { hostname: "server", pathname: "/tasks/titanic/model.json" }, { statusCode: 200 }, @@ -47,7 +47,7 @@ describe("training page", () => { cy.contains("button", "get started").click(); cy.contains("button", "train").click(); - cy.contains("button", "join").click(); + cy.contains("button", "participate").click(); cy.contains("button", "next").click(); cy.contains("label", "select file").selectFile( diff --git a/web-client/package.json b/web-client/package.json index a44cb7c5b..57931c9d0 100644 --- a/web-client/package.json +++ b/web-client/package.json @@ -21,7 +21,6 @@ "tippy.js": "6", "vee-validate": "4", "vue": "3", - "vue-i18n": "9", "vue-router": "4", "vue-toast-notification": "3", "vue3-apexcharts": "1", diff --git a/web-client/src/assets/svg/CreateIcon.vue b/web-client/src/assets/svg/CreateIcon.vue new file mode 100644 index 000000000..bd6163c4f --- /dev/null +++ b/web-client/src/assets/svg/CreateIcon.vue @@ -0,0 +1,24 @@ + + diff --git a/web-client/src/assets/svg/EvaluateIcon.vue b/web-client/src/assets/svg/EvaluateIcon.vue new file mode 100644 index 000000000..3b3365ed3 --- /dev/null +++ b/web-client/src/assets/svg/EvaluateIcon.vue @@ -0,0 +1,23 @@ + + diff --git a/web-client/src/charts.ts b/web-client/src/charts.ts index 42883a3fc..4e83049e3 100644 --- a/web-client/src/charts.ts +++ b/web-client/src/charts.ts @@ -51,7 +51,10 @@ const chartOptions = { max: 100, min: 0, labels: { - show: false + show: true, + formatter: function (value: number) { + return value.toFixed(0); + } } }, xaxis: { @@ -63,7 +66,7 @@ const chartOptions = { show: false }, tooltip: { - enabled: false + enabled: true } } diff --git a/web-client/src/components/containers/ButtonCard.vue b/web-client/src/components/containers/ButtonCard.vue index f5e6f6994..367c5f221 100644 --- a/web-client/src/components/containers/ButtonCard.vue +++ b/web-client/src/components/containers/ButtonCard.vue @@ -22,16 +22,18 @@
diff --git a/web-client/src/components/containers/IconCard.vue b/web-client/src/components/containers/IconCard.vue index 8b1b98234..d7247bd10 100644 --- a/web-client/src/components/containers/IconCard.vue +++ b/web-client/src/components/containers/IconCard.vue @@ -1,7 +1,6 @@