update api

openproblems-bio · Nov 5, 2024 · 1ca2338 · 1ca2338
1 parent d9de3b5
commit 1ca2338
Show file tree

Hide file tree

Showing 9 changed files with 78 additions and 173 deletions.
diff --git a/README.md b/README.md
@@ -38,28 +38,25 @@ should convince readers of the significance and relevance of your task.
 flowchart TB
   file_common_dataset("<a href='https://github.com/openproblems-bio/task_cyto_batch_integration#file-format-common-dataset'>Common Dataset</a>")
   comp_data_processor[/"<a href='https://github.com/openproblems-bio/task_cyto_batch_integration#component-type-data-processor'>Data processor</a>"/]
-  file_solution("<a href='https://github.com/openproblems-bio/task_cyto_batch_integration#file-format-solution'>Solution</a>")
-  file_test("<a href='https://github.com/openproblems-bio/task_cyto_batch_integration#file-format-test-data'>Test data</a>")
-  file_train("<a href='https://github.com/openproblems-bio/task_cyto_batch_integration#file-format-training-data'>Training data</a>")
+  file_unintegrated("<a href='https://github.com/openproblems-bio/task_cyto_batch_integration#file-format-unintegrated'>Unintegrated</a>")
+  file_validation("<a href='https://github.com/openproblems-bio/task_cyto_batch_integration#file-format-validation'>Validation</a>")
   comp_control_method[/"<a href='https://github.com/openproblems-bio/task_cyto_batch_integration#component-type-control-method'>Control Method</a>"/]
-  comp_metric[/"<a href='https://github.com/openproblems-bio/task_cyto_batch_integration#component-type-metric'>Metric</a>"/]
   comp_method[/"<a href='https://github.com/openproblems-bio/task_cyto_batch_integration#component-type-method'>Method</a>"/]
-  file_prediction("<a href='https://github.com/openproblems-bio/task_cyto_batch_integration#file-format-predicted-data'>Predicted data</a>")
+  comp_metric[/"<a href='https://github.com/openproblems-bio/task_cyto_batch_integration#component-type-metric'>Metric</a>"/]
+  file_integrated("<a href='https://github.com/openproblems-bio/task_cyto_batch_integration#file-format-integrated'>Integrated</a>")
   file_score("<a href='https://github.com/openproblems-bio/task_cyto_batch_integration#file-format-score'>Score</a>")
   file_common_dataset---comp_data_processor
-  comp_data_processor-->file_solution
-  comp_data_processor-->file_test
-  comp_data_processor-->file_train
-  file_solution---comp_control_method
-  file_solution---comp_metric
-  file_test---comp_control_method
-  file_test---comp_method
-  file_train---comp_control_method
-  file_train---comp_method
-  comp_control_method-->file_prediction
+  comp_data_processor-->file_unintegrated
+  comp_data_processor-->file_validation
+  file_unintegrated---comp_control_method
+  file_unintegrated---comp_method
+  file_unintegrated---comp_metric
+  file_validation---comp_control_method
+  file_validation---comp_metric
+  comp_control_method-->file_integrated
+  comp_method-->file_integrated
   comp_metric-->file_score
-  comp_method-->file_prediction
-  file_prediction---comp_metric
+  file_integrated---comp_metric
 ```
 
 ## File format: Common Dataset
@@ -116,21 +113,20 @@ Arguments:
 
 <div class="small">
 
-| Name | Type | Description |
-|:---|:---|:---|
-| `--input` | `file` | A subset of the common dataset. |
-| `--output_train` | `file` | (*Output*) The training data in h5ad format. |
-| `--output_test` | `file` | (*Output*) The subset of molecules used for the test dataset. |
-| `--output_solution` | `file` | (*Output*) The solution for the test data. |
+| Name                    | Type   | Description                      |
+|:------------------------|:-------|:---------------------------------|
+| `--input`               | `file` | A subset of the common dataset.  |
+| `--output_unintegrated` | `file` | (*Output*) Unintegrated dataset. |
+| `--output_validation`   | `file` | (*Output*) Validation dataset.   |
 
 </div>
 
-## File format: Solution
+## File format: Unintegrated
 
-The solution for the test data
+Unintegrated dataset
 
 Example file:
-`resources_test/task_cyto_batch_integration/cxg_mouse_pancreas_atlas/solution.h5ad`
+`resources_test/task_cyto_batch_integration/cxg_mouse_pancreas_atlas/train.h5ad`
 
 Format:
 
@@ -141,50 +137,6 @@ Format:
      var: 'hvg', 'hvg_score'
      obsm: 'X_pca'
      layers: 'counts', 'normalized'
-     uns: 'dataset_id', 'dataset_name', 'dataset_url', 'dataset_reference', 'dataset_summary', 'dataset_description', 'dataset_organism', 'normalization_id'
-
-</div>
-
-Data structure:
-
-<div class="small">
-
-| Slot | Type | Description |
-|:---|:---|:---|
-| `obs["label"]` | `string` | Ground truth cell type labels. |
-| `obs["batch"]` | `string` | Batch information. |
-| `var["hvg"]` | `boolean` | Whether or not the feature is considered to be a ‘highly variable gene’. |
-| `var["hvg_score"]` | `double` | A ranking of the features by hvg. |
-| `obsm["X_pca"]` | `double` | The resulting PCA embedding. |
-| `layers["counts"]` | `integer` | Raw counts. |
-| `layers["normalized"]` | `double` | Normalized counts. |
-| `uns["dataset_id"]` | `string` | A unique identifier for the dataset. |
-| `uns["dataset_name"]` | `string` | Nicely formatted name. |
-| `uns["dataset_url"]` | `string` | (*Optional*) Link to the original source of the dataset. |
-| `uns["dataset_reference"]` | `string` | (*Optional*) Bibtex reference of the paper in which the dataset was published. |
-| `uns["dataset_summary"]` | `string` | Short description of the dataset. |
-| `uns["dataset_description"]` | `string` | Long description of the dataset. |
-| `uns["dataset_organism"]` | `string` | (*Optional*) The organism of the sample in the dataset. |
-| `uns["normalization_id"]` | `string` | Which normalization was used. |
-
-</div>
-
-## File format: Test data
-
-The subset of molecules used for the test dataset
-
-Example file:
-`resources_test/task_cyto_batch_integration/cxg_mouse_pancreas_atlas/test.h5ad`
-
-Format:
-
-<div class="small">
-
-    AnnData object
-     obs: 'batch'
-     var: 'hvg', 'hvg_score'
-     obsm: 'X_pca'
-     layers: 'counts', 'normalized'
      uns: 'dataset_id', 'normalization_id'
 
 </div>
@@ -195,6 +147,7 @@ Data structure:
 
 | Slot | Type | Description |
 |:---|:---|:---|
+| `obs["label"]` | `string` | Ground truth cell type labels. |
 | `obs["batch"]` | `string` | Batch information. |
 | `var["hvg"]` | `boolean` | Whether or not the feature is considered to be a ‘highly variable gene’. |
 | `var["hvg_score"]` | `double` | A ranking of the features by hvg. |
@@ -206,12 +159,12 @@ Data structure:
 
 </div>
 
-## File format: Training data
+## File format: Validation
 
-The training data in h5ad format
+Validation dataset
 
 Example file:
-`resources_test/task_cyto_batch_integration/cxg_mouse_pancreas_atlas/train.h5ad`
+`resources_test/task_cyto_batch_integration/cxg_mouse_pancreas_atlas/solution.h5ad`
 
 Format:
 
@@ -222,7 +175,7 @@ Format:
      var: 'hvg', 'hvg_score'
      obsm: 'X_pca'
      layers: 'counts', 'normalized'
-     uns: 'dataset_id', 'normalization_id'
+     uns: 'dataset_id', 'dataset_name', 'dataset_url', 'dataset_reference', 'dataset_summary', 'dataset_description', 'dataset_organism', 'normalization_id'
 
 </div>
 
@@ -240,6 +193,12 @@ Data structure:
 | `layers["counts"]` | `integer` | Raw counts. |
 | `layers["normalized"]` | `double` | Normalized counts. |
 | `uns["dataset_id"]` | `string` | A unique identifier for the dataset. |
+| `uns["dataset_name"]` | `string` | Nicely formatted name. |
+| `uns["dataset_url"]` | `string` | (*Optional*) Link to the original source of the dataset. |
+| `uns["dataset_reference"]` | `string` | (*Optional*) Bibtex reference of the paper in which the dataset was published. |
+| `uns["dataset_summary"]` | `string` | Short description of the dataset. |
+| `uns["dataset_description"]` | `string` | Long description of the dataset. |
+| `uns["dataset_organism"]` | `string` | (*Optional*) The organism of the sample in the dataset. |
 | `uns["normalization_id"]` | `string` | Which normalization was used. |
 
 </div>
@@ -252,50 +211,49 @@ Arguments:
 
 <div class="small">
 
-| Name | Type | Description |
-|:---|:---|:---|
-| `--input_train` | `file` | The training data in h5ad format. |
-| `--input_test` | `file` | The subset of molecules used for the test dataset. |
-| `--input_solution` | `file` | The solution for the test data. |
-| `--output` | `file` | (*Output*) A predicted dataset as output by a method. |
+| Name                   | Type   | Description                    |
+|:-----------------------|:-------|:-------------------------------|
+| `--input_unintegrated` | `file` | Unintegrated dataset.          |
+| `--input_validation`   | `file` | Validation dataset.            |
+| `--output`             | `file` | (*Output*) Integrated dataset. |
 
 </div>
 
-## Component type: Metric
+## Component type: Method
 
-A task template metric.
+A method.
 
 Arguments:
 
 <div class="small">
 
-| Name | Type | Description |
-|:---|:---|:---|
-| `--input_solution` | `file` | The solution for the test data. |
-| `--input_prediction` | `file` | A predicted dataset as output by a method. |
-| `--output` | `file` | (*Output*) File indicating the score of a metric. |
+| Name       | Type   | Description                    |
+|:-----------|:-------|:-------------------------------|
+| `--input`  | `file` | Unintegrated dataset.          |
+| `--output` | `file` | (*Output*) Integrated dataset. |
 
 </div>
 
-## Component type: Method
+## Component type: Metric
 
-A method.
+A task template metric.
 
 Arguments:
 
 <div class="small">
 
 | Name | Type | Description |
 |:---|:---|:---|
-| `--input_train` | `file` | The training data in h5ad format. |
-| `--input_test` | `file` | The subset of molecules used for the test dataset. |
-| `--output` | `file` | (*Output*) A predicted dataset as output by a method. |
+| `--input_validation` | `file` | Validation dataset. |
+| `--input_unintegrated` | `file` | Unintegrated dataset. |
+| `--input_integrated` | `file` | Integrated dataset. |
+| `--output` | `file` | (*Output*) File indicating the score of a metric. |
 
 </div>
 
-## File format: Predicted data
+## File format: Integrated
 
-A predicted dataset as output by a method.
+Integrated dataset
 
 Example file:
 `resources_test/task_cyto_batch_integration/cxg_mouse_pancreas_atlas/prediction.h5ad`

diff --git a/src/api/comp_control_method.yaml b/src/api/comp_control_method.yaml
@@ -12,20 +12,16 @@ info:
       the task, and also as a quality control for the metrics defined
       in the task.
 arguments:
-  - name: --input_train
-    __merge__: file_train.yaml
+  - name: --input_unintegrated
+    __merge__: file_unintegrated.yaml
     required: true
     direction: input
-  - name: --input_test
-    __merge__: file_test.yaml
+  - name: --input_validation
+    __merge__: file_validation.yaml
     required: true
     direction: input
-  - name: "--input_solution"
-    __merge__: file_solution.yaml
-    direction: input
-    required: true
   - name: --output
-    __merge__: file_prediction.yaml
+    __merge__: file_integrated.yaml
     required: true
     direction: output
 # test_resources:

diff --git a/src/api/comp_data_processor.yaml b/src/api/comp_data_processor.yaml
@@ -11,16 +11,12 @@ arguments:
     __merge__: file_common_dataset.yaml
     direction: input
     required: true
-  - name: "--output_train"
-    __merge__: file_train.yaml
+  - name: "--output_unintegrated"
+    __merge__: file_unintegrated.yaml
     direction: output
     required: true
-  - name: "--output_test"
-    __merge__: file_test.yaml
-    direction: output
-    required: true
-  - name: "--output_solution"
-    __merge__: file_solution.yaml
+  - name: "--output_validation"
+    __merge__: file_validation.yaml
     direction: output
     required: true
 # test_resources:

diff --git a/src/api/comp_method.yaml b/src/api/comp_method.yaml
@@ -7,16 +7,12 @@ info:
     description: |
       A method to predict the task effects.
 arguments:
-  - name: --input_train
-    __merge__: file_train.yaml
+  - name: --input
+    __merge__: file_unintegrated.yaml
     required: true
     direction: input
-  - name: "--input_test"
-    __merge__: file_test.yaml
-    direction: input
-    required: true
   - name: --output
-    __merge__: file_prediction.yaml
+    __merge__: file_integrated.yaml
     required: true
     direction: output
 # test_resources:

diff --git a/src/api/comp_metric.yaml b/src/api/comp_metric.yaml
@@ -7,12 +7,16 @@ info:
     description: |
       A metric for evaluating method predictions.
 arguments:
-  - name: "--input_solution"
-    __merge__: file_solution.yaml
+  - name: "--input_validation"
+    __merge__: file_validation.yaml
     direction: input
     required: true
-  - name: "--input_prediction"
-    __merge__: file_prediction.yaml
+  - name: "--input_unintegrated"
+    __merge__: file_unintegrated.yaml
+    direction: input
+    required: true
+  - name: "--input_integrated"
+    __merge__: file_integrated.yaml
     direction: input
     required: true
   - name: "--output"

diff --git a/src/api/file_prediction.yaml → src/api/file_integrated.yaml b/src/api/file_prediction.yaml → src/api/file_integrated.yaml
@@ -1,8 +1,8 @@
 #TODO: Change to the required and/or optional fields of the anndata
 type: file
 example: "resources_test/task_cyto_batch_integration/cxg_mouse_pancreas_atlas/prediction.h5ad"
-label: "Predicted data"
-summary: A predicted dataset as output by a method.
+label: Integrated
+summary: "Integrated dataset"
 info:
   format:
     type: h5ad

diff --git a/src/api/file_test.yaml b/src/api/file_test.yaml
diff --git a/src/api/file_train.yaml → src/api/file_unintegrated.yaml b/src/api/file_train.yaml → src/api/file_unintegrated.yaml
@@ -1,8 +1,8 @@
 #TODO: Change to the required and/or optional fields of the anndata
 type: file
 example: "resources_test/task_cyto_batch_integration/cxg_mouse_pancreas_atlas/train.h5ad"
-label: "Training data"
-summary: "The training data in h5ad format"
+label: "Unintegrated"
+summary: "Unintegrated dataset"
 info:
   format:
     type: h5ad