-
Notifications
You must be signed in to change notification settings - Fork 121
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add the KFP v2 example and update the tutorial (#1587)
* Add the KFP v2 benchmark test. * Update the tutorial. Fixes #1586 Signed-off-by: Ye Cao <[email protected]>
- Loading branch information
Showing
20 changed files
with
901 additions
and
176 deletions.
There are no files selected for viewing
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
406 changes: 327 additions & 79 deletions
406
...ials/kubernetes/efficient-data-sharing-in-kubeflow-with-vineyard-csi-driver.rst
Large diffs are not rendered by default.
Oops, something went wrong.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,22 +1,23 @@ | ||
REGISTRY := "ghcr.io/v6d-io/v6d/kubeflow-example" | ||
docker-build: | ||
docker build prepare-data/ -f Dockerfile \ | ||
--build-arg APP=prepare-data.py \ | ||
-t prepare-data | ||
-t $(REGISTRY)/prepare-data | ||
|
||
docker build preprocess/ -f Dockerfile \ | ||
--build-arg APP=preprocess.py \ | ||
-t preprocess-data | ||
-t $(REGISTRY)/preprocess-data | ||
|
||
docker build train/ -f Dockerfile \ | ||
--build-arg APP=train.py \ | ||
-t train-data | ||
-t $(REGISTRY)/train-data | ||
|
||
docker build test/ -f Dockerfile \ | ||
--build-arg APP=test.py \ | ||
-t test-data | ||
-t $(REGISTRY)/test-data | ||
|
||
load-images: | ||
kind load docker-image prepare-data | ||
kind load docker-image preprocess-data | ||
kind load docker-image train-data | ||
kind load docker-image test-data | ||
push-images: | ||
docker push $(REGISTRY)/prepare-data | ||
docker push $(REGISTRY)/preprocess-data | ||
docker push $(REGISTRY)/train-data | ||
docker push $(REGISTRY)/test-data |
64 changes: 64 additions & 0 deletions
64
k8s/examples/vineyard-csidriver/pipeline-kfp-v2-with-vineyard.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
from kfp import dsl | ||
from kfp import kubernetes | ||
|
||
@dsl.container_component | ||
def PreProcess(data_multiplier: int): | ||
return dsl.ContainerSpec( | ||
image = 'ghcr.io/v6d-io/v6d/kubeflow-example/preprocess-data', | ||
command = ['python3', 'preprocess.py'], | ||
args = [f'--data_multiplier={data_multiplier}', '--with_vineyard=True'], | ||
) | ||
|
||
@dsl.container_component | ||
def Train(): | ||
return dsl.ContainerSpec( | ||
image = 'ghcr.io/v6d-io/v6d/kubeflow-example/train-data', | ||
command = ['python3', 'train.py'], | ||
args = ['--with_vineyard=True'], | ||
) | ||
|
||
@dsl.container_component | ||
def Test(): | ||
return dsl.ContainerSpec( | ||
image = 'ghcr.io/v6d-io/v6d/kubeflow-example/test-data', | ||
command = ['python3', 'test.py'], | ||
args = ['--with_vineyard=True'], | ||
) | ||
|
||
def mount_pvc(component, pvc_name): | ||
kubernetes.mount_pvc( | ||
component, | ||
pvc_name=pvc_name, | ||
mount_path='/data', | ||
) | ||
kubernetes.mount_pvc( | ||
component, | ||
pvc_name="vineyard-objects", | ||
mount_path='/vineyard/data', | ||
) | ||
|
||
@dsl.pipeline( | ||
name='Machine Learning Pipeline With Vineyard', | ||
description='An example pipeline that trains and logs a regression model.' | ||
) | ||
def pipeline(data_multiplier: int): | ||
vineyard_objects_pvc = kubernetes.CreatePVC( | ||
# can also use pvc_name instead of pvc_name_suffix to use a pre-existing PVC | ||
pvc_name='vineyard-objects', | ||
access_modes=['ReadWriteMany'], | ||
# the size does not matter, but it must not be empty | ||
size='1Mi', | ||
storage_class_name='vineyard-system.vineyardd-sample.csi', | ||
) | ||
|
||
comp1 = PreProcess(data_multiplier=data_multiplier).after(vineyard_objects_pvc) | ||
mount_pvc(comp1, "benchmark-data") | ||
comp2 = Train().after(comp1) | ||
mount_pvc(comp2, "benchmark-data") | ||
comp3 = Test().after(comp2) | ||
mount_pvc(comp3, "benchmark-data") | ||
kubernetes.DeletePVC(pvc_name="vineyard-objects").after(comp3) | ||
|
||
if __name__ == '__main__': | ||
from kfp import compiler | ||
compiler.Compiler().compile(pipeline, __file__[:-3]+ '.yaml') |
220 changes: 220 additions & 0 deletions
220
k8s/examples/vineyard-csidriver/pipeline-kfp-v2-with-vineyard.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,220 @@ | ||
# PIPELINE DEFINITION | ||
# Name: machine-learning-pipeline-with-vineyard | ||
# Description: An example pipeline that trains and logs a regression model. | ||
# Inputs: | ||
# data_multiplier: int | ||
components: | ||
comp-createpvc: | ||
executorLabel: exec-createpvc | ||
inputDefinitions: | ||
parameters: | ||
access_modes: | ||
description: 'AccessModes to request for the provisioned PVC. May | ||
be one or more of ``''ReadWriteOnce''``, ``''ReadOnlyMany''``, ``''ReadWriteMany''``, | ||
or | ||
``''ReadWriteOncePod''``. Corresponds to `PersistentVolumeClaim.spec.accessModes | ||
<https://kubernetes.io/docs/concepts/storage/persistent-volumes/#access-modes>`_.' | ||
parameterType: LIST | ||
annotations: | ||
description: Annotations for the PVC's metadata. Corresponds to `PersistentVolumeClaim.metadata.annotations | ||
<https://kubernetes.io/docs/reference/kubernetes-api/config-and-storage-resources/persistent-volume-claim-v1/#PersistentVolumeClaim>`_. | ||
isOptional: true | ||
parameterType: STRUCT | ||
pvc_name: | ||
description: 'Name of the PVC. Corresponds to `PersistentVolumeClaim.metadata.name | ||
<https://kubernetes.io/docs/reference/kubernetes-api/config-and-storage-resources/persistent-volume-claim-v1/#PersistentVolumeClaim>`_. | ||
Only one of ``pvc_name`` and ``pvc_name_suffix`` can | ||
be provided.' | ||
isOptional: true | ||
parameterType: STRING | ||
pvc_name_suffix: | ||
description: 'Prefix to use for a dynamically generated name, which | ||
will take the form ``<argo-workflow-name>-<pvc_name_suffix>``. Only one | ||
of ``pvc_name`` and ``pvc_name_suffix`` can be provided.' | ||
isOptional: true | ||
parameterType: STRING | ||
size: | ||
description: The size of storage requested by the PVC that will be provisioned. | ||
For example, ``'5Gi'``. Corresponds to `PersistentVolumeClaim.spec.resources.requests.storage | ||
<https://kubernetes.io/docs/reference/kubernetes-api/config-and-storage-resources/persistent-volume-claim-v1/#PersistentVolumeClaimSpec>`_. | ||
parameterType: STRING | ||
storage_class_name: | ||
defaultValue: '' | ||
description: 'Name of StorageClass from which to provision the PV | ||
to back the PVC. ``None`` indicates to use the cluster''s default | ||
storage_class_name. Set to ``''''`` for a statically specified PVC.' | ||
isOptional: true | ||
parameterType: STRING | ||
volume_name: | ||
description: 'Pre-existing PersistentVolume that should back the | ||
provisioned PersistentVolumeClaim. Used for statically | ||
specified PV only. Corresponds to `PersistentVolumeClaim.spec.volumeName | ||
<https://kubernetes.io/docs/reference/kubernetes-api/config-and-storage-resources/persistent-volume-claim-v1/#PersistentVolumeClaimSpec>`_.' | ||
isOptional: true | ||
parameterType: STRING | ||
outputDefinitions: | ||
parameters: | ||
name: | ||
parameterType: STRING | ||
comp-deletepvc: | ||
executorLabel: exec-deletepvc | ||
inputDefinitions: | ||
parameters: | ||
pvc_name: | ||
description: Name of the PVC to delete. Supports passing a runtime-generated | ||
name, such as a name provided by ``kubernetes.CreatePvcOp().outputs['name']``. | ||
parameterType: STRING | ||
comp-preprocess: | ||
executorLabel: exec-preprocess | ||
inputDefinitions: | ||
parameters: | ||
data_multiplier: | ||
parameterType: NUMBER_INTEGER | ||
comp-test: | ||
executorLabel: exec-test | ||
comp-train: | ||
executorLabel: exec-train | ||
deploymentSpec: | ||
executors: | ||
exec-createpvc: | ||
container: | ||
image: argostub/createpvc | ||
exec-deletepvc: | ||
container: | ||
image: argostub/deletepvc | ||
exec-preprocess: | ||
container: | ||
args: | ||
- --data_multiplier={{$.inputs.parameters['data_multiplier']}} | ||
- --with_vineyard=True | ||
command: | ||
- python3 | ||
- preprocess.py | ||
image: ghcr.io/v6d-io/v6d/kubeflow-example/preprocess-data | ||
exec-test: | ||
container: | ||
args: | ||
- --with_vineyard=True | ||
command: | ||
- python3 | ||
- test.py | ||
image: ghcr.io/v6d-io/v6d/kubeflow-example/test-data | ||
exec-train: | ||
container: | ||
args: | ||
- --with_vineyard=True | ||
command: | ||
- python3 | ||
- train.py | ||
image: ghcr.io/v6d-io/v6d/kubeflow-example/train-data | ||
pipelineInfo: | ||
description: An example pipeline that trains and logs a regression model. | ||
name: machine-learning-pipeline-with-vineyard | ||
root: | ||
dag: | ||
tasks: | ||
createpvc: | ||
cachingOptions: | ||
enableCache: true | ||
componentRef: | ||
name: comp-createpvc | ||
inputs: | ||
parameters: | ||
access_modes: | ||
runtimeValue: | ||
constant: | ||
- ReadWriteMany | ||
pvc_name: | ||
runtimeValue: | ||
constant: vineyard-objects | ||
size: | ||
runtimeValue: | ||
constant: 1Mi | ||
storage_class_name: | ||
runtimeValue: | ||
constant: vineyard-system.vineyardd-sample.csi | ||
taskInfo: | ||
name: createpvc | ||
deletepvc: | ||
cachingOptions: | ||
enableCache: true | ||
componentRef: | ||
name: comp-deletepvc | ||
dependentTasks: | ||
- test | ||
inputs: | ||
parameters: | ||
pvc_name: | ||
runtimeValue: | ||
constant: vineyard-objects | ||
taskInfo: | ||
name: deletepvc | ||
preprocess: | ||
cachingOptions: | ||
enableCache: true | ||
componentRef: | ||
name: comp-preprocess | ||
dependentTasks: | ||
- createpvc | ||
inputs: | ||
parameters: | ||
data_multiplier: | ||
componentInputParameter: data_multiplier | ||
taskInfo: | ||
name: preprocess | ||
test: | ||
cachingOptions: | ||
enableCache: true | ||
componentRef: | ||
name: comp-test | ||
dependentTasks: | ||
- train | ||
taskInfo: | ||
name: test | ||
train: | ||
cachingOptions: | ||
enableCache: true | ||
componentRef: | ||
name: comp-train | ||
dependentTasks: | ||
- preprocess | ||
taskInfo: | ||
name: train | ||
inputDefinitions: | ||
parameters: | ||
data_multiplier: | ||
parameterType: NUMBER_INTEGER | ||
schemaVersion: 2.1.0 | ||
sdkVersion: kfp-2.3.0 | ||
--- | ||
platforms: | ||
kubernetes: | ||
deploymentSpec: | ||
executors: | ||
exec-preprocess: | ||
pvcMount: | ||
- constant: benchmark-data | ||
mountPath: /data | ||
- constant: vineyard-objects | ||
mountPath: /vineyard/data | ||
exec-test: | ||
pvcMount: | ||
- constant: benchmark-data | ||
mountPath: /data | ||
- constant: vineyard-objects | ||
mountPath: /vineyard/data | ||
exec-train: | ||
pvcMount: | ||
- constant: benchmark-data | ||
mountPath: /data | ||
- constant: vineyard-objects | ||
mountPath: /vineyard/data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
from kfp import dsl | ||
from kfp import kubernetes | ||
|
||
@dsl.container_component | ||
def PreProcess(data_multiplier: int): | ||
return dsl.ContainerSpec( | ||
image = 'ghcr.io/v6d-io/v6d/kubeflow-example/preprocess-data', | ||
command = ['python3', 'preprocess.py'], | ||
args=[f'--data_multiplier={data_multiplier}'], | ||
) | ||
|
||
@dsl.container_component | ||
def Train(): | ||
return dsl.ContainerSpec( | ||
image='ghcr.io/v6d-io/v6d/kubeflow-example/train-data', | ||
command = ['python3', 'train.py'], | ||
) | ||
|
||
@dsl.container_component | ||
def Test(): | ||
return dsl.ContainerSpec( | ||
image='ghcr.io/v6d-io/v6d/kubeflow-example/test-data', | ||
command = ['python3', 'test.py'], | ||
) | ||
|
||
def mount_pvc(component, pvc_name): | ||
kubernetes.mount_pvc( | ||
component, | ||
pvc_name=pvc_name, | ||
mount_path='/data', | ||
) | ||
|
||
@dsl.pipeline( | ||
name='Machine Learning Pipeline', | ||
description='An example pipeline that trains and logs a regression model.' | ||
) | ||
def pipeline(data_multiplier: int): | ||
comp1 = PreProcess(data_multiplier=data_multiplier) | ||
mount_pvc(comp1, "benchmark-data") | ||
comp2 = Train().after(comp1) | ||
mount_pvc(comp2, "benchmark-data") | ||
comp3 = Test().after(comp2) | ||
mount_pvc(comp3, "benchmark-data") | ||
|
||
if __name__ == '__main__': | ||
from kfp import compiler | ||
compiler.Compiler().compile(pipeline, __file__[:-3]+ '.yaml') |
Oops, something went wrong.