Skip to content

Commit 6a43a07

Browse files
authored
Initial the codes of fate-operator (kubeflow#7)
* Add LaynePeng as fate-operator project owner. Signed-off-by: Layne Peng <[email protected]> * Add readme file to introduce the project. Signed-off-by: Layne Peng <[email protected]> * fate-operator WIP version. Signed-off-by: Layne Peng <[email protected]>
1 parent f201e20 commit 6a43a07

File tree

74 files changed

+6900
-3
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

74 files changed

+6900
-3
lines changed

.gitignore

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Binaries for programs and plugins
2+
*.exe
3+
*.exe~
4+
*.dll
5+
*.so
6+
*.dylib
7+
*.idea
8+
9+
# Test binary, built with `go test -c`
10+
*.test
11+
12+
# Output of the go coverage tool, specifically when used with LiteIDE
13+
*.out
14+
15+
# Dependency directories (remove the comment below to include it)
16+
# vendor/
17+
bin/

Dockerfile

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Build the manager binary
2+
FROM golang:1.13 as builder
3+
4+
WORKDIR /workspace
5+
# Copy the Go Modules manifests
6+
COPY go.mod go.mod
7+
COPY go.sum go.sum
8+
# cache deps before building and copying source so that we don't need to re-download as much
9+
# and so that source changes don't invalidate our downloaded layer
10+
RUN go mod download
11+
12+
# Copy the go source
13+
COPY main.go main.go
14+
COPY api/ api/
15+
COPY controllers/ controllers/
16+
17+
# Build
18+
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 GO111MODULE=on go build -a -o manager main.go
19+
20+
# Use distroless as minimal base image to package the manager binary
21+
# Refer to https://github.com/GoogleContainerTools/distroless for more details
22+
FROM gcr.io/distroless/static:nonroot
23+
WORKDIR /
24+
COPY --from=builder /workspace/manager .
25+
USER nonroot:nonroot
26+
27+
ENTRYPOINT ["/manager"]

Makefile

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
2+
# Image URL to use all building/pushing image targets
3+
IMG ?= federatedai/fate-controller:latest
4+
# Produce CRDs that work back to Kubernetes 1.11 (no version conversion)
5+
CRD_OPTIONS ?= "crd:trivialVersions=true"
6+
7+
# Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set)
8+
ifeq (,$(shell go env GOBIN))
9+
GOBIN=$(shell go env GOPATH)/bin
10+
else
11+
GOBIN=$(shell go env GOBIN)
12+
endif
13+
14+
all: manager
15+
16+
# Run tests
17+
test: generate fmt vet manifests
18+
go test ./... -coverprofile cover.out
19+
20+
# Build manager binary
21+
manager: generate fmt vet
22+
go build -o bin/manager main.go
23+
24+
# Run against the configured Kubernetes cluster in ~/.kube/config
25+
run: generate fmt vet manifests
26+
go run ./main.go
27+
28+
# Install CRDs into a cluster
29+
install: manifests
30+
kustomize build config/crd | kubectl apply -f -
31+
32+
# Uninstall CRDs from a cluster
33+
uninstall: manifests
34+
kustomize build config/crd | kubectl delete -f -
35+
36+
# Deploy controller in the configured Kubernetes cluster in ~/.kube/config
37+
deploy: manifests
38+
cd config/manager && kustomize edit set image controller=${IMG}
39+
kustomize build config/default | kubectl apply -f -
40+
41+
# Generate manifests e.g. CRD, RBAC etc.
42+
manifests: controller-gen
43+
$(CONTROLLER_GEN) $(CRD_OPTIONS) rbac:roleName=manager-role webhook paths="./..." output:crd:artifacts:config=config/crd/bases
44+
45+
# Run go fmt against code
46+
fmt:
47+
go fmt ./...
48+
49+
# Run go vet against code
50+
vet:
51+
go vet ./...
52+
53+
# Generate code
54+
generate: controller-gen
55+
$(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths="./..."
56+
57+
# Build the docker image
58+
docker-build: test
59+
docker build . -t ${IMG}
60+
61+
# Push the docker image
62+
docker-push:
63+
docker push ${IMG}
64+
65+
# find or download controller-gen
66+
# download controller-gen if necessary
67+
controller-gen:
68+
ifeq (, $(shell which controller-gen))
69+
@{ \
70+
set -e ;\
71+
CONTROLLER_GEN_TMP_DIR=$$(mktemp -d) ;\
72+
cd $$CONTROLLER_GEN_TMP_DIR ;\
73+
go mod init tmp ;\
74+
go get sigs.k8s.io/controller-tools/cmd/[email protected] ;\
75+
rm -rf $$CONTROLLER_GEN_TMP_DIR ;\
76+
}
77+
CONTROLLER_GEN=$(GOBIN)/controller-gen
78+
else
79+
CONTROLLER_GEN=$(shell which controller-gen)
80+
endif

PROJECT

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
domain: kubefate.net
2+
repo: fate-operator
3+
resources:
4+
- group: app
5+
kind: FateJob
6+
version: v1beta1
7+
- group: app
8+
kind: FateCluster
9+
version: v1beta1
10+
- group: app
11+
kind: Kubefate
12+
version: v1beta1
13+
version: "2"

README.md

Lines changed: 64 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
# fate-operator: Kubernetes Operator for FATE (Federated AI Technology Enabler)
22
## Overview
33

4-
**fate-operator will be ready soon.**
4+
The FATE-Operator makes it easy to deploy and run federated machine learning (FML) jobs on Kubernetes. It is a early version, all suggestions and feature requests are valueables for us. Raising issues is appreciate.
55

6-
All suggestions are welcome. If you willing to disucss fedearted machine learning or know anything about this project, raise issue or email me: [email protected]
6+
## Background
7+
8+
FML is a machine learning setting where many clients (e.g. mobile devices or organizations) collaboratively train a model under the coordination of a central server while keeping the training data decentralized. Only the encrypted mediate parameters are exchanged between clients with MPC or homomorphic encryption.
79

810
## Background
911

@@ -20,6 +22,65 @@ More technologies of Federated machine learning, please refer to [Reference sect
2022

2123
**The design proposal of fate-operator can be found in [kubeflow/community/fate-operator-proposal.md](https://github.com/kubeflow/community/blob/master/proposals/fate-operator-proposal.md).**
2224

25+
## Quick user guide
26+
27+
### Installation
28+
The fate-operator can be installed by following steps.
29+
30+
#### Install CRDs to Kubernetes
31+
```bash
32+
$ make install
33+
```
34+
35+
#### Uninstall CRDs from Kubernetes
36+
```bash
37+
$ make uninstall
38+
```
39+
40+
#### Building controller images
41+
The Docker images are built and pushed to Dockerhub.
42+
[fate-operator](https://hub.docker.com/r/federatedai/fate-controller)
43+
44+
Alternatively, we can build the images manually by commands,
45+
```bash
46+
$ make docker-build
47+
```
48+
49+
#### Deploying controller
50+
```bash
51+
$ make deploy
52+
```
53+
54+
### Deploying KubeFATE
55+
KubeFATE is the infrastructure management service for multiple FATE clusters in one organization. It will only deploy once. To deploy a KubeFATE service, we use the YAML refer to [app_v1beta1_kubefate.yaml](./config/samples/app_v1beta1_kubefate.yaml) as an example,
56+
57+
```bash
58+
$ kubectl create -f ./config/samples/app_v1beta1_kubefate.yaml
59+
```
60+
61+
### Deploying FATE
62+
FATE is the cluster we run FML jobs. To deploy a FATE cluster, we use YAML refer to [app_v1beta1_fatecluster.yaml](./config/samples/app_v1beta1_fatecluster.yaml) as an example,
63+
```bash
64+
$ kubectl create -f ./config/samples/app_v1beta1_fatecluster.yaml
65+
```
66+
67+
### Submitting an FML Job
68+
Once KubeFATE and FATE cluster deployed, we can submit a FML job with `FateJob` config file. In current version, the FATE Job is defined by `DSL pipeline` and `Module Config` two parts. The details refer to [app_v1beta1_fatejob.yaml](./config/samples/app_v1beta1_fatejob.yaml).
69+
```bash
70+
kubectl create -f ./config/samples/app_v1beta1_fatejob.yaml
71+
```
72+
In this example, only a `secure add` operation will be processed. For more example, we can refer to [FATE Examples](https://github.com/FederatedAI/FATE/tree/master/examples/federatedml-1.x-examples). In each example, the files end with `_dsl`, e.g. https://github.com/FederatedAI/FATE/blob/master/examples/federatedml-1.x-examples/hetero_linear_regression/test_hetero_linr_cv_job_dsl.json is the job pipeline and what should be put in `pipeline` field; and the files end with `_conf`, e.g. https://github.com/FederatedAI/FATE/blob/master/examples/federatedml-1.x-examples/hetero_linear_regression/test_hetero_linr_cv_job_conf.json is the config of each components and what should be put in `modulesConf` field.
73+
74+
### Checking created resource status
75+
The status of created resource can be monitor with `kubectl get` command,
76+
```bash
77+
$ kubectl get kubefate,fatecluster,fatejob -A -o yaml
78+
```
79+
80+
## Reference
81+
1. Qiang Yang, Yang Liu, Tianjian Chen, and Yongxin Tong. Federated machine learning: Concept and applications. CoRR, abs/1902.04885, 2019. URL http://arxiv.org/abs/1902.04885
82+
2. Peter Kairouz et al. Advances and open problems in federated learning. arXiv preprint arXiv:1912.04977
83+
2384
## Reference
2485
1. Qiang Yang, Yang Liu, Tianjian Chen, and Yongxin Tong. Federated machine learning: Concept and applications. CoRR, abs/1902.04885, 2019. URL http://arxiv.org/abs/1902.04885
25-
2. Peter Kairouz et al. Advances and open problems in federated learning. arXiv preprint arXiv:1912.04977
86+
2. Peter Kairouz et al. Advances and open problems in federated learning. arXiv preprint arXiv:1912.04977

api/v1beta1/fatecluster_types.go

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
/*
2+
* Copyright 2019-2020 VMware, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* you may obtain a copy of the License at
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
* Unless required by applicable law or agreed to in writing, software
9+
* distributed under the License is distributed on an "AS IS" BASIS,
10+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
* See the License for the specific language governing permissions and
12+
* limitations under the License.
13+
*/
14+
15+
package v1beta1
16+
17+
import (
18+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
19+
)
20+
21+
// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN!
22+
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.
23+
24+
// FateClusterSpec defines the desired state of FateCluster
25+
type FateClusterSpec struct {
26+
// INSERT ADDITIONAL SPEC FIELDS - desired state of cluster
27+
// Important: Run "make" to regenerate code after modifying this file
28+
29+
ClusterSpec *ClusterSpec `json:"clusterSpec,omitempty"`
30+
Kubefate KubefateNamespacedName `json:"kubefate,omitempty"`
31+
ClusterData string `json:"clusterData,omitempty"`
32+
}
33+
34+
type KubefateNamespacedName struct {
35+
Name string `json:"name,omitempty"`
36+
Namespace string `json:"namespace,omitempty"`
37+
}
38+
39+
// FateClusterStatus defines the observed state of FateCluster
40+
type FateClusterStatus struct {
41+
// INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
42+
// Important: Run "make" to regenerate code after modifying this file
43+
Status string `json:"status,omitempty"`
44+
KubefateJobId string `json:"jobId,omitempty"`
45+
KubefateClusterId string `json:"clusterId,omitempty"`
46+
}
47+
48+
// +kubebuilder:object:root=true
49+
// +kubebuilder:printcolumn:name="PartyId",type=string,JSONPath=`.spec.clusterSpec.partyId`
50+
// +kubebuilder:printcolumn:name="status",type=string,JSONPath=`.status.status`
51+
52+
// FateCluster is the Schema for the fateclusters API
53+
type FateCluster struct {
54+
metav1.TypeMeta `json:",inline"`
55+
metav1.ObjectMeta `json:"metadata,omitempty"`
56+
57+
Spec FateClusterSpec `json:"spec,omitempty"`
58+
Status FateClusterStatus `json:"status,omitempty"`
59+
}
60+
61+
// +kubebuilder:object:root=true
62+
63+
// FateClusterList contains a list of FateCluster
64+
type FateClusterList struct {
65+
metav1.TypeMeta `json:",inline"`
66+
metav1.ListMeta `json:"metadata,omitempty"`
67+
Items []FateCluster `json:"items"`
68+
}
69+
70+
func init() {
71+
SchemeBuilder.Register(&FateCluster{}, &FateClusterList{})
72+
}
73+
74+
type Istio struct {
75+
Enabled bool `json:"enabled,omitempty"`
76+
}
77+
78+
type Exchange struct {
79+
IP string `json:"ip,omitempty"`
80+
Port int32 `json:"port,omitempty"`
81+
}
82+
83+
type PartyList struct {
84+
PartyID int32 `json:"partyId,omitempty"`
85+
PartyIP string `json:"partyIp,omitempty"`
86+
PartyPort int32 `json:"partyPort,omitempty"`
87+
}
88+
89+
type NodeSelector struct {
90+
}
91+
92+
type Rollsite struct {
93+
Type string `json:"type,omitempty"`
94+
NodePort int32 `json:"nodePort,omitempty"`
95+
Exchange Exchange `json:"exchange,omitempty"`
96+
PartyList []PartyList `json:"partyList,omitempty"`
97+
NodeSelector NodeSelector `json:"nodeSelector,omitempty"`
98+
}
99+
100+
type List struct {
101+
Name string `json:"name,omitempty"`
102+
NodeSelector NodeSelector `json:"nodeSelector,omitempty"`
103+
SessionProcessorsPerNode int32 `json:"sessionProcessorsPerNode,omitempty"`
104+
SubPath string `json:"subPath,omitempty"`
105+
ExistingClaim string `json:"existingClaim,omitempty"`
106+
StorageClass string `json:"storageClass,omitempty"`
107+
AccessMode string `json:"accessMode,omitempty"`
108+
Size string `json:"size,omitempty"`
109+
}
110+
111+
type Nodemanager struct {
112+
Count int32 `json:"count,omitempty"`
113+
SessionProcessorsPerNode int32 `json:"sessionProcessorsPerNode"`
114+
List []List `json:"list,omitempty"`
115+
}
116+
117+
type Python struct {
118+
FateflowType string `json:"fateflowType,omitempty"`
119+
FateflowNodePort int32 `json:"fateflowNodePort,omitempty"`
120+
// +kubebuilder:default:={}
121+
NodeSelector NodeSelector `json:"nodeSelector,omitempty"`
122+
}
123+
124+
type Mysql struct {
125+
NodeSelector NodeSelector `json:"nodeSelector,omitempty"`
126+
IP string `json:"ip,omitempty"`
127+
Port int32 `json:"port,omitempty"`
128+
Database string `json:"database,omitempty"`
129+
User string `json:"user,omitempty"`
130+
Password string `json:"password,omitempty"`
131+
SubPath string `json:"subPath,omitempty"`
132+
ExistingClaim string `json:"existingClaim,omitempty"`
133+
StorageClass string `json:"storageClass,omitempty"`
134+
AccessMode string `json:"accessMode,omitempty"`
135+
Size string `json:"size,omitempty"`
136+
}
137+
type ClusterSpec struct {
138+
Name string `json:"name,omitempty"`
139+
Namespace string `json:"namespace,omitempty"`
140+
ChartName string `json:"chartName"`
141+
ChartVersion string `json:"chartVersion"`
142+
PartyID int32 `json:"partyId"`
143+
Registry string `json:"registry,omitempty"`
144+
PullPolicy string `json:"pullPolicy,omitempty"`
145+
Persistence bool `json:"persistence,omitempty"`
146+
Istio Istio `json:"istio,omitempty"`
147+
Modules []string `json:"modules,omitempty"`
148+
Rollsite Rollsite `json:"rollsite,omitempty"`
149+
Nodemanager Nodemanager `json:"nodemanager"`
150+
Python Python `json:"python,omitempty"`
151+
Mysql Mysql `json:"mysql,omitempty"`
152+
ServingIP string `json:"servingIp,omitempty"`
153+
ServingPort int32 `json:"servingPort,omitempty"`
154+
}

0 commit comments

Comments
 (0)