Skip to content

Commit

Permalink
Aggregated Throughput Anomaly Detection
Browse files Browse the repository at this point in the history
This PR does the following:
- Implements argument "agg-flow" and "p2p-label" for aggregated flow.
- Aggregated flow contains Pods to external, pods to pods based of labels and pod to service flows.
- New retrieve table has been added for aggregated TAD.
- Modified retrieve table for TAD, new fields include agg_type and algo_type for better understanding.
- TAD delete command can now take multiple tad ids to delete.

partially solves: #168

Signed-off-by: Tushar Tathgur <[email protected]>
  • Loading branch information
Tushar Tathgur authored and Tushar Tathgur committed Mar 21, 2023
1 parent 3c3d7c3 commit 66bd428
Show file tree
Hide file tree
Showing 19 changed files with 726 additions and 416 deletions.
2 changes: 1 addition & 1 deletion build/charts/theia/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ Kubernetes: `>= 1.16.0-0`
| grafana.storage.createPersistentVolume.type | string | `"HostPath"` | Type of PersistentVolume. Can be set to "HostPath", "Local" or "NFS". Please set this value to use a PersistentVolume created by Theia. |
| grafana.storage.persistentVolumeClaimSpec | object | `{}` | Specification for PersistentVolumeClaim. This is ignored if createPersistentVolume.type is non-empty. To use a custom PersistentVolume, please set storageClassName: "" volumeName: "<my-pv>". To dynamically provision a PersistentVolume, please set storageClassName: "<my-storage-class>". HostPath storage is used if both createPersistentVolume.type and persistentVolumeClaimSpec are empty. |
| grafana.storage.size | string | `"1Gi"` | Grafana storage size. It is used to store Grafana configuration files. Can be a plain integer or as a fixed-point number using one of these quantity suffixes: E, P, T, G, M, K. Or the power-of-two equivalents: Ei, Pi, Ti, Gi, Mi, Ki. |
| sparkOperator.enable | bool | `true` | Determine whether to install Spark Operator. It is required to run Network Policy Recommendation and Throughput Anomaly Detection jobs. |
| sparkOperator.enable | bool | `false` | Determine whether to install Spark Operator. It is required to run Network Policy Recommendation and Throughput Anomaly Detection jobs. |
| sparkOperator.image | object | `{"pullPolicy":"IfNotPresent","repository":"projects.registry.vmware.com/antrea/theia-spark-operator","tag":"v1beta2-1.3.3-3.1.1"}` | Container image used by Spark Operator. |
| sparkOperator.name | string | `"theia"` | Name of Spark Operator. |
| theiaManager.apiServer.apiPort | int | `11347` | The port for the Theia Manager APIServer to serve on. |
Expand Down
4 changes: 4 additions & 0 deletions build/charts/theia/crds/anomaly-detector-crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ spec:
type: array
items:
type: string
aggflow:
type: string
pod2podlabel:
type: string
executorInstances:
type: integer
driverCoreRequest:
Expand Down
10 changes: 8 additions & 2 deletions build/charts/theia/provisioning/datasources/create_table.sh
Original file line number Diff line number Diff line change
Expand Up @@ -285,23 +285,29 @@ clickhouse client -n -h 127.0.0.1 <<-EOSQL
) engine=ReplicatedMergeTree('/clickhouse/tables/{shard}/{database}/{table}', '{replica}')
ORDER BY (timeCreated);
--Create a table to store the Throughput Anomaly Detector results
--Create a table to store the endpoint to endpoint Throughput Anomaly Detector results
CREATE TABLE IF NOT EXISTS tadetector_local (
sourceIP String,
sourceTransportPort UInt16,
destinationIP String,
destinationTransportPort UInt16,
protocolIdentifier UInt16,
flowStartSeconds DateTime,
sourcePodNamespace String,
sourcePodLabels String,
destinationPodNamespace String,
destinationPodLabels String,
destinationServicePortName String,
flowEndSeconds DateTime,
throughputStandardDeviation Float64,
aggType String,
algoType String,
algoCalc Float64,
throughput Float64,
anomaly String,
id String
) engine=ReplicatedMergeTree('/clickhouse/tables/{shard}/{database}/{table}', '{replica}')
ORDER BY (flowStartSeconds);
ORDER BY (flowEndSeconds);
--Create distributed tables for cluster
CREATE TABLE IF NOT EXISTS flows AS flows_local
Expand Down
2 changes: 1 addition & 1 deletion build/charts/theia/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ grafana:
sparkOperator:
# -- Determine whether to install Spark Operator. It is required to run Network
# Policy Recommendation and Throughput Anomaly Detection jobs.
enable: true
enable: false
# -- Name of Spark Operator.
name: "theia"
# -- Container image used by Spark Operator.
Expand Down
238 changes: 9 additions & 229 deletions build/yamls/flow-visibility.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,26 +34,6 @@ metadata:
name: theia-manager
namespace: flow-visibility
---
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app.kubernetes.io/name: spark-operator
name: theia-spark
namespace: flow-visibility
---
apiVersion: v1
kind: ServiceAccount
metadata:
annotations:
helm.sh/hook: pre-install, pre-upgrade
helm.sh/hook-delete-policy: hook-failed, before-hook-creation
helm.sh/hook-weight: "-10"
labels:
app.kubernetes.io/name: spark-operator
name: theia-spark-operator
namespace: flow-visibility
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
Expand All @@ -72,39 +52,6 @@ rules:
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
labels:
app.kubernetes.io/name: spark-operator
name: spark-role
namespace: flow-visibility
rules:
- apiGroups:
- ""
resources:
- pods
verbs:
- '*'
- apiGroups:
- ""
resources:
- services
verbs:
- '*'
- apiGroups:
- ""
resources:
- configmaps
verbs:
- '*'
- apiGroups:
- ""
resources:
- persistentvolumeclaims
verbs:
- '*'
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
Expand Down Expand Up @@ -243,94 +190,6 @@ rules:
- list
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
annotations:
helm.sh/hook: pre-install, pre-upgrade
helm.sh/hook-delete-policy: hook-failed, before-hook-creation
helm.sh/hook-weight: "-10"
labels:
app.kubernetes.io/name: spark-operator
name: theia-spark-operator
rules:
- apiGroups:
- ""
resources:
- pods
verbs:
- '*'
- apiGroups:
- ""
resources:
- services
- configmaps
- secrets
verbs:
- create
- get
- delete
- update
- apiGroups:
- extensions
- networking.k8s.io
resources:
- ingresses
verbs:
- create
- get
- delete
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- apiGroups:
- ""
resources:
- events
verbs:
- create
- update
- patch
- apiGroups:
- ""
resources:
- resourcequotas
verbs:
- get
- list
- watch
- apiGroups:
- apiextensions.k8s.io
resources:
- customresourcedefinitions
verbs:
- create
- get
- update
- delete
- apiGroups:
- admissionregistration.k8s.io
resources:
- mutatingwebhookconfigurations
- validatingwebhookconfigurations
verbs:
- create
- get
- update
- delete
- apiGroups:
- sparkoperator.k8s.io
resources:
- sparkapplications
- sparkapplications/status
- scheduledsparkapplications
- scheduledsparkapplications/status
verbs:
- '*'
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
labels:
Expand All @@ -347,22 +206,6 @@ subjects:
namespace: flow-visibility
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
labels:
app.kubernetes.io/name: spark-operator
name: spark
namespace: flow-visibility
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: spark-role
subjects:
- kind: ServiceAccount
name: theia-spark
namespace: flow-visibility
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
Expand Down Expand Up @@ -392,25 +235,6 @@ subjects:
name: theia-manager
namespace: flow-visibility
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
annotations:
helm.sh/hook: pre-install, pre-upgrade
helm.sh/hook-delete-policy: hook-failed, before-hook-creation
helm.sh/hook-weight: "-10"
labels:
app.kubernetes.io/name: spark-operator
name: theia-spark-operator
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: theia-spark-operator
subjects:
- kind: ServiceAccount
name: theia-spark-operator
namespace: flow-visibility
---
apiVersion: v1
data:
000001_0-1-0.down.sql: ""
Expand Down Expand Up @@ -843,23 +667,29 @@ data:
) engine=ReplicatedMergeTree('/clickhouse/tables/{shard}/{database}/{table}', '{replica}')
ORDER BY (timeCreated);
--Create a table to store the Throughput Anomaly Detector results
--Create a table to store the endpoint to endpoint Throughput Anomaly Detector results
CREATE TABLE IF NOT EXISTS tadetector_local (
sourceIP String,
sourceTransportPort UInt16,
destinationIP String,
destinationTransportPort UInt16,
protocolIdentifier UInt16,
flowStartSeconds DateTime,
sourcePodNamespace String,
sourcePodLabels String,
destinationPodNamespace String,
destinationPodLabels String,
destinationServicePortName String,
flowEndSeconds DateTime,
throughputStandardDeviation Float64,
aggType String,
algoType String,
algoCalc Float64,
throughput Float64,
anomaly String,
id String
) engine=ReplicatedMergeTree('/clickhouse/tables/{shard}/{database}/{table}', '{replica}')
ORDER BY (flowStartSeconds);
ORDER BY (flowEndSeconds);
--Create distributed tables for cluster
CREATE TABLE IF NOT EXISTS flows AS flows_local
Expand Down Expand Up @@ -6679,6 +6509,7 @@ spec:
- --alsologtostderr
- --log_file_max_size=100
- --log_file_max_num=4
- --v=11
env:
- name: POD_NAME
valueFrom:
Expand Down Expand Up @@ -6733,57 +6564,6 @@ spec:
name: host-var-log-antrea-theia-manager
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app.kubernetes.io/name: spark-operator
name: theia-spark-operator
namespace: flow-visibility
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: spark-operator
strategy:
type: Recreate
template:
metadata:
annotations:
prometheus.io/path: /metrics
prometheus.io/port: "10254"
prometheus.io/scrape: "true"
labels:
app.kubernetes.io/name: spark-operator
spec:
containers:
- args:
- -v=2
- -logtostderr
- -namespace=
- -enable-ui-service=true
- -ingress-url-format=
- -controller-threads=10
- -resync-interval=30
- -enable-batch-scheduler=false
- -label-selector-filter=
- -enable-metrics=true
- -metrics-labels=app_type
- -metrics-port=10254
- -metrics-endpoint=/metrics
- -metrics-prefix=
- -enable-resource-quota-enforcement=false
image: projects.registry.vmware.com/antrea/theia-spark-operator:v1beta2-1.3.3-3.1.1
imagePullPolicy: null
name: spark-operator
ports:
- containerPort: 10254
name: metrics
resources: {}
securityContext: {}
securityContext: {}
serviceAccountName: theia-spark-operator
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
labels:
Expand Down
2 changes: 2 additions & 0 deletions pkg/apis/crd/v1alpha1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ type ThroughputAnomalyDetectorSpec struct {
StartInterval metav1.Time `json:"startInterval,omitempty"`
EndInterval metav1.Time `json:"endInterval,omitempty"`
NSIgnoreList []string `json:"nsIgnoreList,omitempty"`
AggregatedFlow string `json:"aggflow,omitempty"`
Pod2PodLabel string `json:"pod2podlabel,omitempty"`
ExecutorInstances int `json:"executorInstances,omitempty"`
DriverCoreRequest string `json:"driverCoreRequest,omitempty"`
DriverMemory string `json:"driverMemory,omitempty"`
Expand Down
Loading

0 comments on commit 66bd428

Please sign in to comment.