Skip to content

Commit

Permalink
fixed cleanup, one last step left to delete after finished
Browse files Browse the repository at this point in the history
Signed-off-by: Dhruv-J <[email protected]>
  • Loading branch information
Dhruv-J committed Jul 21, 2023
1 parent 5c420fb commit fe62781
Show file tree
Hide file tree
Showing 8 changed files with 43 additions and 214 deletions.
1 change: 0 additions & 1 deletion build/charts/theia/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ Kubernetes: `>= 1.16.0-0`
| clickhouse.monitor.deletePercentage | float | `0.5` | The percentage of records in ClickHouse that will be deleted when the storage grows above threshold. Vary from 0 to 1. |
| clickhouse.monitor.enable | bool | `true` | Determine whether to run a monitor to periodically check the ClickHouse memory usage and clean data. |
| clickhouse.monitor.execInterval | string | `"1m"` | The time interval between two round of monitoring. Can be a plain integer using one of these unit suffixes ns, us (or µs), ms, s, m, h. |
| clickhouse.monitor.gocoverdir | string | `"clickhouse-monitor-coverage"` | coverage directory to be used |
| clickhouse.monitor.image | object | `{"pullPolicy":"IfNotPresent","repository":"projects.registry.vmware.com/antrea/theia-clickhouse-monitor","tag":""}` | Container image used by the ClickHouse Monitor. |
| clickhouse.monitor.skipRoundsNum | int | `3` | The number of rounds for the monitor to stop after a deletion to wait for the ClickHouse MergeTree Engine to release memory. |
| clickhouse.monitor.threshold | float | `0.5` | The storage percentage at which the monitor starts to delete old records. Vary from 0 to 1. |
Expand Down
5 changes: 1 addition & 4 deletions build/charts/theia/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@ clickhouse:
# -- The number of rounds for the monitor to stop after a deletion to wait for
# the ClickHouse MergeTree Engine to release memory.
skipRoundsNum: 3
# -- coverage directory to be used
gocoverdir: clickhouse-monitor-coverage
# -- Container image used by the ClickHouse Monitor.
image:
repository: "projects.registry.vmware.com/antrea/theia-clickhouse-monitor"
Expand Down Expand Up @@ -271,5 +269,4 @@ theiaManager:
# -- TLS min version from: VersionTLS10, VersionTLS11, VersionTLS12, VersionTLS13.
tlsMinVersion: ""
# -- Log verbosity switch for Theia Manager.
logVerbosity: 0
# -- coverage directory to be used
logVerbosity: 0
6 changes: 2 additions & 4 deletions ci/kind/test-e2e-kind.sh
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,8 @@ function run_test {
sed -i -e "s/activeFlowExportTimeout: \"5s\"/activeFlowExportTimeout: \"2s\"/g" $TMP_DIR/antrea.yml
sed -i -e "s/idleFlowExportTimeout: \"15s\"/idleFlowExportTimeout: \"1s\"/g" $TMP_DIR/antrea.yml

curl -o $TMP_DIR/flow-aggregator.yml https://raw.githubusercontent.com/antrea-io/antrea/main/build/yamls/flow-aggregator.yml
#cp ~/go/src/github.com/antrea/build/yamls/flow-aggregator.yml $TMP_DIR/flow-aggregator.yml
#curl -o $TMP_DIR/flow-aggregator.yml https://raw.githubusercontent.com/antrea-io/antrea/main/build/yamls/flow-aggregator.yml
cp ~/go/src/github.com/antrea/build/yamls/flow-aggregator.yml $TMP_DIR/flow-aggregator.yml
sed -i -e "s|image: projects.registry.vmware.com/antrea/flow-aggregator:latest|image: antrea/flow-aggregator:latest|g" $TMP_DIR/flow-aggregator.yml
perl -i -p0e 's/ # Enable is the switch to enable exporting flow records to ClickHouse.\n enable: false/ # Enable is the switch to enable exporting flow records to ClickHouse.\n enable: true/' $TMP_DIR/flow-aggregator.yml
sed -i -e "s/ activeFlowRecordTimeout: 60s/ activeFlowRecordTimeout: 3500ms/g" $TMP_DIR/flow-aggregator.yml
Expand All @@ -188,13 +188,11 @@ function run_test {
sleep 1

if $coverage; then
echo "STARTED_RUNNING"
go test -v -timeout=30m antrea.io/theia/test/e2e -provider=kind --logs-export-dir=$ANTREA_LOG_DIR -cover -coverprofile=.coverage/.cov.out -covermode=atomic --skip=$skiplist -test.gocoverdir=.coverage/kind-e2e-coverage
else
go test -v -timeout=30m antrea.io/theia/test/e2e -provider=kind --logs-export-dir=$ANTREA_LOG_DIR --skip=$skiplist
fi

echo "FINISHED_RUNNING"
}

function coverage_and_cleanup_test {
Expand Down
1 change: 0 additions & 1 deletion pkg/theia/commands/anomaly_detection_delete_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,5 +144,4 @@ func TestAnomalyDetectionDelete(t *testing.T) {
}
})
}
fmt.Println("done running AD delete test")
}
6 changes: 2 additions & 4 deletions plugins/clickhouse-monitor/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,8 @@ func main() {

func startMonitor(connect *sql.DB) {
stopCh := signals.RegisterSignalHandlers()
klog.InfoS("----------registered stop handler inside loop")
// Set up signal capture: the first SIGTERM / SIGINT signal is handled gracefully and will
// cause the stopCh channel to be closed; if another signal is received before the program
// exits, we will force exit.
// Set up signal capture: the first SIGINT signal is expected to be received from
// intentional SIGINT sending to collect coverage
runUntil(func() {
// The monitor stops working for several rounds after a deletion
// as the release of memory space by the ClickHouse MergeTree engine requires time
Expand Down
1 change: 0 additions & 1 deletion plugins/clickhouse-monitor/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ import (
)

func TestMonitorWithMockDB(t *testing.T) {
klog.InfoS("into function TestMonitorWithMockDB")
db, mock, err := sqlmock.New(sqlmock.QueryMatcherOption(sqlmock.QueryMatcherEqual))
if err != nil {
t.Fatalf("an error '%s' was not expected when opening a stub database connection", err)
Expand Down
48 changes: 38 additions & 10 deletions test/e2e/e2e_coverage_and_cleanup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ func copyCovFolder(nodeName, covDir, covPrefix string) error {
if err != nil {
return fmt.Errorf("copyCovFolder: error creating absolute file path: %v", err)
}
cmd := exec.Command("docker", "cp", nodeName+":"+"/var/log/"+covPrefix+"-coverage/.", covDirAbs)
pathOnNode := nodeName + ":" + "/var/log/" + covPrefix + "-coverage/."
cmd := exec.Command("docker", "cp", pathOnNode, covDirAbs)
var errb bytes.Buffer
cmd.Stderr = &errb
// TODO add stdout and stderr reader
if err := cmd.Run(); err != nil {
errStr := errb.String()
fmt.Printf("err: %v | stderr: %s\n", err, errStr)
Expand All @@ -49,7 +49,7 @@ func copyCovFolder(nodeName, covDir, covPrefix string) error {
return nil
}

func copyCovFilesBothNodes(namespace, covDir, covPrefix string) error {
func copyCovFilesBothNodes(covPrefix string) error {
log.Infof("Copying coverage files from worker nodes kind-worker and kind-worker2")
if err := copyCovFolder(workerNodeA, cmCovDir, covPrefix); err != nil {
return err
Expand All @@ -66,19 +66,47 @@ func copyCovFilesBothNodes(namespace, covDir, covPrefix string) error {
return nil
}

// func clearCovFilesBothNodes() error {
// return nil
// }
func clearCovFolder(nodeName, covPrefix string) error {
nestedCmd := "`rm -rf /var/log/" + covPrefix + "-coverage/*`"
cmd := exec.Command("docker", "exec", nodeName, "sh", "-c", nestedCmd)
var errb bytes.Buffer
cmd.Stderr = &errb
if err := cmd.Run(); err != nil {
errStr := errb.String()
fmt.Printf("cmd: %v | err: %v | stderr: %s\n", cmd, err, errStr)
if !strings.Contains(errb.String(), "not found") {
return fmt.Errorf("error while running docker exec command[%v] from node: %s: %s", cmd, nodeName, errStr)
}
}
return nil
}

func clearCovFilesBothNodes(covPrefix string) error {
log.Infof("Clearing coverage files from worker nodes kind-worker and kind-worker2")
if err := clearCovFolder(workerNodeA, covPrefix); err != nil {
return err
}
if err := clearCovFolder(workerNodeB, covPrefix); err != nil {
return err
}
return nil
}

func TestCoverageAndCleanup(t *testing.T) {
if os.Getenv("COVERAGE") == "" {
t.Skip()
}
fmt.Println("RUNNING FINAL COVERAGE STUFF")
if err := copyCovFilesBothNodes("flow-visibility", ".coverage", "cm"); err != nil {
t.Fatalf("error running TestCoverageAndCleanup: %v", err)
if err := copyCovFilesBothNodes("cm"); err != nil {
t.Fatalf("error running TestCoverageAndCleanup clickhouse-monitor copy files: %v", err)
}
if err := copyCovFilesBothNodes("tm"); err != nil {
t.Fatalf("error running TestCoverageAndCleanup theia-manager copy files: %v", err)
}
if err := clearCovFilesBothNodes("cm"); err != nil {
t.Fatalf("error running TestCoverageAndCleanup clickhouse-monitor clear files: %v", err)
}
if err := copyCovFilesBothNodes("flow-visibility", ".coverage", "tm"); err != nil {
t.Fatalf("error running TestCoverageAndCleanup: %v", err)
if err := clearCovFilesBothNodes("tm"); err != nil {
t.Fatalf("error running TestCoverageAndCleanup theia-manager clear files: %v", err)
}
}
189 changes: 0 additions & 189 deletions test/e2e/framework.go
Original file line number Diff line number Diff line change
Expand Up @@ -1406,9 +1406,6 @@ func (data *TestData) killProcesses(namespace, podName, containerName, processNa
return fmt.Errorf("error when getting pid of '%s', stderr: <%v>, err: <%v>", processName, stderr, err)
}
cmds = []string{"kill", "-SIGINT", strings.TrimSpace(stdout)}
// TOREMOVE vvv
log.Infof("TOREMOVE Sending SIGINT to '%s' with cmd 'kill -SIGINT %s'", processName, strings.TrimSpace(stdout))
fmt.Println("sending SIGINT")
_, stderr, err = data.RunCommandFromPod(namespace, podName, containerName, cmds)
if err != nil {
return fmt.Errorf("error when sending SIGINT signal to '%s', stderr: <%v>, err: <%v>", processName, stderr, err)
Expand All @@ -1431,11 +1428,8 @@ func (data *TestData) killProcessesOnPods() error {
err = data.killProcesses("flow-visibility", pod.Name, clickHouseMonitorContName, clickHouseMonitorContName)
} else if strings.Contains(podName, "theia-manager") {
err = data.killProcesses("flow-visibility", pod.Name, theiaManagerContName, theiaManagerContName)
} else {
fmt.Println("no coverage files to look for")
}
if err != nil {
fmt.Println(fmt.Sprintf("copyCovFilesFromPods: error copying node files: %v", err))
return fmt.Errorf("error when copying coverage files from pods: copy pod files out, error:%v", err)
}
}
Expand Down Expand Up @@ -1580,190 +1574,7 @@ func (data *TestData) Cleanup(namespaces []string) {
}
}

// func (data *TestData) copyPodFiles(podName string, containerName string, nsName string, fileName string, covDir string) error {
// fmt.Println("_______________ copyPodFiles")
// // getPodWriter creates the file with name podName-fileName-suffix. It returns nil if the
// // file cannot be created. File must be closed by the caller.
// getPodWriter := func(podName, fileName string) *os.File {
// destFile, err := filepath.Abs(filepath.Join("../.././", covDir, fileName))
// if err != nil {
// fmt.Println(fmt.Sprintf("FILE PATH ERROR IS NOT NIL: %v", err))
// return nil
// }
// fmt.Println("QWERTY destination file is: " + destFile)
// f, err := os.Create(destFile)
// if err != nil {
// fmt.Println(fmt.Sprintf("FILE CREATION ERROR IS NOT NIL: %v", err))
// _ = fmt.Errorf("error when creating destination file '%s': %v", destFile, err)
// return nil
// }
// if f == nil {
// fmt.Println("NILLED F MID")
// }
// return f
// }

// // dump the file from Theia Pods to disk.
// w := getPodWriter(podName, fileName)
// if w == nil {
// return nil
// }
// defer w.Close()
// cmd := []string{"cat", fileName}
// stdout, stderr, err := data.RunCommandFromPod(nsName, podName, containerName, cmd)
// if err != nil {
// return fmt.Errorf("cannot retrieve content of file '%s' from Pod '%s', stderr: <%v>, err: <%v>", fileName, podName, stderr, err)
// }
// if stdout == "" {
// return nil
// }
// w.WriteString(stdout)
// return nil
// }

// func (data *TestData) copyNodeFiles(nodeName, fileName, covDir, covPrefix string) error {
// fmt.Println("-------------- copyNodeFiles: node " + nodeName + " has file: " + fileName)
// // getNodeWriter creates the file with name nodeName-suffix. It returns nil if the file
// // cannot be created. File must be closed by the caller.
// getNodeWriter := func(nodeName, fileName, suffix string) *os.File {
// covdirabs, err := filepath.Abs("../../" + covDir)
// if err != nil {
// log.Infof("copyNodeFiles: error creating absolute file path: %v", err)
// return nil
// }
// covFile := filepath.Join(covdirabs, fmt.Sprintf("%s-%s-%s", fileName, nodeName, suffix))
// // fmt.Println("_______________ copyNodeFiles covFile: " + covFile)
// f, err := os.Create(covFile)
// if err != nil {
// _ = fmt.Errorf("error when creating coverage file '%s': %v", covFile, err)
// // fmt.Println(fmt.Sprintf("_______________ copyNodeFiles: error creating coverage file: %v", err))
// return nil
// }
// if f != nil {
// stat, err := f.Stat()
// if err != nil && stat.Size() == 0 {
// fmt.Println("_______________ copyNodeFiles: file is empty")
// return nil
// }
// fmt.Println("_______________ copyNodeFiles: file is not empty")
// } else {
// fmt.Println("_______________ copyNodeFiles: file is nil")
// }
// return f
// }

// // dump the file from Antrea Pods to disk.
// // a filepath-friendly timestamp format.
// const timeFormat = "Jan02-15-04-05"
// timeStamp := time.Now().Format(timeFormat)
// w := getNodeWriter(nodeName, fileName, timeStamp)
// if w == nil {
// return fmt.Errorf("error in copyNodeFiles: nodewriter file is null")
// } else {
// stat, err := w.Stat()
// if err != nil && stat.Size() == 0 {
// return fmt.Errorf("error in copyNodeFiles: nodewriter file is empty")
// }
// }
// defer w.Close()
// fileName = "/var/log/" + covPrefix + "-coverage/" + fileName
// cmd := fmt.Sprintf("cat %s", fileName)
// fmt.Println("_______________ copyNodeFiles: cat command set")
// rc, stdout, stderr, err := data.RunCommandOnNode(nodeName, cmd)
// fmt.Printf("|||||| copyNodeFiles stderr: %v \n", stderr)
// if err != nil {
// return fmt.Errorf("error in copyNodeFiles: %v", err)
// }
// if rc != 0 && stdout == "" {
// fmt.Printf("_______________ copyNodeFiles: cat command stdout empty\n")
// os.Remove(w.Name())
// return nil
// }
// w.WriteString(stdout)
// fmt.Println("_______________ copyNodeFiles: finished writing string")
// return nil
// }

// func (data *TestData) findAndCopyCovFiles(nodeName, podName, covDir, covPrefix string) error {
// cmd := "/bin/sh -c find / -name 'covmeta.*' -exec basename {} ';'"
// rc, stdout, stderr, err := data.RunCommandOnNode(nodeName, cmd)
// var files []string
// if err != nil || rc != 0 {
// if err != nil && !strings.Contains(err.Error(), "No such file or directory") {
// return fmt.Errorf("error when running this find command (for coverage file) '%s' on Node %s, Pod %s, stderr: <%v>, err: <%v>", cmd, nodeName, podName, stderr, err)
// }
// } else {
// stdout = strings.TrimSpace(stdout)
// files = strings.Split(stdout, "\n")
// fmt.Printf("found meta file at: %v with stdout: %v\n", files, stdout)
// }
// cmd = "/bin/sh -c find / -name 'covcounters.*' -exec basename {} ';'"
// rc, stdout, stderr, err = data.RunCommandOnNode(nodeName, cmd)
// if err != nil || rc != 0 {
// if err != nil && !strings.Contains(err.Error(), "No such file or directory") {
// return fmt.Errorf("error when running this find command (for coverage file) '%s' on Node %s, Pod %s, stderr: <%v>, err: <%v>", cmd, nodeName, podName, stderr, err)
// }
// } else {
// stdout = strings.TrimSpace(stdout)
// files = append(files, strings.Split(stdout, "\n")...)
// fmt.Printf("found cover file at: %v with stdout: %v\n", strings.Split(stdout, "\n"), stdout)
// }
// for _, file := range files {
// if len(file) == 0 {
// continue
// }
// err = data.copyNodeFiles(nodeName, file, covDir, covPrefix)
// if err != nil {
// return fmt.Errorf("error when copying coverage files from Pod '%s' to coverage directory '%s': %v", podName, covDir, err)
// }
// }
// return nil
// }

// func (data *TestData) killProcessesAndCollectCovFiles(namespace, podName, containerName, processName, covDir, covPrefix string) error {
// cmds := []string{"pgrep", "-f", processName}
// stdout, stderr, err := data.RunCommandFromPod(namespace, podName, containerName, cmds)
// if err != nil {
// return fmt.Errorf("error when getting pid of '%s', stderr: <%v>, err: <%v>", processName, stderr, err)
// }
// cmds = []string{"kill", "-SIGINT", strings.TrimSpace(stdout)}
// log.Infof("Sending SIGINT to '%s' with cmd 'kill -SIGINT %s'", processName, strings.TrimSpace(stdout))
// fmt.Println("sending SIGINT")
// _, stderr, err = data.RunCommandFromPod(namespace, podName, containerName, cmds)
// if err != nil {
// return fmt.Errorf("error when sending SIGINT signal to '%s', stderr: <%v>, err: <%v>", processName, stderr, err)
// }
// log.Infof("Copying coverage files from worker nodes kind-worker and kind-worker2")
// if err = data.findAndCopyCovFiles("kind-worker", podName, covDir, covPrefix); err != nil {
// fmt.Println("errored out looking in kind-worker")
// return err
// }
// fmt.Println("did not error out looking in kind-worker")
// if err = data.findAndCopyCovFiles("kind-worker2", podName, covDir, covPrefix); err != nil {
// fmt.Println("errored out looking in kind-worker2")
// return err
// }
// fmt.Println("did not error out looking in kind-worker2")
// return nil
// }

// docker cp kind-worker:/var/log/cm-coverage/. .coverage/clickhouse-monitor-coverage/
// func (data *TestData) copyCovFolder(nodeName, covDir, covPrefix string) error {
// covDirAbs, err := filepath.Abs("../../" + covDir)
// if err != nil {
// return fmt.Errorf("copyCovFolder: error creating absolute file path: %v", err)
// }
// cmd := exec.Command("docker", "cp", nodeName+":"+"/var/log/"+covPrefix+"-coverage/.", covDirAbs);
// if err := cmd.Run(); err != nil {
// if !strings.Contains(err.Error(), "exit status 1") {
// return fmt.Errorf("error while running docker cp command[%v] from node: %s: %v", cmd, nodeName, err)
// }
// }
// return nil
// }

func flowVisibilityCleanup(tb testing.TB, data *TestData, config FlowVisibilitySetUpConfig) {
// TODO potentially check for files here
teardownTest(tb, data)
teardownFlowVisibility(tb, data, config)
}
Expand Down

0 comments on commit fe62781

Please sign in to comment.