fixed cleanup, one last step left to delete after finished

Signed-off-by: Dhruv-J <[email protected]>
antrea-io · Jul 21, 2023 · fe62781 · fe62781
1 parent 5c420fb
commit fe62781
Show file tree

Hide file tree

Showing 8 changed files with 43 additions and 214 deletions.
diff --git a/build/charts/theia/README.md b/build/charts/theia/README.md
@@ -34,7 +34,6 @@ Kubernetes: `>= 1.16.0-0`
 | clickhouse.monitor.deletePercentage | float | `0.5` | The percentage of records in ClickHouse that will be deleted when the storage grows above threshold. Vary from 0 to 1. |
 | clickhouse.monitor.enable | bool | `true` | Determine whether to run a monitor to periodically check the ClickHouse memory usage and clean data. |
 | clickhouse.monitor.execInterval | string | `"1m"` | The time interval between two round of monitoring. Can be a plain integer using one of these unit suffixes ns, us (or µs), ms, s, m, h. |
-| clickhouse.monitor.gocoverdir | string | `"clickhouse-monitor-coverage"` | coverage directory to be used |
 | clickhouse.monitor.image | object | `{"pullPolicy":"IfNotPresent","repository":"projects.registry.vmware.com/antrea/theia-clickhouse-monitor","tag":""}` | Container image used by the ClickHouse Monitor. |
 | clickhouse.monitor.skipRoundsNum | int | `3` | The number of rounds for the monitor to stop after a deletion to wait for the ClickHouse MergeTree Engine to release memory. |
 | clickhouse.monitor.threshold | float | `0.5` | The storage percentage at which the monitor starts to delete old records. Vary from 0 to 1. |

diff --git a/build/charts/theia/values.yaml b/build/charts/theia/values.yaml
@@ -28,8 +28,6 @@ clickhouse:
     # -- The number of rounds for the monitor to stop after a deletion to wait for
     # the ClickHouse MergeTree Engine to release memory.
     skipRoundsNum: 3
-    # -- coverage directory to be used
-    gocoverdir: clickhouse-monitor-coverage
     # -- Container image used by the ClickHouse Monitor.
     image:
       repository: "projects.registry.vmware.com/antrea/theia-clickhouse-monitor"
@@ -271,5 +269,4 @@ theiaManager:
     # -- TLS min version from: VersionTLS10, VersionTLS11, VersionTLS12, VersionTLS13.
     tlsMinVersion: ""
   # -- Log verbosity switch for Theia Manager.
-  logVerbosity: 0
-  # -- coverage directory to be used
+  logVerbosity: 0
diff --git a/ci/kind/test-e2e-kind.sh b/ci/kind/test-e2e-kind.sh
@@ -166,8 +166,8 @@ function run_test {
   sed -i -e "s/activeFlowExportTimeout: \"5s\"/activeFlowExportTimeout: \"2s\"/g" $TMP_DIR/antrea.yml
   sed -i -e "s/idleFlowExportTimeout: \"15s\"/idleFlowExportTimeout: \"1s\"/g" $TMP_DIR/antrea.yml
 
-  curl -o $TMP_DIR/flow-aggregator.yml https://raw.githubusercontent.com/antrea-io/antrea/main/build/yamls/flow-aggregator.yml
-  #cp ~/go/src/github.com/antrea/build/yamls/flow-aggregator.yml $TMP_DIR/flow-aggregator.yml
+  #curl -o $TMP_DIR/flow-aggregator.yml https://raw.githubusercontent.com/antrea-io/antrea/main/build/yamls/flow-aggregator.yml
+  cp ~/go/src/github.com/antrea/build/yamls/flow-aggregator.yml $TMP_DIR/flow-aggregator.yml
   sed -i -e "s|image: projects.registry.vmware.com/antrea/flow-aggregator:latest|image: antrea/flow-aggregator:latest|g" $TMP_DIR/flow-aggregator.yml
   perl -i -p0e 's/      # Enable is the switch to enable exporting flow records to ClickHouse.\n      enable: false/      # Enable is the switch to enable exporting flow records to ClickHouse.\n      enable: true/' $TMP_DIR/flow-aggregator.yml
   sed -i -e "s/    activeFlowRecordTimeout: 60s/    activeFlowRecordTimeout: 3500ms/g" $TMP_DIR/flow-aggregator.yml
@@ -188,13 +188,11 @@ function run_test {
   sleep 1
 
   if $coverage; then
-    echo "STARTED_RUNNING"
     go test -v -timeout=30m antrea.io/theia/test/e2e -provider=kind --logs-export-dir=$ANTREA_LOG_DIR -cover -coverprofile=.coverage/.cov.out -covermode=atomic --skip=$skiplist -test.gocoverdir=.coverage/kind-e2e-coverage
   else
     go test -v -timeout=30m antrea.io/theia/test/e2e -provider=kind --logs-export-dir=$ANTREA_LOG_DIR --skip=$skiplist
   fi
 
-  echo "FINISHED_RUNNING"
 }
 
 function coverage_and_cleanup_test {

diff --git a/pkg/theia/commands/anomaly_detection_delete_test.go b/pkg/theia/commands/anomaly_detection_delete_test.go
@@ -144,5 +144,4 @@ func TestAnomalyDetectionDelete(t *testing.T) {
 			}
 		})
 	}
-	fmt.Println("done running AD delete test")
 }
diff --git a/plugins/clickhouse-monitor/main.go b/plugins/clickhouse-monitor/main.go
@@ -106,10 +106,8 @@ func main() {
 
 func startMonitor(connect *sql.DB) {
 	stopCh := signals.RegisterSignalHandlers()
-	klog.InfoS("----------registered stop handler inside loop")
-	// Set up signal capture: the first SIGTERM / SIGINT signal is handled gracefully and will
-	// cause the stopCh channel to be closed; if another signal is received before the program
-	// exits, we will force exit.
+	// Set up signal capture: the first SIGINT signal is expected to be received from
+	// intentional SIGINT sending to collect coverage
 	runUntil(func() {
 		// The monitor stops working for several rounds after a deletion
 		// as the release of memory space by the ClickHouse MergeTree engine requires time

diff --git a/plugins/clickhouse-monitor/main_test.go b/plugins/clickhouse-monitor/main_test.go
@@ -27,7 +27,6 @@ import (
 )
 
 func TestMonitorWithMockDB(t *testing.T) {
-	klog.InfoS("into function TestMonitorWithMockDB")
 	db, mock, err := sqlmock.New(sqlmock.QueryMatcherOption(sqlmock.QueryMatcherEqual))
 	if err != nil {
 		t.Fatalf("an error '%s' was not expected when opening a stub database connection", err)

diff --git a/test/e2e/e2e_coverage_and_cleanup_test.go b/test/e2e/e2e_coverage_and_cleanup_test.go
@@ -35,10 +35,10 @@ func copyCovFolder(nodeName, covDir, covPrefix string) error {
 	if err != nil {
 		return fmt.Errorf("copyCovFolder: error creating absolute file path: %v", err)
 	}
-	cmd := exec.Command("docker", "cp", nodeName+":"+"/var/log/"+covPrefix+"-coverage/.", covDirAbs)
+	pathOnNode := nodeName + ":" + "/var/log/" + covPrefix + "-coverage/."
+	cmd := exec.Command("docker", "cp", pathOnNode, covDirAbs)
 	var errb bytes.Buffer
 	cmd.Stderr = &errb
-	// TODO add stdout and stderr reader
 	if err := cmd.Run(); err != nil {
 		errStr := errb.String()
 		fmt.Printf("err: %v | stderr: %s\n", err, errStr)
@@ -49,7 +49,7 @@ func copyCovFolder(nodeName, covDir, covPrefix string) error {
 	return nil
 }
 
-func copyCovFilesBothNodes(namespace, covDir, covPrefix string) error {
+func copyCovFilesBothNodes(covPrefix string) error {
 	log.Infof("Copying coverage files from worker nodes kind-worker and kind-worker2")
 	if err := copyCovFolder(workerNodeA, cmCovDir, covPrefix); err != nil {
 		return err
@@ -66,19 +66,47 @@ func copyCovFilesBothNodes(namespace, covDir, covPrefix string) error {
 	return nil
 }
 
-// func clearCovFilesBothNodes() error {
-// 	return nil
-// }
+func clearCovFolder(nodeName, covPrefix string) error {
+	nestedCmd := "`rm -rf /var/log/" + covPrefix + "-coverage/*`"
+	cmd := exec.Command("docker", "exec", nodeName, "sh", "-c", nestedCmd)
+	var errb bytes.Buffer
+	cmd.Stderr = &errb
+	if err := cmd.Run(); err != nil {
+		errStr := errb.String()
+		fmt.Printf("cmd: %v | err: %v | stderr: %s\n", cmd, err, errStr)
+		if !strings.Contains(errb.String(), "not found") {
+			return fmt.Errorf("error while running docker exec command[%v] from node: %s: %s", cmd, nodeName, errStr)
+		}
+	}
+	return nil
+}
+
+func clearCovFilesBothNodes(covPrefix string) error {
+	log.Infof("Clearing coverage files from worker nodes kind-worker and kind-worker2")
+	if err := clearCovFolder(workerNodeA, covPrefix); err != nil {
+		return err
+	}
+	if err := clearCovFolder(workerNodeB, covPrefix); err != nil {
+		return err
+	}
+	return nil
+}
 
 func TestCoverageAndCleanup(t *testing.T) {
 	if os.Getenv("COVERAGE") == "" {
 		t.Skip()
 	}
 	fmt.Println("RUNNING FINAL COVERAGE STUFF")
-	if err := copyCovFilesBothNodes("flow-visibility", ".coverage", "cm"); err != nil {
-		t.Fatalf("error running TestCoverageAndCleanup: %v", err)
+	if err := copyCovFilesBothNodes("cm"); err != nil {
+		t.Fatalf("error running TestCoverageAndCleanup clickhouse-monitor copy files: %v", err)
+	}
+	if err := copyCovFilesBothNodes("tm"); err != nil {
+		t.Fatalf("error running TestCoverageAndCleanup theia-manager copy files: %v", err)
+	}
+	if err := clearCovFilesBothNodes("cm"); err != nil {
+		t.Fatalf("error running TestCoverageAndCleanup clickhouse-monitor clear files: %v", err)
 	}
-	if err := copyCovFilesBothNodes("flow-visibility", ".coverage", "tm"); err != nil {
-		t.Fatalf("error running TestCoverageAndCleanup: %v", err)
+	if err := clearCovFilesBothNodes("tm"); err != nil {
+		t.Fatalf("error running TestCoverageAndCleanup theia-manager clear files: %v", err)
 	}
 }
diff --git a/test/e2e/framework.go b/test/e2e/framework.go
@@ -1406,9 +1406,6 @@ func (data *TestData) killProcesses(namespace, podName, containerName, processNa
 		return fmt.Errorf("error when getting pid of '%s', stderr: <%v>, err: <%v>", processName, stderr, err)
 	}
 	cmds = []string{"kill", "-SIGINT", strings.TrimSpace(stdout)}
-	// TOREMOVE vvv
-	log.Infof("TOREMOVE Sending SIGINT to '%s' with cmd 'kill -SIGINT %s'", processName, strings.TrimSpace(stdout))
-	fmt.Println("sending SIGINT")
 	_, stderr, err = data.RunCommandFromPod(namespace, podName, containerName, cmds)
 	if err != nil {
 		return fmt.Errorf("error when sending SIGINT signal to '%s', stderr: <%v>, err: <%v>", processName, stderr, err)
@@ -1431,11 +1428,8 @@ func (data *TestData) killProcessesOnPods() error {
 			err = data.killProcesses("flow-visibility", pod.Name, clickHouseMonitorContName, clickHouseMonitorContName)
 		} else if strings.Contains(podName, "theia-manager") {
 			err = data.killProcesses("flow-visibility", pod.Name, theiaManagerContName, theiaManagerContName)
-		} else {
-			fmt.Println("no coverage files to look for")
 		}
 		if err != nil {
-			fmt.Println(fmt.Sprintf("copyCovFilesFromPods: error copying node files: %v", err))
 			return fmt.Errorf("error when copying coverage files from pods: copy pod files out, error:%v", err)
 		}
 	}
@@ -1580,190 +1574,7 @@ func (data *TestData) Cleanup(namespaces []string) {
 	}
 }
 
-// func (data *TestData) copyPodFiles(podName string, containerName string, nsName string, fileName string, covDir string) error {
-// 	fmt.Println("_______________ copyPodFiles")
-// 	// getPodWriter creates the file with name podName-fileName-suffix. It returns nil if the
-// 	// file cannot be created. File must be closed by the caller.
-// 	getPodWriter := func(podName, fileName string) *os.File {
-// 		destFile, err := filepath.Abs(filepath.Join("../.././", covDir, fileName))
-// 		if err != nil {
-// 			fmt.Println(fmt.Sprintf("FILE PATH ERROR IS NOT NIL: %v", err))
-// 			return nil
-// 		}
-// 		fmt.Println("QWERTY destination file is: " + destFile)
-// 		f, err := os.Create(destFile)
-// 		if err != nil {
-// 			fmt.Println(fmt.Sprintf("FILE CREATION ERROR IS NOT NIL: %v", err))
-// 			_ = fmt.Errorf("error when creating destination file '%s': %v", destFile, err)
-// 			return nil
-// 		}
-// 		if f == nil {
-// 			fmt.Println("NILLED F MID")
-// 		}
-// 		return f
-// 	}
-
-// 	// dump the file from Theia Pods to disk.
-// 	w := getPodWriter(podName, fileName)
-// 	if w == nil {
-// 		return nil
-// 	}
-// 	defer w.Close()
-// 	cmd := []string{"cat", fileName}
-// 	stdout, stderr, err := data.RunCommandFromPod(nsName, podName, containerName, cmd)
-// 	if err != nil {
-// 		return fmt.Errorf("cannot retrieve content of file '%s' from Pod '%s', stderr: <%v>, err: <%v>", fileName, podName, stderr, err)
-// 	}
-// 	if stdout == "" {
-// 		return nil
-// 	}
-// 	w.WriteString(stdout)
-// 	return nil
-// }
-
-// func (data *TestData) copyNodeFiles(nodeName, fileName, covDir, covPrefix string) error {
-// 	fmt.Println("-------------- copyNodeFiles: node " + nodeName + " has file: " + fileName)
-// 	// getNodeWriter creates the file with name nodeName-suffix. It returns nil if the file
-// 	// cannot be created. File must be closed by the caller.
-// 	getNodeWriter := func(nodeName, fileName, suffix string) *os.File {
-// 		covdirabs, err := filepath.Abs("../../" + covDir)
-// 		if err != nil {
-// 			log.Infof("copyNodeFiles: error creating absolute file path: %v", err)
-// 			return nil
-// 		}
-// 		covFile := filepath.Join(covdirabs, fmt.Sprintf("%s-%s-%s", fileName, nodeName, suffix))
-// 		// fmt.Println("_______________ copyNodeFiles covFile: " + covFile)
-// 		f, err := os.Create(covFile)
-// 		if err != nil {
-// 			_ = fmt.Errorf("error when creating coverage file '%s': %v", covFile, err)
-// 			// fmt.Println(fmt.Sprintf("_______________ copyNodeFiles: error creating coverage file: %v", err))
-// 			return nil
-// 		}
-// 		if f != nil {
-// 			stat, err := f.Stat()
-// 			if err != nil && stat.Size() == 0 {
-// 				fmt.Println("_______________ copyNodeFiles: file is empty")
-// 				return nil
-// 			}
-// 			fmt.Println("_______________ copyNodeFiles: file is not empty")
-// 		} else {
-// 			fmt.Println("_______________ copyNodeFiles: file is nil")
-// 		}
-// 		return f
-// 	}
-
-// 	// dump the file from Antrea Pods to disk.
-// 	// a filepath-friendly timestamp format.
-// 	const timeFormat = "Jan02-15-04-05"
-// 	timeStamp := time.Now().Format(timeFormat)
-// 	w := getNodeWriter(nodeName, fileName, timeStamp)
-// 	if w == nil {
-// 		return fmt.Errorf("error in copyNodeFiles: nodewriter file is null")
-// 	} else {
-// 		stat, err := w.Stat()
-// 		if err != nil && stat.Size() == 0 {
-// 			return fmt.Errorf("error in copyNodeFiles: nodewriter file is empty")
-// 		}
-// 	}
-// 	defer w.Close()
-// 	fileName = "/var/log/" + covPrefix + "-coverage/" + fileName
-// 	cmd := fmt.Sprintf("cat %s", fileName)
-// 	fmt.Println("_______________ copyNodeFiles: cat command set")
-// 	rc, stdout, stderr, err := data.RunCommandOnNode(nodeName, cmd)
-// 	fmt.Printf("|||||| copyNodeFiles stderr: %v \n", stderr)
-// 	if err != nil {
-// 		return fmt.Errorf("error in copyNodeFiles: %v", err)
-// 	}
-// 	if rc != 0 && stdout == "" {
-// 		fmt.Printf("_______________ copyNodeFiles: cat command stdout empty\n")
-// 		os.Remove(w.Name())
-// 		return nil
-// 	}
-// 	w.WriteString(stdout)
-// 	fmt.Println("_______________ copyNodeFiles: finished writing string")
-// 	return nil
-// }
-
-// func (data *TestData) findAndCopyCovFiles(nodeName, podName, covDir, covPrefix string) error {
-// 	cmd := "/bin/sh -c find / -name 'covmeta.*' -exec basename {} ';'"
-// 	rc, stdout, stderr, err := data.RunCommandOnNode(nodeName, cmd)
-// 	var files []string
-// 	if err != nil || rc != 0 {
-// 		if err != nil && !strings.Contains(err.Error(), "No such file or directory") {
-// 			return fmt.Errorf("error when running this find command (for coverage file) '%s' on Node %s, Pod %s, stderr: <%v>, err: <%v>", cmd, nodeName, podName, stderr, err)
-// 		}
-// 	} else {
-// 		stdout = strings.TrimSpace(stdout)
-// 		files = strings.Split(stdout, "\n")
-// 		fmt.Printf("found meta file at: %v with stdout: %v\n", files, stdout)
-// 	}
-// 	cmd = "/bin/sh -c find / -name 'covcounters.*' -exec basename {} ';'"
-// 	rc, stdout, stderr, err = data.RunCommandOnNode(nodeName, cmd)
-// 	if err != nil || rc != 0 {
-// 		if err != nil && !strings.Contains(err.Error(), "No such file or directory") {
-// 			return fmt.Errorf("error when running this find command (for coverage file) '%s' on Node %s, Pod %s, stderr: <%v>, err: <%v>", cmd, nodeName, podName, stderr, err)
-// 		}
-// 	} else {
-// 		stdout = strings.TrimSpace(stdout)
-// 		files = append(files, strings.Split(stdout, "\n")...)
-// 		fmt.Printf("found cover file at: %v with stdout: %v\n", strings.Split(stdout, "\n"), stdout)
-// 	}
-// 	for _, file := range files {
-// 		if len(file) == 0 {
-// 			continue
-// 		}
-// 		err = data.copyNodeFiles(nodeName, file, covDir, covPrefix)
-// 		if err != nil {
-// 			return fmt.Errorf("error when copying coverage files from Pod '%s' to coverage directory '%s': %v", podName, covDir, err)
-// 		}
-// 	}
-// 	return nil
-// }
-
-// func (data *TestData) killProcessesAndCollectCovFiles(namespace, podName, containerName, processName, covDir, covPrefix string) error {
-// 	cmds := []string{"pgrep", "-f", processName}
-// 	stdout, stderr, err := data.RunCommandFromPod(namespace, podName, containerName, cmds)
-// 	if err != nil {
-// 		return fmt.Errorf("error when getting pid of '%s', stderr: <%v>, err: <%v>", processName, stderr, err)
-// 	}
-// 	cmds = []string{"kill", "-SIGINT", strings.TrimSpace(stdout)}
-// 	log.Infof("Sending SIGINT to '%s' with cmd 'kill -SIGINT %s'", processName, strings.TrimSpace(stdout))
-// 	fmt.Println("sending SIGINT")
-// 	_, stderr, err = data.RunCommandFromPod(namespace, podName, containerName, cmds)
-// 	if err != nil {
-// 		return fmt.Errorf("error when sending SIGINT signal to '%s', stderr: <%v>, err: <%v>", processName, stderr, err)
-// 	}
-// 	log.Infof("Copying coverage files from worker nodes kind-worker and kind-worker2")
-// 	if err = data.findAndCopyCovFiles("kind-worker", podName, covDir, covPrefix); err != nil {
-// 		fmt.Println("errored out looking in kind-worker")
-// 		return err
-// 	}
-// 	fmt.Println("did not error out looking in kind-worker")
-// 	if err = data.findAndCopyCovFiles("kind-worker2", podName, covDir, covPrefix); err != nil {
-// 		fmt.Println("errored out looking in kind-worker2")
-// 		return err
-// 	}
-// 	fmt.Println("did not error out looking in kind-worker2")
-// 	return nil
-// }
-
-// docker cp kind-worker:/var/log/cm-coverage/. .coverage/clickhouse-monitor-coverage/
-// func (data *TestData) copyCovFolder(nodeName, covDir, covPrefix string) error {
-// 	covDirAbs, err := filepath.Abs("../../" + covDir)
-// 	if err != nil {
-// 		return fmt.Errorf("copyCovFolder: error creating absolute file path: %v", err)
-// 	}
-// 	cmd := exec.Command("docker", "cp", nodeName+":"+"/var/log/"+covPrefix+"-coverage/.", covDirAbs);
-// 	if err := cmd.Run(); err != nil {
-// 		if !strings.Contains(err.Error(), "exit status 1") {
-// 			return fmt.Errorf("error while running docker cp command[%v] from node: %s: %v", cmd, nodeName, err)
-// 		}
-// 	}
-// 	return nil
-// }
-
 func flowVisibilityCleanup(tb testing.TB, data *TestData, config FlowVisibilitySetUpConfig) {
-	// TODO potentially check for files here
 	teardownTest(tb, data)
 	teardownFlowVisibility(tb, data, config)
 }