Skip to content

Commit

Permalink
feat: add parallel run to quantitative testing (#383)
Browse files Browse the repository at this point in the history
* feat: run quantitative tests in parallel

Signed-off-by: Felipe Zipitria <[email protected]>

* fix: rebase

Signed-off-by: Felipe Zipitria <[email protected]>

* should fix

---------

Signed-off-by: Felipe Zipitria <[email protected]>
Co-authored-by: Matteo Pace <[email protected]>
  • Loading branch information
fzipi and M4tteoP authored Nov 3, 2024
1 parent f5a9644 commit 27ea76a
Show file tree
Hide file tree
Showing 8 changed files with 80 additions and 44 deletions.
47 changes: 28 additions & 19 deletions cmd/quantitative.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"fmt"
"os"

"github.com/rs/zerolog"
"github.com/spf13/cobra"

"github.com/coreruleset/go-ftw/experimental/corpus"
Expand All @@ -25,17 +26,18 @@ func NewQuantitativeCmd() *cobra.Command {
RunE: runQuantitativeE,
}

runCmd.Flags().IntP("lines", "l", 0, "Number of lines of input to process before stopping")
runCmd.Flags().IntP("paranoia-level", "P", 1, "Paranoia level used to run the quantitative tests")
runCmd.Flags().IntP("corpus-line", "n", 0, "Number is the payload line from the corpus to exclusively send")
runCmd.Flags().IntP("lines", "l", 0, "Number of lines of input to process before stopping.")
runCmd.Flags().IntP("paranoia-level", "P", 1, "Paranoia level used to run the quantitative tests.")
runCmd.Flags().IntP("corpus-line", "n", 0, "Number is the payload line from the corpus to exclusively send.")
runCmd.Flags().StringP("payload", "p", "", "Payload is a string you want to test using quantitative tests. Will not use the corpus.")
runCmd.Flags().IntP("rule", "r", 0, "Rule ID of interest: only show false positives for specified rule ID")
runCmd.Flags().StringP("corpus", "c", "leipzig", "Corpus to use for the quantitative tests")
runCmd.Flags().StringP("corpus-lang", "L", "eng", "Corpus language to use for the quantitative tests")
runCmd.Flags().IntP("rule", "r", 0, "Rule ID of interest: only show false positives for specified rule ID.")
runCmd.Flags().IntP("max-concurrency", "", 10, "maximum number of goroutines. Defaults to 10, or 1 if log level is debug/trace.")
runCmd.Flags().StringP("corpus", "c", "leipzig", "Corpus to use for the quantitative tests.")
runCmd.Flags().StringP("corpus-lang", "L", "eng", "Corpus language to use for the quantitative tests.")
runCmd.Flags().StringP("corpus-size", "s", "100K", "Corpus size to use for the quantitative tests. Most corpora will have sizes like \"100K\", \"1M\", etc.")
runCmd.Flags().StringP("corpus-year", "y", "2023", "Corpus year to use for the quantitative tests. Most corpus will have a year like \"2023\", \"2022\", etc.")
runCmd.Flags().StringP("corpus-source", "S", "news", "Corpus source to use for the quantitative tests. Most corpus will have a source like \"news\", \"web\", \"wikipedia\", etc.")
runCmd.Flags().StringP("crs-path", "C", ".", "Path to top folder of local CRS installation")
runCmd.Flags().StringP("crs-path", "C", ".", "Path to top folder of local CRS installation.")
runCmd.Flags().StringP("file", "f", "", "Output file path for quantitative tests. Prints to standard output by default.")
runCmd.Flags().StringP("output", "o", "normal", "Output type for quantitative tests. \"normal\" is the default.")

Expand All @@ -59,6 +61,12 @@ func runQuantitativeE(cmd *cobra.Command, _ []string) error {
number, _ := cmd.Flags().GetInt("number")
rule, _ := cmd.Flags().GetInt("rule")
wantedOutput, _ := cmd.Flags().GetString("output")
maxConcurrency, _ := cmd.Flags().GetInt("max-concurrency")

// --max-concurrency defaults to 1 if debug/trace is enabled, but if set explicitly, it should override this
if !cmd.Flags().Changed("max-concurrency") && zerolog.GlobalLevel() <= zerolog.DebugLevel {
maxConcurrency = 1
}

if paranoiaLevel > 1 && rule > 0 {
return fmt.Errorf("paranoia level and rule ID cannot be used together")
Expand Down Expand Up @@ -86,18 +94,19 @@ func runQuantitativeE(cmd *cobra.Command, _ []string) error {
}

params := quantitative.Params{
Corpus: corpusType,
CorpusSize: corpusSize,
CorpusYear: corpusYear,
CorpusLang: corpusLang,
CorpusSource: corpusSource,
Directory: directory,
Fast: fast,
Lines: lines,
ParanoiaLevel: paranoiaLevel,
Number: number,
Payload: payload,
Rule: rule,
Corpus: corpusType,
CorpusSize: corpusSize,
CorpusYear: corpusYear,
CorpusLang: corpusLang,
CorpusSource: corpusSource,
Directory: directory,
Fast: fast,
Lines: lines,
ParanoiaLevel: paranoiaLevel,
Number: number,
Payload: payload,
Rule: rule,
MaxConcurrency: maxConcurrency,
}

return quantitative.RunQuantitativeTests(params, out)
Expand Down
3 changes: 1 addition & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,14 @@ require (
github.com/go-logr/zerologr v1.2.3
github.com/google/uuid v1.6.0
github.com/hashicorp/go-getter v1.7.6
github.com/hashicorp/go-getter/v2 v2.2.3
github.com/icza/backscanner v0.0.0-20240328210400-b40c3a86dec5
github.com/knadh/koanf/parsers/yaml v0.1.0
github.com/knadh/koanf/providers/env v1.0.0
github.com/knadh/koanf/providers/file v1.1.2
github.com/knadh/koanf/providers/rawbytes v0.1.0
github.com/knadh/koanf/v2 v2.1.1
github.com/kyokomi/emoji/v2 v2.2.13
github.com/magefile/mage v1.15.0
github.com/magefile/mage v1.15.1-0.20230912152418-9f54e0f83e2a
github.com/rs/zerolog v1.33.0
github.com/spf13/cobra v1.8.1
github.com/stretchr/testify v1.9.0
Expand Down
15 changes: 2 additions & 13 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -182,8 +182,6 @@ cloud.google.com/go/webrisk v1.4.0/go.mod h1:Hn8X6Zr+ziE2aNd8SliSDWpEnSS1u4R9+xX
cloud.google.com/go/webrisk v1.5.0/go.mod h1:iPG6fr52Tv7sGk0H6qUFzmL3HHZev1htXuWDEEsqMTg=
cloud.google.com/go/workflows v1.6.0/go.mod h1:6t9F5h/unJz41YqfBmqSASJSXccBLtD1Vwf+KmJENM0=
cloud.google.com/go/workflows v1.7.0/go.mod h1:JhSrZuVZWuiDfKEFxU0/F1PQjmpnpcoISEXH2bcHC3M=
code.gitea.io/sdk/gitea v0.18.0 h1:+zZrwVmujIrgobt6wVBWCqITz6bn1aBjnCUHmpZrerI=
code.gitea.io/sdk/gitea v0.18.0/go.mod h1:IG9xZJoltDNeDSW0qiF2Vqx5orMWa7OhVWrjvrd5NpI=
code.gitea.io/sdk/gitea v0.19.0 h1:8I6s1s4RHgzxiPHhOQdgim1RWIRcr0LVMbHBjBFXq4Y=
code.gitea.io/sdk/gitea v0.19.0/go.mod h1:IG9xZJoltDNeDSW0qiF2Vqx5orMWa7OhVWrjvrd5NpI=
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
Expand All @@ -193,8 +191,6 @@ github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJ
github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
github.com/Masterminds/semver v1.5.0 h1:H65muMkzWKEuNDnfl9d70GUjFniHKHRbFPGBuZ3QEww=
github.com/Masterminds/semver v1.5.0/go.mod h1:MB6lktGJrhw8PrUyiEoblNEGEQ+RzHPF078ddwwvV3Y=
github.com/Masterminds/semver/v3 v3.2.1 h1:RN9w6+7QoMeJVGyfmbcgs28Br8cvmnucEXnY0rYXWg0=
github.com/Masterminds/semver/v3 v3.2.1/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ=
github.com/Masterminds/semver/v3 v3.3.0 h1:B8LGeaivUe71a5qox1ICM/JLl0NqZSW5CHyL+hmvYS0=
github.com/Masterminds/semver/v3 v3.3.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
github.com/Masterminds/sprig v2.22.0+incompatible h1:z4yfnGrZ7netVz+0EDJ0Wi+5VZCSYp4Z0m2dk6cEM60=
Expand Down Expand Up @@ -234,8 +230,6 @@ github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSV
github.com/coreruleset/ftw-tests-schema/v2 v2.1.0 h1:2ilKzKRG5UzzxBcrJLXFtPalStdQ9jzzaYFuFk0OEk0=
github.com/coreruleset/ftw-tests-schema/v2 v2.1.0/go.mod h1:ZHVFX5ses4+5IxUP0ufCNg/VqRWxziH6ZuUca092Hxo=
github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creativeprojects/go-selfupdate v1.3.0 h1:Hs+14cJ7Um7xbRpmkmrKEfkZGwec3vZLPWTVEFwYXBc=
github.com/creativeprojects/go-selfupdate v1.3.0/go.mod h1:uYJeumb6ECaI6bhc5dvH3Htjz1KoMArQjFt1P8m71Tc=
github.com/creativeprojects/go-selfupdate v1.4.0 h1:4ePPd2CPCNl/YoPXeVxpuBLDUZh8rMEKP5ac+1Y/r5c=
github.com/creativeprojects/go-selfupdate v1.4.0/go.mod h1:oPG7LmzEmS6OxfqEm620k5VKxP45xFZNKMkp4V5qqUY=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
Expand Down Expand Up @@ -389,7 +383,6 @@ github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9n
github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48=
github.com/hashicorp/go-getter v1.7.6 h1:5jHuM+aH373XNtXl9TNTUH5Qd69Trve11tHIrB+6yj4=
github.com/hashicorp/go-getter v1.7.6/go.mod h1:W7TalhMmbPmsSMdNjD0ZskARur/9GJ17cfHTRtXV744=
github.com/hashicorp/go-getter/v2 v2.2.3/go.mod h1:hp5Yy0GMQvwWVUmwLs3ygivz1JSLI323hdIE9J9m7TY=
github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k=
github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M=
github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISHxT2Q8+VepXU=
Expand Down Expand Up @@ -444,8 +437,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kyokomi/emoji/v2 v2.2.13 h1:GhTfQa67venUUvmleTNFnb+bi7S3aocF7ZCXU9fSO7U=
github.com/kyokomi/emoji/v2 v2.2.13/go.mod h1:JUcn42DTdsXJo1SWanHh4HKDEyPaR5CqkmoirZZP9qE=
github.com/magefile/mage v1.15.0 h1:BvGheCMAsG3bWUDbZ8AyXXpCNwU9u5CB6sM+HNb9HYg=
github.com/magefile/mage v1.15.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A=
github.com/magefile/mage v1.15.1-0.20230912152418-9f54e0f83e2a h1:tdPcGgyiH0K+SbsJBBm2oPyEIOTAvLBwD9TuUwVtZho=
github.com/magefile/mage v1.15.1-0.20230912152418-9f54e0f83e2a/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A=
github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU=
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
Expand Down Expand Up @@ -508,8 +501,6 @@ github.com/tonglil/buflogr v1.1.1/go.mod h1:WLLtPRLqcFYWQLbA+ytXy5WrFTYnfA+beg1M
github.com/ulikunitz/xz v0.5.10/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
github.com/ulikunitz/xz v0.5.12 h1:37Nm15o69RwBkXM0J6A5OlE67RZTfzUxTj8fB3dfcsc=
github.com/ulikunitz/xz v0.5.12/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
github.com/xanzy/go-gitlab v0.106.0 h1:EDfD03K74cIlQo2EducfiupVrip+Oj02bq9ofw5F8sA=
github.com/xanzy/go-gitlab v0.106.0/go.mod h1:ETg8tcj4OhrB84UEgeE8dSuV/0h4BBL1uOV/qK0vlyI=
github.com/xanzy/go-gitlab v0.112.0 h1:6Z0cqEooCvBMfBIHw+CgO4AKGRV8na/9781xOb0+DKw=
github.com/xanzy/go-gitlab v0.112.0/go.mod h1:wKNKh3GkYDMOsGmnfuX+ITCmDuSDWFO0G+C4AygL9RY=
github.com/yargevad/filepathx v1.0.0 h1:SYcT+N3tYGi+NvazubCNlvgIPbzAk7i7y2dwg3I5FYc=
Expand Down Expand Up @@ -664,8 +655,6 @@ golang.org/x/oauth2 v0.0.0-20220822191816-0ebed06d0094/go.mod h1:h4gKUeWbJ4rQPri
golang.org/x/oauth2 v0.0.0-20220909003341-f21342109be1/go.mod h1:h4gKUeWbJ4rQPri7E0u6Gs4e9Ri2zaLxzw5DI5XGrYg=
golang.org/x/oauth2 v0.0.0-20221014153046-6fdb5e3db783/go.mod h1:h4gKUeWbJ4rQPri7E0u6Gs4e9Ri2zaLxzw5DI5XGrYg=
golang.org/x/oauth2 v0.1.0/go.mod h1:G9FE4dLTsbXUu90h/Pf85g4w1D+SSAgR+q46nJZ8M4A=
golang.org/x/oauth2 v0.21.0 h1:tsimM75w1tF/uws5rbeHzIWxEqElMehnc+iW793zsZs=
golang.org/x/oauth2 v0.21.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs=
golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
Expand Down
18 changes: 11 additions & 7 deletions internal/quantitative/local_engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,16 +79,14 @@ func (e *localEngine) Create(prefix string, paranoia int) LocalEngine {
}

// CrsCall benchmarks the CRS WAF with a GET request
// payload: the string to be passed in the request body
// payload: the string to be passed as a query parameter
// returns the status of the HTTP response and a map of the matched rules with their IDs and the data that matched.
func (e *localEngine) CrsCall(payload string) map[int]string {
var matchedRules = make(map[int]string)

if e.waf == nil {
log.Fatal().Msg("local engine not initialized")
}
// we use the payload in the URI so rules in phase 1 can catch it
uri := fmt.Sprintf("/get?payload=%s", url.QueryEscape(payload))
uri := fmt.Sprintf("/get?uri_payload=%s", url.QueryEscape(payload))

tx := e.waf.NewTransaction()
tx.ProcessConnection("127.0.0.1", 8080, "127.0.0.1", 8080)
Expand All @@ -104,7 +102,7 @@ func (e *localEngine) CrsCall(payload string) map[int]string {
log.Error().Err(err).Msg("failed to process request body")
}

matchedRules = getMatchedRules(tx)
matchedRules := getMatchedRules(tx)

// We don't care about the response body for now, nor logging.
if err := tx.Close(); err != nil {
Expand Down Expand Up @@ -165,9 +163,15 @@ func getMatchedRules(tx types.Transaction) map[int]string {
if needToDiscardAdminRule(id) {
continue
}
matchedRules[id] = rule.Data()
var logData strings.Builder
for i, matchData := range rule.MatchedDatas() {
logData.WriteString(" chain#")
logData.WriteString(strconv.Itoa(i))
logData.WriteString(": ")
logData.WriteString(matchData.Value())
}
matchedRules[id] = logData.String()
}

return matchedRules
}

Expand Down
16 changes: 14 additions & 2 deletions internal/quantitative/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
package quantitative

import (
"sync"
"time"

"github.com/rs/zerolog/log"
Expand Down Expand Up @@ -39,12 +40,14 @@ type Params struct {
CorpusYear string
// CorpusSource is the source of the corpus: e.g. most corpus will have a source like "news", "web", "wikipedia", etc.
CorpusSource string
// MaxConcurrency is the maximum number of goroutines spawned
MaxConcurrency int
}

// RunQuantitativeTests runs all quantitative tests
func RunQuantitativeTests(params Params, out *output.Output) error {
var lc corpus.File
out.Println(":hourglass: Running quantitative tests")
out.Println(":hourglass: Running quantitative tests with %d goroutines", params.MaxConcurrency)
log.Trace().Msgf("Rule: %d", params.Rule)
log.Trace().Msgf("Payload: %s", params.Payload)
log.Trace().Msgf("Directory: %s", params.Directory)
Expand Down Expand Up @@ -101,6 +104,9 @@ func RunQuantitativeTests(params Params, out *output.Output) error {

// iterate over the corpus
log.Trace().Msgf("Iterating over corpus")
var wg sync.WaitGroup
ch := make(chan int, params.MaxConcurrency)

for iter := corpusRunner.GetIterator(lc); iter.HasNext(); {
payload := iter.Next()
stats.incrementRun()
Expand All @@ -116,8 +122,14 @@ func RunQuantitativeTests(params Params, out *output.Output) error {
if params.Lines > 0 && stats.Count() >= params.Lines {
break
}
doEngineCall(runner, payload, params.Rule, stats)
wg.Add(1)
ch <- 1
go func(runner LocalEngine, payload corpus.Payload, rule int, stats *QuantitativeRunStats) {
defer func() { wg.Done(); <-ch }()
doEngineCall(runner, payload, rule, stats)
}(runner, payload, params.Rule, stats)
}
wg.Wait()

stats.SetTotalTime(time.Since(startTime))
stats.printSummary(out)
Expand Down
6 changes: 6 additions & 0 deletions internal/quantitative/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package quantitative
import (
"encoding/json"
"sort"
"sync"
"time"

"github.com/rs/zerolog/log"
Expand All @@ -23,6 +24,8 @@ type QuantitativeRunStats struct {
falsePositives int
// falsePositivesPerRule is the aggregated false positives per rule
falsePositivesPerRule map[int]int
// mu is the mutex to protect the falsePositivesPerRule map
mu sync.Mutex
}

// NewQuantitativeStats returns a new empty stats
Expand All @@ -32,6 +35,7 @@ func NewQuantitativeStats() *QuantitativeRunStats {
falsePositives: 0,
falsePositivesPerRule: make(map[int]int),
totalTime: 0,
mu: sync.Mutex{},
}
}

Expand Down Expand Up @@ -67,8 +71,10 @@ func (s *QuantitativeRunStats) printSummary(out *output.Output) {

// addFalsePositive increments the false positive count and the false positive count for the rule.
func (s *QuantitativeRunStats) addFalsePositive(rule int) {
s.mu.Lock()
s.falsePositives++
s.falsePositivesPerRule[rule]++
s.mu.Unlock()
}

// FalsePositives returns the total false positives detected
Expand Down
17 changes: 17 additions & 0 deletions internal/quantitative/stats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package quantitative

import (
"bytes"
"sync"
"testing"
"time"

Expand Down Expand Up @@ -129,3 +130,19 @@ func (s *statsTestSuite) TestQuantitativeRunStats_printSummary() {
q.printSummary(out)
s.Require().Equal("Run 1 payloads in 0s\nTotal False positive ratio: 1/1 = 1.0000\nFalse positives per rule id:\n 920100: 1 false positives\n", b.String())
}

func TestAddFalsePositiveRace(t *testing.T) {
stats := &QuantitativeRunStats{
falsePositivesPerRule: make(map[int]int),
}

var wg sync.WaitGroup
for i := 0; i < 100; i++ {
wg.Add(1)
go func(rule int) {
defer wg.Done()
stats.addFalsePositive(rule)
}(i % 10) // Few rules are getting hit to make the concurrency issue more likely
}
wg.Wait()
}
2 changes: 1 addition & 1 deletion magefile.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ func Lint() error {

// Test runs all tests.
func Test() error {
if err := sh.RunV("go", "test", "-v", "./..."); err != nil {
if err := sh.RunV("go", "test", "-v", "./...", "-race"); err != nil {
return err
}

Expand Down

0 comments on commit 27ea76a

Please sign in to comment.