Skip to content

Commit

Permalink
Improve key filtering (#4338)
Browse files Browse the repository at this point in the history
## Summary

<!--
Ideally, there is an attached Linear ticket that will describe the
"why".

If relevant, use this section to call out any additional information
you'd like to _highlight_ to the reviewer.
-->

Currently, we can only do a direct match for key attributes (e.g.
`workspace_id:1`).

This PR improves the matching, namely:

* Add wildcard support (`service:*foo*` will match `service:barfoobaz`)
* Add multi-word support `service:'image processor'` will match
`service:image processor`)

Additionally this PR improves code quality:

* Use a true query builder instead of rolling our code. I chose to use
[squirrel](https://github.com/Masterminds/squirrel). This makes
conditional queries a lot easier and removes usages of `fmt.Sprintf`
which is just ripe for a sql injection.
* Drop support for [testify
suite](https://github.com/stretchr/testify#suite-package). This is
currently unusable with vscode
(golang/vscode-go#2414).

## How did you test this change?

Verified space search works
![Screenshot 2023-02-21 at 10 50 49
AM](https://user-images.githubusercontent.com/58678/220422792-fb2dcd79-5ad8-453f-9feb-81f354f539ab.png)

Verified wildcard search works
![Screenshot 2023-02-21 at 10 56 00
AM](https://user-images.githubusercontent.com/58678/220422942-34fdf347-268a-4372-8fee-19280e2ef39b.png)


Verified we can search with body as well
![Screenshot 2023-02-21 at 10 56 36
AM](https://user-images.githubusercontent.com/58678/220423058-e8770d97-7a12-437a-bbce-b3a3a340895c.png)



## Are there any deployment considerations?

<!--
 Backend - Do we need to consider migrations or backfilling data?
-->

N/A
  • Loading branch information
et committed Feb 22, 2023
1 parent 40f5986 commit 2e21dde
Show file tree
Hide file tree
Showing 7 changed files with 272 additions and 224 deletions.
176 changes: 132 additions & 44 deletions backend/clickhouse/logs.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ import (
"strings"
"time"

sq "github.com/Masterminds/squirrel"
modelInputs "github.com/highlight-run/highlight/backend/private-graph/graph/model"
e "github.com/pkg/errors"
log "github.com/sirupsen/logrus"
)

type LogRow struct {
Expand All @@ -26,14 +26,8 @@ type LogRow struct {
SecureSessionId string
}

const LogsTable = "logs"

func (client *Client) BatchWriteLogRows(ctx context.Context, logRows []*LogRow) error {
query := fmt.Sprintf(`
INSERT INTO %s
`, LogsTable)

batch, err := client.conn.PrepareBatch(ctx, query)
batch, err := client.conn.PrepareBatch(ctx, "INSERT INTO logs")

if err != nil {
return e.Wrap(err, "failed to create logs batch")
Expand All @@ -49,20 +43,20 @@ func (client *Client) BatchWriteLogRows(ctx context.Context, logRows []*LogRow)
}

func (client *Client) ReadLogs(ctx context.Context, projectID int, params modelInputs.LogsParamsInput) ([]*modelInputs.LogLine, error) {
whereClause := buildWhereClause(projectID, params)
query := makeSelectQuery("Timestamp, SeverityText, Body, LogAttributes", projectID, params)
query = query.Limit(100)

query := fmt.Sprintf(`
SELECT Timestamp, SeverityText, Body, LogAttributes FROM %s
%s
LIMIT 100
`, LogsTable, whereClause)

log.WithContext(ctx).Info(query)
sql, args, err := query.ToSql()
if err != nil {
return nil, err
}

rows, err := client.conn.Query(
ctx,
query,
sql,
args...,
)

if err != nil {
return nil, err
}
Expand Down Expand Up @@ -97,32 +91,37 @@ func (client *Client) ReadLogs(ctx context.Context, projectID int, params modelI
}

func (client *Client) ReadLogsTotalCount(ctx context.Context, projectID int, params modelInputs.LogsParamsInput) (uint64, error) {
whereClause := buildWhereClause(projectID, params)

query := fmt.Sprintf(`SELECT COUNT(*) FROM %s %s`, LogsTable, whereClause)

log.WithContext(ctx).Info(query)
query := makeSelectQuery("COUNT(*)", projectID, params)
sql, args, err := query.ToSql()
if err != nil {
return 0, err
}

var count uint64
err := client.conn.QueryRow(
err = client.conn.QueryRow(
ctx,
query,
sql,
args...,
).Scan(&count)

return count, err
}

func (client *Client) LogsKeys(ctx context.Context, projectID int) ([]*modelInputs.LogKey, error) {
rows, err := client.conn.Query(ctx,
`
SELECT arrayJoin(LogAttributes.keys) as key, count() as cnt
FROM logs
WHERE ProjectId = ?
GROUP BY key
ORDER BY cnt DESC
LIMIT 50;`,
projectID,
)
query := sq.Select("arrayJoin(LogAttributes.keys) as key, count() as cnt").
From("logs").
Where(sq.Eq{"ProjectId": projectID}).
GroupBy("key").
OrderBy("cnt DESC").
Limit(50)

sql, args, err := query.ToSql()

if err != nil {
return nil, err
}

rows, err := client.conn.Query(ctx, sql, args...)

if err != nil {
return nil, err
Expand Down Expand Up @@ -150,16 +149,23 @@ func (client *Client) LogsKeys(ctx context.Context, projectID int) ([]*modelInpu
}

func (client *Client) LogsKeyValues(ctx context.Context, projectID int, keyName string) ([]string, error) {
rows, err := client.conn.Query(ctx,
`
SELECT LogAttributes[?] as value, count() as cnt FROM logs
WHERE ProjectId = ?
GROUP BY value
ORDER BY cnt DESC
LIMIT 50;`,
keyName,
projectID,
)
query := sq.Select("LogAttributes[?] as value, count() as cnt").
From("logs").
Where(sq.Eq{"ProjectId": projectID}).
GroupBy("value").
OrderBy("cnt DESC").
Limit(50)

sql, args, err := query.ToSql()

// Injects `keyName` into LogAttributes[?]
argsWithKeyName := append([]interface{}{keyName}, args...)

if err != nil {
return nil, err
}

rows, err := client.conn.Query(ctx, sql, argsWithKeyName...)

if err != nil {
return nil, err
Expand Down Expand Up @@ -217,5 +223,87 @@ func makeSeverityText(severityText string) modelInputs.SeverityText {
default:
return modelInputs.SeverityTextInfo
}
}

func makeSelectQuery(selectStr string, projectID int, params modelInputs.LogsParamsInput) sq.SelectBuilder {
query := sq.Select(selectStr).
From("logs").
Where(sq.Eq{"ProjectId": projectID}).
Where(sq.LtOrEq{"toUInt64(toDateTime(Timestamp))": uint64(params.DateRange.EndDate.Unix())}).
Where(sq.GtOrEq{"toUInt64(toDateTime(Timestamp))": uint64(params.DateRange.StartDate.Unix())})

filters := makeFilters(params.Query)

if len(filters.body) > 0 {
query = query.Where(sq.ILike{"Body": filters.body})
}

for key, value := range filters.attributes {
column := fmt.Sprintf("LogAttributes['%s']", key)
if strings.Contains(value, "%") {
query = query.Where(sq.Like{column: value})

} else {
query = query.Where(sq.Eq{column: value})
}
}

return query
}

type filters struct {
body string
attributes map[string]string
}

func makeFilters(query string) filters {
filters := filters{
body: "",
attributes: make(map[string]string),
}

queries := splitQuery(query)

for _, q := range queries {
parts := strings.Split(q, ":")

if len(parts) == 1 && len(parts[0]) > 0 {
body := parts[0]
if strings.Contains(body, "*") {
body = strings.ReplaceAll(body, "*", "%")
}
filters.body = filters.body + body
} else if len(parts) == 2 {
wildcardValue := strings.ReplaceAll(parts[1], "*", "%")
filters.attributes[parts[0]] = wildcardValue
}
}

if len(filters.body) > 0 && !strings.Contains(filters.body, "%") {
filters.body = "%" + filters.body + "%"
}

return filters
}

// Splits the query by spaces _unless_ it is quoted
// "some thing" => ["some", "thing"]
// "some thing 'spaced string' else" => ["some", "thing", "spaced string", "else"]
func splitQuery(query string) []string {
var result []string
inquote := false
i := 0
for j, c := range query {
if c == '\'' {
inquote = !inquote
} else if c == ' ' && !inquote {
result = append(result, unquoteAndTrim(query[i:j]))
i = j + i
}
}
return append(result, unquoteAndTrim(query[i:]))
}

func unquoteAndTrim(s string) string {
return strings.ReplaceAll(strings.Trim(s, " "), "'", "")
}
Loading

0 comments on commit 2e21dde

Please sign in to comment.