Skip to content

Commit 2e21dde

Browse files
authored
Improve key filtering (#4338)
## Summary <!-- Ideally, there is an attached Linear ticket that will describe the "why". If relevant, use this section to call out any additional information you'd like to _highlight_ to the reviewer. --> Currently, we can only do a direct match for key attributes (e.g. `workspace_id:1`). This PR improves the matching, namely: * Add wildcard support (`service:*foo*` will match `service:barfoobaz`) * Add multi-word support `service:'image processor'` will match `service:image processor`) Additionally this PR improves code quality: * Use a true query builder instead of rolling our code. I chose to use [squirrel](https://github.com/Masterminds/squirrel). This makes conditional queries a lot easier and removes usages of `fmt.Sprintf` which is just ripe for a sql injection. * Drop support for [testify suite](https://github.com/stretchr/testify#suite-package). This is currently unusable with vscode (golang/vscode-go#2414). ## How did you test this change? Verified space search works ![Screenshot 2023-02-21 at 10 50 49 AM](https://user-images.githubusercontent.com/58678/220422792-fb2dcd79-5ad8-453f-9feb-81f354f539ab.png) Verified wildcard search works ![Screenshot 2023-02-21 at 10 56 00 AM](https://user-images.githubusercontent.com/58678/220422942-34fdf347-268a-4372-8fee-19280e2ef39b.png) Verified we can search with body as well ![Screenshot 2023-02-21 at 10 56 36 AM](https://user-images.githubusercontent.com/58678/220423058-e8770d97-7a12-437a-bbce-b3a3a340895c.png) ## Are there any deployment considerations? <!-- Backend - Do we need to consider migrations or backfilling data? --> N/A
1 parent 40f5986 commit 2e21dde

File tree

7 files changed

+272
-224
lines changed

7 files changed

+272
-224
lines changed

backend/clickhouse/logs.go

+132-44
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@ import (
66
"strings"
77
"time"
88

9+
sq "github.com/Masterminds/squirrel"
910
modelInputs "github.com/highlight-run/highlight/backend/private-graph/graph/model"
1011
e "github.com/pkg/errors"
11-
log "github.com/sirupsen/logrus"
1212
)
1313

1414
type LogRow struct {
@@ -26,14 +26,8 @@ type LogRow struct {
2626
SecureSessionId string
2727
}
2828

29-
const LogsTable = "logs"
30-
3129
func (client *Client) BatchWriteLogRows(ctx context.Context, logRows []*LogRow) error {
32-
query := fmt.Sprintf(`
33-
INSERT INTO %s
34-
`, LogsTable)
35-
36-
batch, err := client.conn.PrepareBatch(ctx, query)
30+
batch, err := client.conn.PrepareBatch(ctx, "INSERT INTO logs")
3731

3832
if err != nil {
3933
return e.Wrap(err, "failed to create logs batch")
@@ -49,20 +43,20 @@ func (client *Client) BatchWriteLogRows(ctx context.Context, logRows []*LogRow)
4943
}
5044

5145
func (client *Client) ReadLogs(ctx context.Context, projectID int, params modelInputs.LogsParamsInput) ([]*modelInputs.LogLine, error) {
52-
whereClause := buildWhereClause(projectID, params)
46+
query := makeSelectQuery("Timestamp, SeverityText, Body, LogAttributes", projectID, params)
47+
query = query.Limit(100)
5348

54-
query := fmt.Sprintf(`
55-
SELECT Timestamp, SeverityText, Body, LogAttributes FROM %s
56-
%s
57-
LIMIT 100
58-
`, LogsTable, whereClause)
59-
60-
log.WithContext(ctx).Info(query)
49+
sql, args, err := query.ToSql()
50+
if err != nil {
51+
return nil, err
52+
}
6153

6254
rows, err := client.conn.Query(
6355
ctx,
64-
query,
56+
sql,
57+
args...,
6558
)
59+
6660
if err != nil {
6761
return nil, err
6862
}
@@ -97,32 +91,37 @@ func (client *Client) ReadLogs(ctx context.Context, projectID int, params modelI
9791
}
9892

9993
func (client *Client) ReadLogsTotalCount(ctx context.Context, projectID int, params modelInputs.LogsParamsInput) (uint64, error) {
100-
whereClause := buildWhereClause(projectID, params)
101-
102-
query := fmt.Sprintf(`SELECT COUNT(*) FROM %s %s`, LogsTable, whereClause)
103-
104-
log.WithContext(ctx).Info(query)
94+
query := makeSelectQuery("COUNT(*)", projectID, params)
95+
sql, args, err := query.ToSql()
96+
if err != nil {
97+
return 0, err
98+
}
10599

106100
var count uint64
107-
err := client.conn.QueryRow(
101+
err = client.conn.QueryRow(
108102
ctx,
109-
query,
103+
sql,
104+
args...,
110105
).Scan(&count)
111106

112107
return count, err
113108
}
114109

115110
func (client *Client) LogsKeys(ctx context.Context, projectID int) ([]*modelInputs.LogKey, error) {
116-
rows, err := client.conn.Query(ctx,
117-
`
118-
SELECT arrayJoin(LogAttributes.keys) as key, count() as cnt
119-
FROM logs
120-
WHERE ProjectId = ?
121-
GROUP BY key
122-
ORDER BY cnt DESC
123-
LIMIT 50;`,
124-
projectID,
125-
)
111+
query := sq.Select("arrayJoin(LogAttributes.keys) as key, count() as cnt").
112+
From("logs").
113+
Where(sq.Eq{"ProjectId": projectID}).
114+
GroupBy("key").
115+
OrderBy("cnt DESC").
116+
Limit(50)
117+
118+
sql, args, err := query.ToSql()
119+
120+
if err != nil {
121+
return nil, err
122+
}
123+
124+
rows, err := client.conn.Query(ctx, sql, args...)
126125

127126
if err != nil {
128127
return nil, err
@@ -150,16 +149,23 @@ func (client *Client) LogsKeys(ctx context.Context, projectID int) ([]*modelInpu
150149
}
151150

152151
func (client *Client) LogsKeyValues(ctx context.Context, projectID int, keyName string) ([]string, error) {
153-
rows, err := client.conn.Query(ctx,
154-
`
155-
SELECT LogAttributes[?] as value, count() as cnt FROM logs
156-
WHERE ProjectId = ?
157-
GROUP BY value
158-
ORDER BY cnt DESC
159-
LIMIT 50;`,
160-
keyName,
161-
projectID,
162-
)
152+
query := sq.Select("LogAttributes[?] as value, count() as cnt").
153+
From("logs").
154+
Where(sq.Eq{"ProjectId": projectID}).
155+
GroupBy("value").
156+
OrderBy("cnt DESC").
157+
Limit(50)
158+
159+
sql, args, err := query.ToSql()
160+
161+
// Injects `keyName` into LogAttributes[?]
162+
argsWithKeyName := append([]interface{}{keyName}, args...)
163+
164+
if err != nil {
165+
return nil, err
166+
}
167+
168+
rows, err := client.conn.Query(ctx, sql, argsWithKeyName...)
163169

164170
if err != nil {
165171
return nil, err
@@ -217,5 +223,87 @@ func makeSeverityText(severityText string) modelInputs.SeverityText {
217223
default:
218224
return modelInputs.SeverityTextInfo
219225
}
226+
}
227+
228+
func makeSelectQuery(selectStr string, projectID int, params modelInputs.LogsParamsInput) sq.SelectBuilder {
229+
query := sq.Select(selectStr).
230+
From("logs").
231+
Where(sq.Eq{"ProjectId": projectID}).
232+
Where(sq.LtOrEq{"toUInt64(toDateTime(Timestamp))": uint64(params.DateRange.EndDate.Unix())}).
233+
Where(sq.GtOrEq{"toUInt64(toDateTime(Timestamp))": uint64(params.DateRange.StartDate.Unix())})
234+
235+
filters := makeFilters(params.Query)
236+
237+
if len(filters.body) > 0 {
238+
query = query.Where(sq.ILike{"Body": filters.body})
239+
}
240+
241+
for key, value := range filters.attributes {
242+
column := fmt.Sprintf("LogAttributes['%s']", key)
243+
if strings.Contains(value, "%") {
244+
query = query.Where(sq.Like{column: value})
245+
246+
} else {
247+
query = query.Where(sq.Eq{column: value})
248+
}
249+
}
250+
251+
return query
252+
}
253+
254+
type filters struct {
255+
body string
256+
attributes map[string]string
257+
}
258+
259+
func makeFilters(query string) filters {
260+
filters := filters{
261+
body: "",
262+
attributes: make(map[string]string),
263+
}
264+
265+
queries := splitQuery(query)
266+
267+
for _, q := range queries {
268+
parts := strings.Split(q, ":")
269+
270+
if len(parts) == 1 && len(parts[0]) > 0 {
271+
body := parts[0]
272+
if strings.Contains(body, "*") {
273+
body = strings.ReplaceAll(body, "*", "%")
274+
}
275+
filters.body = filters.body + body
276+
} else if len(parts) == 2 {
277+
wildcardValue := strings.ReplaceAll(parts[1], "*", "%")
278+
filters.attributes[parts[0]] = wildcardValue
279+
}
280+
}
281+
282+
if len(filters.body) > 0 && !strings.Contains(filters.body, "%") {
283+
filters.body = "%" + filters.body + "%"
284+
}
285+
286+
return filters
287+
}
288+
289+
// Splits the query by spaces _unless_ it is quoted
290+
// "some thing" => ["some", "thing"]
291+
// "some thing 'spaced string' else" => ["some", "thing", "spaced string", "else"]
292+
func splitQuery(query string) []string {
293+
var result []string
294+
inquote := false
295+
i := 0
296+
for j, c := range query {
297+
if c == '\'' {
298+
inquote = !inquote
299+
} else if c == ' ' && !inquote {
300+
result = append(result, unquoteAndTrim(query[i:j]))
301+
i = j + i
302+
}
303+
}
304+
return append(result, unquoteAndTrim(query[i:]))
305+
}
220306

307+
func unquoteAndTrim(s string) string {
308+
return strings.ReplaceAll(strings.Trim(s, " "), "'", "")
221309
}

0 commit comments

Comments
 (0)