Skip to content

Commit

Permalink
Merge pull request #168 from m-lab/sandbox-ptdedup
Browse files Browse the repository at this point in the history
update traceroute dedup sql
  • Loading branch information
yachang authored Jun 28, 2019
2 parents 8b8b19e + 1bbd5d4 commit 7c4e1cb
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 4 deletions.
5 changes: 2 additions & 3 deletions cloud/bq/dedup.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,11 +177,10 @@ var dedupTemplateSwitch = `
// as mlab-oti:batch.traceroute_20170601.
var dedupTemplateTraceroute = `
#standardSQL
# Select single row based on test_id, client_ip, server_ip, src_ip, dest_ip
# Select single row based on TestTime, client_ip, server_ip
SELECT * EXCEPT (row_number)
FROM ( SELECT *, ROW_NUMBER() OVER (
PARTITION BY CONCAT(test_id, connection_spec.client_ip, connection_spec.server_ip,
paris_traceroute_hop.src_ip, paris_traceroute_hop.dest_ip)
PARTITION BY CONCAT(STRING(TestTime), Source.IP, DESTINATION.IP)
) row_number
FROM ` + "`%s`" + `)
WHERE row_number = 1`
Expand Down
12 changes: 11 additions & 1 deletion cloud/bq/sanity.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,8 @@ func (at *AnnotatedTable) CheckIsRegular(ctx context.Context) error {
}

// GetTableDetail fetches more detailed info about a partition or table.
// Expects table to have test_id, and task_filename fields.
// Expects table to have test_id, and task_filename fields for legacy tables,
// but it is not true for new traceroute tables.
func GetTableDetail(ctx context.Context, dsExt *dataset.Dataset, table bqiface.Table) (*Detail, error) {
// If table is a partition, then we have to separate out the partition part for the query.
parts := strings.Split(table.TableID(), "$")
Expand Down Expand Up @@ -175,6 +176,13 @@ func GetTableDetail(ctx context.Context, dsExt *dataset.Dataset, table bqiface.T
FROM `+"`%s.%s`"+`
%s -- where clause`,
dataset, tableName, where)

tracerouteQuery := fmt.Sprintf(`
#standardSQL
SELECT COUNT(DISTINCT ParseInfo.TaskFileName) AS TestCount, COUNT(DISTINCT ParseInfo.TaskFileName) AS TaskFileCount
FROM `+"`%s.%s`"+`
%s -- where clause`,
dataset, tableName, where)

legacyNDTQuery := fmt.Sprintf(`
#standardSQL
Expand All @@ -190,6 +198,8 @@ func GetTableDetail(ctx context.Context, dsExt *dataset.Dataset, table bqiface.T
query = tcpinfoQuery
} else if parts[0] == "legacy" {
query = legacyNDTQuery
} else if parts[0] == "traceroute" {
query = tracerouteQuery
}
err := dsExt.QueryAndParse(ctx, query, &detail)
if err != nil {
Expand Down

0 comments on commit 7c4e1cb

Please sign in to comment.