Skip to content

Commit 3098e27

Browse files
authored
Add flag to use cquery (#178)
* Add flag to use cquery This PR adds two flags `--useCquery` and `--cqueryCommandOptions` which enables using `bazel cquery` instead of `bazel query` when computing build graph. This creates more accurate build graph and hence hash computation is more accurate with less false positives. A e2e test is added to demonstrate this change. * use deps() for cquery * add test covering source code change * Update E2ETest.kt fix comment
1 parent 71622f4 commit 3098e27

18 files changed

+396
-66
lines changed

README.md

+24-3
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,14 @@ Commands:
8181
### `generate-hashes` command
8282

8383
```terminal
84-
Usage: bazel-diff generate-hashes [-hkvV] -b=<bazelPath> [-s=<seedFilepaths>]
85-
-w=<workspacePath>
84+
Usage: bazel-diff generate-hashes [-hkvV] [--[no-]useCquery] [-b=<bazelPath>]
85+
[--contentHashPath=<contentHashPath>]
86+
[-s=<seedFilepaths>] -w=<workspacePath>
8687
[-co=<bazelCommandOptions>]...
88+
[--cqueryCommandOptions=<cqueryCommandOptions>
89+
]...
90+
[--fineGrainedHashExternalRepos=<fineGrainedHa
91+
shExternalRepos>]...
8792
[-so=<bazelStartupOptions>]... <outputPath>
8893
Writes to a file the SHA256 hashes for each Bazel Target in the provided
8994
workspace.
@@ -95,12 +100,16 @@ workspace.
95100
binary available in PATH will be used.
96101
-co, --bazelCommandOptions=<bazelCommandOptions>
97102
Additional space separated Bazel command options used
98-
when invoking Bazel
103+
when invoking `bazel query`
99104
--contentHashPath=<contentHashPath>
100105
Path to content hash json file. It's a map which maps
101106
relative file path from workspace path to its
102107
content hash. Files in this map will skip content
103108
hashing and use provided value
109+
--cqueryCommandOptions=<cqueryCommandOptions>
110+
Additional space separated Bazel command options used
111+
when invoking `bazel cquery`. This flag is has no
112+
effect if `--useCquery`is false.
104113
--fineGrainedHashExternalRepos=<fineGrainedHashExternalRepos>
105114
Comma separate list of external repos in which
106115
fine-grained hashes are computed for the targets.
@@ -124,12 +133,24 @@ workspace.
124133
-so, --bazelStartupOptions=<bazelStartupOptions>
125134
Additional space separated Bazel client startup
126135
options used when invoking Bazel
136+
--[no-]useCquery If true, use cquery instead of query when generating
137+
dependency graphs. Using cquery would yield more
138+
accurate build graph at the cost of slower query
139+
execution. When this is set, one usually also wants
140+
to set `--cqueryCommandOptions` to specify a
141+
targeting platform. Note that this flag only works
142+
with Bazel 6.2.0 or above because lower versions
143+
does not support `--query_file` flag.
127144
-v, --verbose Display query string, missing files and elapsed time
128145
-V, --version Print version information and exit.
129146
-w, --workspacePath=<workspacePath>
130147
Path to Bazel workspace directory.
131148
```
132149

150+
**Note**: `--useCquery` flag may not work with very large repos due to limitation
151+
of Bazel. You may want to fallback to use normal query mode in that case.
152+
See https://github.com/bazelbuild/bazel/issues/17743 for more details.
153+
133154
### What does the SHA256 value of `generate-hashes` represent?
134155

135156
`generate-hashes` is a canonical SHA256 value representing all attributes and inputs into a target. These inputs

cli/BUILD

+2-18
Original file line numberDiff line numberDiff line change
@@ -25,34 +25,18 @@ kt_jvm_library(
2525
name = "cli-lib",
2626
srcs = glob(["src/main/kotlin/**/*.kt"]),
2727
deps = [
28-
":build_java_proto",
2928
"@bazel_diff_maven//:com_google_code_gson_gson",
3029
"@bazel_diff_maven//:com_google_guava_guava",
3130
"@bazel_diff_maven//:info_picocli_picocli",
3231
"@bazel_diff_maven//:io_insert_koin_koin_core_jvm",
3332
"@bazel_diff_maven//:org_apache_commons_commons_pool2",
3433
"@bazel_diff_maven//:org_jetbrains_kotlinx_kotlinx_coroutines_core_jvm",
34+
"@bazel_tools//src/main/protobuf:analysis_v2_java_proto",
35+
"@bazel_tools//src/main/protobuf:build_java_proto",
3536
"@com_google_protobuf//:protobuf_java",
3637
],
3738
)
3839

39-
java_proto_library(
40-
name = "build_java_proto",
41-
deps = [":build_proto"],
42-
)
43-
44-
proto_library(
45-
name = "build_proto",
46-
srcs = [":build_proto_gen"],
47-
)
48-
49-
genrule(
50-
name = "build_proto_gen",
51-
srcs = ["@bazel_tools//src/main/protobuf:build.proto"],
52-
outs = ["build.proto"],
53-
cmd = "cp $< $@",
54-
)
55-
5640
kt_jvm_test(
5741
name = "BuildGraphHasherTest",
5842
test_class = "com.bazel_diff.hash.BuildGraphHasherTest",

cli/src/main/kotlin/com/bazel_diff/bazel/BazelClient.kt

+21-3
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,33 @@ import org.koin.core.component.KoinComponent
66
import org.koin.core.component.inject
77
import java.util.*
88

9-
class BazelClient(private val fineGrainedHashExternalRepos: Set<String>) : KoinComponent {
9+
class BazelClient(private val useCquery: Boolean, private val fineGrainedHashExternalRepos: Set<String>) : KoinComponent {
1010
private val logger: Logger by inject()
1111
private val queryService: BazelQueryService by inject()
1212

1313
suspend fun queryAllTargets(): List<BazelTarget> {
1414
val queryEpoch = Calendar.getInstance().getTimeInMillis()
1515

16-
val query = listOf("//external:all-targets", "//...:all-targets") + fineGrainedHashExternalRepos.map { "@$it//...:all-targets" }
17-
val targets = queryService.query(query.joinToString(" + ") { "'$it'" })
16+
val repoTargetsQuery = listOf("//external:all-targets")
17+
val targets = if (useCquery) {
18+
// Explicitly listing external repos here sometimes causes issues mentioned at
19+
// https://bazel.build/query/cquery#recursive-target-patterns. Hence, we query all dependencies with `deps`
20+
// instead. However, we still need to append all "//external:*" targets because fine-grained hash
21+
// computation depends on hashing of source files in external repos as well, which is limited to repos
22+
// explicitly mentioned in `fineGrainedHashExternalRepos` flag. Therefore, for any repos not mentioned there
23+
// we are still relying on the repo-generation target under `//external` to compute the hash.
24+
//
25+
// In addition, we must include all source dependencies in this query in order for them to show up in
26+
// `configuredRuleInput`. Hence, one must not filter them out with `kind(rule, deps(..))`. However, these
27+
// source targets are omitted inside BazelQueryService with the custom starlark function used to print
28+
// labels.
29+
(queryService.query("deps(//...:all-targets)", useCquery = true) +
30+
queryService.query(repoTargetsQuery.joinToString(" + ") { "'$it'" }))
31+
.distinctBy { it.rule.name }
32+
} else {
33+
val buildTargetsQuery = listOf("//...:all-targets") + fineGrainedHashExternalRepos.map { "@$it//...:all-targets" }
34+
queryService.query((repoTargetsQuery + buildTargetsQuery).joinToString(" + ") { "'$it'" })
35+
}
1836
val queryDuration = Calendar.getInstance().getTimeInMillis() - queryEpoch
1937
logger.i { "All targets queried in $queryDuration" }
2038
return targets.mapNotNull { target: Build.Target ->

cli/src/main/kotlin/com/bazel_diff/bazel/BazelQueryService.kt

+93-27
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@ package com.bazel_diff.bazel
33
import com.bazel_diff.log.Logger
44
import com.bazel_diff.process.Redirect
55
import com.bazel_diff.process.process
6+
import com.google.devtools.build.lib.analysis.AnalysisProtosV2
67
import com.google.devtools.build.lib.query2.proto.proto2api.Build
78
import kotlinx.coroutines.ExperimentalCoroutinesApi
89
import kotlinx.coroutines.runBlocking
910
import org.koin.core.component.KoinComponent
1011
import org.koin.core.component.inject
11-
import java.nio.charset.StandardCharsets
12+
import java.io.File
1213
import java.nio.file.Files
1314
import java.nio.file.Path
1415

@@ -17,16 +18,53 @@ class BazelQueryService(
1718
private val bazelPath: Path,
1819
private val startupOptions: List<String>,
1920
private val commandOptions: List<String>,
20-
private val keepGoing: Boolean?,
21+
private val cqueryOptions: List<String>,
22+
private val keepGoing: Boolean,
2123
private val noBazelrc: Boolean,
2224
) : KoinComponent {
2325
private val logger: Logger by inject()
2426

27+
suspend fun query(query: String, useCquery: Boolean = false): List<Build.Target> {
28+
// Unfortunately, there is still no direct way to tell if a target is compatible or not with the proto output
29+
// by itself. So we do an extra cquery with the trick at
30+
// https://bazel.build/extending/platforms#cquery-incompatible-target-detection to first find all compatible
31+
// targets.
32+
val compatibleTargetSet =
33+
if (useCquery) {
34+
runQuery(query, useCquery = true, outputCompatibleTargets = true).useLines {
35+
it.filter { it.isNotBlank() }.toSet()
36+
}
37+
} else {
38+
emptySet()
39+
}
40+
val outputFile = runQuery(query, useCquery)
41+
42+
val targets = outputFile.inputStream().buffered().use { proto ->
43+
if (useCquery) {
44+
val cqueryResult = AnalysisProtosV2.CqueryResult.parseFrom(proto)
45+
cqueryResult.resultsList.filter { it.target.rule.name in compatibleTargetSet }.map { it.target }
46+
} else {
47+
mutableListOf<Build.Target>().apply {
48+
while (true) {
49+
val target = Build.Target.parseDelimitedFrom(proto) ?: break
50+
// EOF
51+
add(target)
52+
}
53+
}
54+
}
55+
}
56+
57+
return targets
58+
}
59+
2560
@OptIn(ExperimentalCoroutinesApi::class)
26-
suspend fun query(query: String): List<Build.Target> {
27-
val tempFile = Files.createTempFile(null, ".txt")
28-
val outputFile = Files.createTempFile(null, ".bin")
29-
Files.write(tempFile, query.toByteArray(StandardCharsets.UTF_8))
61+
private suspend fun runQuery(query: String, useCquery: Boolean, outputCompatibleTargets: Boolean = false): File {
62+
val queryFile = Files.createTempFile(null, ".txt").toFile()
63+
queryFile.deleteOnExit()
64+
val outputFile = Files.createTempFile(null, ".bin").toFile()
65+
outputFile.deleteOnExit()
66+
67+
queryFile.writeText(query)
3068
logger.i { "Executing Query: $query" }
3169

3270
val cmd: MutableList<String> = ArrayList<String>().apply {
@@ -35,41 +73,69 @@ class BazelQueryService(
3573
add("--bazelrc=/dev/null")
3674
}
3775
addAll(startupOptions)
38-
add("query")
76+
if (useCquery) {
77+
add("cquery")
78+
add("--transitions=lite")
79+
} else {
80+
add("query")
81+
}
3982
add("--output")
40-
add("streamed_proto")
41-
add("--order_output=no")
42-
if (keepGoing != null && keepGoing) {
83+
if (useCquery) {
84+
if (outputCompatibleTargets) {
85+
add("starlark")
86+
add("--starlark:file")
87+
val cqueryOutputFile = Files.createTempFile(null, ".cquery").toFile()
88+
cqueryOutputFile.deleteOnExit()
89+
cqueryOutputFile.writeText("""
90+
def format(target):
91+
if providers(target) == None:
92+
# skip printing non-target results. That is, source files and generated files won't be
93+
# printed
94+
return ""
95+
if "IncompatiblePlatformProvider" not in providers(target):
96+
label = str(target.label)
97+
if label.startswith("@//"):
98+
# normalize label to be consistent with content inside proto
99+
return label[1:]
100+
else:
101+
return label
102+
return ""
103+
""".trimIndent())
104+
add(cqueryOutputFile.toString())
105+
} else {
106+
// Unfortunately, cquery does not support streamed_proto yet.
107+
// See https://github.com/bazelbuild/bazel/issues/17743. This poses an issue for large monorepos.
108+
add("proto")
109+
}
110+
} else {
111+
add("streamed_proto")
112+
}
113+
if (!useCquery) {
114+
add("--order_output=no")
115+
}
116+
if (keepGoing) {
43117
add("--keep_going")
44118
}
45-
addAll(commandOptions)
119+
if (useCquery) {
120+
addAll(cqueryOptions)
121+
} else {
122+
addAll(commandOptions)
123+
}
46124
add("--query_file")
47-
add(tempFile.toString())
125+
add(queryFile.toString())
48126
}
49127

50128
val result = runBlocking {
51129
process(
52130
*cmd.toTypedArray(),
53-
stdout = Redirect.ToFile(outputFile.toFile()),
131+
stdout = Redirect.ToFile(outputFile),
54132
workingDirectory = workingDirectory.toFile(),
55133
stderr = Redirect.PRINT,
56134
destroyForcibly = true,
57135
)
58136
}
59137

60-
if(result.resultCode != 0) throw RuntimeException("Bazel query failed, exit code ${result.resultCode}")
61-
62-
val targets = mutableListOf<Build.Target>()
63-
outputFile.toFile().inputStream().buffered().use {stream ->
64-
while (true) {
65-
val target = Build.Target.parseDelimitedFrom(stream) ?: break
66-
// EOF
67-
targets.add(target)
68-
}
69-
}
70-
71-
Files.delete(tempFile)
72-
Files.delete(outputFile)
73-
return targets
138+
if (result.resultCode != 0) throw RuntimeException("Bazel query failed, exit code ${result.resultCode}")
139+
return outputFile
74140
}
75141
}

cli/src/main/kotlin/com/bazel_diff/bazel/BazelRule.kt

+10-2
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,16 @@ class BazelRule(private val rule: Build.Rule) {
2222
}
2323
}
2424

25-
fun ruleInputList(fineGrainedHashExternalRepos: Set<String>): List<String> {
26-
return rule.ruleInputList.map { ruleInput: String -> transformRuleInput(fineGrainedHashExternalRepos, ruleInput) }
25+
fun ruleInputList(useCquery: Boolean, fineGrainedHashExternalRepos: Set<String>): List<String> {
26+
return if (useCquery) {
27+
rule.configuredRuleInputList.map { it.label } +
28+
rule.ruleInputList.map { ruleInput: String -> transformRuleInput(fineGrainedHashExternalRepos, ruleInput) }
29+
// Only keep the non-fine-grained ones because the others are already covered by configuredRuleInputList
30+
.filter { it.startsWith("//external:") }
31+
.distinct()
32+
} else {
33+
rule.ruleInputList.map { ruleInput: String -> transformRuleInput(fineGrainedHashExternalRepos, ruleInput) }
34+
}
2735
}
2836

2937
val name: String = rule.name

cli/src/main/kotlin/com/bazel_diff/cli/GenerateHashesCommand.kt

+19-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ class GenerateHashesCommand : Callable<Int> {
5959

6060
@CommandLine.Option(
6161
names = ["-co", "--bazelCommandOptions"],
62-
description = ["Additional space separated Bazel command options used when invoking Bazel"],
62+
description = ["Additional space separated Bazel command options used when invoking `bazel query`"],
6363
scope = CommandLine.ScopeType.INHERIT,
6464
converter = [OptionsConverter::class],
6565
)
@@ -73,6 +73,22 @@ class GenerateHashesCommand : Callable<Int> {
7373
)
7474
var fineGrainedHashExternalRepos: Set<String> = emptySet()
7575

76+
@CommandLine.Option(
77+
names = ["--useCquery"],
78+
negatable = true,
79+
description = ["If true, use cquery instead of query when generating dependency graphs. Using cquery would yield more accurate build graph at the cost of slower query execution. When this is set, one usually also wants to set `--cqueryCommandOptions` to specify a targeting platform. Note that this flag only works with Bazel 6.2.0 or above because lower versions does not support `--query_file` flag."],
80+
scope = CommandLine.ScopeType.INHERIT
81+
)
82+
var useCquery = false
83+
84+
@CommandLine.Option(
85+
names = ["--cqueryCommandOptions"],
86+
description = ["Additional space separated Bazel command options used when invoking `bazel cquery`. This flag is has no effect if `--useCquery`is false."],
87+
scope = CommandLine.ScopeType.INHERIT,
88+
converter = [OptionsConverter::class],
89+
)
90+
var cqueryCommandOptions: List<String> = emptyList()
91+
7692
@CommandLine.Option(
7793
names = ["-k", "--keep_going"],
7894
negatable = true,
@@ -108,6 +124,8 @@ class GenerateHashesCommand : Callable<Int> {
108124
contentHashPath,
109125
bazelStartupOptions,
110126
bazelCommandOptions,
127+
cqueryCommandOptions,
128+
useCquery,
111129
keepGoing,
112130
fineGrainedHashExternalRepos,
113131
),

cli/src/main/kotlin/com/bazel_diff/di/Modules.kt

+6-3
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,9 @@ fun hasherModule(
2828
contentHashPath: File?,
2929
startupOptions: List<String>,
3030
commandOptions: List<String>,
31-
keepGoing: Boolean?,
31+
cqueryOptions: List<String>,
32+
useCquery: Boolean,
33+
keepGoing: Boolean,
3234
fineGrainedHashExternalRepos: Set<String>,
3335
): Module = module {
3436
val debug = System.getProperty("DEBUG", "false").equals("true")
@@ -38,14 +40,15 @@ fun hasherModule(
3840
bazelPath,
3941
startupOptions,
4042
commandOptions,
43+
cqueryOptions,
4144
keepGoing,
4245
debug
4346
)
4447
}
45-
single { BazelClient(fineGrainedHashExternalRepos) }
48+
single { BazelClient(useCquery, fineGrainedHashExternalRepos) }
4649
single { BuildGraphHasher(get()) }
4750
single { TargetHasher() }
48-
single { RuleHasher(fineGrainedHashExternalRepos) }
51+
single { RuleHasher(useCquery, fineGrainedHashExternalRepos) }
4952
single { SourceFileHasher(fineGrainedHashExternalRepos) }
5053
single(named("working-directory")) { workingDirectory }
5154
single(named("output-base")) {

0 commit comments

Comments
 (0)