Skip to content

Commit bc35aee

Browse files
feat(bazel-diff): improve performance (#120)
* Use ByteBufferPool for effective pooling of memory for file hashing * Use parallelStream() to speed up IO operations related to file hashing * Use Maps.difference from guava for getImpactedTargets Co-authored-by: Maxwell Elliott <[email protected]>
1 parent f65e24b commit bc35aee

File tree

9 files changed

+276
-143
lines changed

9 files changed

+276
-143
lines changed

artifacts.bzl

+2-1
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,6 @@ BAZEL_DIFF_MAVEN_ARTIFACTS = [
77
maven.artifact("org.mockito", "mockito-core", "3.5.15", testonly = True),
88
"info.picocli:picocli:jar:4.3.2",
99
"com.google.code.gson:gson:jar:2.8.6",
10-
"com.google.guava:guava:29.0-jre"
10+
"com.google.guava:guava:29.0-jre",
11+
"org.apache.commons:commons-pool2:2.11.1",
1112
]

src/main/java/com/bazel_diff/BUILD

+2-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ java_library(
2727
"@com_google_protobuf//:protobuf_java",
2828
"@bazel_diff_maven//:info_picocli_picocli",
2929
"@bazel_diff_maven//:com_google_code_gson_gson",
30-
"@bazel_diff_maven//:com_google_guava_guava"
30+
"@bazel_diff_maven//:com_google_guava_guava",
31+
"@bazel_diff_maven//:org_apache_commons_commons_pool2"
3132
],
3233
javacopts = select({
3334
":enable_debug": ["-ADEBUG=true"],

src/main/java/com/bazel_diff/BazelClient.java

+65-29
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,27 @@
11
package com.bazel_diff;
22

3+
import com.google.common.hash.Hasher;
4+
import com.google.common.hash.Hashing;
35
import com.google.devtools.build.lib.query2.proto.proto2api.Build;
46

57
import java.io.*;
68
import java.nio.charset.StandardCharsets;
79
import java.io.IOException;
810
import java.nio.file.Files;
911
import java.nio.file.Path;
10-
import java.security.MessageDigest;
11-
import java.security.NoSuchAlgorithmException;
1212
import java.time.Duration;
1313
import java.time.Instant;
1414
import java.util.ArrayList;
15-
import java.util.HashMap;
1615
import java.util.List;
1716
import java.util.Map;
17+
import java.util.concurrent.atomic.AtomicReference;
1818
import java.util.stream.Collectors;
1919
import java.util.Arrays;
2020

2121
interface BazelClient {
2222
List<BazelTarget> queryAllTargets() throws IOException;
23-
Map<String, BazelSourceFileTarget> queryAllSourcefileTargets() throws IOException, NoSuchAlgorithmException;
23+
24+
Map<String, BazelSourceFileTarget> queryAllSourcefileTargets() throws Exception;
2425
}
2526

2627
class BazelClientImpl implements BazelClient {
@@ -43,8 +44,8 @@ class BazelClientImpl implements BazelClient {
4344
) {
4445
this.workingDirectory = workingDirectory.normalize();
4546
this.bazelPath = bazelPath;
46-
this.startupOptions = startupOptions != null ? Arrays.asList(startupOptions.split(" ")): new ArrayList<String>();
47-
this.commandOptions = commandOptions != null ? Arrays.asList(commandOptions.split(" ")): new ArrayList<String>();
47+
this.startupOptions = startupOptions != null ? Arrays.asList(startupOptions.split(" ")) : new ArrayList<String>();
48+
this.commandOptions = commandOptions != null ? Arrays.asList(commandOptions.split(" ")) : new ArrayList<String>();
4849
this.verbose = verbose;
4950
this.keepGoing = keepGoing;
5051
this.debug = debug;
@@ -59,11 +60,11 @@ public List<BazelTarget> queryAllTargets() throws IOException {
5960
long querySeconds = Duration.between(queryStartTime, queryEndTime).getSeconds();
6061
System.out.printf("BazelDiff: All targets queried in %d seconds%n", querySeconds);
6162
}
62-
return targets.stream().map( target -> new BazelTargetImpl(target)).collect(Collectors.toList());
63+
return targets.stream().map(target -> new BazelTargetImpl(target)).collect(Collectors.toList());
6364
}
6465

6566
@Override
66-
public Map<String, BazelSourceFileTarget> queryAllSourcefileTargets() throws IOException, NoSuchAlgorithmException {
67+
public Map<String, BazelSourceFileTarget> queryAllSourcefileTargets() throws Exception {
6768
Instant queryStartTime = Instant.now();
6869
List<Build.Target> targets = performBazelQuery("kind('source file', //...:all-targets)");
6970
Instant queryEndTime = Instant.now();
@@ -78,26 +79,59 @@ public Map<String, BazelSourceFileTarget> queryAllSourcefileTargets() throws IOE
7879
return sourceFileTargets;
7980
}
8081

81-
private Map<String, BazelSourceFileTarget> processBazelSourcefileTargets(List<Build.Target> targets, Boolean readSourcefileTargets) throws IOException, NoSuchAlgorithmException {
82-
Map<String, BazelSourceFileTarget> sourceTargets = new HashMap<>();
83-
for (Build.Target target : targets) {
84-
Build.SourceFile sourceFile = target.getSourceFile();
85-
if (sourceFile != null) {
86-
MessageDigest digest = MessageDigest.getInstance("SHA-256");
87-
digest.update(sourceFile.getNameBytes().toByteArray());
88-
for (String subinclude : sourceFile.getSubincludeList()) {
89-
digest.update(subinclude.getBytes());
90-
}
91-
BazelSourceFileTargetImpl sourceFileTarget = new BazelSourceFileTargetImpl(
92-
sourceFile.getName(),
93-
digest.digest().clone(),
94-
readSourcefileTargets ? workingDirectory : null,
95-
verbose
96-
);
97-
sourceTargets.put(sourceFileTarget.getName(), sourceFileTarget);
98-
}
82+
private Map<String, BazelSourceFileTarget> processBazelSourcefileTargets(List<Build.Target> targets, Boolean readSourcefileTargets) throws Exception {
83+
AtomicReference<Exception> exception = new AtomicReference(null);
84+
Map<String, BazelSourceFileTarget> result = targets.parallelStream().map((target -> {
85+
Build.SourceFile sourceFile = target.getSourceFile();
86+
if (sourceFile != null) {
87+
Hasher hasher = Hashing.sha256().newHasher();
88+
hasher.putBytes(sourceFile.getNameBytes().toByteArray());
89+
for (String subinclude : sourceFile.getSubincludeList()) {
90+
hasher.putBytes(subinclude.getBytes());
91+
}
92+
BazelSourceFileTargetImpl sourceFileTarget = null;
93+
try {
94+
sourceFileTarget = new BazelSourceFileTargetImpl(
95+
sourceFile.getName(),
96+
hasher.hash().asBytes().clone(),
97+
readSourcefileTargets ? workingDirectory : null,
98+
verbose
99+
);
100+
} catch (Exception e) {
101+
exception.set(e);
102+
}
103+
return new SourceTargetEntry(sourceFileTarget.getName(), sourceFileTarget);
104+
}
105+
return null;
106+
}))
107+
.filter(pair -> pair != null)
108+
.collect(Collectors.toMap(SourceTargetEntry::getKey, SourceTargetEntry::getValue));
109+
110+
//Rethrowing nested parallel exception
111+
Exception nestedException = exception.get();
112+
if (nestedException != null) {
113+
throw nestedException;
114+
}
115+
116+
return result;
117+
}
118+
119+
private static class SourceTargetEntry<K extends String, V extends BazelSourceFileTargetImpl> {
120+
private K key;
121+
private V value;
122+
123+
public SourceTargetEntry(K key, V value) {
124+
this.key = key;
125+
this.value = value;
126+
}
127+
128+
public K getKey() {
129+
return key;
130+
}
131+
132+
public V getValue() {
133+
return value;
99134
}
100-
return sourceTargets;
101135
}
102136

103137
private List<Build.Target> performBazelQuery(String query) throws IOException {
@@ -134,18 +168,20 @@ private List<Build.Target> performBazelQuery(String query) throws IOException {
134168
BufferedReader stdError = new BufferedReader(new InputStreamReader(process.getErrorStream()));
135169
Thread tStdError = new Thread(new Runnable() {
136170
String line = null;
171+
137172
public void run() {
138173
try {
139174
while ((line = stdError.readLine()) != null) {
140175
if (verbose) {
141176
System.out.println(line);
142177
}
143178

144-
if(Thread.currentThread().isInterrupted()) {
179+
if (Thread.currentThread().isInterrupted()) {
145180
return;
146181
}
147182
}
148-
} catch(IOException e) {}
183+
} catch (IOException e) {
184+
}
149185
}
150186
});
151187
tStdError.start();

src/main/java/com/bazel_diff/BazelRule.java

+11-10
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
package com.bazel_diff;
22

3-
import java.security.MessageDigest;
4-
import java.security.NoSuchAlgorithmException;
53
import java.util.List;
4+
5+
import com.google.common.hash.Hasher;
6+
import com.google.common.hash.Hashing;
67
import com.google.devtools.build.lib.query2.proto.proto2api.Build;
78
import java.util.stream.Collectors;
89

910
interface BazelRule {
10-
byte[] getDigest() throws NoSuchAlgorithmException;
11+
byte[] getDigest();
1112
List<String> getRuleInputList();
1213
String getName();
1314
}
@@ -20,15 +21,15 @@ class BazelRuleImpl implements BazelRule {
2021
}
2122

2223
@Override
23-
public byte[] getDigest() throws NoSuchAlgorithmException {
24-
MessageDigest digest = MessageDigest.getInstance("SHA-256");
25-
digest.update(rule.getRuleClassBytes().toByteArray());
26-
digest.update(rule.getNameBytes().toByteArray());
27-
digest.update(rule.getSkylarkEnvironmentHashCodeBytes().toByteArray());
24+
public byte[] getDigest() {
25+
Hasher hasher = Hashing.sha256().newHasher();
26+
hasher.putBytes(rule.getRuleClassBytes().toByteArray());
27+
hasher.putBytes(rule.getNameBytes().toByteArray());
28+
hasher.putBytes(rule.getSkylarkEnvironmentHashCodeBytes().toByteArray());
2829
for (Build.Attribute attribute : rule.getAttributeList()) {
29-
digest.update(attribute.toByteArray());
30+
hasher.putBytes(attribute.toByteArray());
3031
}
31-
return digest.digest();
32+
return hasher.hash().asBytes();
3233
}
3334

3435
@Override

src/main/java/com/bazel_diff/BazelSourceFileTarget.java

+20-32
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
package com.bazel_diff;
22

3+
import com.google.common.hash.Hasher;
4+
import com.google.common.hash.Hashing;
5+
36
import java.io.*;
47
import java.nio.ByteBuffer;
5-
import java.nio.channels.FileChannel;
68
import java.nio.file.Path;
79
import java.nio.file.Paths;
8-
import java.io.IOException;
9-
import java.security.MessageDigest;
10-
import java.security.NoSuchAlgorithmException;
1110

1211
interface BazelSourceFileTarget {
1312
String getName();
@@ -17,51 +16,40 @@ interface BazelSourceFileTarget {
1716
class BazelSourceFileTargetImpl implements BazelSourceFileTarget {
1817
private String name;
1918
private byte[] digest;
19+
static private ByteBufferPool pool = new ByteBufferPool(1024, 10240); //10kb
2020

21-
private void digestLargeFile(MessageDigest finalDigest, FileChannel inChannel) throws IOException {
22-
int bufferSize = 10240; // 10kb
23-
ByteBuffer buffer = ByteBuffer.allocate(bufferSize);
24-
while (inChannel.read(buffer) != -1) {
21+
private void digest(Hasher finalDigest, InputStream stream) throws Exception {
22+
ByteBuffer buffer = pool.borrow();
23+
byte[] array = buffer.array(); //Available for non-direct buffers
24+
Integer length = 0;
25+
while (true) {
26+
if (!((length = stream.read(array)) != -1)) break;
2527
buffer.flip();
26-
finalDigest.update(buffer);
28+
finalDigest.putBytes(array, 0, length);
2729
buffer.clear();
2830
}
31+
pool.recycle(buffer);
2932
}
3033

31-
private void digestSmallFile(MessageDigest finalDigest, FileChannel inChannel) throws IOException {
32-
long fileSize = inChannel.size();
33-
ByteBuffer buffer = ByteBuffer.allocate((int) fileSize);
34-
inChannel.read(buffer);
35-
buffer.flip();
36-
finalDigest.update(buffer);
37-
}
38-
39-
BazelSourceFileTargetImpl(String name, byte[] digest, Path workingDirectory, Boolean verbose)
40-
throws IOException, NoSuchAlgorithmException {
34+
BazelSourceFileTargetImpl(String name, byte[] digest, Path workingDirectory, Boolean verbose) throws Exception {
4135
this.name = name;
42-
MessageDigest finalDigest = MessageDigest.getInstance("SHA-256");
36+
Hasher hasher = Hashing.sha256().newHasher();
4337
if (workingDirectory != null && name.startsWith("//")) {
4438
String filenameSubstring = name.substring(2);
4539
String filenamePath = filenameSubstring.replaceFirst(":", "/");
4640
Path absoluteFilePath = Paths.get(workingDirectory.toString(), filenamePath);
47-
try (RandomAccessFile sourceFile = new RandomAccessFile(absoluteFilePath.toString(), "r")) {
48-
FileChannel inChannel = sourceFile.getChannel();
49-
if (inChannel.size() > 1048576) { // 1mb
50-
digestLargeFile(finalDigest, inChannel);
51-
} else {
52-
digestSmallFile(finalDigest, inChannel);
53-
}
54-
sourceFile.close();
55-
inChannel.close();
41+
42+
try (InputStream stream = new BufferedInputStream(new FileInputStream(absoluteFilePath.toString()))) {
43+
digest(hasher, stream);
5644
} catch (FileNotFoundException e) {
5745
if (verbose) {
5846
System.out.printf("BazelDiff: [Warning] file %s not found%n", absoluteFilePath);
5947
}
6048
}
6149
}
62-
finalDigest.update(digest);
63-
finalDigest.update(name.getBytes());
64-
this.digest = finalDigest.digest();
50+
hasher.putBytes(digest);
51+
hasher.putBytes(name.getBytes());
52+
this.digest = hasher.hash().asBytes();
6553
}
6654

6755
@Override
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
package com.bazel_diff;
2+
3+
import com.google.common.base.Supplier;
4+
import com.google.common.base.Suppliers;
5+
import org.apache.commons.pool2.PooledObject;
6+
import org.apache.commons.pool2.PooledObjectFactory;
7+
import org.apache.commons.pool2.impl.DefaultPooledObject;
8+
import org.apache.commons.pool2.impl.GenericObjectPool;
9+
import org.apache.commons.pool2.impl.GenericObjectPoolConfig;
10+
11+
import java.nio.ByteBuffer;
12+
13+
public class ByteBufferPool {
14+
private Integer poolSize;
15+
private Integer bufferSize;
16+
private Supplier<GenericObjectPool<ByteBuffer>> delegate = Suppliers.memoize(() -> {
17+
GenericObjectPoolConfig<ByteBuffer> config = new GenericObjectPoolConfig<>();
18+
config.setMaxTotal(poolSize);
19+
return new GenericObjectPool(new ByteBufferObjectFactory(bufferSize), config);
20+
});
21+
22+
public ByteBufferPool(Integer poolSize, Integer bufferSize) {
23+
this.poolSize = poolSize;
24+
this.bufferSize = bufferSize;
25+
}
26+
27+
public ByteBuffer borrow() throws Exception {
28+
return delegate.get().borrowObject();
29+
}
30+
31+
public void recycle(ByteBuffer buffer) {
32+
delegate.get().returnObject(buffer);
33+
}
34+
}
35+
36+
class ByteBufferObjectFactory implements PooledObjectFactory<ByteBuffer> {
37+
private Integer bufferSize;
38+
39+
public ByteBufferObjectFactory(Integer bufferSize) {
40+
this.bufferSize = bufferSize;
41+
}
42+
43+
@Override
44+
public void activateObject(PooledObject<ByteBuffer> p) throws Exception {
45+
p.getObject().clear();
46+
}
47+
48+
@Override
49+
public void destroyObject(PooledObject<ByteBuffer> p) throws Exception {
50+
p.getObject().clear();
51+
}
52+
53+
@Override
54+
public PooledObject<ByteBuffer> makeObject() throws Exception {
55+
return new DefaultPooledObject(ByteBuffer.allocate(bufferSize));
56+
}
57+
58+
@Override
59+
public void passivateObject(PooledObject<ByteBuffer> p) throws Exception {
60+
p.getObject().clear();
61+
}
62+
63+
@Override
64+
public boolean validateObject(PooledObject<ByteBuffer> p) {
65+
return p.getObject().capacity() == bufferSize && !p.getObject().isDirect();
66+
}
67+
}

0 commit comments

Comments
 (0)