Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix more Unicode bugs #24182

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/main/java/com/google/devtools/build/lib/analysis/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -1477,6 +1477,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/actions:artifacts",
"//src/main/java/com/google/devtools/build/lib/actions:commandline_item",
"//src/main/java/com/google/devtools/build/lib/collect/nestedset",
"//src/main/java/com/google/devtools/build/lib/unsafe:string",
"//src/main/java/com/google/devtools/build/lib/util",
"//src/main/java/net/starlark/java/eval",
"//third_party:jsr305",
Expand All @@ -1493,6 +1494,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/actions:artifact_expander",
"//src/main/java/com/google/devtools/build/lib/actions:artifacts",
"//src/main/java/com/google/devtools/build/lib/collect/nestedset",
"//src/main/java/com/google/devtools/build/lib/unsafe:string",
"//src/main/java/com/google/devtools/build/lib/util",
"//third_party:guava",
"//third_party:jsr305",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
import com.google.devtools.build.lib.packages.RuleTransitionData;
import com.google.devtools.build.lib.packages.WorkspaceFactory;
import com.google.devtools.build.lib.starlarkbuildapi.core.Bootstrap;
import com.google.devtools.build.lib.unsafe.StringUnsafe;
import com.google.devtools.build.lib.vfs.DigestHashFunction;
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.PathFragment;
Expand Down Expand Up @@ -131,7 +132,10 @@ protected synchronized byte[] getFastDigest(PathFragment path) {

@Override
protected synchronized byte[] getDigest(PathFragment path) {
return getDigestFunction().getHashFunction().hashString(path.toString(), UTF_8).asBytes();
return getDigestFunction()
.getHashFunction()
.hashBytes(StringUnsafe.getInstance().getInternalStringBytes(path.getPathString()))
.asBytes();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.ImmutableSortedMap.toImmutableSortedMap;
import static java.nio.charset.StandardCharsets.ISO_8859_1;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.Comparator.comparing;

import com.github.benmanes.caffeine.cache.Caffeine;
Expand Down Expand Up @@ -146,7 +145,7 @@ protected void computeKey(
public String getFileContents(@Nullable EventHandler eventHandler) throws IOException {
ByteArrayOutputStream stream = new ByteArrayOutputStream();
newDeterministicWriter().writeOutputFile(stream);
return stream.toString(UTF_8);
return stream.toString(ISO_8859_1);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

import static com.google.common.collect.ImmutableList.toImmutableList;
import static java.nio.charset.StandardCharsets.ISO_8859_1;
import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
Expand Down Expand Up @@ -213,7 +212,7 @@ public void writeOutputFile(OutputStream out, @Nullable EventHandler eventHandle
public String getFileContents(@Nullable EventHandler eventHandler) throws IOException {
ByteArrayOutputStream stream = new ByteArrayOutputStream();
writeOutputFile(stream, eventHandler);
return stream.toString(UTF_8);
return stream.toString(ISO_8859_1);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

package com.google.devtools.build.lib.analysis.actions;

import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.devtools.build.lib.actions.ActionExecutionContext;
import com.google.devtools.build.lib.actions.ActionKeyContext;
Expand All @@ -25,6 +24,7 @@
import com.google.devtools.build.lib.collect.nestedset.NestedSet;
import com.google.devtools.build.lib.collect.nestedset.NestedSetBuilder;
import com.google.devtools.build.lib.collect.nestedset.Order;
import com.google.devtools.build.lib.unsafe.StringUnsafe;
import com.google.devtools.build.lib.util.Fingerprint;
import javax.annotation.Nullable;
import net.starlark.java.eval.Tuple;
Expand All @@ -49,7 +49,8 @@ public LazyWriteNestedSetOfTupleAction(

@Override
public DeterministicWriter newDeterministicWriter(ActionExecutionContext ctx) {
return out -> out.write(getContents(delimiter).getBytes(UTF_8));
return out ->
out.write(StringUnsafe.getInstance().getInternalStringBytes(getContents(delimiter)));
}

/** Computes the Action key for this action by computing the fingerprint for the file contents. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

package com.google.devtools.build.lib.analysis.actions;

import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.common.collect.ImmutableSet;
import com.google.devtools.build.lib.actions.ActionExecutionContext;
Expand All @@ -26,6 +25,7 @@
import com.google.devtools.build.lib.collect.nestedset.NestedSet;
import com.google.devtools.build.lib.collect.nestedset.NestedSetBuilder;
import com.google.devtools.build.lib.collect.nestedset.Order;
import com.google.devtools.build.lib.unsafe.StringUnsafe;
import com.google.devtools.build.lib.util.Fingerprint;
import java.util.function.Function;
import javax.annotation.Nullable;
Expand Down Expand Up @@ -71,7 +71,7 @@ public LazyWritePathsFileAction(

@Override
public DeterministicWriter newDeterministicWriter(ActionExecutionContext ctx) {
return out -> out.write(getContents().getBytes(UTF_8));
return out -> out.write(StringUnsafe.getInstance().getInternalStringBytes(getContents()));
}

/** Computes the Action key for this action by computing the fingerprint for the file contents. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

package com.google.devtools.build.lib.analysis.actions;

import static java.nio.charset.StandardCharsets.UTF_8;
import static java.nio.charset.StandardCharsets.ISO_8859_1;

import com.google.common.collect.ImmutableList;
import com.google.devtools.build.lib.actions.AbstractAction;
Expand Down Expand Up @@ -47,7 +47,8 @@ public ImmutableList<SpawnResult> expandTemplate(
final String expandedTemplate =
getExpandedTemplateUnsafe(
templateMetadata.template(), templateMetadata.substitutions(), ctx.getPathResolver());
DeterministicWriter deterministicWriter = out -> out.write(expandedTemplate.getBytes(UTF_8));
DeterministicWriter deterministicWriter =
out -> out.write(expandedTemplate.getBytes(ISO_8859_1));
return ctx.getContext(FileWriteActionContext.class)
.writeOutputToFile(
action,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
// limitations under the License.
package com.google.devtools.build.lib.analysis.actions;

import static java.nio.charset.StandardCharsets.UTF_8;
import static java.nio.charset.StandardCharsets.ISO_8859_1;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableMap;
Expand Down Expand Up @@ -349,10 +349,13 @@ protected void afterExecute(
for (Artifact input : allInputs.toList()) {
usedInputsByMappedPath.put(pathMapper.getMappedExecPathString(input), input);
}
// Bazel encodes file system paths as raw bytes stored in a Latin-1 encoded string, so we need
// to make sure to also decode the unused input list as Latin-1.
try (BufferedReader br =
new BufferedReader(
new InputStreamReader(
getUnusedInputListInputStream(actionExecutionContext, spawnResults), UTF_8))) {
getUnusedInputListInputStream(actionExecutionContext, spawnResults),
ISO_8859_1))) {
String line;
while ((line = br.readLine()) != null) {
line = line.trim();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

package com.google.devtools.build.lib.analysis.actions;

import static java.nio.charset.StandardCharsets.ISO_8859_1;

import com.google.common.annotations.VisibleForTesting;
import com.google.devtools.build.lib.actions.Artifact;
import com.google.devtools.build.lib.actions.ArtifactPathResolver;
Expand All @@ -22,16 +24,12 @@
import com.google.devtools.build.lib.vfs.FileSystemUtils;
import com.google.devtools.build.lib.vfs.Path;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import javax.annotation.Nullable;

/** A template that contains text content, or alternatively throws an {@link IOException}. */
@Immutable // all subclasses are immutable
public abstract class Template {

static final Charset DEFAULT_CHARSET = StandardCharsets.UTF_8;

/** We only allow subclasses in this file. */
private Template() {}

Expand Down Expand Up @@ -105,7 +103,8 @@ private static final class ArtifactTemplate extends Template {
public String getContent(ArtifactPathResolver resolver) throws IOException {
Path templatePath = resolver.toPath(templateArtifact);
try {
return FileSystemUtils.readContent(templatePath, DEFAULT_CHARSET);
// Bazel's internal encoding for strings is raw bytes as Latin-1
return FileSystemUtils.readContent(templatePath, ISO_8859_1);
} catch (IOException e) {
throw new IOException(
"failed to load template file '"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -977,13 +977,8 @@ public void expandTemplate(
ImmutableMap.Builder<String, Substitution> substitutionsBuilder = ImmutableMap.builder();
for (Map.Entry<String, String> substitution :
Dict.cast(substitutionsUnchecked, String.class, String.class, "substitutions").entrySet()) {
// Blaze calls ParserInput.fromLatin1 when reading BUILD files, which might
// contain UTF-8 encoded symbols as part of template substitution.
// As a quick fix, the substitution values are corrected before being passed on.
// In the long term, avoiding ParserInput.fromLatin would be a better approach.
substitutionsBuilder.put(
substitution.getKey(),
Substitution.of(substitution.getKey(), convertLatin1ToUtf8(substitution.getValue())));
substitution.getKey(), Substitution.of(substitution.getKey(), substitution.getValue()));
}
if (!Starlark.UNBOUND.equals(computedSubstitutions)) {
for (Substitution substitution : ((TemplateDict) computedSubstitutions).getAll()) {
Expand All @@ -1007,16 +1002,6 @@ public void expandTemplate(
registerAction(action);
}

/**
* Returns the proper UTF-8 representation of a String that was erroneously read using Latin1.
*
* @param latin1 Input string
* @return The input string, UTF8 encoded
*/
private static String convertLatin1ToUtf8(String latin1) {
return new String(latin1.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8);
}

@Override
public Args args(StarlarkThread thread) {
return Args.newArgs(thread.mutability(), getSemantics());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/authandtls/credentialhelper",
"//src/main/java/com/google/devtools/build/lib/concurrent",
"//src/main/java/com/google/devtools/build/lib/events",
"//src/main/java/com/google/devtools/build/lib/unsafe:string",
"//src/main/java/com/google/devtools/build/lib/vfs",
"//src/main/java/com/google/devtools/common/options",
"//third_party:auth",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@
// limitations under the License.
package com.google.devtools.build.lib.authandtls;

import com.google.common.base.Strings;
import java.nio.charset.Charset;
import com.google.devtools.build.lib.unsafe.StringUnsafe;
import java.util.Base64;

/**
Expand All @@ -26,16 +25,18 @@ public final class BasicHttpAuthenticationEncoder {

private BasicHttpAuthenticationEncoder() {}

/** Encode username and password into a token with given {@link Charset}. */
public static String encode(String username, String password, Charset charset) {
StringBuilder sb = new StringBuilder();
if (!Strings.isNullOrEmpty(username)) {
sb.append(username);
}
sb.append(":");
if (!Strings.isNullOrEmpty(password)) {
sb.append(password);
}
return "Basic " + Base64.getEncoder().encodeToString(sb.toString().getBytes(charset));
/**
* Encode username and password into a token.
*
* <p>username and password are expected to use Bazel's internal string encoding. The returned
* string is a regular Unicode string.
*/
public static String encode(String username, String password) {
// The raw bytes in the internal string are assumed to be UTF-8, which is the encoding used for
// basic authentication.
return "Basic "
+ Base64.getEncoder()
.encodeToString(
StringUnsafe.getInstance().getInternalStringBytes(username + ":" + password));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
// limitations under the License.
package com.google.devtools.build.lib.authandtls;

import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.auth.Credentials;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
Expand Down Expand Up @@ -58,7 +56,7 @@ public Map<String, List<String>> getRequestMetadata(URI uri) throws IOException
Credential credential = netrc.getCredential(uri.getHost());
if (credential != null) {
String token =
BasicHttpAuthenticationEncoder.encode(credential.login(), credential.password(), UTF_8);
BasicHttpAuthenticationEncoder.encode(credential.login(), credential.password());
return ImmutableMap.of("Authorization", ImmutableList.of(token));
} else {
return ImmutableMap.of();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
package com.google.devtools.build.lib.authandtls;

import static com.google.common.base.Predicates.not;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.nio.charset.StandardCharsets.ISO_8859_1;
import static java.util.Objects.requireNonNull;

import com.google.common.base.Strings;
Expand Down Expand Up @@ -77,7 +77,7 @@ private static class TokenStream implements Closeable {
private final Queue<Token> tokens = new ArrayDeque<>();

TokenStream(InputStream inputStream) throws IOException {
bufferedReader = new BufferedReader(new InputStreamReader(inputStream, UTF_8));
bufferedReader = new BufferedReader(new InputStreamReader(inputStream, ISO_8859_1));
processLine();
}

Expand Down Expand Up @@ -183,8 +183,7 @@ private static Credential parseCredentialForMachine(TokenStream tokenStream, Str
while (!done && tokenStream.hasNext()) {
// Peek rather than taking next token since we probably won't process it
Token token = tokenStream.peek();
if (token instanceof ItemToken itemToken) {
String item = itemToken.item();
if (token instanceof ItemToken(String item)) {
switch (item) {
case LOGIN -> {
tokenStream.next();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/skyframe:package_lookup_function",
"//src/main/java/com/google/devtools/build/lib/skyframe:package_lookup_value",
"//src/main/java/com/google/devtools/build/lib/skyframe:precomputed_value",
"//src/main/java/com/google/devtools/build/lib/util:string_encoding",
"//src/main/java/com/google/devtools/build/lib/vfs",
"//src/main/java/com/google/devtools/build/lib/vfs:pathfragment",
"//src/main/java/com/google/devtools/build/lib/vfs/inmemoryfs",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

package com.google.devtools.build.lib.bazel.bzlmod;

import static java.nio.charset.StandardCharsets.UTF_8;
import static java.nio.charset.StandardCharsets.ISO_8859_1;

import com.google.common.collect.ImmutableList;
import com.google.devtools.build.lib.actions.FileValue;
Expand All @@ -25,6 +25,7 @@
import com.google.devtools.build.lib.packages.VendorThreadContext;
import com.google.devtools.build.lib.rules.repository.RepositoryDelegatorFunction;
import com.google.devtools.build.lib.skyframe.PrecomputedValue;
import com.google.devtools.build.lib.util.StringEncoding;
import com.google.devtools.build.lib.vfs.FileSystemUtils;
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.Root;
Expand Down Expand Up @@ -54,7 +55,8 @@
public class VendorFileFunction implements SkyFunction {

private static final String VENDOR_FILE_HEADER =
"""
StringEncoding.unicodeToInternal(
"""
###############################################################################
# This file is used to configure how external repositories are handled in vendor mode.
# ONLY the two following functions can be used:
Expand All @@ -67,7 +69,7 @@ public class VendorFileFunction implements SkyFunction {
# Note that Bazel will NOT update the vendored source for this repo while running vendor command
# unless it's unpinned. The user can modify and maintain the vendored source for this repo manually.
###############################################################################
""";
""");

private final BazelStarlarkEnvironment starlarkEnv;

Expand Down Expand Up @@ -138,8 +140,7 @@ private void createVendorFile(Path vendorPath, Path vendorFilePath)
throws VendorFileFunctionException {
try {
vendorPath.createDirectoryAndParents();
byte[] vendorFileContents = VENDOR_FILE_HEADER.getBytes(UTF_8);
FileSystemUtils.writeContent(vendorFilePath, vendorFileContents);
FileSystemUtils.writeContent(vendorFilePath, ISO_8859_1, VENDOR_FILE_HEADER);
} catch (IOException e) {
throw new VendorFileFunctionException(
new IOException("error creating VENDOR.bazel file", e), Transience.TRANSIENT);
Expand Down
Loading