Skip to content

Commit

Permalink
[manifest license] fully implement SPDX, plus general parsing stuff (#…
Browse files Browse the repository at this point in the history
…334)

* [ParserBase] get ready for parsing SPDX license expressions

* parse SPDX license expressions

* add error message tests

* more tests, plus minor error change

* format

Co-authored-by: nicole mazzuca <[email protected]>
  • Loading branch information
strega-nil-ms and strega-nil authored Feb 1, 2022
1 parent c76e0de commit 457b030
Show file tree
Hide file tree
Showing 18 changed files with 1,389 additions and 838 deletions.
9 changes: 4 additions & 5 deletions Generate-SpdxLicenseList.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,15 @@ function Transform-JsonFile {

$fileContent = @(
"// Data downloaded from $Uri",
"// Generated by scripts/Generate-SpdxLicenseList.ps1",
"{")
"// Generated by Generate-SpdxLicenseList.ps1")
$json.$OuterName |
Sort-Object -Property $Id -Culture '' |
ForEach-Object {
$fileContent += " `"$($_.$Id)`","
$fileContent += "`"$($_.$Id)`","
}
$fileContent += "}"

$fileContent -join "`n" | Out-File -FilePath $OutFile -Encoding 'utf8'
($fileContent -join "`n") + "`n" `
| Out-File -FilePath $OutFile -Encoding 'utf8' -NoNewline
}

$baseUrl = "https://raw.githubusercontent.com/$GithubRepository/$Commit/json"
Expand Down
15 changes: 15 additions & 0 deletions include/vcpkg/base/messages.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ namespace vcpkg::msg
LocalizedString() = default;
operator StringView() const { return m_data; }
const std::string& data() const { return m_data; }
std::string extract_data() { return std::exchange(m_data, ""); }

static LocalizedString from_string_unchecked(std::string&& s)
{
Expand Down Expand Up @@ -87,6 +88,8 @@ namespace vcpkg::msg
}
};

inline const char* to_printf_arg(const msg::LocalizedString& s) { return s.data().c_str(); }

struct LocalizedStringMapLess
{
using is_transparent = void;
Expand All @@ -110,6 +113,16 @@ namespace vcpkg::msg

inline void print(Color c, const LocalizedString& s) { write_unlocalized_text_to_stdout(c, s); }
inline void print(const LocalizedString& s) { write_unlocalized_text_to_stdout(Color::none, s); }
inline void println(Color c, const LocalizedString& s)
{
write_unlocalized_text_to_stdout(c, s);
write_unlocalized_text_to_stdout(Color::none, "\n");
}
inline void println(const LocalizedString& s)
{
write_unlocalized_text_to_stdout(Color::none, s);
write_unlocalized_text_to_stdout(Color::none, "\n");
}

template<class Message, class... Ts>
void print(Message m, Ts... args)
Expand Down Expand Up @@ -157,6 +170,8 @@ namespace vcpkg::msg
DECLARE_MSG_ARG(version);
DECLARE_MSG_ARG(list);
DECLARE_MSG_ARG(output);
DECLARE_MSG_ARG(row);
DECLARE_MSG_ARG(column);
#undef DECLARE_MSG_ARG

// These are `...` instead of
Expand Down
1 change: 1 addition & 0 deletions include/vcpkg/base/optional.h
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,7 @@ namespace vcpkg

return !rhs.m_base.has_value();
}
friend bool operator!=(const Optional& lhs, const Optional& rhs) noexcept { return !(lhs == rhs); }

private:
details::OptionalStorage<T> m_base;
Expand Down
51 changes: 40 additions & 11 deletions include/vcpkg/base/parse.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#pragma once

#include <vcpkg/base/cstringview.h>
#include <vcpkg/base/messages.h>
#include <vcpkg/base/optional.h>
#include <vcpkg/base/stringview.h>
#include <vcpkg/base/unicode.h>
Expand Down Expand Up @@ -42,16 +43,36 @@ namespace vcpkg::Parse
virtual const std::string& get_message() const override;
};

struct ParserBase
struct SourceLoc
{
struct SourceLoc
{
Unicode::Utf8Decoder it;
Unicode::Utf8Decoder start_of_line;
int row;
int column;
};
Unicode::Utf8Decoder it;
Unicode::Utf8Decoder start_of_line;
int row;
int column;
};

enum class MessageKind
{
Warning,
Error,
};

struct ParseMessage
{
SourceLoc location = {};
msg::LocalizedString message;

msg::LocalizedString format(StringView origin, MessageKind kind) const;
};

struct ParseMessages
{
std::unique_ptr<ParseError> error;
std::vector<ParseMessage> warnings;
};

struct ParserBase
{
ParserBase(StringView text, StringView origin, TextRowCol init_rowcol = {});

static constexpr bool is_whitespace(char32_t ch) { return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n'; }
Expand Down Expand Up @@ -110,9 +131,17 @@ namespace vcpkg::Parse

void add_error(std::string message) { add_error(std::move(message), cur_loc()); }
void add_error(std::string message, const SourceLoc& loc);
void add_error(msg::LocalizedString&& message) { add_error(message.extract_data(), cur_loc()); }
void add_error(msg::LocalizedString&& message, const SourceLoc& loc) { add_error(message.extract_data(), loc); }

void add_warning(msg::LocalizedString&& message) { add_warning(std::move(message), cur_loc()); }
void add_warning(msg::LocalizedString&& message, const SourceLoc& loc);

const IParseError* get_error() const { return m_messages.error.get(); }
std::unique_ptr<IParseError> extract_error() { return std::move(m_messages.error); }

const Parse::IParseError* get_error() const { return m_err.get(); }
std::unique_ptr<Parse::IParseError> extract_error() { return std::move(m_err); }
const ParseMessages& messages() const { return m_messages; }
ParseMessages extract_messages() { return std::move(m_messages); }

private:
Unicode::Utf8Decoder m_it;
Expand All @@ -123,6 +152,6 @@ namespace vcpkg::Parse
StringView m_text;
StringView m_origin;

std::unique_ptr<IParseError> m_err;
ParseMessages m_messages;
};
}
41 changes: 26 additions & 15 deletions include/vcpkg/base/unicode.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,37 @@ namespace vcpkg::Unicode
StartFour = 4,
};

constexpr static char32_t end_of_file = 0xFFFF'FFFF;

enum class utf8_errc
{
NoError = 0,
InvalidCodeUnit = 1,
InvalidCodePoint = 2,
PairedSurrogates = 3,
UnexpectedContinue = 4,
UnexpectedStart = 5,
UnexpectedEof = 6,
};

const std::error_category& utf8_category() noexcept;

Utf8CodeUnitKind utf8_code_unit_kind(unsigned char code_unit) noexcept;
int utf8_code_unit_count(Utf8CodeUnitKind kind) noexcept;
int utf8_code_unit_count(char code_unit) noexcept;

int utf8_encode_code_point(char (&array)[4], char32_t code_point) noexcept;

// returns {after-current-code-point, error},
// and if error = NoError, then out = parsed code point.
// else, out = end_of_file.
std::pair<const char*, utf8_errc> utf8_decode_code_point(const char* first,
const char* last,
char32_t& out) noexcept;

// uses the C++20 definition
bool is_double_width_code_point(char32_t ch) noexcept;

inline std::string& utf8_append_code_point(std::string& str, char32_t code_point)
{
if (static_cast<uint32_t>(code_point) < 0x80)
Expand Down Expand Up @@ -52,21 +77,6 @@ namespace vcpkg::Unicode

char32_t utf16_surrogates_to_code_point(char32_t leading, char32_t trailing);

constexpr static char32_t end_of_file = 0xFFFF'FFFF;

enum class utf8_errc
{
NoError = 0,
InvalidCodeUnit = 1,
InvalidCodePoint = 2,
PairedSurrogates = 3,
UnexpectedContinue = 4,
UnexpectedStart = 5,
UnexpectedEof = 6,
};

const std::error_category& utf8_category() noexcept;

inline std::error_code make_error_code(utf8_errc err) noexcept
{
return std::error_code(static_cast<int>(err), utf8_category());
Expand All @@ -89,6 +99,7 @@ namespace vcpkg::Unicode
struct Utf8Decoder
{
Utf8Decoder() noexcept;
explicit Utf8Decoder(StringView sv) : Utf8Decoder(sv.begin(), sv.end()) { }
Utf8Decoder(const char* first, const char* last) noexcept;

struct sentinel
Expand Down
12 changes: 11 additions & 1 deletion include/vcpkg/base/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,17 @@ namespace vcpkg::Util
{
using std::begin;
using std::end;
return std::find_if(begin(cont), end(cont), pred);
// allow cont.begin() to not have the same type as cont.end()
auto it = begin(cont);
auto last = end(cont);
for (; it != last; ++it)
{
if (pred(*it))
{
break;
}
}
return it;
}

template<class Container, class Pred>
Expand Down
1 change: 1 addition & 0 deletions include/vcpkg/fwd/configuration.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ namespace vcpkg
{
struct Configuration;
struct RegistryConfig;
struct ManifestConfiguration;
}
2 changes: 1 addition & 1 deletion include/vcpkg/platform-expression.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,6 @@ namespace vcpkg::PlatformExpression
};

// platform expression parses a platform expression; the EBNF of such is defined in
// /docs/maintainers/manifest-files.md#supports
// https://github.com/microsoft/vcpkg/blob/master/docs/maintainers/manifest-files.md#supports
ExpectedS<Expr> parse_platform_expression(StringView expression, MultipleBinaryOperators multiple_binary_operators);
}
13 changes: 10 additions & 3 deletions include/vcpkg/sourceparagraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include <vcpkg/base/fwd/json.h>

#include <vcpkg/fwd/configuration.h>
#include <vcpkg/fwd/vcpkgcmdarguments.h>

#include <vcpkg/base/expected.h>
Expand Down Expand Up @@ -71,7 +72,12 @@ namespace vcpkg
std::vector<Dependency> dependencies;
std::vector<DependencyOverride> overrides;
std::vector<std::string> default_features;
std::string license; // SPDX license expression

// there are two distinct "empty" states here
// "user did not provide a license" -> nullopt
// "user provided license = null" -> {""}
Optional<std::string> license; // SPDX license expression

Optional<std::string> builtin_baseline;
Optional<Json::Object> vcpkg_configuration;
// Currently contacts is only a Json::Object but it will eventually be unified with maintainers
Expand Down Expand Up @@ -127,8 +133,7 @@ namespace vcpkg
Json::Object serialize_manifest(const SourceControlFile& scf);
Json::Object serialize_debug_manifest(const SourceControlFile& scf);

ExpectedS<struct ManifestConfiguration> parse_manifest_configuration(StringView origin,
const Json::Object& manifest);
ExpectedS<ManifestConfiguration> parse_manifest_configuration(StringView origin, const Json::Object& manifest);

/// <summary>
/// Named pair of a SourceControlFile and the location of this file
Expand All @@ -146,4 +151,6 @@ namespace vcpkg
{
return print_error_message({&error_info_list, 1});
}

std::string parse_spdx_license_expression(StringView sv, Parse::ParseMessages& messages);
}
23 changes: 23 additions & 0 deletions locales/messages.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"AwsFailedToDownload": "aws failed to download with exit code: {value}\n{output}",
"AwsRestoredPackages": "Restored {value} packages from AWS servers in {elapsed}s",
"AwsUploadedPackages": "Uploaded binaries to {value} AWS servers",
"EmptyLicenseExpression": "SPDX license expression was empty.",
"ErrorIndividualPackagesUnsupported": "Error: In manifest mode, `vcpkg install` does not support individual package arguments.\nTo install additional packages, edit vcpkg.json and then run `vcpkg install` without any package arguments.",
"ErrorInvalidClassicModeOption": "Error: The option {value} is not supported in classic mode and no manifest was found.",
"ErrorInvalidManifestModeOption": "Error: The option {value} is not supported in manifest mode.",
Expand All @@ -21,8 +22,30 @@
"ErrorRequirePackagesToInstall": "Error: No packages were listed for installation and no manifest was found.",
"ErrorVcvarsUnsupported": "Error: in triplet {triplet}: Use of Visual Studio's Developer Prompt is unsupported on non-Windows hosts.\nDefine 'VCPKG_CMAKE_SYSTEM_NAME' or 'VCPKG_CHAINLOAD_TOOLCHAIN_FILE' in the triplet file.",
"ForceSystemBinariesOnWeirdPlatforms": "Environment variable VCPKG_FORCE_SYSTEM_BINARIES must be set on arm, s390x, and ppc64le platforms.",
"FormattedParseError": "error: {value}",
"FormattedParseMessageExpression": " on expression: {value}",
"FormattedParseMessageLocation": "{path}:{row}:{column}: ",
"_FormattedParseMessageLocation.comment": "{LOCKED}",
"FormattedParseWarning": "warning: {value}",
"IllegalFeatures": "Error: List of features is not allowed in this contect",
"IllegalPlatformSpec": "Error: Platform qualifier is not allowed in this context",
"LicenseExpressionContainsExtraPlus": "SPDX license expression contains an extra '+'. These are only allowed directly after a license identifier.",
"LicenseExpressionContainsInvalidCharacter": "SPDX license expression contains an invalid character (0x{value:02x} '{value}').",
"LicenseExpressionContainsUnicode": "SPDX license expression contains a unicode character (U+{value:04x} '{pretty_value}'), but these expressions are ASCII-only.",
"LicenseExpressionDocumentRefUnsupported": "The current implementation does not support DocumentRef- SPDX references.",
"LicenseExpressionExpectCompoundFoundParen": "Expected a compound or the end of the string, found a parenthesis.",
"LicenseExpressionExpectCompoundFoundWith": "Expected either AND or OR, found WITH (WITH is only allowed after license names, not parenthesized expressions).",
"LicenseExpressionExpectCompoundFoundWord": "Expected either AND or OR, found a license or exception name: '{value}'.",
"LicenseExpressionExpectCompoundOrWithFoundWord": "Expected either AND, OR, or WITH, found a license or exception name: '{value}'.",
"LicenseExpressionExpectExceptionFoundCompound": "Expected an exception name, found the compound {value}.",
"LicenseExpressionExpectExceptionFoundEof": "Expected an exception name, found the end of the string.",
"LicenseExpressionExpectExceptionFoundParen": "Expected an exception name, found a parenthesis.",
"LicenseExpressionExpectLicenseFoundCompound": "Expected a license name, found the compound {value}.",
"LicenseExpressionExpectLicenseFoundEof": "Expected a license name, found the end of the string.",
"LicenseExpressionExpectLicenseFoundParen": "Expected a license name, found a parenthesis.",
"LicenseExpressionImbalancedParens": "There was a close parenthesis without an opening parenthesis.",
"LicenseExpressionUnknownException": "Unknown license exception identifier '{value}'. Known values are listed at https://spdx.org/licenses/exceptions-index.html",
"LicenseExpressionUnknownLicense": "Unknown license identifier '{value}'. Known values are listed at https://spdx.org/licenses/",
"NoLocalizationForMessages": "No localization for the following messages:",
"ProcessorArchitectureMalformed": "Failed to parse %PROCESSOR_ARCHITECTURE% ({value}) as a valid CPU architecture.",
"ProcessorArchitectureMissing": "The required environment variable %PROCESSOR_ARCHITECTURE% is missing.",
Expand Down
40 changes: 36 additions & 4 deletions src/vcpkg-test/json.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,8 +236,8 @@ TEST_CASE ("JSON track newlines", "[json]")
REQUIRE(!res);
REQUIRE(res.error()->format() ==
R"(filename:2:1: error: Unexpected character; expected property name
on expression: ,
^
on expression: ,
^
)");
}

Expand All @@ -247,7 +247,39 @@ TEST_CASE ("JSON duplicated object keys", "[json]")
REQUIRE(!res);
REQUIRE(res.error()->format() ==
R"(filename:1:13: error: Duplicated key "name" in an object
on expression: {"name": 1, "name": 2}
^
on expression: {"name": 1, "name": 2}
^
)");
}

TEST_CASE ("JSON support unicode characters in errors", "[json]")
{
// unicode characters w/ bytes >1
auto res = Json::parse(R"json("Δx/Δt" "")json", "filename");
REQUIRE(!res);
CHECK(res.error()->format() ==
R"(filename:1:9: error: Unexpected character; expected EOF
on expression: "Δx/Δt" ""
^
)");

// full width unicode characters
// note that the A is full width
res = Json::parse(R"json("姐姐aA" "")json", "filename");
REQUIRE(!res);
CHECK(res.error()->format() ==
R"(filename:1:8: error: Unexpected character; expected EOF
on expression: "姐姐aA" ""
^
)");

// incorrect errors in the face of combining characters
// (this test should be fixed once the underlying bug is fixed)
res = Json::parse(R"json("é" "")json", "filename");
REQUIRE(!res);
CHECK(res.error()->format() ==
R"(filename:1:6: error: Unexpected character; expected EOF
on expression: "é" ""
^
)");
}
Loading

0 comments on commit 457b030

Please sign in to comment.