From 0de71cc6422fb1438383316bcc97a60efd6dae27 Mon Sep 17 00:00:00 2001 From: Paul Johnston Date: Mon, 18 Mar 2024 21:45:13 -0600 Subject: [PATCH 1/5] e2e/smoke: add testcase with pathologic output --- e2e/smoke/BUILD.bazel | 18 +++++++++++++++++- e2e/smoke/testdata/file empty with spaces.txt | 0 e2e/smoke/testdata/file-empty.txt | 0 e2e/smoke/testdata_mtree.spec.golden | 3 +++ 4 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 e2e/smoke/testdata/file empty with spaces.txt create mode 100644 e2e/smoke/testdata/file-empty.txt create mode 100644 e2e/smoke/testdata_mtree.spec.golden diff --git a/e2e/smoke/BUILD.bazel b/e2e/smoke/BUILD.bazel index a41fee5a2..de195863b 100644 --- a/e2e/smoke/BUILD.bazel +++ b/e2e/smoke/BUILD.bazel @@ -4,7 +4,7 @@ load("@aspect_bazel_lib//lib:copy_to_directory.bzl", "copy_to_directory") load("@aspect_bazel_lib//lib:diff_test.bzl", "diff_test") load("@aspect_bazel_lib//lib:expand_template.bzl", "expand_template") load("@aspect_bazel_lib//lib:jq.bzl", "jq") -load("@aspect_bazel_lib//lib:tar.bzl", "tar") +load("@aspect_bazel_lib//lib:tar.bzl", "mtree_spec", "tar") load("@aspect_bazel_lib//lib:yq.bzl", "yq") # Validate that JQ works and resolves its toolchain @@ -93,6 +93,22 @@ tar( mtree = [], ) +mtree_spec( + name = "testdata_mtree", + srcs = [":testdata_files"], +) + +filegroup( + name = "testdata_files", + srcs = glob(["testdata/**/*"]), +) + +diff_test( + name = "mtree_golden_test", + file1 = ":testdata_mtree", + file2 = "testdata_mtree.spec.golden", +) + bats_test( name = "bats", srcs = [ diff --git a/e2e/smoke/testdata/file empty with spaces.txt b/e2e/smoke/testdata/file empty with spaces.txt new file mode 100644 index 000000000..e69de29bb diff --git a/e2e/smoke/testdata/file-empty.txt b/e2e/smoke/testdata/file-empty.txt new file mode 100644 index 000000000..e69de29bb diff --git a/e2e/smoke/testdata_mtree.spec.golden b/e2e/smoke/testdata_mtree.spec.golden new file mode 100644 index 000000000..7d4143ace --- /dev/null +++ b/e2e/smoke/testdata_mtree.spec.golden @@ -0,0 +1,3 @@ +testdata uid=0 gid=0 time=1672560000 mode=0755 type=dir +testdata/file empty with spaces.txt uid=0 gid=0 time=1672560000 mode=0755 type=file content=testdata/file empty with spaces.txt +testdata/file-empty.txt uid=0 gid=0 time=1672560000 mode=0755 type=file content=testdata/file-empty.txt From ddb389e839b580b8680f3e5c1a1a50c6b73fad43 Mon Sep 17 00:00:00 2001 From: Paul Johnston Date: Tue, 19 Mar 2024 22:24:23 -0600 Subject: [PATCH 2/5] WIP: vis encode mtree entries --- e2e/smoke/BUILD.bazel | 6 +++ .../file empty with \360\237\230\215.txt" | 0 e2e/smoke/testdata_mtree.spec.golden | 3 +- lib/private/tar.bzl | 49 ++++++++++++++++++- 4 files changed, 55 insertions(+), 3 deletions(-) create mode 100644 "e2e/smoke/testdata/file empty with \360\237\230\215.txt" diff --git a/e2e/smoke/BUILD.bazel b/e2e/smoke/BUILD.bazel index de195863b..32efa5813 100644 --- a/e2e/smoke/BUILD.bazel +++ b/e2e/smoke/BUILD.bazel @@ -98,6 +98,12 @@ mtree_spec( srcs = [":testdata_files"], ) +tar( + name = "testdata_tar", + srcs = [":testdata_files"], + mtree = [":testdata_mtree"], +) + filegroup( name = "testdata_files", srcs = glob(["testdata/**/*"]), diff --git "a/e2e/smoke/testdata/file empty with \360\237\230\215.txt" "b/e2e/smoke/testdata/file empty with \360\237\230\215.txt" new file mode 100644 index 000000000..e69de29bb diff --git a/e2e/smoke/testdata_mtree.spec.golden b/e2e/smoke/testdata_mtree.spec.golden index 7d4143ace..428d9bf8e 100644 --- a/e2e/smoke/testdata_mtree.spec.golden +++ b/e2e/smoke/testdata_mtree.spec.golden @@ -1,3 +1,4 @@ testdata uid=0 gid=0 time=1672560000 mode=0755 type=dir -testdata/file empty with spaces.txt uid=0 gid=0 time=1672560000 mode=0755 type=file content=testdata/file empty with spaces.txt +testdata/file\sempty\swith\sspaces.txt uid=0 gid=0 time=1672560000 mode=0755 type=file content=testdata/file\sempty\swith\sspaces.txt +testdata/file\sempty\swith\s😍.txt uid=0 gid=0 time=1672560000 mode=0755 type=file content=testdata/file\sempty\swith\s😍.txt testdata/file-empty.txt uid=0 gid=0 time=1672560000 mode=0755 type=file content=testdata/file-empty.txt diff --git a/lib/private/tar.bzl b/lib/private/tar.bzl index 0bc8a2e35..db9183baf 100644 --- a/lib/private/tar.bzl +++ b/lib/private/tar.bzl @@ -162,7 +162,7 @@ def _tar_impl(ctx): def _mtree_line(file, type, content = None, uid = "0", gid = "0", time = "1672560000", mode = "0755"): spec = [ - file, + _mtree_vis_encode(file), "uid=" + uid, "gid=" + gid, "time=" + time, @@ -170,9 +170,54 @@ def _mtree_line(file, type, content = None, uid = "0", gid = "0", time = "167256 "type=" + type, ] if content: - spec.append("content=" + content) + spec.append("content=" + _mtree_vis_encode(content)) return " ".join(spec) +def _ord(ch): + fail("don't know how to get codepoint of unicode character") + return 0 + +encode_unicode_characters = False + +def _mtree_vis_encode(s): + # range over the character indices of the string in reverse, replacing + # special characters with escape sequences. Typical case there are no + # special characters and no modification of `s`, so this is an O(n) scan of + # the string. + for i in range(len(s) - 1, 0, -1): + ch = s[i] + if ch == " ": + s = s[:i] + "\\s" + s[i + 1:] + + elif ch == " ": + s = s[:i] + "\\s" + s[i + 1:] + + elif ch == "\n": + s = s[:i] + "\\n" + s[i + 1:] + + elif ch == "\r": + s = s[:i] + "\\r" + s[i + 1:] + + elif ch == "\b": + s = s[:i] + "\\b" + s[i + 1:] + + elif ch == "\a": + s = s[:i] + "\\a" + s[i + 1:] + + elif ch == "\v": + s = s[:i] + "\\v" + s[i + 1:] + + elif ch == "\t": + s = s[:i] + "\\t" + s[i + 1:] + + elif ch == "\f": + s = s[:i] + "\\f" + s[i + 1:] + + elif encode_unicode_characters and not ch.isalpha(): + s = s[:i] + ("\\x%x" % _ord(ch)) + s[i + 1:] + + return s + # This function exactly same as the one from "@aspect_bazel_lib//lib:paths.bzl" # except that it takes workspace_name directly instead of the ctx object. # Reason is the performance of Args.add_all closures where we use this function. From 716c14ef6b179cbdd8bf31393c53d83c8136758f Mon Sep 17 00:00:00 2001 From: Paul Johnston Date: Wed, 20 Mar 2024 21:36:27 -0600 Subject: [PATCH 3/5] checkpoint genrule --- e2e/smoke/BUILD.bazel | 24 +++++++++++++++++++++ lib/private/tar.bzl | 49 ++----------------------------------------- 2 files changed, 26 insertions(+), 47 deletions(-) diff --git a/e2e/smoke/BUILD.bazel b/e2e/smoke/BUILD.bazel index 32efa5813..bc825e7c0 100644 --- a/e2e/smoke/BUILD.bazel +++ b/e2e/smoke/BUILD.bazel @@ -98,6 +98,30 @@ mtree_spec( srcs = [":testdata_files"], ) +genrule( + name = "testdata_mtree.fixed", + srcs = [":testdata_mtree"], + outs = ["testdata_mtree.mtree"], + cmd = """ +while IFS= read -r line; do + echo "Processing line: $$line" + output="$$line" + if [[ $$line = *[![:ascii:]]* ]]; then + echo "Contains Non-ASCII: $$line" + filename="$$(sed 's| uid=0.*||' <<< "$$line")" + body="$$(sed 's|.*\\( uid=0.* content=\\).*|\\1|' <<< "$$line")" + repl="$$(vis -wo <<< "$$filename")" + echo "filename: <$$filename> => <$$repl> (body=<$$body>)" + pattern="$$(echo "$$filename" | sed -e 's/[.^$$[*+?]/\\&/g')" + newline="$$(sed "s|$$filename|foo|g" <<< "$$line")" + echo "replaced line: $$newline" + output="$$repl$$body$$repl" + fi + echo "$$output" >>$@ +done < $< + """, +) + tar( name = "testdata_tar", srcs = [":testdata_files"], diff --git a/lib/private/tar.bzl b/lib/private/tar.bzl index db9183baf..0bc8a2e35 100644 --- a/lib/private/tar.bzl +++ b/lib/private/tar.bzl @@ -162,7 +162,7 @@ def _tar_impl(ctx): def _mtree_line(file, type, content = None, uid = "0", gid = "0", time = "1672560000", mode = "0755"): spec = [ - _mtree_vis_encode(file), + file, "uid=" + uid, "gid=" + gid, "time=" + time, @@ -170,54 +170,9 @@ def _mtree_line(file, type, content = None, uid = "0", gid = "0", time = "167256 "type=" + type, ] if content: - spec.append("content=" + _mtree_vis_encode(content)) + spec.append("content=" + content) return " ".join(spec) -def _ord(ch): - fail("don't know how to get codepoint of unicode character") - return 0 - -encode_unicode_characters = False - -def _mtree_vis_encode(s): - # range over the character indices of the string in reverse, replacing - # special characters with escape sequences. Typical case there are no - # special characters and no modification of `s`, so this is an O(n) scan of - # the string. - for i in range(len(s) - 1, 0, -1): - ch = s[i] - if ch == " ": - s = s[:i] + "\\s" + s[i + 1:] - - elif ch == " ": - s = s[:i] + "\\s" + s[i + 1:] - - elif ch == "\n": - s = s[:i] + "\\n" + s[i + 1:] - - elif ch == "\r": - s = s[:i] + "\\r" + s[i + 1:] - - elif ch == "\b": - s = s[:i] + "\\b" + s[i + 1:] - - elif ch == "\a": - s = s[:i] + "\\a" + s[i + 1:] - - elif ch == "\v": - s = s[:i] + "\\v" + s[i + 1:] - - elif ch == "\t": - s = s[:i] + "\\t" + s[i + 1:] - - elif ch == "\f": - s = s[:i] + "\\f" + s[i + 1:] - - elif encode_unicode_characters and not ch.isalpha(): - s = s[:i] + ("\\x%x" % _ord(ch)) + s[i + 1:] - - return s - # This function exactly same as the one from "@aspect_bazel_lib//lib:paths.bzl" # except that it takes workspace_name directly instead of the ctx object. # Reason is the performance of Args.add_all closures where we use this function. From 249133606010fb82c4ae9511a8e9bebd9d179a9c Mon Sep 17 00:00:00 2001 From: Paul Johnston Date: Thu, 21 Mar 2024 00:44:23 -0600 Subject: [PATCH 4/5] Refactor to emit mtree.json, then format it --- e2e/smoke/BUILD.bazel | 24 ------------ e2e/smoke/testdata_mtree.spec.golden | 4 +- lib/private/tar.bzl | 56 ++++++++++++++++++++++------ lib/tar.bzl | 1 + 4 files changed, 47 insertions(+), 38 deletions(-) diff --git a/e2e/smoke/BUILD.bazel b/e2e/smoke/BUILD.bazel index bc825e7c0..32efa5813 100644 --- a/e2e/smoke/BUILD.bazel +++ b/e2e/smoke/BUILD.bazel @@ -98,30 +98,6 @@ mtree_spec( srcs = [":testdata_files"], ) -genrule( - name = "testdata_mtree.fixed", - srcs = [":testdata_mtree"], - outs = ["testdata_mtree.mtree"], - cmd = """ -while IFS= read -r line; do - echo "Processing line: $$line" - output="$$line" - if [[ $$line = *[![:ascii:]]* ]]; then - echo "Contains Non-ASCII: $$line" - filename="$$(sed 's| uid=0.*||' <<< "$$line")" - body="$$(sed 's|.*\\( uid=0.* content=\\).*|\\1|' <<< "$$line")" - repl="$$(vis -wo <<< "$$filename")" - echo "filename: <$$filename> => <$$repl> (body=<$$body>)" - pattern="$$(echo "$$filename" | sed -e 's/[.^$$[*+?]/\\&/g')" - newline="$$(sed "s|$$filename|foo|g" <<< "$$line")" - echo "replaced line: $$newline" - output="$$repl$$body$$repl" - fi - echo "$$output" >>$@ -done < $< - """, -) - tar( name = "testdata_tar", srcs = [":testdata_files"], diff --git a/e2e/smoke/testdata_mtree.spec.golden b/e2e/smoke/testdata_mtree.spec.golden index 428d9bf8e..2b80c99e4 100644 --- a/e2e/smoke/testdata_mtree.spec.golden +++ b/e2e/smoke/testdata_mtree.spec.golden @@ -1,4 +1,4 @@ testdata uid=0 gid=0 time=1672560000 mode=0755 type=dir -testdata/file\sempty\swith\sspaces.txt uid=0 gid=0 time=1672560000 mode=0755 type=file content=testdata/file\sempty\swith\sspaces.txt -testdata/file\sempty\swith\s😍.txt uid=0 gid=0 time=1672560000 mode=0755 type=file content=testdata/file\sempty\swith\s😍.txt +testdata/file\040empty\040with\040spaces.txt uid=0 gid=0 time=1672560000 mode=0755 type=file content=testdata/file\040empty\040with\040spaces.txt +testdata/file\040empty\040with\040\360\237\230\215.txt uid=0 gid=0 time=1672560000 mode=0755 type=file content=testdata/file\040empty\040with\040\360\237\230\215.txt testdata/file-empty.txt uid=0 gid=0 time=1672560000 mode=0755 type=file content=testdata/file-empty.txt diff --git a/lib/private/tar.bzl b/lib/private/tar.bzl index 0bc8a2e35..2dcf6ade9 100644 --- a/lib/private/tar.bzl +++ b/lib/private/tar.bzl @@ -161,17 +161,15 @@ def _tar_impl(ctx): return DefaultInfo(files = depset([out]), runfiles = ctx.runfiles([out])) def _mtree_line(file, type, content = None, uid = "0", gid = "0", time = "1672560000", mode = "0755"): - spec = [ - file, - "uid=" + uid, - "gid=" + gid, - "time=" + time, - "mode=" + mode, - "type=" + type, - ] - if content: - spec.append("content=" + content) - return " ".join(spec) + return json.encode(struct( + file = file, + uid = uid, + gid = gid, + time = time, + mode = mode, + type = type, + content = content, + )) # This function exactly same as the one from "@aspect_bazel_lib//lib:paths.bzl" # except that it takes workspace_name directly instead of the ctx object. @@ -198,6 +196,7 @@ def _expand(file, expander, transform = to_repository_relative_path): def _mtree_impl(ctx): out = ctx.outputs.out or ctx.actions.declare_file(ctx.attr.name + ".spec") + json_out = ctx.actions.declare_file(out.basename + ".json", sibling = out) content = ctx.actions.args() content.set_param_file_format("multiline") @@ -230,7 +229,40 @@ def _mtree_impl(ctx): allow_closure = True, ) - ctx.actions.write(out, content = content) + ctx.actions.write(json_out, content = content) + + jq_bin = ctx.toolchains["@aspect_bazel_lib//lib:jq_toolchain_type"].jqinfo.bin + + ctx.actions.run_shell( + tools = [jq_bin], + inputs = [json_out], + outputs = [out], + command = """ +while IFS= read -r jsonl; do + file=$({jq} -r '.file' <<< "$jsonl") + gid=$({jq} -r '.gid' <<< "$jsonl") + uid=$({jq} -r '.uid' <<< "$jsonl") + time=$({jq} -r '.time' <<< "$jsonl") + mode=$({jq} -r '.mode' <<< "$jsonl") + type=$({jq} -r '.type' <<< "$jsonl") + content=$({jq} -r '.content' <<< "$jsonl") + + file_encoded=$(echo -n "$file" | vis -wo) + mtree_line="$file_encoded uid=$uid gid=$gid time=$time mode=$mode type=$type" + if [ "$content" != "null" ]; then + content_encoded=$(echo -n "$content" | vis -wo) + mtree_line="$mtree_line content=$content_encoded" + fi + + echo "$mtree_line" >> {output} +done < {input} +""".format( + jq = jq_bin.path, + input = json_out.path, + output = out.path, + ), + mnemonic = "MtreeSpec", + ) return DefaultInfo(files = depset([out]), runfiles = ctx.runfiles([out])) diff --git a/lib/tar.bzl b/lib/tar.bzl index e8051a7ef..81dde4b9f 100644 --- a/lib/tar.bzl +++ b/lib/tar.bzl @@ -58,6 +58,7 @@ mtree_spec = rule( doc = "Create an mtree specification to map a directory hierarchy. See https://man.freebsd.org/cgi/man.cgi?mtree(8)", implementation = _tar_lib.mtree_implementation, attrs = _tar_lib.mtree_attrs, + toolchains = ["@aspect_bazel_lib//lib:jq_toolchain_type"], ) tar_rule = _tar From 83935b614404c24d84b85f8d0056fd6fb2fd191a Mon Sep 17 00:00:00 2001 From: Alex Eagle Date: Wed, 24 Apr 2024 09:55:39 -0700 Subject: [PATCH 5/5] refactor: move new test code to test folder --- e2e/smoke/BUILD.bazel | 24 +------------------ lib/tests/tar/BUILD.bazel | 23 ++++++++++++++++++ .../tar}/testdata/file empty with spaces.txt | 0 .../file empty with \360\237\230\215.txt" | 0 .../tests/tar}/testdata/file-empty.txt | 0 5 files changed, 24 insertions(+), 23 deletions(-) rename {e2e/smoke => lib/tests/tar}/testdata/file empty with spaces.txt (100%) rename "e2e/smoke/testdata/file empty with \360\237\230\215.txt" => "lib/tests/tar/testdata/file empty with \360\237\230\215.txt" (100%) rename {e2e/smoke => lib/tests/tar}/testdata/file-empty.txt (100%) diff --git a/e2e/smoke/BUILD.bazel b/e2e/smoke/BUILD.bazel index 32efa5813..a41fee5a2 100644 --- a/e2e/smoke/BUILD.bazel +++ b/e2e/smoke/BUILD.bazel @@ -4,7 +4,7 @@ load("@aspect_bazel_lib//lib:copy_to_directory.bzl", "copy_to_directory") load("@aspect_bazel_lib//lib:diff_test.bzl", "diff_test") load("@aspect_bazel_lib//lib:expand_template.bzl", "expand_template") load("@aspect_bazel_lib//lib:jq.bzl", "jq") -load("@aspect_bazel_lib//lib:tar.bzl", "mtree_spec", "tar") +load("@aspect_bazel_lib//lib:tar.bzl", "tar") load("@aspect_bazel_lib//lib:yq.bzl", "yq") # Validate that JQ works and resolves its toolchain @@ -93,28 +93,6 @@ tar( mtree = [], ) -mtree_spec( - name = "testdata_mtree", - srcs = [":testdata_files"], -) - -tar( - name = "testdata_tar", - srcs = [":testdata_files"], - mtree = [":testdata_mtree"], -) - -filegroup( - name = "testdata_files", - srcs = glob(["testdata/**/*"]), -) - -diff_test( - name = "mtree_golden_test", - file1 = ":testdata_mtree", - file2 = "testdata_mtree.spec.golden", -) - bats_test( name = "bats", srcs = [ diff --git a/lib/tests/tar/BUILD.bazel b/lib/tests/tar/BUILD.bazel index a6c2bb5fe..64c182c3a 100644 --- a/lib/tests/tar/BUILD.bazel +++ b/lib/tests/tar/BUILD.bazel @@ -342,3 +342,26 @@ assert_tar_listing( "drwxrwxrwt 0 0 0 0 Aug 3 2017 ./tmp/", ], ) + +# Case 12: Special characters in filenames require running 'vis' +mtree_spec( + name = "testdata_mtree", + srcs = [":testdata_files"], +) + +tar( + name = "testdata_tar", + srcs = [":testdata_files"], + mtree = [":testdata_mtree"], +) + +filegroup( + name = "testdata_files", + srcs = glob(["testdata/**/*"]), +) + +diff_test( + name = "mtree_golden_test", + file1 = ":testdata_mtree", + file2 = "testdata_mtree.spec.golden", +) diff --git a/e2e/smoke/testdata/file empty with spaces.txt b/lib/tests/tar/testdata/file empty with spaces.txt similarity index 100% rename from e2e/smoke/testdata/file empty with spaces.txt rename to lib/tests/tar/testdata/file empty with spaces.txt diff --git "a/e2e/smoke/testdata/file empty with \360\237\230\215.txt" "b/lib/tests/tar/testdata/file empty with \360\237\230\215.txt" similarity index 100% rename from "e2e/smoke/testdata/file empty with \360\237\230\215.txt" rename to "lib/tests/tar/testdata/file empty with \360\237\230\215.txt" diff --git a/e2e/smoke/testdata/file-empty.txt b/lib/tests/tar/testdata/file-empty.txt similarity index 100% rename from e2e/smoke/testdata/file-empty.txt rename to lib/tests/tar/testdata/file-empty.txt