diff --git a/BUILD.bazel b/BUILD.bazel index a4ea6c21..6aaa77cc 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -163,6 +163,8 @@ _RUNTIME_PKGS = [ "//markdown", "//markdown/private", "//markdown/tools", + "//markdown/tools/github_markdown_toc", + "//markdown/tools/github_markdown_toc/cmd/gh-md-toc", "//shlib/lib", "//shlib/rules", "//shlib/rules/private", diff --git a/MODULE.bazel b/MODULE.bazel index 0fe6c0a6..6c6c92d5 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -33,10 +33,10 @@ go_deps.from_file(go_mod = "//:go.mod") use_repo( go_deps, "com_github_creasty_defaults", - "com_github_ekalinin_github_markdown_toc_go", "com_github_stretchr_testify", "in_gopkg_alecthomas_kingpin_v2", "in_gopkg_yaml_v3", + "org_golang_x_net", ) # MARK: - Dev Dependencies diff --git a/README.md b/README.md index b8c97861..482ccbf0 100644 --- a/README.md +++ b/README.md @@ -3,48 +3,48 @@ [![Build](https://github.com/cgrindel/bazel-starlib/actions/workflows/ci.yml/badge.svg?event=schedule)](https://github.com/cgrindel/bazel-starlib/actions/workflows/ci.yml) Bazel Starlib is a collection of projects that contain rulesets and libraries that are useful for -the implementation of Bazel projects. - -| Project | Description | Documentation | -| ------- | ----------- | ------------- | -| bazeldoc | Generate Starlark documentation using [Bazel Stardoc](https://github.com/bazelbuild/stardoc). Formerly hosted as [bazel-doc](https://github.com/cgrindel/bazel-doc). | [API](/doc/bazeldoc/), [How-to](/bazeldoc/) | -| bzlformat | Format Bazel Starlark files using [Buildifier](https://github.com/bazelbuild/buildtools/tree/master/buildifier), test that the formatted files exist in the workspace directory, and copy formatted files to the workspace directory. Formerly hosted as [rules_bzlformat](https://github.com/cgrindel/rules_bzlformat). | [API](/doc/bzlformat/), [How-to](/bzlformat/), [Examples](/examples/bzlformat/) | -| bzllib | Collection of Starlark libraries. | [API](/doc/bzllib/), [How-to](/bzllib/) | -| bzlrelease | Automate and customize the generation of releases using GitHub Actions. | [API](/doc/bzlrelease/), [How-to](/bzlrelease/) | -| bzltidy | Collect Bazel actions that keep your source files up-to-date. | [API](/doc/bztidy/), [How-to](/bzltidy/) | -| markdown | Maintain markdown files. | [API](/doc/markdown/), [How-to](/markdown/), [Examples](/examples/markdown/) | -| shlib | Collection of libraries useful when implementing shell binaries, libraries, and tests. Formerly hosted as [bazel_shlib](https://github.com/cgrindel/bazel_shlib). | [API](/doc/shlib/), [How-to](/shlib/) | -| updatesrc | Copy files from the Bazel output directories to the workspace directory. Formerly hosted as [rules_updatesrc](https://github.com/cgrindel/rules_updatesrc) | [API](/doc/updatesrc/), [How-to](/updatesrc/), [Examples](/examples/updatesrc/) | - +the implementation of Bazel projects. + +| Project | Description | Documentation | +| ---------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------- | +| bazeldoc | Generate Starlark documentation using [Bazel Stardoc](https://github.com/bazelbuild/stardoc). Formerly hosted as [bazel-doc](https://github.com/cgrindel/bazel-doc). | [API](/doc/bazeldoc/), [How-to](/bazeldoc/) | +| bzlformat | Format Bazel Starlark files using [Buildifier](https://github.com/bazelbuild/buildtools/tree/master/buildifier), test that the formatted files exist in the workspace directory, and copy formatted files to the workspace directory. Formerly hosted as [rules_bzlformat](https://github.com/cgrindel/rules_bzlformat). | [API](/doc/bzlformat/), [How-to](/bzlformat/), [Examples](/examples/bzlformat/) | +| bzllib | Collection of Starlark libraries. | [API](/doc/bzllib/), [How-to](/bzllib/) | +| bzlrelease | Automate and customize the generation of releases using GitHub Actions. | [API](/doc/bzlrelease/), [How-to](/bzlrelease/) | +| bzltidy | Collect Bazel actions that keep your source files up-to-date. | [API](/doc/bztidy/), [How-to](/bzltidy/) | +| markdown | Maintain markdown files. | [API](/doc/markdown/), [How-to](/markdown/), [Examples](/examples/markdown/) | +| shlib | Collection of libraries useful when implementing shell binaries, libraries, and tests. Formerly hosted as [bazel_shlib](https://github.com/cgrindel/bazel_shlib). | [API](/doc/shlib/), [How-to](/shlib/) | +| updatesrc | Copy files from the Bazel output directories to the workspace directory. Formerly hosted as [rules_updatesrc](https://github.com/cgrindel/rules_updatesrc) | [API](/doc/updatesrc/), [How-to](/updatesrc/), [Examples](/examples/updatesrc/) | ## Table of Contents -* [Quickstart](#quickstart) - * [Workspace Configuration](#workspace-configuration) -* [Other Documentation](#other-documentation) - +- [Quickstart](#quickstart) + - [Workspace Configuration](#workspace-configuration) +- [Other Documentation](#other-documentation) + ## Quickstart -The following provides a quick introduction on how to load this repository into your workspace. For +The following provides a quick introduction on how to load this repository into your workspace. For more information on how to use the projects from this repository in your workspace, check out the how-to links above and review the [the generated documentation](/doc/). - ### `MODULE.bazel` Snippet + ```python bazel_dep(name = "cgrindel_bazel_starlib", version = "0.16.2") ``` - + ### Workspace Configuration + ```python load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") @@ -64,8 +64,23 @@ load("@bazel_skylib//:workspace.bzl", "bazel_skylib_workspace") bazel_skylib_workspace() ``` + +## Developer Documentation + +### To Add a Golang Dependency + +Execute the following to update the go module files, resolve the Golang dependencies and update the +Bazel build files. + +```sh +$ bazel run @io_bazel_rules_go//go -- github.com/sweet/go_pkg +$ bazel run //:go_mod_tidy +$ bazel run //:gazelle_update_repos +$ bazel run //:update_build_files +``` + ## Other Documentation - [Release process for this repository](release/README.md) diff --git a/cmd/go_deps/README.md b/cmd/go_deps/README.md deleted file mode 100644 index ec3f5ced..00000000 --- a/cmd/go_deps/README.md +++ /dev/null @@ -1,34 +0,0 @@ -# Golang Dependencies - -The `bazel-starlib` repository uses Golang modules that contain exeutable packages (e.g., binaries). -To ensure that these binary targets are downloaded and built properly, a simple Golang program -exists to reference the required packages. [Gazelle](https://github.com/bazelbuild/bazel-gazelle) is -then used to identify the transitive dependencies and saves them to `go_deps.bzl`. - -NOTE: If the external Go module has dependencies that are referenced only in its `main` package, you -must update this repository to directly reference those direct dependencies. For instance, -`github.com/ekalinin/github-markdown-toc.go` references `gopkg.in/alecthomas/kingpin.v2` in its -`main` package. Hence, we added the version of `gopkg.in/alecthomas/kingpin.v2` that is referenced -in `github.com/ekalinin/github-markdown-toc.go` and added a usage to `cmd/go_deps`. - -For more details, please see the following: - -- https://github.com/bazelbuild/bazel-gazelle/issues/1585 -- [Slack thread](https://bazelbuild.slack.com/archives/CDBP88Z0D/p1689814617770239) - -## To Add a Golang Dependency - -Update the `main.go` in this directory to depend upon the desired Golang package. Be sure to use the -package in some way. Otherwise, `go mod tidy` will remove it. - -Execute the following to update the go module files, resolve the Golang dependencies and update the -Bazel build files. - -```sh -$ bazel run @io_bazel_rules_go//go -- github.com/sweet/go_pkg -# bazel run //:go_mod_tidy -$ bazel run //:gazelle_update_repos -$ bazel run //:update_build_files -``` - -Reference the Golang binary target. diff --git a/cmd/go_deps/main.go b/cmd/go_deps/main.go deleted file mode 100644 index e36f0fe5..00000000 --- a/cmd/go_deps/main.go +++ /dev/null @@ -1,24 +0,0 @@ -package main - -import ( - "fmt" - "strings" - - gmt "github.com/ekalinin/github-markdown-toc.go" - "gopkg.in/alecthomas/kingpin.v2" -) - -func main() { - kingpin.Parse() - fmt.Println("Hello, world.") - - ghtoc := generateToc() - fmt.Printf("TOC:\n%s", ghtoc) -} - -// Use the markdown package so that it is a dependency. -func generateToc() string { - doc := gmt.NewGHDoc("", false, 0, 0, true, "", 2, false) - toc := *doc.GrabToc() - return strings.Join(toc, "\n") -} diff --git a/go.mod b/go.mod index 77352a49..a1738322 100644 --- a/go.mod +++ b/go.mod @@ -4,13 +4,14 @@ go 1.19 require ( github.com/creasty/defaults v1.7.0 - github.com/ekalinin/github-markdown-toc.go v1.2.1 github.com/stretchr/testify v1.8.4 + golang.org/x/net v0.1.0 gopkg.in/alecthomas/kingpin.v2 v2.2.4 gopkg.in/yaml.v3 v3.0.1 ) require ( + github.com/alecthomas/assert v1.0.0 // indirect github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc // indirect github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf // indirect github.com/davecgh/go-spew v1.1.1 // indirect diff --git a/go.sum b/go.sum index 4c4a4be6..63cbf144 100644 --- a/go.sum +++ b/go.sum @@ -1,4 +1,5 @@ -github.com/alecthomas/assert v0.0.0-20170929043011-405dbfeb8e38 h1:smF2tmSOzy2Mm+0dGI2AIUHY+w0BUc+4tn40djz7+6U= +github.com/alecthomas/assert v1.0.0 h1:3XmGh/PSuLzDbK3W2gUbRXwgW5lqPkuqvRgeQ30FI5o= +github.com/alecthomas/assert v1.0.0/go.mod h1:va/d2JC+M7F6s+80kl/R3G7FUiW6JzUO+hPhLyJ36ZY= github.com/alecthomas/colour v0.1.0 h1:nOE9rJm6dsZ66RGWYSFrXw461ZIt9A6+nHgL7FRrDUk= github.com/alecthomas/repr v0.0.0-20210801044451-80ca428c5142 h1:8Uy0oSf5co/NZXje7U1z8Mpep++QJOldL2hs/sBQf48= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc h1:cAKDfWh5VpdgMhJosfJnn5/FoN2SRZ4p7fJNX58YPaU= @@ -9,14 +10,14 @@ github.com/creasty/defaults v1.7.0 h1:eNdqZvc5B509z18lD8yc212CAqJNvfT1Jq6L8WowdB github.com/creasty/defaults v1.7.0/go.mod h1:iGzKe6pbEHnpMPtfDXZEr0NVxWnPTjb1bbDy08fPzYM= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/ekalinin/github-markdown-toc.go v1.2.1 h1:6jRFt5qg61XfXZbP3SDaeTX+1OC1EgbHvRceYDmPAUE= -github.com/ekalinin/github-markdown-toc.go v1.2.1/go.mod h1:V5aiwoSLm1+er91D4l0AXn8vr4FX07Iu+zgDMFj3FeU= github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/sergi/go-diff v1.2.0 h1:XU+rvMAioB0UC3q1MFrIQy4Vo5/4VsRDQQXHsEya6xQ= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +golang.org/x/net v0.1.0 h1:hZ/3BUoy5aId7sCpA/Tc5lt8DkFgdVS2onTpJsZ/fl0= +golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco= golang.org/x/sys v0.10.0 h1:SqMFp9UcQJZa+pmYuAKjd9xq1f0j5rLcDIk0mj4qAsA= golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= gopkg.in/alecthomas/kingpin.v2 v2.2.4 h1:CC8tJ/xljioKrK6ii3IeWVXU4Tw7VB+LbjZBJaBxN50= diff --git a/go_deps.bzl b/go_deps.bzl index 5c020886..ea3586a8 100644 --- a/go_deps.bzl +++ b/go_deps.bzl @@ -10,8 +10,8 @@ def bazel_starlib_go_dependencies(): name = "com_github_alecthomas_assert", build_external = "external", importpath = "github.com/alecthomas/assert", - sum = "h1:smF2tmSOzy2Mm+0dGI2AIUHY+w0BUc+4tn40djz7+6U=", - version = "v0.0.0-20170929043011-405dbfeb8e38", + sum = "h1:3XmGh/PSuLzDbK3W2gUbRXwgW5lqPkuqvRgeQ30FI5o=", + version = "v1.0.0", ) go_repository( name = "com_github_alecthomas_colour", @@ -55,13 +55,6 @@ def bazel_starlib_go_dependencies(): sum = "h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=", version = "v1.1.1", ) - go_repository( - name = "com_github_ekalinin_github_markdown_toc_go", - build_external = "external", - importpath = "github.com/ekalinin/github-markdown-toc.go", - sum = "h1:6jRFt5qg61XfXZbP3SDaeTX+1OC1EgbHvRceYDmPAUE=", - version = "v1.2.1", - ) go_repository( name = "com_github_mattn_go_isatty", build_external = "external", @@ -118,6 +111,14 @@ def bazel_starlib_go_dependencies(): sum = "h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=", version = "v3.0.1", ) + go_repository( + name = "org_golang_x_net", + build_external = "external", + importpath = "golang.org/x/net", + sum = "h1:hZ/3BUoy5aId7sCpA/Tc5lt8DkFgdVS2onTpJsZ/fl0=", + version = "v0.1.0", + ) + go_repository( name = "org_golang_x_sys", build_external = "external", @@ -125,3 +126,17 @@ def bazel_starlib_go_dependencies(): sum = "h1:SqMFp9UcQJZa+pmYuAKjd9xq1f0j5rLcDIk0mj4qAsA=", version = "v0.10.0", ) + go_repository( + name = "org_golang_x_term", + build_external = "external", + importpath = "golang.org/x/term", + sum = "h1:g6Z6vPFA9dYBAF7DWcH6sCcOntplXsDKcliusYijMlw=", + version = "v0.1.0", + ) + go_repository( + name = "org_golang_x_text", + build_external = "external", + importpath = "golang.org/x/text", + sum = "h1:BrVqGRd7+k1DiOgtnFvAkoQEWQvBc25ouMJM6429SFg=", + version = "v0.4.0", + ) diff --git a/markdown/tools/BUILD.bazel b/markdown/tools/BUILD.bazel index 41e20b7d..d92f5ab8 100644 --- a/markdown/tools/BUILD.bazel +++ b/markdown/tools/BUILD.bazel @@ -23,7 +23,7 @@ sh_binary( srcs = ["update_markdown_toc.sh"], data = [ ":update_markdown_doc", - "@com_github_ekalinin_github_markdown_toc_go//cmd/gh-md-toc", + "//markdown/tools/github_markdown_toc/cmd/gh-md-toc", ], visibility = ["//visibility:public"], deps = [ diff --git a/markdown/tools/github_markdown_toc/BUILD.bazel b/markdown/tools/github_markdown_toc/BUILD.bazel new file mode 100644 index 00000000..5d4d3fb2 --- /dev/null +++ b/markdown/tools/github_markdown_toc/BUILD.bazel @@ -0,0 +1,40 @@ +load("@cgrindel_bazel_starlib//bzlformat:defs.bzl", "bzlformat_pkg") +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +filegroup( + name = "all_files", + srcs = glob(["*"]), + visibility = ["//:__subpackages__"], +) + +go_library( + name = "github_markdown_toc", + srcs = [ + "ghdoc.go", + "headerfinder.go", + "internals.go", + ], + importpath = "github.com/cgrindel/bazel-starlib/markdown/tools/github_markdown_toc", + visibility = ["//visibility:public"], + deps = [ + "@org_golang_x_net//html", + "@org_golang_x_net//html/atom", + ], +) + +go_test( + name = "github_markdown_toc_test", + srcs = [ + "ghdoc_test.go", + "headerfinder_test.go", + "internal_test.go", + ], + embed = [":github_markdown_toc"], + deps = [ + "@com_github_stretchr_testify//assert", + "@org_golang_x_net//html", + "@org_golang_x_net//html/atom", + ], +) + +bzlformat_pkg(name = "bzlformat") diff --git a/markdown/tools/github_markdown_toc/LICENSE b/markdown/tools/github_markdown_toc/LICENSE new file mode 100644 index 00000000..66fe00f0 --- /dev/null +++ b/markdown/tools/github_markdown_toc/LICENSE @@ -0,0 +1,20 @@ +The MIT License (MIT) + +Copyright (c) 2015 Eugene Kalinin + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/markdown/tools/github_markdown_toc/Makefile b/markdown/tools/github_markdown_toc/Makefile new file mode 100644 index 00000000..b1ef7ec9 --- /dev/null +++ b/markdown/tools/github_markdown_toc/Makefile @@ -0,0 +1,40 @@ +EXEC=gh-md-toc +CMD_SRC=cmd/${EXEC}/main.go +BUILD_DIR=build +BUILD_OS="windows darwin linux" +BUILD_ARCH="amd64" + +clean: + @rm -f ${EXEC} + @rm -f ${BUILD_DIR}/* + @go clean + +lint: + @golint + @golangci-lint run + +# make run ARGS="--help" +run: + @go run ${CMD_SRC} $(ARGS) + +build: clean lint + go build -race -o ${EXEC} ${CMD_SRC} + +test: clean lint + @go test -cover -o ${EXEC} + +release: test buildall + @git tag `grep "version" main.go | grep -o -E '[0-9]\.[0-9]\.[0-9]{1,2}'` + @git push --tags origin master + +buildall: clean + @mkdir -p ${BUILD_DIR} + @for os in "${BUILD_OS}" ; do \ + for arch in "${BUILD_ARCH}" ; do \ + echo " * build $$os for $$arch"; \ + GOOS=$$os GOARCH=$$arch go build -o ${BUILD_DIR}/${EXEC} ${CMD_SRC}; \ + cd ${BUILD_DIR}; \ + tar czf ${EXEC}.$$os.$$arch.tgz ${EXEC}; \ + cd - ; \ + done done + @rm ${BUILD_DIR}/${EXEC} diff --git a/markdown/tools/github_markdown_toc/README.md b/markdown/tools/github_markdown_toc/README.md new file mode 100644 index 00000000..2510d414 --- /dev/null +++ b/markdown/tools/github_markdown_toc/README.md @@ -0,0 +1,14 @@ +# GitHub Markdown TOC + +This utility is based upon [the gh-md-toc utility created by +eklanin](https://github.com/ekalinin/github-markdown-toc.go). The original implementation used +regular expressions to parse the HTML generated from GitHub's markdown rendering service. +Unfortunately, subtle changes in the output from this service can cause the regular expressions to +not work properly. I created [a pull request that replaced the regular experession logic with HTML +parsing using `golang.org/x/net/html`](https://github.com/ekalinin/github-markdown-toc.go/pull/38). +As of this writing, the pull request has not been merged. + +After another outage due to the fragility of the regular expression logic on 2023-08-29, I opted to +fork the code with the HTML parsing logic and incorporate it into this repository. I preserved the +original license on the code. However, any changes to this utility may not be compatible with the +original code base. diff --git a/cmd/go_deps/BUILD.bazel b/markdown/tools/github_markdown_toc/cmd/gh-md-toc/BUILD.bazel similarity index 53% rename from cmd/go_deps/BUILD.bazel rename to markdown/tools/github_markdown_toc/cmd/gh-md-toc/BUILD.bazel index beafa847..1e24efa5 100644 --- a/cmd/go_deps/BUILD.bazel +++ b/markdown/tools/github_markdown_toc/cmd/gh-md-toc/BUILD.bazel @@ -1,20 +1,26 @@ load("@cgrindel_bazel_starlib//bzlformat:defs.bzl", "bzlformat_pkg") load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library") +filegroup( + name = "all_files", + srcs = glob(["*"]), + visibility = ["//:__subpackages__"], +) + go_library( - name = "go_deps_lib", + name = "gh-md-toc_lib", srcs = ["main.go"], - importpath = "github.com/cgrindel/bazel-starlib/cmd/go_deps", + importpath = "github.com/cgrindel/bazel-starlib/markdown/tools/github_markdown_toc/cmd/gh-md-toc", visibility = ["//visibility:private"], deps = [ - "@com_github_ekalinin_github_markdown_toc_go//:github-markdown-toc_go", + "//markdown/tools/github_markdown_toc", "@in_gopkg_alecthomas_kingpin_v2//:kingpin_v2", ], ) go_binary( - name = "go_deps", - embed = [":go_deps_lib"], + name = "gh-md-toc", + embed = [":gh-md-toc_lib"], visibility = ["//visibility:public"], ) diff --git a/markdown/tools/github_markdown_toc/cmd/gh-md-toc/main.go b/markdown/tools/github_markdown_toc/cmd/gh-md-toc/main.go new file mode 100644 index 00000000..2218c8d5 --- /dev/null +++ b/markdown/tools/github_markdown_toc/cmd/gh-md-toc/main.go @@ -0,0 +1,92 @@ +package main + +import ( + "fmt" + "io" + "os" + + "gopkg.in/alecthomas/kingpin.v2" + + ghtoc "github.com/cgrindel/bazel-starlib/markdown/tools/github_markdown_toc" +) + +var ( + pathsDesc = "Local path or URL of the document to grab TOC. Read MD from stdin if not entered." + paths = kingpin.Arg("path", pathsDesc).Strings() + serial = kingpin.Flag("serial", "Grab TOCs in the serial mode").Bool() + hideHeader = kingpin.Flag("hide-header", "Hide TOC header").Bool() + hideFooter = kingpin.Flag("hide-footer", "Hide TOC footer").Bool() + startDepth = kingpin.Flag("start-depth", "Start including from this level. Defaults to 0 (include all levels)").Default("0").Int() + depth = kingpin.Flag("depth", "How many levels of headings to include. Defaults to 0 (all)").Default("0").Int() + noEscape = kingpin.Flag("no-escape", "Do not escape chars in sections").Bool() + token = kingpin.Flag("token", "GitHub personal token").String() + indent = kingpin.Flag("indent", "Indent space of generated list").Default("2").Int() + debug = kingpin.Flag("debug", "Show debug info").Bool() +) + +// check if there was an error (and panic if it was) +func check(e error) { + if e != nil { + panic(e) + } +} + +// Entry point +func main() { + kingpin.Version(ghtoc.Version) + kingpin.Parse() + + if *token == "" { + *token = os.Getenv("GH_TOC_TOKEN") + } + + pathsCount := len(*paths) + + // read file paths | urls from args + absPathsInToc := pathsCount > 1 + ch := make(chan *ghtoc.GHToc, pathsCount) + + for _, p := range *paths { + ghdoc := ghtoc.NewGHDoc(p, absPathsInToc, *startDepth, *depth, !*noEscape, *token, *indent, *debug) + getFn := func(ch chan *ghtoc.GHToc, ghdoc *ghtoc.GHDoc) { ch <- ghdoc.GetToc() } + if *serial { + getFn(ch, ghdoc) + } else { + go getFn(ch, ghdoc) + } + } + + if !*hideHeader && pathsCount == 1 { + fmt.Println() + fmt.Println("Table of Contents") + fmt.Println("=================") + fmt.Println() + } + + for i := 1; i <= pathsCount; i++ { + toc := <-ch + // #14, check if there's really TOC? + if toc != nil { + check(toc.Print(os.Stdout)) + } + } + + // read md from stdin + if pathsCount == 0 { + bytes, err := io.ReadAll(os.Stdin) + check(err) + + file, err := os.CreateTemp(os.TempDir(), "ghtoc") + check(err) + defer os.Remove(file.Name()) + + check(os.WriteFile(file.Name(), bytes, 0644)) + check(ghtoc.NewGHDoc(file.Name(), false, *startDepth, *depth, !*noEscape, *token, *indent, *debug). + GetToc(). + Print(os.Stdout)) + } + + if !*hideFooter { + fmt.Println("Created by [gh-md-toc](https://github.com/ekalinin/github-markdown-toc.go)") + } +} diff --git a/markdown/tools/github_markdown_toc/ghdoc.go b/markdown/tools/github_markdown_toc/ghdoc.go new file mode 100644 index 00000000..8179c666 --- /dev/null +++ b/markdown/tools/github_markdown_toc/ghdoc.go @@ -0,0 +1,206 @@ +package ghtoc + +import ( + "fmt" + "io" + "log" + "net/url" + "os" + "strconv" + "strings" +) + +// GHToc GitHub TOC +type GHToc []string + +// Print TOC to the console +func (toc *GHToc) Print(w io.Writer) error { + for _, tocItem := range *toc { + if _, err := fmt.Fprintln(w, tocItem); err != nil { + return err + } + } + if _, err := fmt.Fprintln(w); err != nil { + return err + } + return nil +} + +type httpGetter func(urlPath string) ([]byte, string, error) +type httpPoster func(urlPath, filePath, token string) (string, error) + +// GHDoc GitHub document +type GHDoc struct { + Path string + AbsPaths bool + StartDepth int + Depth int + Escape bool + GhToken string + Indent int + Debug bool + html string + logger *log.Logger + httpGetter httpGetter + httpPoster httpPoster +} + +// NewGHDoc create GHDoc +func NewGHDoc(Path string, AbsPaths bool, StartDepth int, Depth int, Escape bool, Token string, Indent int, Debug bool) *GHDoc { + return &GHDoc{ + Path: Path, + AbsPaths: AbsPaths, + StartDepth: StartDepth, + Depth: Depth, + Escape: Escape, + GhToken: Token, + Indent: Indent, + Debug: Debug, + html: "", + logger: log.New(os.Stderr, "", log.LstdFlags), + httpGetter: httpGet, + httpPoster: httpPost, + } +} + +func (doc *GHDoc) d(msg string) { + if doc.Debug { + doc.logger.Println(msg) + } +} + +// IsRemoteFile checks if path is for remote file or not +func (doc *GHDoc) IsRemoteFile() bool { + u, err := url.Parse(doc.Path) + if err != nil || u.Scheme == "" { + doc.d("IsRemoteFile: false") + return false + } + doc.d("IsRemoteFile: true") + return true +} + +func (doc *GHDoc) convertMd2Html(localPath string, token string) (string, error) { + ghURL := "https://api.github.com/markdown/raw" + return doc.httpPoster(ghURL, localPath, token) +} + +// Convert2HTML downloads remote file +func (doc *GHDoc) Convert2HTML() error { + doc.d("Convert2HTML: start.") + defer doc.d("Convert2HTML: done.") + + if doc.IsRemoteFile() { + htmlBody, ContentType, err := doc.httpGetter(doc.Path) + doc.d("Convert2HTML: remote file. content-type: " + ContentType) + if err != nil { + return err + } + + // if not a plain text - return the result (should be html) + if strings.Split(ContentType, ";")[0] != "text/plain" { + doc.html = string(htmlBody) + return nil + } + + // if remote file's content is a plain text + // we need to convert it to html + tmpfile, err := os.CreateTemp("", "ghtoc-remote-txt") + if err != nil { + return err + } + defer tmpfile.Close() + doc.Path = tmpfile.Name() + if err = os.WriteFile(tmpfile.Name(), htmlBody, 0644); err != nil { + return err + } + } + doc.d("Convert2HTML: local file: " + doc.Path) + if _, err := os.Stat(doc.Path); os.IsNotExist(err) { + return err + } + htmlBody, err := doc.convertMd2Html(doc.Path, doc.GhToken) + doc.d("Convert2HTML: converted to html, size: " + strconv.Itoa(len(htmlBody))) + if err != nil { + return err + } + if doc.Debug { + htmlFile := doc.Path + ".debug.html" + doc.d("Convert2HTML: write html file: " + htmlFile) + if err := os.WriteFile(htmlFile, []byte(htmlBody), 0644); err != nil { + return err + } + } + doc.html = htmlBody + return nil +} + +// GrabToc gets TOC from html +func (doc *GHDoc) GrabToc() *GHToc { + doc.d("GrabToc: start, html size: " + strconv.Itoa(len(doc.html))) + defer doc.d("GrabToc: done.") + + listIndentation := generateListIndentation(doc.Indent) + + minDepth := doc.StartDepth + var maxDepth int + if doc.Depth > 0 { + maxDepth = doc.Depth - 1 + } else { + maxDepth = int(MaxHxDepth) + } + + hdrs := findHeadersInString(doc.html) + + // Determine the min depth represented by the slice of headers. For example, if a document only + // has H2 tags and no H1 tags. We want the H2 TOC entries to not have an indent. + minHxDepth := MaxHxDepth + for _, hdr := range hdrs { + if hdr.Depth < minHxDepth { + minHxDepth = hdr.Depth + } + } + + // Populate the toc with entries + toc := GHToc{} + for _, hdr := range hdrs { + hDepth := int(hdr.Depth) + if hDepth >= minDepth && hDepth <= maxDepth { + indentDepth := int(hdr.Depth) - int(minHxDepth) - doc.StartDepth + indent := strings.Repeat(listIndentation(), indentDepth) + toc = append(toc, doc.tocEntry(indent, hdr)) + } + } + + return &toc +} + +func (doc *GHDoc) tocEntry(indent string, hdr Header) string { + return indent + "* " + + "[" + doc.tocName(hdr.Name) + "]" + + "(" + doc.tocLink(hdr.Href) + ")" +} + +func (doc *GHDoc) tocName(name string) string { + if doc.Escape { + return EscapeSpecChars(name) + } + return name +} + +func (doc *GHDoc) tocLink(href string) string { + link, _ := url.QueryUnescape(href) + if doc.AbsPaths { + link = doc.Path + link + } + return link +} + +// GetToc return GHToc for a document +func (doc *GHDoc) GetToc() *GHToc { + if err := doc.Convert2HTML(); err != nil { + log.Fatal(err) + return nil + } + return doc.GrabToc() +} diff --git a/markdown/tools/github_markdown_toc/ghdoc_test.go b/markdown/tools/github_markdown_toc/ghdoc_test.go new file mode 100644 index 00000000..19136958 --- /dev/null +++ b/markdown/tools/github_markdown_toc/ghdoc_test.go @@ -0,0 +1,546 @@ +package ghtoc + +import ( + "bytes" + "errors" + "log" + "os" + "testing" +) + +func TestIsUrl(t *testing.T) { + doc1 := &GHDoc{ + Path: "https://github.com/ekalinin/envirius/blob/master/README.md", + } + if !doc1.IsRemoteFile() { + t.Error("This is url: ", doc1.Path) + } + + doc2 := &GHDoc{ + Path: "./README.md", + } + if doc2.IsRemoteFile() { + t.Error("This is not url: ", doc2.Path) + } +} + +func TestGrabTocOneRow(t *testing.T) { + tocExpected := []string{ + "* [README in another language](#readme-in-another-language)", + } + doc := &GHDoc{ + html: ` +

README in another language

+ `, + AbsPaths: false, + Depth: 0, + Indent: 2, + } + toc := *doc.GrabToc() + if toc[0] != tocExpected[0] { + t.Error("Res :", toc, "\nExpected :", tocExpected) + } +} + +func TestGrabTocOneRowWithNewLines(t *testing.T) { + tocExpected := []string{ + "* [README in another language](#readme-in-another-language)", + } + doc := &GHDoc{ + html: ` +

+ + README in another language +

+ `, AbsPaths: false, + Depth: 0, + Escape: true, + Indent: 2, + } + toc := *doc.GrabToc() + if toc[0] != tocExpected[0] { + t.Error("Res :", toc, "\nExpected :", tocExpected) + } +} + +func TestGrabTocMultilineOriginGithub(t *testing.T) { + + tocExpected := []string{ + "* [How to add a plugin?](#how-to-add-a-plugin)", + " * [Mandatory elements](#mandatory-elements)", + " * [plug\\_list\\_versions](#plug_list_versions)", + } + doc := &GHDoc{ + html: ` +

How to add a plugin?

+ +

All plugins are in the directory +nv-plugins. +If you need to add support for a new language you should add it as plugin +inside this directory.

+ +

Mandatory elements

+ +

If you create a plugin which builds all stuff from source then In a simplest +case you need to implement 2 functions in the plugin's body:

+ +

plug_list_versions

+ +

This function should return list of available versions of the plugin. +For example:

+ `, AbsPaths: false, + Escape: true, + Depth: 0, + Indent: 2, + } + toc := *doc.GrabToc() + for i := 0; i <= len(tocExpected)-1; i++ { + if toc[i] != tocExpected[i] { + t.Error("Res :", toc[i], "\nExpected :", tocExpected[i]) + } + } +} + +func TestGrabTocBackquoted(t *testing.T) { + tocExpected := []string{ + "* [The command foo1](#the-command-foo1)", + " * [The command foo2 is better](#the-command-foo2-is-better)", + "* [The command bar1](#the-command-bar1)", + " * [The command bar2 is better](#the-command-bar2-is-better)", + } + + doc := &GHDoc{ + html: ` +

+The command foo1 +

+ +

Blabla...

+ +

+The command foo2 is better

+ +

Blabla...

+ +

+The command bar1 +

+ +

Blabla...

+ +

+The command bar2 is better

+ +

Blabla...

+ `, AbsPaths: false, + Depth: 0, + Indent: 2, + } + toc := *doc.GrabToc() + for i := 0; i <= len(tocExpected)-1; i++ { + if toc[i] != tocExpected[i] { + t.Error("Res :", toc[i], "\nExpected :", tocExpected[i]) + } + } +} + +func TestGrabTocDepth(t *testing.T) { + tocExpected := []string{ + "* [The command foo1](#the-command-foo1)", + "* [The command bar1](#the-command-bar1)", + } + + doc := &GHDoc{ + html: ` +

+The command foo1 +

+ +

Blabla...

+ +

+The command foo2 is better

+ +

Blabla...

+ +

+The command bar1 +

+ +

Blabla...

+ +

+The command bar2 is better

+ +

Blabla...

+ `, AbsPaths: false, + Escape: true, + Depth: 1, + Indent: 2, + } + toc := *doc.GrabToc() + for i := 0; i <= len(tocExpected)-1; i++ { + if toc[i] != tocExpected[i] { + t.Error("Res :", toc[i], "\nExpected :", tocExpected[i]) + } + } +} + +func TestGrabTocStartDepth(t *testing.T) { + tocExpected := []string{ + "* [The command foo2 is better](#the-command-foo2-is-better)", + " * [The command foo3 is even betterer](#the-command-foo3-is-even-betterer)", + "* [The command bar2 is better](#the-command-bar2-is-better)", + " * [The command bar3 is even betterer](#the-command-bar3-is-even-betterer)", + } + + doc := &GHDoc{ + html: ` +

+The command foo1 +

+ +

Blabla...

+ +

+The command foo2 is better

+ +

Blabla...

+ +

+The command foo3 is even betterer

+ +

Blabla...

+ +

+The command bar1 +

+ +

Blabla...

+ +

+The command bar2 is better

+ +

Blabla...

+ +

+The command bar3 is even betterer

+ +

Blabla...

+ `, AbsPaths: false, + Escape: true, + StartDepth: 1, + Indent: 2, + } + toc := *doc.GrabToc() + for i := 0; i <= len(tocExpected)-1; i++ { + if toc[i] != tocExpected[i] { + t.Error("Res :", toc[i], "\nExpected :", tocExpected[i]) + } + } +} + +func TestGrabTocWithAbspath(t *testing.T) { + link := "https://github.com/ekalinin/envirius/blob/master/README.md" + tocExpected := []string{ + "* [README in another language](" + link + "#readme-in-another-language)", + } + doc := &GHDoc{ + html: ` +

README in another language

+ `, AbsPaths: true, + Path: link, + Depth: 0, + Indent: 2, + } + toc := *doc.GrabToc() + if toc[0] != tocExpected[0] { + t.Error("Res :", toc, "\nExpected :", tocExpected) + } +} + +func TestEscapedChars(t *testing.T) { + tocExpected := []string{ + "* [mod\\_\\*](#mod_)", + } + + doc := &GHDoc{ + html: ` +

+ + mod_* +

`, + AbsPaths: false, + Escape: true, + Depth: 0, + Indent: 2, + } + toc := *doc.GrabToc() + + if toc[0] != tocExpected[0] { + t.Error("Res :", toc, "\nExpected :", tocExpected) + } +} + +func TestCustomSpaceIndentation(t *testing.T) { + tocExpected := []string{ + "* [Header Level1](#header-level1)", + " * [Header Level2](#header-level2)", + " * [Header Level3](#header-level3)", + } + + doc := &GHDoc{ + html: ` +

+Header Level1 +

+

+Header Level2 +

+

+Header Level3 +

+ `, + AbsPaths: false, + Depth: 0, + Indent: 4, + } + toc := *doc.GrabToc() + + for i := 0; i <= len(tocExpected)-1; i++ { + if toc[i] != tocExpected[i] { + t.Error("Res :", toc[i], "\nExpected :", tocExpected[i]) + } + } +} + +func TestMinHeaderNumber(t *testing.T) { + tocExpected := []string{ + "* [foo](#foo)", + " * [bar](#bar)", + } + + doc := &GHDoc{ + html: ` +

+ + foo +

+

+ + bar +

+ `, + AbsPaths: false, + Depth: 0, + Indent: 2, + } + toc := *doc.GrabToc() + + if toc[0] != tocExpected[0] { + t.Error("Res :", toc, "\nExpected :", tocExpected) + } +} + +func TestGHTocPrint(t *testing.T) { + toc := GHToc{"one", "two"} + want := "one\ntwo\n\n" + var got bytes.Buffer + toc.Print(&got) + + if got.String() != want { + t.Error("\nGot :", got.String(), "\nWant:", want) + } +} + +func TestNewGHDocWithDebug(t *testing.T) { + noMatterN := 1 + noMatterS := "test" + noMatterB := false + var got bytes.Buffer + + doc := NewGHDoc(noMatterS, noMatterB, noMatterN, noMatterN, + noMatterB, noMatterS, noMatterN, true) + doc.logger = log.New(&got, "", 0) + + want := "test" + doc.d(want) + if got.String() != want+"\n" { + t.Error("\nGot :", got.String(), "\nWant:", want) + } +} + +func TestGHDocConvert2HTML(t *testing.T) { + remotePath := "https://github.com/some/readme.md" + token := "some-gh-token" + doc := NewGHDoc(remotePath, true, 0, 0, + true, token, 4, false) + + // mock for getting remote raw README text + htmlResponse := []byte("raw md text") + doc.httpGetter = func(urlPath string) ([]byte, string, error) { + if urlPath != remotePath { + t.Error("Wrong urlPath. \nGot :", urlPath, "\nWant:", remotePath) + } + return htmlResponse, "text/plain;utf-8", nil + } + + // mock for converting md to txt + ghURL := "https://api.github.com/markdown/raw" + htmlBody := `

header>

some text` + doc.httpPoster = func(urlPath, filePath, token string) (string, error) { + if urlPath != ghURL { + if urlPath != remotePath { + t.Error("Wrong urlPath. \nGot :", urlPath, "\nWant:", ghURL) + } + } + return htmlBody, nil + } + if err := doc.Convert2HTML(); err != nil { + t.Error("Got error:", err) + } + if doc.html != htmlBody { + t.Error("Wrong html. \nGot :", doc.html, "\nWant:", htmlBody) + } +} + +func TestGHDocConvert2HTMLNonPlainText(t *testing.T) { + remotePath := "https://github.com/some/readme.md" + token := "some-gh-token" + doc := NewGHDoc(remotePath, true, 0, 0, + true, token, 4, false) + + // mock for getting remote raw README text + htmlResponse := []byte("raw md text") + doc.httpGetter = func(_ string) ([]byte, string, error) { + return htmlResponse, "text/html;utf-8", nil + } + // should not call converter to HTML + doc.httpPoster = func(urlPath, filePath, token string) (string, error) { + t.Error("Should not call httpPost (via convertMd2Html)") + return "", nil + } + if err := doc.Convert2HTML(); err != nil { + t.Error("Got error:", err) + } + if doc.html != string(htmlResponse) { + t.Error("Wrong html. \nGot :", doc.html, "\nWant:", string(htmlResponse)) + } +} + +func TestGHDocConvert2HTMLErrorConvert(t *testing.T) { + remotePath := "https://github.com/some/readme.md" + token := "some-gh-token" + errGet := errors.New("error from http get") + doc := NewGHDoc(remotePath, true, 0, 0, + true, token, 4, false) + + // mock for getting remote raw README text + doc.httpGetter = func(urlPath string) ([]byte, string, error) { + return nil, "", errGet + } + + err := doc.Convert2HTML() + if err == nil { + t.Error("Should get error from http get!") + } + + if !errors.Is(err, errGet) { + t.Error("Wrong error. \nGot :", err, "\nWant:", errGet) + } +} + +func TestGHDocConvert2HTMLLocalFileNotExists(t *testing.T) { + localPath := "/some/readme.md" + token := "some-gh-token" + doc := NewGHDoc(localPath, true, 0, 0, + true, token, 4, false) + + // should not be called + doc.httpGetter = func(_ string) ([]byte, string, error) { + t.Error("Should not call httpGet") + return nil, "", nil + } + + err := doc.Convert2HTML() + if err == nil { + t.Error("Should get error from file checking.") + } + + if !errors.Is(err, os.ErrNotExist) { + t.Error("Wrong error. \nGot :", err, "\nWant:", os.ErrNotExist) + } +} + +// Cover the changes of `ioutil.*` to `os.*` in Convert2HTML. +func TestGHDocConvert2HTML_issue35(t *testing.T) { + remotePath := "https://github.com/some/readme.md" + token := "some-gh-token" + + // enable debug + doc := NewGHDoc(remotePath, true, 0, 0, true, token, 4, true) + + // mock for getting remote raw README text + htmlResponse := []byte("raw md text") + doc.httpGetter = func(urlPath string) ([]byte, string, error) { + return htmlResponse, "text/plain;utf-8", nil + } + + // mock for converting md to txt + htmlBody := `

header>

some text` + doc.httpPoster = func(urlPath, filePath, token string) (string, error) { + return htmlBody, nil + } + + if err := doc.Convert2HTML(); err != nil { + t.Error("Got error:", err) + } + + if doc.html != htmlBody { + t.Error("Wrong html. \nGot :", doc.html, "\nWant:", htmlBody) + } +} + +func TestGrabToc_issue35(t *testing.T) { + // As of 2022-08-25, GitHub API returns the HTML in the below format. + doc := &GHDoc{ + html: ` +

One

+

Uno

+

Two

+

Dos

+

Three

+

Tres

`, + AbsPaths: false, + Depth: 0, + Indent: 2, + } + + tocExpected := []string{ + "* [One](#one)", + " * [Two](#two)", + " * [Three](#three)", + } + toc := *doc.GrabToc() + + // Require not empty + if len(toc) == 0 { + t.Fatal("returned ToC is empty. GrabToc could not parse the HTML") + } + + // Assert equal + for i, tocActual := range toc { + if tocExpected[i] != tocActual { + t.Error("Res :", tocActual, "\nExpected :", tocExpected) + } + } +} diff --git a/markdown/tools/github_markdown_toc/headerfinder.go b/markdown/tools/github_markdown_toc/headerfinder.go new file mode 100644 index 00000000..32c5bc86 --- /dev/null +++ b/markdown/tools/github_markdown_toc/headerfinder.go @@ -0,0 +1,122 @@ +package ghtoc + +import ( + "io" + "strings" + + "golang.org/x/net/html" + "golang.org/x/net/html/atom" +) + +// HxDepth represents the header depth with H1 being 0. +type HxDepth int + +// InvalidDepth designates that the data atom is not a valid Hx. +const InvalidDepth HxDepth = -1 + +// MaxHxDepth is the maximum HxDepth value. +// H6 is the last Hx tag (5 = 6 - 1) +const MaxHxDepth HxDepth = 5 + +// Header represents an HTML header +type Header struct { + Depth HxDepth + Href string + Name string +} + +func findHeadersInString(str string) []Header { + r := strings.NewReader(str) + return findHeaders(r) +} + +func findHeaders(r io.Reader) []Header { + hdrs := make([]Header, 0) + tokenizer := html.NewTokenizer(r) + for { + tt := tokenizer.Next() + switch tt { + case html.ErrorToken: + return hdrs + case html.StartTagToken: + t := tokenizer.Token() + if hdr, ok := createHeader(tokenizer, t); ok { + hdrs = append(hdrs, hdr) + } + } + } +} + +func getHxDepth(dataAtom atom.Atom) HxDepth { + hxAtoms := []atom.Atom{ + atom.H1, + atom.H2, + atom.H3, + atom.H4, + atom.H5, + atom.H6, + } + for depth, hxAtom := range hxAtoms { + if dataAtom == hxAtom { + return HxDepth(depth) + } + } + return InvalidDepth +} + +func createHeader(tokenizer *html.Tokenizer, token html.Token) (Header, bool) { + hxDepth := getHxDepth(token.DataAtom) + if hxDepth == InvalidDepth { + return Header{}, false + } + + var href string + var nameParts []string + // Start at 1 because we are inside the Hx tag + tokenDepth := 1 + afterAnchor := false + for { + tokenizer.Next() + t := tokenizer.Token() + switch t.Type { + case html.ErrorToken: + return Header{}, false + case html.StartTagToken: + tokenDepth++ + if t.DataAtom == atom.A { + if hrefAttr, ok := findAttribute(t.Attr, "", "href"); ok { + href = hrefAttr.Val + } else { + // Expected to find href attribute + return Header{}, false + } + } + case html.EndTagToken: + switch t.DataAtom { + case token.DataAtom: + // If we encountered the matching end tag for the Hx, then we are done + return Header{ + Depth: hxDepth, + Name: removeStuff(strings.Join(nameParts, " ")), + Href: href, + }, true + case atom.A: + afterAnchor = true + } + tokenDepth-- + case html.TextToken: + if afterAnchor { + nameParts = append(nameParts, removeStuff(t.Data)) + } + } + } +} + +func findAttribute(attrs []html.Attribute, namespace, key string) (html.Attribute, bool) { + for _, attr := range attrs { + if attr.Namespace == namespace && attr.Key == key { + return attr, true + } + } + return html.Attribute{}, false +} diff --git a/markdown/tools/github_markdown_toc/headerfinder_test.go b/markdown/tools/github_markdown_toc/headerfinder_test.go new file mode 100644 index 00000000..c8ada93c --- /dev/null +++ b/markdown/tools/github_markdown_toc/headerfinder_test.go @@ -0,0 +1,111 @@ +package ghtoc + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "golang.org/x/net/html" + "golang.org/x/net/html/atom" +) + +const singleH1 = ` +

Document Title

+` + +const singleH2 = ` +

+ + Interesting Section +

+` + +const multipleSections = ` +

Document Title

+Hi +

First Section

+Some Text +

First Subsection

+

Second Section

+

Second Subsection

+` + +func TestFindHeaders(t *testing.T) { + t.Run("single H1", func(t *testing.T) { + results := findHeadersInString(singleH1) + assert.Len(t, results, 1) + assert.Equal( + t, + Header{Depth: 0, Href: "#document-title", Name: "Document Title"}, + results[0], + ) + }) + t.Run("single H2", func(t *testing.T) { + results := findHeadersInString(singleH2) + assert.Len(t, results, 1) + assert.Equal( + t, + Header{Depth: 1, Href: "#interesting-section", Name: "Interesting Section"}, + results[0], + ) + }) + t.Run("multiple sections", func(t *testing.T) { + results := findHeadersInString(multipleSections) + assert.Len(t, results, 5) + assert.Equal( + t, + Header{Depth: 0, Href: "#document-title", Name: "Document Title"}, + results[0], + ) + assert.Equal( + t, + Header{Depth: 1, Href: "#first-section", Name: "First Section"}, + results[1], + ) + assert.Equal( + t, + Header{Depth: 2, Href: "#first-subsection", Name: "First Subsection"}, + results[2], + ) + assert.Equal( + t, + Header{Depth: 1, Href: "#second-section", Name: "Second Section"}, + results[3], + ) + assert.Equal( + t, + Header{Depth: 3, Href: "#second-subsection", Name: "Second Subsection"}, + results[4], + ) + }) +} + +func TestFindAttribute(t *testing.T) { + worldGreeting := html.Attribute{Namespace: "", Key: "greeting", Val: "Hello, World!"} + spaceGreeting := html.Attribute{Namespace: "outer-space", Key: "greeting", Val: "Hello, Space!"} + attrs := []html.Attribute{spaceGreeting, worldGreeting} + t.Run("attribute exists", func(t *testing.T) { + attr, ok := findAttribute(attrs, "", "greeting") + assert.True(t, ok) + assert.Equal(t, worldGreeting, attr) + + attr, ok = findAttribute(attrs, "outer-space", "greeting") + assert.True(t, ok) + assert.Equal(t, spaceGreeting, attr) + }) + t.Run("attribute does not exist", func(t *testing.T) { + _, ok := findAttribute(attrs, "", "doesnotexist") + assert.False(t, ok) + }) +} + +func TestGetHxDepth(t *testing.T) { + assert.Equal(t, HxDepth(0), getHxDepth(atom.H1)) + assert.Equal(t, HxDepth(1), getHxDepth(atom.H2)) + assert.Equal(t, HxDepth(2), getHxDepth(atom.H3)) + assert.Equal(t, HxDepth(3), getHxDepth(atom.H4)) + assert.Equal(t, HxDepth(4), getHxDepth(atom.H5)) + assert.Equal(t, HxDepth(5), getHxDepth(atom.H6)) + assert.Equal(t, InvalidDepth, getHxDepth(atom.A)) +} diff --git a/markdown/tools/github_markdown_toc/internal_test.go b/markdown/tools/github_markdown_toc/internal_test.go new file mode 100644 index 00000000..87503051 --- /dev/null +++ b/markdown/tools/github_markdown_toc/internal_test.go @@ -0,0 +1,124 @@ +package ghtoc + +import ( + "fmt" + "log" + "net/http" + "net/http/httptest" + "os" + "testing" +) + +func TestHttpGet(t *testing.T) { + expected := "dummy data" + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + _, err := fmt.Fprint(w, expected) + if err != nil { + println(err) + } + })) + defer srv.Close() + + body, _, err := httpGet(srv.URL) + got := string(body) + + if err != nil { + t.Error("Should not be err", err) + } + if got != expected { + t.Error("\nGot :", got, "\nWant:", expected) + } +} + +func TestHttpGetForbidden(t *testing.T) { + txt := "please, do not try" + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusForbidden) + _, err := fmt.Fprint(w, txt) + if err != nil { + println(err) + } + })) + defer srv.Close() + + _, _, err := httpGet(srv.URL) + if err == nil { + t.Error("Should not not be nil") + } +} + +func createTmp(content string) (string, error) { + tmpFile, err := os.CreateTemp("", "example.*.txt") + if err != nil { + log.Fatal(err) + } + + if _, err := tmpFile.Write([]byte(content)); err != nil { + if err := tmpFile.Close(); err != nil { + return "", err + } + log.Fatal(err) + } + if err := tmpFile.Close(); err != nil { + log.Fatal(err) + } + + return tmpFile.Name(), nil +} + +func TestHttpPost(t *testing.T) { + token := "xxx-token-yyy" + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method != "POST" { + t.Error("Should be POST") + } + tokenPassed := r.Header.Get("Authorization") + tokenWanted := "token " + token + if tokenPassed != tokenWanted { + t.Error("Should pass token", tokenWanted, ", but passed: ", tokenPassed) + } + })) + defer srv.Close() + + fileName, err := createTmp("#some title") + if err != nil { + t.Error("Should not be err", err) + } + defer os.Remove(fileName) + + _, err = httpPost(srv.URL, fileName, token) + if err != nil { + t.Error("Should not be err", err) + } +} + +// Cover the changes of ioutil.ReadAll to io.ReadAll in doHTTPReq. +func Test_doHTTPReq_issue35(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + fmt.Fprintln(w, "Hello, client") + })) + defer srv.Close() + + dummyURL := srv.URL + + req, err := http.NewRequest("POST", dummyURL, nil) + if err != nil { + t.Fatal(err) + } + + resBody, resHeader, err := doHTTPReq(req) + + // Require no error + if err != nil { + t.Fatal("doHTTPReq should not be err:", err.Error()) + } + + // Assert response body + if string(resBody) != "Hello, client\n" { + t.Error("response body should be \"Hello, client\", but got:", string(resBody)) + } + // Assert response header + if resHeader != "text/plain; charset=utf-8" { + t.Error("response header should be \"Hello, client\", but got:", resHeader) + } +} diff --git a/markdown/tools/github_markdown_toc/internals.go b/markdown/tools/github_markdown_toc/internals.go new file mode 100644 index 00000000..fa15e0e5 --- /dev/null +++ b/markdown/tools/github_markdown_toc/internals.go @@ -0,0 +1,105 @@ +package ghtoc + +import ( + "bytes" + "errors" + "io" + "net/http" + "os" + "strings" +) + +const ( + // Version is a current app version + Version = "1.2.0" + userAgent = "github-markdown-toc.go v" + Version +) + +// doHTTPReq executes a particular http request +func doHTTPReq(req *http.Request) ([]byte, string, error) { + req.Header.Set("User-Agent", userAgent) + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + return []byte{}, "", err + } + + defer resp.Body.Close() + body, err := io.ReadAll(resp.Body) + if err != nil { + return []byte{}, "", err + } + + if resp.StatusCode == http.StatusForbidden { + return []byte{}, resp.Header.Get("Content-type"), errors.New(string(body)) + } + + return body, resp.Header.Get("Content-type"), nil +} + +// Executes HTTP GET request +func httpGet(urlPath string) ([]byte, string, error) { + req, err := http.NewRequest("GET", urlPath, nil) + if err != nil { + return []byte{}, "", err + } + return doHTTPReq(req) +} + +// httpPost executes HTTP POST with file content +func httpPost(urlPath, filePath, token string) (string, error) { + file, err := os.Open(filePath) + if err != nil { + return "", err + } + defer file.Close() + + body := &bytes.Buffer{} + _, err = io.Copy(body, file) + if err != nil { + return "", err + } + + req, err := http.NewRequest("POST", urlPath, body) + if err != nil { + return "", err + } + + if token != "" { + req.Header.Add("Authorization", "token "+token) + } + req.Header.Set("Content-Type", "text/plain;charset=utf-8") + + resp, _, err := doHTTPReq(req) + return string(resp), err +} + +// removeStuff trims spaces, removes new lines and code tag from a string +func removeStuff(s string) string { + res := strings.Replace(s, "\n", "", -1) + res = strings.Replace(res, "", "", -1) + res = strings.Replace(res, "", "", -1) + res = strings.TrimSpace(res) + + return res +} + +// generate func of custom spaces indentation +func generateListIndentation(spaces int) func() string { + return func() string { + return strings.Repeat(" ", spaces) + } +} + +// Public + +// EscapeSpecChars Escapes special characters +func EscapeSpecChars(s string) string { + specChar := []string{"\\", "`", "*", "_", "{", "}", "#", "+", "-", ".", "!"} + res := s + + for _, c := range specChar { + res = strings.Replace(res, c, "\\"+c, -1) + } + return res +} diff --git a/markdown/tools/update_markdown_toc.sh b/markdown/tools/update_markdown_toc.sh index 990a0f12..5f7868e2 100755 --- a/markdown/tools/update_markdown_toc.sh +++ b/markdown/tools/update_markdown_toc.sh @@ -23,7 +23,7 @@ update_markdown_doc_sh_location=cgrindel_bazel_starlib/markdown/tools/update_mar update_markdown_doc_sh="$(rlocation "${update_markdown_doc_sh_location}")" || \ (echo >&2 "Failed to locate ${update_markdown_doc_sh_location}" && exit 1) -gh_md_toc_location=com_github_ekalinin_github_markdown_toc_go/cmd/gh-md-toc/gh-md-toc_/gh-md-toc +gh_md_toc_location=cgrindel_bazel_starlib/markdown/tools/github_markdown_toc/cmd/gh-md-toc/gh-md-toc_/gh-md-toc gh_md_toc="$(rlocation "${gh_md_toc_location}")" || \ (echo >&2 "Failed to locate ${gh_md_toc_location}" && exit 1)