From 195b75d1859fcd8d15e6e3152a210789c23df2f4 Mon Sep 17 00:00:00 2001 From: Julien Voisin Date: Tue, 24 Dec 2024 05:16:02 +0000 Subject: [PATCH] refactor(rewriter): use custom title case converter implementation instead of `golang.org/x/text/cases.Title()` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The implementation is equivalent to `cases.Title(language.English).String(strings.ToLower(…))`, and this is the only place in miniflux where "golang.org/x/text/cases" and "golang.org/x/text/language" are (directly) used. This reduces the binary size from 27015590 to 26686112 on my machine. Kudos to https://gsa.zxilly.dev for making it straightforward to catch things like this. --- go.mod | 2 +- internal/reader/rewrite/rewrite_functions.go | 19 +++++++++++++++++++ internal/reader/rewrite/rewriter.go | 5 +---- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/go.mod b/go.mod index 50c78a06cba..38341a407d4 100644 --- a/go.mod +++ b/go.mod @@ -17,7 +17,6 @@ require ( golang.org/x/net v0.33.0 golang.org/x/oauth2 v0.24.0 golang.org/x/term v0.27.0 - golang.org/x/text v0.21.0 ) require ( @@ -42,6 +41,7 @@ require ( github.com/tdewolff/parse/v2 v2.7.19 // indirect github.com/x448/float16 v0.8.4 // indirect golang.org/x/sys v0.28.0 // indirect + golang.org/x/text v0.21.0 // indirect google.golang.org/protobuf v1.34.2 // indirect ) diff --git a/internal/reader/rewrite/rewrite_functions.go b/internal/reader/rewrite/rewrite_functions.go index a696c22a2c7..1b48eb9b388 100644 --- a/internal/reader/rewrite/rewrite_functions.go +++ b/internal/reader/rewrite/rewrite_functions.go @@ -11,6 +11,7 @@ import ( "net/url" "regexp" "strings" + "unicode" "miniflux.app/v2/internal/config" @@ -26,6 +27,24 @@ var ( textLinkRegex = regexp.MustCompile(`(?mi)(\bhttps?:\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])`) ) +// titlelize returns a copy of the string s with all Unicode letters that begin words +// mapped to their Unicode title case. +func titlelize(s string) string { + // A closure is used here to remember the previous character + // so that we can check if there is a space preceding the current + // character. + previous := ' ' + return strings.Map( + func(current rune) rune { + if unicode.IsSpace(previous) { + previous = current + return unicode.ToTitle(current) + } + previous = current + return current + }, strings.ToLower(s)) +} + func addImageTitle(entryContent string) string { doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent)) if err != nil { diff --git a/internal/reader/rewrite/rewriter.go b/internal/reader/rewrite/rewriter.go index 4f8ee9512c6..e2c26b6c6f9 100644 --- a/internal/reader/rewrite/rewriter.go +++ b/internal/reader/rewrite/rewriter.go @@ -11,9 +11,6 @@ import ( "miniflux.app/v2/internal/model" "miniflux.app/v2/internal/urllib" - - "golang.org/x/text/cases" - "golang.org/x/text/language" ) type rule struct { @@ -94,7 +91,7 @@ func (rule rule) applyRule(entryURL string, entry *model.Entry) { case "remove_tables": entry.Content = removeTables(entry.Content) case "remove_clickbait": - entry.Title = cases.Title(language.English).String(strings.ToLower(entry.Title)) + entry.Title = titlelize(entry.Title) } }