From d2843b90e8e8c47b4b2f6a1b42ff9a6a7432e295 Mon Sep 17 00:00:00 2001 From: Alexey Kuznetsov Date: Sat, 11 Mar 2017 12:56:38 +0300 Subject: [PATCH 1/2] fix encoding --- utils.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/utils.go b/utils.go index 7e600def..e138bace 100644 --- a/utils.go +++ b/utils.go @@ -27,6 +27,7 @@ import ( "math/rand" "net/http" "net/url" + "regexp" "strings" "time" "unicode/utf8" @@ -397,6 +398,12 @@ func ParseFeed(c appengine.Context, contentType, origUrl, fetchUrl string, body } if enc != encoding.Nop { cr = nilCharsetReader + preview := string(body[:128]) // + r := regexp.MustCompile(``) + rr := r.FindStringSubmatch(preview) + if len(rr) > 1 { + enc, _ = charset.Lookup(rr[1]) + } body, err = ioutil.ReadAll(transform.NewReader(bytes.NewReader(body), enc.NewDecoder())) if err != nil { return nil, nil, err From 3da24d36946839c234c5ed66dcfc8aa4f9264bf5 Mon Sep 17 00:00:00 2001 From: Alexey Kuznetsov Date: Tue, 14 Mar 2017 18:45:04 +0300 Subject: [PATCH 2/2] fix regexp --- utils.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils.go b/utils.go index e138bace..0fdd17b4 100644 --- a/utils.go +++ b/utils.go @@ -399,7 +399,7 @@ func ParseFeed(c appengine.Context, contentType, origUrl, fetchUrl string, body if enc != encoding.Nop { cr = nilCharsetReader preview := string(body[:128]) // - r := regexp.MustCompile(``) + r := regexp.MustCompile(`<\?xml.*encoding="(.*)".*\?>`) rr := r.FindStringSubmatch(preview) if len(rr) > 1 { enc, _ = charset.Lookup(rr[1])