From 2456ad9ba7031867b6eecacb13a12ac02cf4915f Mon Sep 17 00:00:00 2001 From: Ha Tien Loi Date: Mon, 9 Oct 2023 13:16:04 +0700 Subject: [PATCH] feat: add zingmp3 extractor (#1280) * feat: add zingmp3 * adhoc: temp commit * register the extractor --------- Co-authored-by: Xinzhao Xu --- .github/workflows/stream_zingmp3.yml | 31 ++++ README.md | 1 + app/register.go | 1 + extractors/zingmp3/zingmp3.go | 220 +++++++++++++++++++++++++++ extractors/zingmp3/zingmp3_test.go | 44 ++++++ go.mod | 1 + go.sum | 2 + utils/utils.go | 2 +- 8 files changed, 301 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/stream_zingmp3.yml create mode 100644 extractors/zingmp3/zingmp3.go create mode 100644 extractors/zingmp3/zingmp3_test.go diff --git a/.github/workflows/stream_zingmp3.yml b/.github/workflows/stream_zingmp3.yml new file mode 100644 index 000000000..e912e4cd2 --- /dev/null +++ b/.github/workflows/stream_zingmp3.yml @@ -0,0 +1,31 @@ +name: zingmp3 + +on: + push: + paths: + - "extractors/zingmp3/*.go" + - ".github/workflows/stream_zingmp3.yml" + pull_request: + paths: + - "extractors/zingmp3/*.go" + - ".github/workflows/stream_zingmp3.yml" + schedule: + # run ci weekly + - cron: "0 0 * * 0" + +jobs: + test: + runs-on: ${{ matrix.os }} + strategy: + matrix: + go: ["1.21"] + os: [ubuntu-latest] + name: ${{ matrix.os }} + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-go@v2 + with: + go-version: ${{ matrix.go }} + + - name: Test + run: go test -timeout 5m -race -coverpkg=./... -coverprofile=coverage.txt github.com/iawia002/lux/extractors/zingmp3 diff --git a/README.md b/README.md index 0203ef154..7e820f713 100644 --- a/README.md +++ b/README.md @@ -644,6 +644,7 @@ $ lux -j "https://www.bilibili.com/video/av20203945" | 知乎 | | ✓ | | | | | [![zhihu](https://github.com/iawia002/lux/actions/workflows/stream_zhihu.yml/badge.svg)](https://github.com/iawia002/lux/actions/workflows/stream_zhihu.yml/) | | Rumble | | ✓ | | | | | [![rumble](https://github.com/iawia002/lux/actions/workflows/stream_rumble.yml/badge.svg)](https://github.com/iawia002/lux/actions/workflows/stream_rumble.yml/) | | 小红书 | | ✓ | | | | | [![xiaohongshu](https://github.com/iawia002/lux/actions/workflows/stream_xiaohongshu.yml/badge.svg)](https://github.com/iawia002/lux/actions/workflows/stream_xiaohongshu.yml/) | +| Zing MP3 | | ✓ | | ✓ | | | [![zingmp3](https://github.com/iawia002/lux/actions/workflows/stream_zingmp3.yml/badge.svg)](https://github.com/iawia002/lux/actions/workflows/stream_zingmp3.yml/) | ## Known issues diff --git a/app/register.go b/app/register.go index 07c72897c..e78df4805 100644 --- a/app/register.go +++ b/app/register.go @@ -43,4 +43,5 @@ import ( _ "github.com/iawia002/lux/extractors/youku" _ "github.com/iawia002/lux/extractors/youtube" _ "github.com/iawia002/lux/extractors/zhihu" + _ "github.com/iawia002/lux/extractors/zingmp3" ) diff --git a/extractors/zingmp3/zingmp3.go b/extractors/zingmp3/zingmp3.go new file mode 100644 index 000000000..0ca86365b --- /dev/null +++ b/extractors/zingmp3/zingmp3.go @@ -0,0 +1,220 @@ +package zingmp3 + +import ( + "crypto/hmac" + "crypto/sha256" + "crypto/sha512" + "encoding/hex" + "fmt" + "maps" + "net/http" + neturl "net/url" + "regexp" + "sort" + + "github.com/buger/jsonparser" + "github.com/pkg/errors" + + "github.com/iawia002/lux/extractors" + "github.com/iawia002/lux/request" + "github.com/iawia002/lux/utils" +) + +func init() { + zingmp3Extractor := New() + extractors.Register("zingmp3", zingmp3Extractor) + extractors.Register("zing", zingmp3Extractor) +} + +type extractor struct{} + +// New returns a zingmp3 extractor. +func New() extractors.Extractor { + return &extractor{} +} + +type params map[string]string + +var ApiSlugs = map[string]string{ + "bai-hat": "/api/v2/page/get/song", + "embed": "/api/v2/page/get/song", + "video-clip": "/api/v2/page/get/video", + "lyric": "/api/v2/lyric/get/lyric", + "song-streaming": "/api/v2/song/get/streaming", +} + +const Domain = "https://zingmp3.vn" + +// Extract is the main function to extract the data. +func (e *extractor) Extract(url string, option extractors.Options) ([]*extractors.Data, error) { + urlRegExp := regexp.MustCompile(`https?://(?:mp3\.zing|zingmp3)\.vn/(?P(?:bai-hat|video-clip|embed))/[^/?#]+/(?P\w+)(?:\.html|\?)`) + urlMatcher := urlRegExp.FindStringSubmatch(url) + if len(urlMatcher) == 0 { + return nil, errors.WithStack(extractors.ErrURLParseFailed) + } + urlType := urlMatcher[1] + id := urlMatcher[2] + if err := updatingCookies(); err != nil { + return nil, errors.WithStack(err) + } + data := callApi(urlType, params{"id": id}) + title, _ := jsonparser.GetString(data, "title") + var contentType extractors.DataType + var source []byte + if urlType == "video-clip" { + source, _, _, _ = jsonparser.Get(data, "streaming") + api := fmt.Sprintf(`http://api.mp3.zing.vn/api/mobile/video/getvideoinfo?requestdata={"id":"%s"}`, id) + res, _ := request.Get(api, api, nil) + newSource, _, _, _ := jsonparser.Get([]byte(res), "source") + source, _ = jsonparser.Set(source, newSource, "mp4") + contentType = extractors.DataTypeVideo + } else { + contentType = extractors.DataTypeAudio + source = callApi("song-streaming", params{"id": id}) + } + streams := make(map[string]*extractors.Stream) + if err := jsonparser.ObjectEach(source, func(k []byte, v []byte, dataType jsonparser.ValueType, offset int) error { + key := string(k) + value := string(v) + if value == "" || value == "VIP" { + return nil + } + + // Handle for audio + if key != "mp4" && key != "hls" { + size, _ := request.Size(value, url) + urlData := &extractors.Part{ + URL: value, + Ext: "mp3", + Size: size, + } + streams["default"] = &extractors.Stream{ + Parts: []*extractors.Part{urlData}, + } + return nil + } + + // Handle for video + return jsonparser.ObjectEach(v, func(kSource []byte, vSource []byte, _ jsonparser.ValueType, _ int) error { + resolution := string(kSource) + videoUrl := string(vSource) + if resolution == "" { + return nil + } + if resolution == "hls" { + urls, _ := utils.M3u8URLs(videoUrl) + parts := make([]*extractors.Part, 0) + for _, u := range urls { + parts = append(parts, &extractors.Part{ + URL: u, + Ext: "ts", + }) + } + streams[resolution] = &extractors.Stream{ + ID: resolution, + Parts: parts, + NeedMux: false, + } + return nil + } + size, _ := request.Size(videoUrl, url) + streams[fmt.Sprintf("mp4-%s", resolution)] = &extractors.Stream{ + Parts: []*extractors.Part{{ + URL: videoUrl, + Ext: "mp4", + Size: size, + }}, + } + return nil + }) + }); err != nil { + return nil, errors.WithStack(err) + } + + return []*extractors.Data{ + { + Site: "Zing MP3 zingmp3.vn", + Title: title, + Type: contentType, + Streams: streams, + URL: url, + }, + }, nil +} + +func callApi(urlType string, p params) []byte { + api := generateApi(urlType, p) + res, _ := request.GetByte(api, api, nil) + data, _, _, _ := jsonparser.Get(res, "data") + return data +} + +func updatingCookies() error { + // For the first time. We need to call the temp API to get cookies and set cookies to for next request + // But sometime zingmp3 doesn't return cookies. We need to retry get and set cookies again (only allow 5 time) + for i := 0; i < 5; i++ { + api := generateApi("bai-hat", params{"id": ""}) + res, err := request.Request(http.MethodGet, api, nil, nil) + if err != nil { + return err + } + cookies := "" + for _, value := range res.Cookies() { + cookies += value.String() + } + res.Body.Close() // nolint + if cookies != "" { + request.SetOptions(request.Options{ + Cookie: cookies, + }) + return nil + } + } + return nil +} + +func generateApi(urlType string, p params) string { + slugApi := ApiSlugs[urlType] + maps.Copy(p, params{"ctime": "1"}) + + sortedParams := sortedParams(p) + sig := generateSig(slugApi, sortedParams) + maps.Copy(sortedParams, params{ + "apiKey": "X5BM3w8N7MKozC0B85o4KMlzLZKhV00y", + "sig": sig, + }) + + urlParams := neturl.Values{} + for key, value := range sortedParams { + urlParams.Add(key, value) + } + return fmt.Sprintf("%s%s?%s", Domain, slugApi, urlParams.Encode()) +} + +func generateSig(slugApi string, p params) string { + str := "" + for key, value := range p { + str += fmt.Sprintf("%s=%s", key, value) + } + h := sha256.New() + h.Write([]byte(str)) + sha256Value := hex.EncodeToString(h.Sum(nil)) + var passwordBytes = []byte(fmt.Sprintf("%s%s", slugApi, sha256Value)) + salt := []byte("acOrvUS15XRW2o9JksiK1KgQ6Vbds8ZW") + hmacHashed := hmac.New(sha512.New, salt) + hmacHashed.Write(passwordBytes) + return hex.EncodeToString(hmacHashed.Sum(nil)) +} + +func sortedParams(p params) params { + keys := make([]string, 0, len(p)) + for k := range p { + keys = append(keys, k) + } + sort.Strings(keys) + sortedParams := params{} + for _, k := range keys { + sortedParams[k] = p[k] + } + return sortedParams +} diff --git a/extractors/zingmp3/zingmp3_test.go b/extractors/zingmp3/zingmp3_test.go new file mode 100644 index 000000000..39ca97c4d --- /dev/null +++ b/extractors/zingmp3/zingmp3_test.go @@ -0,0 +1,44 @@ +package zingmp3 + +import ( + "testing" + + "github.com/iawia002/lux/extractors" + "github.com/iawia002/lux/test" +) + +func TestDownload(t *testing.T) { + tests := []struct { + name string + args test.Args + }{ + { + name: "Host is mp3.zing.vn", + args: test.Args{ + URL: "https://mp3.zing.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html", + Title: "Xa Mãi Xa", + }, + }, + { + name: "Host is zingmp3.vn", + args: test.Args{ + URL: "https://zingmp3.vn/bai-hat/SOLO-JENNIE/ZW9FID6Z.html", + Title: "SOLO", + }, + }, + { + name: "Video clip", + args: test.Args{ + URL: "https://zingmp3.vn/video-clip/Suong-Hoa-Dua-Loi-K-ICM-RYO/ZO8ZF7C7.html", + Title: "Sương Hoa Đưa Lối", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + data, err := New().Extract(tt.args.URL, extractors.Options{}) + test.CheckError(t, err) + test.Check(t, tt.args, data[0]) + }) + } +} diff --git a/go.mod b/go.mod index 1787dbb5c..61ca062dc 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.21 require ( github.com/MercuryEngineering/CookieMonster v0.0.0-20180304172713-1584578b3403 github.com/PuerkitoBio/goquery v1.8.0 + github.com/buger/jsonparser v1.1.1 github.com/cheggaaa/pb/v3 v3.0.8 github.com/dop251/goja v0.0.0-20230304130813-e2f543bf4b4c github.com/fatih/color v1.13.0 diff --git a/go.sum b/go.sum index 39042ee97..2f55bf070 100644 --- a/go.sum +++ b/go.sum @@ -11,6 +11,8 @@ github.com/bitly/go-simplejson v0.5.0 h1:6IH+V8/tVMab511d5bn4M7EwGXZf9Hj6i2xSwkN github.com/bitly/go-simplejson v0.5.0/go.mod h1:cXHtHw4XUPsvGaxgjIAn8PhEWG9NfngEKAMDJEczWVA= github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4YnC6+E63dPcxHo2sUxDIu8g3QgEJdRY= github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4= +github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs= +github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0= github.com/cheggaaa/pb/v3 v3.0.8 h1:bC8oemdChbke2FHIIGy9mn4DPJ2caZYQnfbRqwmdCoA= github.com/cheggaaa/pb/v3 v3.0.8/go.mod h1:UICbiLec/XO6Hw6k+BHEtHeQFzzBH4i2/qk/ow1EJTA= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= diff --git a/utils/utils.go b/utils/utils.go index 4c6091ba5..623ac7a33 100644 --- a/utils/utils.go +++ b/utils/utils.go @@ -62,7 +62,7 @@ func Domain(url string) string { domainPattern := `([a-z0-9][-a-z0-9]{0,62})\.` + `(com\.cn|com\.hk|` + `cn|com|net|edu|gov|biz|org|info|pro|name|xxx|xyz|be|` + - `me|top|cc|tv|tt)` + `me|top|cc|tv|tt|vn)` domain := MatchOneOf(url, domainPattern) if domain != nil { return domain[1]