Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add retry, skip-error, keywords options #23

Merged
merged 3 commits into from
Dec 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions cmd/flags/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ var (
UserAgent = client.DefaultUserAgent
Proxy = ""
ConfigRoot = ""
Keywords []string
Retry = 3
SkipError = true

// Common download flags.

Expand Down Expand Up @@ -72,13 +75,16 @@ func NewFetcher(category fetcher.Category, properties map[string]string) (fetche
Config: cc,
Category: category,
Formats: fs,
Keywords: Keywords,
Extract: Extract,
DownloadPath: DownloadPath,
InitialBookID: InitialBookID,
Rename: Rename,
Thread: Thread,
RateLimit: RateLimit,
Properties: properties,
Retry: Retry,
SkipError: SkipError,
})
}

Expand Down
1 change: 1 addition & 0 deletions cmd/k12.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ var k12Cmd = &cobra.Command{
Row("Download Path", flags.DownloadPath).
Row("Thread", flags.Thread).
Row("Thread Limit (req/min)", flags.RateLimit).
Row("Keywords", flags.Keywords).
Print()

flags.Website = k12Website
Expand Down
4 changes: 4 additions & 0 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,5 +46,9 @@ func init() {
persistentFlags.StringVarP(&flags.ConfigRoot, "config", "c", flags.ConfigRoot, "The config path for bookhunter")
persistentFlags.StringVar(&flags.Proxy, "proxy", flags.Proxy, "The request proxy")
persistentFlags.StringVarP(&flags.UserAgent, "user-agent", "a", flags.UserAgent, "The request user-agent")
persistentFlags.IntVarP(&flags.Retry, "retry", "r", flags.Retry, "The retry times for a failed download")
persistentFlags.BoolVarP(&flags.SkipError, "skip-error", "s", flags.SkipError,
"Continue to download the next book if the current book download failed")
persistentFlags.StringSliceVarP(&flags.Keywords, "keyword", "k", flags.Keywords, "The keywords for books")
persistentFlags.BoolVar(&log.EnableDebug, "verbose", false, "Print all the logs for debugging")
}
1 change: 1 addition & 0 deletions cmd/sobooks.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ var sobooksCmd = &cobra.Command{
Row("Initial ID", flags.InitialBookID).
Row("Rename File", flags.Rename).
Row("Thread", flags.Thread).
Row("Keywords", flags.Keywords).
Row("Thread Limit (req/min)", flags.RateLimit).
Print()

Expand Down
1 change: 1 addition & 0 deletions cmd/talebook.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ var talebookDownloadCmd = &cobra.Command{
Row("Initial ID", flags.InitialBookID).
Row("Rename File", flags.Rename).
Row("Thread", flags.Thread).
Row("Keywords", flags.Keywords).
Row("Thread Limit (req/min)", flags.RateLimit).
Print()

Expand Down
1 change: 1 addition & 0 deletions cmd/telegram.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ var telegramCmd = &cobra.Command{
Row("Initial ID", flags.InitialBookID).
Row("Rename File", flags.Rename).
Row("Thread", flags.Thread).
Row("Keywords", flags.Keywords).
Row("Thread Limit (req/min)", flags.RateLimit).
Print()

Expand Down
5 changes: 4 additions & 1 deletion internal/fetcher/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,16 @@ const (
type Config struct {
Category Category // The identity of the fetcher service.
Formats []file.Format // The formats that the user wants.
Keywords []string // The keywords that the user wants.
Extract bool // Extract the archives after download.
DownloadPath string // The path for storing the file.
InitialBookID int64 // The book id start to download.
Rename bool // Rename the file by using book ID.
Thread int // The number of download threads.
RateLimit int // Request per minute for a thread.
precessFile string // Define the download process.
Retry int // The retry times for a failed download.
SkipError bool // Continue to download the next book if the current book download failed.
processFile string // Define the download process.

// The extra configuration for a custom fetcher services.
Properties map[string]string
Expand Down
69 changes: 60 additions & 9 deletions internal/fetcher/fetcher.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
package fetcher

import (
"errors"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
"time"

"github.com/bookstairs/bookhunter/internal/driver"
"github.com/bookstairs/bookhunter/internal/file"
Expand Down Expand Up @@ -45,12 +50,17 @@ func (f *fetcher) Download() error {
}
log.Infof("Successfully query the download content counts: %d", size)

// Create download progress with ratelimit.
if f.precessFile == "" {
f.precessFile = defaultProgressFile
// Create download progress with rate limit.
if f.processFile == "" {
if len(f.Keywords) == 0 {
f.processFile = defaultProgressFile
} else {
// Avoid the download progress overloading.
f.processFile = strconv.FormatInt(time.Now().Unix(), 10) + defaultProgressFile
}
}
rate := f.RateLimit * f.Thread
f.progress, err = progress.NewProgress(f.InitialBookID, size, rate, filepath.Join(configPath, f.precessFile))
f.progress, err = progress.NewProgress(f.InitialBookID, size, rate, filepath.Join(configPath, f.processFile))
if err != nil {
return err
}
Expand Down Expand Up @@ -90,7 +100,7 @@ func (f *fetcher) Download() error {
}

// startDownload will start a download thread.
func (f *fetcher) startDownload() {
func (f *fetcher) startDownload() { //nolint:gocyclo
thread:
for {
bookID := f.progress.AcquireBookID()
Expand All @@ -100,7 +110,7 @@ thread:
break thread
}

// Start download the given book ID.
// Start downloading the given book ID.
// The error will be sent to the channel.

// Acquire the available file formats
Expand All @@ -117,12 +127,30 @@ thread:
log.Warnf("[%d/%d] No downloadable files found.", bookID, f.progress.Size())
}

// Filter the name, skip the progress if the name isn't the desired one.
if len(formats) != 0 && len(f.Keywords) != 0 {
formats = f.filterNames(formats)
if len(formats) == 0 {
log.Warnf("[%d/%d] The files found by the given keywords", bookID, f.progress.Size())
// No need to save the download progress.
continue
}
}

// Download the file by formats one by one.
for format, share := range formats {
err := f.downloadFile(bookID, format, share)
if err != nil && err != ErrFileNotExist {
f.errs <- err
break thread
for retry := 0; err != nil && !errors.Is(err, ErrFileNotExist) && retry < f.Retry; retry++ {
fmt.Printf("Download book id %d failed: %v, retry (%d/%d)\n", bookID, err, retry, f.Retry)
err = f.downloadFile(bookID, format, share)
}

if err != nil && !errors.Is(err, ErrFileNotExist) {
fmt.Printf("Download book id %d failed: %v\n", bookID, err)
if !f.SkipError {
f.errs <- err
break thread
}
}
}

Expand All @@ -137,6 +165,8 @@ thread:

// downloadFile in a thread.
func (f *fetcher) downloadFile(bookID int64, format file.Format, share driver.Share) error {
f.progress.TakeRateLimit()
log.Debugf("Start download book id %d, format %s, share %v.", bookID, format, share)
// Create the file writer.
writer, err := f.creator.NewWriter(bookID, f.progress.Size(), share.FileName, share.SubPath, format, share.Size)
if err != nil {
Expand All @@ -161,3 +191,24 @@ func (f *fetcher) filterFormats(formats map[file.Format]driver.Share) map[file.F
}
return fs
}

func (f *fetcher) filterNames(formats map[file.Format]driver.Share) map[file.Format]driver.Share {
fs := make(map[file.Format]driver.Share)
for format, share := range formats {
if matchKeywords(share.FileName, f.Keywords) {
fs[format] = share
}
}
return fs
}

func matchKeywords(title string, keywords []string) bool {
for _, keyword := range keywords {
// Should we support the regular expression?
if strings.Contains(title, keyword) {
return true
}
}

return false
}
6 changes: 5 additions & 1 deletion internal/fetcher/telegram.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"path/filepath"
"strconv"
"strings"
"time"

"github.com/gotd/td/tg"

Expand All @@ -30,7 +31,10 @@ func newTelegramService(config *Config) (service, error) {
appHash := config.Property("appHash")

// Change the process file name.
config.precessFile = strings.ReplaceAll(channelID, "/", "_") + ".db"
config.processFile = strings.ReplaceAll(channelID, "/", "_") + ".db"
if len(config.Keywords) == 0 {
config.processFile = strconv.FormatInt(time.Now().Unix(), 10) + config.processFile
}

tel, err := telegram.New(channelID, mobile, appID, appHash, sessionPath, config.Proxy)
if err != nil {
Expand Down
13 changes: 9 additions & 4 deletions internal/progress/progress.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ var (
)

type Progress interface {
// TakeRateLimit would wait until the rate limit is available.
TakeRateLimit()

// AcquireBookID would find the book id from the assign array.
AcquireBookID() int64

Expand All @@ -42,7 +45,7 @@ type bitProgress struct {
file *os.File // The Progress file path for download progress.
}

// NewProgress Create a storge for save the download progress.
// NewProgress Create a storage for save the download progress.
func NewProgress(start, size int64, rate int, path string) (Progress, error) {
if start < 1 {
return nil, ErrStartBookID
Expand Down Expand Up @@ -127,14 +130,16 @@ func loadStorage(file *os.File) (*bitset.BitSet, error) {
return set, nil
}

// TakeRateLimit block until the rate meets the given config.
func (storage *bitProgress) TakeRateLimit() {
storage.limit.Take()
}

// AcquireBookID would find the book id from the assign array.
func (storage *bitProgress) AcquireBookID() int64 {
storage.lock.Lock()
defer storage.lock.Unlock()

// Block until the rate meets the given config.
storage.limit.Take()

for i := uint(0); i < storage.assigned.Len(); i++ {
if !storage.assigned.Test(i) {
storage.assigned.Set(i)
Expand Down