diff --git a/bitmagnet.io/guides/reprocess-reclassify.md b/bitmagnet.io/guides/reprocess-reclassify.md index b91db8d7..78938018 100644 --- a/bitmagnet.io/guides/reprocess-reclassify.md +++ b/bitmagnet.io/guides/reprocess-reclassify.md @@ -25,6 +25,7 @@ The `reprocess` command will re-queue torrents to allow the latest updates to be To reprocess all torrents in your index, simply run `bitmagnet reprocess`. If you've indexed a lot of torrents, this will take a while, so there are a few options available to control exactly what gets reprocessed: - `apisDisabled`: Disable API calls during classification. This makes the classifier run a _lot_ faster, but disables identification with external services such as TMDB (metadata already gathered from external APIs is not lost). +- `localSearchDisabled`: Disable the local search query on the content table for matching torrents to known content. This should be tried before any external API call is attempted, but it's an expensive query and so it's useful to be able to disable it using this flag. - `contentType`: Only reprocess torrents of a certain content type. For example, `bitmagnet reprocess --contentType movie` will only reprocess movies. Multiple content types can be comma separated, and `null` refers to torrents of unknown content type. - `orphans`: Only reprocess torrents that have no content record. - `classifyMode`: This controls how already matched torrents are handled. diff --git a/internal/app/cmd/processcmd/command.go b/internal/app/cmd/processcmd/command.go index 8da92736..bc3f95b6 100644 --- a/internal/app/cmd/processcmd/command.go +++ b/internal/app/cmd/processcmd/command.go @@ -39,6 +39,11 @@ func New(p Params) (Result, error) { Value: false, Usage: "disable API calls for the classifier workflow", }, + &cli.BoolFlag{ + Name: "localSearchDisabled", + Value: false, + Usage: "disable local search queries for the classifier workflow", + }, }, Action: func(ctx *cli.Context) error { pr, err := p.Processor.Get() @@ -53,6 +58,9 @@ func New(p Params) (Result, error) { if ctx.Bool("apisDisabled") { flags["apis_enabled"] = false } + if ctx.Bool("localSearchDisabled") { + flags["local_search_enabled"] = false + } var infoHashes []protocol.ID for _, infoHash := range ctx.StringSlice("infoHash") { id, err := protocol.ParseID(infoHash) diff --git a/internal/app/cmd/reprocesscmd/command.go b/internal/app/cmd/reprocesscmd/command.go index 02f92a43..dbdc0262 100644 --- a/internal/app/cmd/reprocesscmd/command.go +++ b/internal/app/cmd/reprocesscmd/command.go @@ -66,6 +66,11 @@ func New(p Params) (Result, error) { Value: false, Usage: "disable API calls for the classifier workflow", }, + &cli.BoolFlag{ + Name: "localSearchDisabled", + Value: false, + Usage: "disable local search queries for the classifier workflow", + }, }, Action: func(ctx *cli.Context) error { var classifyMode processor.ClassifyMode @@ -85,6 +90,9 @@ func New(p Params) (Result, error) { if ctx.Bool("apisDisabled") { flags["apis_enabled"] = false } + if ctx.Bool("localSearchDisabled") { + flags["local_search_enabled"] = false + } var contentTypes []model.NullContentType for _, contentType := range ctx.StringSlice("contentType") { if contentType == "null" { diff --git a/internal/classifier/classifier.core.yaml b/internal/classifier/classifier.core.yaml index 4d335dae..a6c4a09b 100644 --- a/internal/classifier/classifier.core.yaml +++ b/internal/classifier/classifier.core.yaml @@ -89,7 +89,10 @@ workflows: condition: "!result.hasAttachedContent && result.hasBaseTitle" if_action: find_match: - - attach_local_content_by_search + - if_else: + condition: "flags.local_search_enabled" + if_action: attach_local_content_by_search + else_action: unmatched - if_else: condition: "flags.apis_enabled && flags.tmdb_enabled" if_action: attach_tmdb_content_by_search @@ -226,11 +229,13 @@ keywords: - yvm - (#|10|11|12|13|14|15|16|17) ?y ?o flag_definitions: + local_search_enabled: bool apis_enabled: bool tmdb_enabled: bool delete_content_types: content_type_list delete_xxx: bool flags: + local_search_enabled: true apis_enabled: true tmdb_enabled: true delete_content_types: [] diff --git a/internal/processor/batch/queue/handler.go b/internal/processor/batch/queue/handler.go index 29dbb5c7..543c0539 100644 --- a/internal/processor/batch/queue/handler.go +++ b/internal/processor/batch/queue/handler.go @@ -72,7 +72,7 @@ func New(p Params) Result { priority := 10 // prioritise jobs where API calls are disabled as they will run faster: if msg.ApisDisabled() { - priority = 5 + priority = 4 } maxInfoHash := msg.InfoHashGreaterThan chunkSize := uint(0) diff --git a/internal/processor/hook_0_9_0/hook.go b/internal/processor/hook_0_9_0/hook.go index 23c11c7c..b8bd07a2 100644 --- a/internal/processor/hook_0_9_0/hook.go +++ b/internal/processor/hook_0_9_0/hook.go @@ -62,7 +62,8 @@ func execHook(d *dao.Query) error { job, jobErr := batch.NewQueueJob(batch.MessageParams{ UpdatedBefore: time.Now(), ClassifierFlags: classifier.Flags{ - "apis_enabled": false, + "apis_enabled": false, + "local_search_enabled": false, }, ChunkSize: 10_000, BatchSize: 100, diff --git a/internal/processor/queue/handler.go b/internal/processor/queue/handler.go index dfb545f4..c5073034 100644 --- a/internal/processor/queue/handler.go +++ b/internal/processor/queue/handler.go @@ -34,6 +34,13 @@ func New(p Params) Result { if err := json.Unmarshal([]byte(job.Payload), msg); err != nil { return err } + // The following is somewhat of a hack to alter the `local_search_enabled` flag for jobs queued by the upgrade hook between 0.9.0 and 0.9.3. + // It should be removed at a later date. + if job.Priority == 5 && msg.ClassifierFlags != nil { + if _, ok := msg.ClassifierFlags["local_search_enabled"]; !ok { + msg.ClassifierFlags["local_search_enabled"] = false + } + } return pr.Process(ctx, *msg) }, handler.JobTimeout(time.Second*60*10), handler.Concurrency(int(p.Config.Concurrency))), nil }),