Skip to content

Commit

Permalink
Add local_search_enabled flag to classifier (#291)
Browse files Browse the repository at this point in the history
* Add local_search_enabled flag

* Add comment

* Update documentation
  • Loading branch information
mgdigital authored Jul 4, 2024
1 parent b05e5d1 commit abdb714
Show file tree
Hide file tree
Showing 7 changed files with 33 additions and 3 deletions.
1 change: 1 addition & 0 deletions bitmagnet.io/guides/reprocess-reclassify.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ The `reprocess` command will re-queue torrents to allow the latest updates to be
To reprocess all torrents in your index, simply run `bitmagnet reprocess`. If you've indexed a lot of torrents, this will take a while, so there are a few options available to control exactly what gets reprocessed:

- `apisDisabled`: Disable API calls during classification. This makes the classifier run a _lot_ faster, but disables identification with external services such as TMDB (metadata already gathered from external APIs is not lost).
- `localSearchDisabled`: Disable the local search query on the content table for matching torrents to known content. This should be tried before any external API call is attempted, but it's an expensive query and so it's useful to be able to disable it using this flag.
- `contentType`: Only reprocess torrents of a certain content type. For example, `bitmagnet reprocess --contentType movie` will only reprocess movies. Multiple content types can be comma separated, and `null` refers to torrents of unknown content type.
- `orphans`: Only reprocess torrents that have no content record.
- `classifyMode`: This controls how already matched torrents are handled.
Expand Down
8 changes: 8 additions & 0 deletions internal/app/cmd/processcmd/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ func New(p Params) (Result, error) {
Value: false,
Usage: "disable API calls for the classifier workflow",
},
&cli.BoolFlag{
Name: "localSearchDisabled",
Value: false,
Usage: "disable local search queries for the classifier workflow",
},
},
Action: func(ctx *cli.Context) error {
pr, err := p.Processor.Get()
Expand All @@ -53,6 +58,9 @@ func New(p Params) (Result, error) {
if ctx.Bool("apisDisabled") {
flags["apis_enabled"] = false
}
if ctx.Bool("localSearchDisabled") {
flags["local_search_enabled"] = false
}
var infoHashes []protocol.ID
for _, infoHash := range ctx.StringSlice("infoHash") {
id, err := protocol.ParseID(infoHash)
Expand Down
8 changes: 8 additions & 0 deletions internal/app/cmd/reprocesscmd/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,11 @@ func New(p Params) (Result, error) {
Value: false,
Usage: "disable API calls for the classifier workflow",
},
&cli.BoolFlag{
Name: "localSearchDisabled",
Value: false,
Usage: "disable local search queries for the classifier workflow",
},
},
Action: func(ctx *cli.Context) error {
var classifyMode processor.ClassifyMode
Expand All @@ -85,6 +90,9 @@ func New(p Params) (Result, error) {
if ctx.Bool("apisDisabled") {
flags["apis_enabled"] = false
}
if ctx.Bool("localSearchDisabled") {
flags["local_search_enabled"] = false
}
var contentTypes []model.NullContentType
for _, contentType := range ctx.StringSlice("contentType") {
if contentType == "null" {
Expand Down
7 changes: 6 additions & 1 deletion internal/classifier/classifier.core.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,10 @@ workflows:
condition: "!result.hasAttachedContent && result.hasBaseTitle"
if_action:
find_match:
- attach_local_content_by_search
- if_else:
condition: "flags.local_search_enabled"
if_action: attach_local_content_by_search
else_action: unmatched
- if_else:
condition: "flags.apis_enabled && flags.tmdb_enabled"
if_action: attach_tmdb_content_by_search
Expand Down Expand Up @@ -226,11 +229,13 @@ keywords:
- yvm
- (#|10|11|12|13|14|15|16|17) ?y ?o
flag_definitions:
local_search_enabled: bool
apis_enabled: bool
tmdb_enabled: bool
delete_content_types: content_type_list
delete_xxx: bool
flags:
local_search_enabled: true
apis_enabled: true
tmdb_enabled: true
delete_content_types: []
Expand Down
2 changes: 1 addition & 1 deletion internal/processor/batch/queue/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ func New(p Params) Result {
priority := 10
// prioritise jobs where API calls are disabled as they will run faster:
if msg.ApisDisabled() {
priority = 5
priority = 4
}
maxInfoHash := msg.InfoHashGreaterThan
chunkSize := uint(0)
Expand Down
3 changes: 2 additions & 1 deletion internal/processor/hook_0_9_0/hook.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ func execHook(d *dao.Query) error {
job, jobErr := batch.NewQueueJob(batch.MessageParams{
UpdatedBefore: time.Now(),
ClassifierFlags: classifier.Flags{
"apis_enabled": false,
"apis_enabled": false,
"local_search_enabled": false,
},
ChunkSize: 10_000,
BatchSize: 100,
Expand Down
7 changes: 7 additions & 0 deletions internal/processor/queue/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,13 @@ func New(p Params) Result {
if err := json.Unmarshal([]byte(job.Payload), msg); err != nil {
return err
}
// The following is somewhat of a hack to alter the `local_search_enabled` flag for jobs queued by the upgrade hook between 0.9.0 and 0.9.3.
// It should be removed at a later date.
if job.Priority == 5 && msg.ClassifierFlags != nil {
if _, ok := msg.ClassifierFlags["local_search_enabled"]; !ok {
msg.ClassifierFlags["local_search_enabled"] = false
}
}
return pr.Process(ctx, *msg)
}, handler.JobTimeout(time.Second*60*10), handler.Concurrency(int(p.Config.Concurrency))), nil
}),
Expand Down

0 comments on commit abdb714

Please sign in to comment.