Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(proxy)!: added config for multiple rotating proxies and tls verification with CI integration #187

Merged
merged 8 commits into from
Feb 9, 2024
5 changes: 3 additions & 2 deletions src/config/structs.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@ type Ranking struct {
}

type Settings struct {
RequestedResultsPerPage int `koanf:"requestedresults"`
Shortcut string `koanf:"shortcut"`
RequestedResultsPerPage int `koanf:"requestedresults"`
Shortcut string `koanf:"shortcut"`
Proxies []string `koanf:"proxies"`
}

// ReaderTTL is format in which the config is read from the config file
Expand Down
2 changes: 1 addition & 1 deletion src/engines/bing/bing.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi
var pagesCol *colly.Collector
var retError error

sedefaults.InitializeCollectors(&col, &pagesCol, &options, &timings)
sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings)

sedefaults.PagesColRequest(Info.Name, pagesCol, ctx)
sedefaults.PagesColError(Info.Name, pagesCol)
Expand Down
2 changes: 1 addition & 1 deletion src/engines/brave/brave.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi
var pagesCol *colly.Collector
var retError error

sedefaults.InitializeCollectors(&col, &pagesCol, &options, &timings)
sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings)

sedefaults.PagesColRequest(Info.Name, pagesCol, ctx)
sedefaults.PagesColError(Info.Name, pagesCol)
Expand Down
2 changes: 1 addition & 1 deletion src/engines/duckduckgo/duckduckgo.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi
var pagesCol *colly.Collector
var retError error

sedefaults.InitializeCollectors(&col, &pagesCol, &options, &timings)
sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings)

sedefaults.PagesColRequest(Info.Name, pagesCol, ctx)
sedefaults.PagesColError(Info.Name, pagesCol)
Expand Down
2 changes: 1 addition & 1 deletion src/engines/etools/etools.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi
var pagesCol *colly.Collector
var retError error

sedefaults.InitializeCollectors(&col, &pagesCol, &options, &timings)
sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings)

sedefaults.PagesColRequest(Info.Name, pagesCol, ctx)
sedefaults.PagesColError(Info.Name, pagesCol)
Expand Down
2 changes: 1 addition & 1 deletion src/engines/google/google.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi
var pagesCol *colly.Collector
var retError error

sedefaults.InitializeCollectors(&col, &pagesCol, &options, &timings)
sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings)

sedefaults.PagesColRequest(Info.Name, pagesCol, ctx)
sedefaults.PagesColError(Info.Name, pagesCol)
Expand Down
2 changes: 1 addition & 1 deletion src/engines/googlescholar/googlescholar.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi
var pagesCol *colly.Collector
var retError error

sedefaults.InitializeCollectors(&col, &pagesCol, &options, &timings)
sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings)

sedefaults.PagesColRequest(Info.Name, pagesCol, ctx)
sedefaults.PagesColError(Info.Name, pagesCol)
Expand Down
2 changes: 1 addition & 1 deletion src/engines/mojeek/mojeek.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi
var pagesCol *colly.Collector
var retError error

sedefaults.InitializeCollectors(&col, &pagesCol, &options, &timings)
sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings)

sedefaults.PagesColRequest(Info.Name, pagesCol, ctx)
sedefaults.PagesColError(Info.Name, pagesCol)
Expand Down
2 changes: 1 addition & 1 deletion src/engines/presearch/presearch.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi
var pagesCol *colly.Collector
var retError error

sedefaults.InitializeCollectors(&col, &pagesCol, &options, &timings)
sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings)

sedefaults.PagesColRequest(Info.Name, pagesCol, ctx)
sedefaults.PagesColError(Info.Name, pagesCol)
Expand Down
2 changes: 1 addition & 1 deletion src/engines/qwant/qwant.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi
var pagesCol *colly.Collector
var retError error

sedefaults.InitializeCollectors(&col, &pagesCol, &options, &timings)
sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings)

sedefaults.PagesColRequest(Info.Name, pagesCol, ctx)
sedefaults.PagesColError(Info.Name, pagesCol)
Expand Down
2 changes: 1 addition & 1 deletion src/engines/startpage/startpage.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi
var pagesCol *colly.Collector
var retError error

sedefaults.InitializeCollectors(&col, &pagesCol, &options, &timings)
sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings)

sedefaults.PagesColRequest(Info.Name, pagesCol, ctx)
sedefaults.PagesColError(Info.Name, pagesCol)
Expand Down
16 changes: 7 additions & 9 deletions src/engines/structs.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,13 @@ type DOMPaths struct {
}

type Options struct {
MaxPages int
VisitPages bool
Category category.Name
UserAgent string
Locale string //format: en_US
SafeSearch bool
Mobile bool

ProxyAddr string
MaxPages int
VisitPages bool
Category category.Name
UserAgent string
Locale string //format: en_US
SafeSearch bool
Mobile bool
JustFirstPage bool
}

Expand Down
2 changes: 1 addition & 1 deletion src/engines/swisscows/swisscows.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi
var pagesCol *colly.Collector
var retError error

sedefaults.InitializeCollectors(&col, &pagesCol, &options, &timings)
sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings)

sedefaults.PagesColRequest(Info.Name, pagesCol, ctx)
sedefaults.PagesColError(Info.Name, pagesCol)
Expand Down
2 changes: 1 addition & 1 deletion src/engines/yahoo/yahoo.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi
var pagesCol *colly.Collector
var retError error

sedefaults.InitializeCollectors(&col, &pagesCol, &options, &timings)
sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings)

sedefaults.PagesColRequest(Info.Name, pagesCol, ctx)
sedefaults.PagesColError(Info.Name, pagesCol)
Expand Down
2 changes: 1 addition & 1 deletion src/engines/yep/yep.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi
var pagesCol *colly.Collector
var retError error

sedefaults.InitializeCollectors(&col, &pagesCol, &options, &timings)
sedefaults.InitializeCollectors(&col, &pagesCol, &settings, &options, &timings)

sedefaults.PagesColRequest(Info.Name, pagesCol, ctx)
sedefaults.PagesColError(Info.Name, pagesCol)
Expand Down
21 changes: 20 additions & 1 deletion src/sedefaults/sedefaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"strconv"

"github.com/gocolly/colly/v2"
"github.com/gocolly/colly/v2/proxy"
"github.com/hearchco/hearchco/src/bucket"
"github.com/hearchco/hearchco/src/config"
"github.com/hearchco/hearchco/src/engines"
Expand Down Expand Up @@ -179,7 +180,7 @@ func Prepare(seName engines.Name, options *engines.Options, settings *config.Set
return nil
}

func InitializeCollectors(colPtr **colly.Collector, pagesColPtr **colly.Collector, options *engines.Options, timings *config.Timings) {
func InitializeCollectors(colPtr **colly.Collector, pagesColPtr **colly.Collector, settings *config.Settings, options *engines.Options, timings *config.Timings) {
*colPtr = colly.NewCollector(colly.MaxDepth(1), colly.UserAgent(options.UserAgent), colly.Async())
*pagesColPtr = colly.NewCollector(colly.MaxDepth(1), colly.UserAgent(options.UserAgent), colly.Async())

Expand All @@ -206,6 +207,24 @@ func InitializeCollectors(colPtr **colly.Collector, pagesColPtr **colly.Collecto
(*pagesColPtr).SetRequestTimeout(timings.PageTimeout)
}
}

if settings.Proxies != nil {
log.Debug().
Strs("proxies", settings.Proxies).
Msg("Using proxies")

// Rotate proxies
rp, err := proxy.RoundRobinProxySwitcher(settings.Proxies...)
if err != nil {
log.Fatal().
Err(err).
Strs("proxies", settings.Proxies).
Msg("sedefaults.InitializeCollectors(): failed creating proxy switcher")
}

(*colPtr).SetProxyFunc(rp)
(*pagesColPtr).SetProxyFunc(rp)
}
}

func DoGetRequest(urll string, anonurll string, colCtx *colly.Context, collector *colly.Collector, packageName engines.Name, retError *error) {
Expand Down
Loading