diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini index 8d395511687d6..5334747c7ca09 100644 --- a/custom/conf/app.example.ini +++ b/custom/conf/app.example.ini @@ -780,6 +780,7 @@ LEVEL = Info ;; for example: block anonymous AI crawlers from accessing repo code pages. ;; The "expensive" mode is experimental and subject to change. ;REQUIRE_SIGNIN_VIEW = false +;OVERLOAD_INFLIGHT_ANONYMOUS_REQUESTS = ;; ;; Mail notification ;ENABLE_NOTIFY_MAIL = false diff --git a/modules/setting/service.go b/modules/setting/service.go index d9535efec6ee8..731818fd1d9a8 100644 --- a/modules/setting/service.go +++ b/modules/setting/service.go @@ -5,6 +5,7 @@ package setting import ( "regexp" + "runtime" "strings" "time" @@ -45,6 +46,8 @@ var Service = struct { ShowMilestonesDashboardPage bool RequireSignInViewStrict bool BlockAnonymousAccessExpensive bool + BlockAnonymousAccessOverload bool + OverloadInflightAnonymousRequests int EnableNotifyMail bool EnableBasicAuth bool EnablePasskeyAuth bool @@ -164,10 +167,12 @@ func loadServiceFrom(rootCfg ConfigProvider) { // boolean values are considered as "strict" var err error Service.RequireSignInViewStrict, err = sec.Key("REQUIRE_SIGNIN_VIEW").Bool() + Service.OverloadInflightAnonymousRequests = sec.Key("OVERLOAD_INFLIGHT_ANONYMOUS_REQUESTS").MustInt(4 * runtime.NumCPU()) if s := sec.Key("REQUIRE_SIGNIN_VIEW").String(); err != nil && s != "" { // non-boolean value only supports "expensive" at the moment Service.BlockAnonymousAccessExpensive = s == "expensive" - if !Service.BlockAnonymousAccessExpensive { + Service.BlockAnonymousAccessOverload = s == "overload" + if !Service.BlockAnonymousAccessExpensive && !Service.BlockAnonymousAccessOverload { log.Fatal("Invalid config option: REQUIRE_SIGNIN_VIEW = %s", s) } } diff --git a/routers/common/blockexpensive.go b/routers/common/blockexpensive.go index f52aa2b709286..765e483c638eb 100644 --- a/routers/common/blockexpensive.go +++ b/routers/common/blockexpensive.go @@ -6,27 +6,94 @@ package common import ( "net/http" "strings" + "sync/atomic" + "time" user_model "code.gitea.io/gitea/models/user" "code.gitea.io/gitea/modules/reqctx" "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/templates" + "code.gitea.io/gitea/modules/util" "code.gitea.io/gitea/modules/web/middleware" + "code.gitea.io/gitea/services/context" "github.com/go-chi/chi/v5" + lru "github.com/hashicorp/golang-lru/v2" ) +const tplStatus503RateLimit templates.TplName = "status/503_ratelimit" + +type RateLimitToken struct { + RetryAfter time.Time +} + func BlockExpensive() func(next http.Handler) http.Handler { - if !setting.Service.BlockAnonymousAccessExpensive { + if !setting.Service.BlockAnonymousAccessExpensive && !setting.Service.BlockAnonymousAccessOverload { return nil } + + tokenCache, _ := lru.New[string, RateLimitToken](10000) + + deferAnonymousRateLimitAccess := func(w http.ResponseWriter, req *http.Request) bool { + // * For a crawler: if it sees 503 error, it would retry later (they have their own queue), there is still a chance for them to read all pages + // * For a real anonymous user: allocate a token, and let them wait for a while by browser JS (queue the request by browser) + + const tokenCookieName = "gitea_arlt" // gitea anonymous rate limit token + cookieToken, _ := req.Cookie(tokenCookieName) + if cookieToken != nil && cookieToken.Value != "" { + token, exist := tokenCache.Get(cookieToken.Value) + if exist { + if time.Now().After(token.RetryAfter) { + // still valid + tokenCache.Remove(cookieToken.Value) + return false + } + // not reach RetryAfter time, so either remove the old one and allocate a new one, or keep using the old one + // TODO: in the future, we could do better to allow more accesses for the same token, or extend the expiration time if the access seems well-behaved + tokenCache.Remove(cookieToken.Value) + } + } + + // TODO: merge the code with RenderPanicErrorPage + tmplCtx := context.TemplateContext{} + tmplCtx["Locale"] = middleware.Locale(w, req) + ctxData := middleware.GetContextData(req.Context()) + + tokenKey, _ := util.CryptoRandomString(32) + retryAfterDuration := 1 * time.Second + token := RateLimitToken{RetryAfter: time.Now().Add(retryAfterDuration)} + tokenCache.Add(tokenKey, token) + ctxData["RateLimitTokenKey"] = tokenKey + ctxData["RateLimitCookieName"] = tokenCookieName + ctxData["RateLimitRetryAfterMs"] = retryAfterDuration.Milliseconds() + 100 + _ = templates.HTMLRenderer().HTML(w, http.StatusServiceUnavailable, tplStatus503RateLimit, ctxData, tmplCtx) + return true + } + + inflightRequestNum := atomic.Int32{} return func(next http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { ret := determineRequestPriority(reqctx.FromContext(req.Context())) if !ret.SignedIn { - if ret.Expensive || ret.LongPolling { - http.Redirect(w, req, setting.AppSubURL+"/user/login", http.StatusSeeOther) + if ret.LongPolling { + http.Error(w, "Long polling is not allowed for anonymous users", http.StatusForbidden) return } + if ret.Expensive { + inflightNum := inflightRequestNum.Add(1) + defer inflightRequestNum.Add(-1) + + if setting.Service.BlockAnonymousAccessExpensive { + // strictly block the anonymous accesses to expensive pages, to save CPU + http.Redirect(w, req, setting.AppSubURL+"/user/login", http.StatusSeeOther) + return + } else if int(inflightNum) > setting.Service.OverloadInflightAnonymousRequests { + // be friendly to anonymous access (crawler, real anonymous user) to expensive pages, but limit the inflight requests + if deferAnonymousRateLimitAccess(w, req) { + return + } + } + } } next.ServeHTTP(w, req) }) @@ -44,6 +111,7 @@ func isRoutePathExpensive(routePattern string) bool { "/{username}/{reponame}/blame/", "/{username}/{reponame}/commit/", "/{username}/{reponame}/commits/", + "/{username}/{reponame}/compare/", "/{username}/{reponame}/graph", "/{username}/{reponame}/media/", "/{username}/{reponame}/raw/", diff --git a/templates/status/503_ratelimit.tmpl b/templates/status/503_ratelimit.tmpl new file mode 100644 index 0000000000000..0e7b1abff099c --- /dev/null +++ b/templates/status/503_ratelimit.tmpl @@ -0,0 +1,15 @@ +{{template "base/head" .}} +