From 1d798170dcc7a65afa085ce4d24452891b95978a Mon Sep 17 00:00:00 2001 From: Ray Kim Date: Sat, 10 Aug 2024 02:42:07 +0100 Subject: [PATCH] add missing file --- src/optimization/optrules.jl | 86 ++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 src/optimization/optrules.jl diff --git a/src/optimization/optrules.jl b/src/optimization/optrules.jl new file mode 100644 index 00000000..f1c3fa2d --- /dev/null +++ b/src/optimization/optrules.jl @@ -0,0 +1,86 @@ + +""" + DoWG(repsilon = 1e-8) + +[DoWG](https://arxiv.org/abs/2305.16284) optimizer. It's only parameter is the +initial guess of the Euclidean distance to the optimum repsilon. +The [DoG](https://arxiv.org/abs/2302.12022) paper recommends 1e-4*(1 + norm(x0)). + +# Parameters +- repsilon: Initial guess of the Euclidean distance between the initial point and + the optimum. +""" +Optimisers.@def struct DoWG <: Optimisers.AbstractRule + repsilon = 1e-8 +end + +Optimisers.init(o::DoWG, x::AbstractArray{T}) where {T} = (copy(x), zero(T), T(o.repsilon)) + +function Optimisers.apply!(::DoWG, state, x::AbstractArray{T}, dx) where {T} + x0, v, r = state + + r = max(sqrt(sum(abs2, x - x0)), r) + r2 = r * r + v = v + r2 * sum(abs2, dx) + η = r2 / sqrt(v) + dx′ = Optimisers.@lazy dx * η + return (x0, v, r), dx′ +end + +""" + DoG(repsilon = 1e-8) + +[DoG](https://arxiv.org/abs/2305.16284) optimizer. It's only parameter is the +initial guess of the Euclidean distance to the optimum repsilon. +The [DoG](https://arxiv.org/abs/2302.12022) paper recommends 1e-4*(1 + norm(x0)). + +# Parameters +- repsilon: Initial guess of the Euclidean distance between the initial point and + the optimum. +""" + +Optimisers.@def struct DoG <: Optimisers.AbstractRule + repsilon = 1e-8 +end + +Optimisers.init(o::DoG, x::AbstractArray{T}) where {T} = (copy(x), zero(T), T(o.repsilon)) + +function Optimisers.apply!(::DoG, state, x::AbstractArray{T}, dx) where {T} + x0, v, r = state + + r = max(sqrt(sum(abs2, x - x0)), r) + v = v + sum(abs2, dx) + η = r / sqrt(v) + dx′ = Optimisers.@lazy dx * η + return (x0, v, r), dx′ +end + +""" + COCOB(α = 100) + +[Continuous Coin Betting](https://arxiv.org/abs/1705.07795) optimizer. +It's only parameter is the maximum change per parameter α, which shouldn't need much tuning. +The paper suggests α = 100 as a generally default value. + +# Parameters +- alpha (α): Scaling parameter. +""" +Optimisers.@def struct COCOB <: Optimisers.AbstractRule + alpha = 100 +end + +function Optimisers.init(::COCOB, x::AbstractArray{T}) where {T} + return (zero(x), zero(x), zero(x), zero(x), copy(x)) +end + +function Optimisers.apply!(o::COCOB, state, x::AbstractArray{T}, dx) where {T} + α = T(o.alpha) + L, G, R, θ, x1 = state + + Optimisers.@.. L = max(L, abs(dx)) + Optimisers.@.. G = G + abs(dx) + Optimisers.@.. R = max(R + (x - x1) * -dx, 0) + Optimisers.@.. θ = θ + -dx + dx′ = Optimisers.@lazy -(x1 - x) - (θ / (L * max(G + L, α * L)) * (L + R)) + return (L, G, R, θ, x1), dx′ +end