Imports

higher_order_applymap, tensor, autograd

Types

Adam[TT] = object ## Learnable weights ## Decays on first and second moment ## Current decay ## Exponential moving averages (mean estimation) ## Exponential moving averages squared (uncentered variance) ## Epsilon for numerical stability when dividing: Adaptative Moment Estimation Source Edit
Optimizer[TT] = SGD[TT] or Adam[TT]: Source Edit
SGD[TT] = object params*: seq[Variable[TT]] lr*: TT.T: Stochastic gradient descent without momentum. Source Edit
SGDMomentum[TT] = object params*: seq[Variable[TT]] lr*: TT.T ## Learning rate momentum*: TT.T ## Value of the momentum ## Moments for momentum ## Learning rate decay ## Flag for Nesterov momentum: Stochastic gradient descent with momentum. Details on Nesterov momentum can be found in Sutskever et. al. 2013 Source Edit

Procs

proc newSGD[T](params: varargs[Variable[Tensor[T]]]; learning_rate: T): SGD[ Tensor[T]] {....deprecated: "Use the optimizer macro instead".}: Deprecated: Use the optimizer macro instead
Source Edit
proc optimizer[M, T](model: M; OptimizerKind: typedesc[Adam]; learning_rate: T = T(0.001); beta1: T = T(0.9); beta2: T = T(0.999); eps: T = T(1e-8)): Adam[Tensor[T]]: Create a Adam optimizer that will update the model weight Source Edit
func optimizer[M, T](model: M; OptimizerKind: typedesc[SGD]; learning_rate: T): SGD[ Tensor[T]]: Create a SGD optimizer that will update the model weight

Parameters:

model Model to optimize.

learning_rate Learning rate.

Returns:

A SGD optimizer with the given learning rate.

Source Edit
proc optimizer[M, T](model: M; OptimizerKind: typedesc[SGDMomentum]; learning_rate: T; momentum: T = T(0.0); decay: T = T(0.0); nesterov = false): SGDMomentum[Tensor[T]]: Create a SGD optimizer with optional momentum that will update the model weight

Parameters:

model Model to optimize.

learning_rate Learning rate.

momentum Momentum.

decay How much the learning rate will decay each update.

nesterov Whether to use Nesterov momentum or not.

Returns:

A SGD optimizer with momentum with the given parameters.

Source Edit
proc optimizerAdam[M, T](model: M; learning_rate: T; beta1: T = T(0.9); beta2: T = T(0.999); eps: T = T(1e-8)): Adam[Tensor[T]] {. ...deprecated: "Use optimizer(model, SGDMomentum, learning_rate) instead.".}: Deprecated: Use optimizer(model, SGDMomentum, learning_rate) instead.
Source Edit
func optimizerSGD[M, T](model: M; learning_rate: T): SGD[Tensor[T]] {. ...deprecated: "Use optimizer(model, SGD, learning_rate) instead.".}: Deprecated: Use optimizer(model, SGD, learning_rate) instead.
Source Edit
proc optimizerSGDMomentum[M, T](model: M; learning_rate: T; momentum = T(0.0); decay = T(0.0); nesterov = false): SGDMomentum[ Tensor[T]] {....deprecated: "Use optimizer(model, SGDMomentum, learning_rate) instead.".}: Deprecated: Use optimizer(model, SGDMomentum, learning_rate) instead.
Source Edit
proc update(self: SGD): Performs an optimization update.

Parameters:

self A SGD optimizer to update.

This proc will update the weights in the model associated with the input optimizer according to the following rule: w = w - lr * gradient
Source Edit
proc update(self: var Adam): Source Edit
proc update(self: var SGDMomentum): Performs an optimization update.

Parameters:

self A SGDMomentum optimizer to update.

This proc will update the weights in the model associated with the input optimizer according to the following rule: w = w - lr * gradient + m * moment If nesterov is set to true then the following rule is applied instead: w = w - lr * gradient + m * v

v = - lr * gradient + m * moment

Where in both cases the moment is the gradient change applied in the previous update step and m is the momentum.

If decay is greater than 0, the learning rate will be modified each call according to the following: lr = lr * 1/(1 + decay)
Source Edit
proc zeroGrads(o: Optimizer): Source Edit

src/arraymancer/nn/optimizers/optimizers

Imports

Types

Procs