import grain.autograd; import numir; { auto model = MLP!(float, HostStorage)(3); auto optim = Adam!(typeof(model))(model, 1e-3); static assert(isOptimizer!(typeof(optim))); model.fc1.weight.data.zero_(); model.fc1.weight.grad = [[0.2f, 0.0f, 0.0f], [0.0f, 0.0f, 0.0f]].variable .data; optim.update(); auto w = model.fc1.weight; auto m1 = (1.0 - optim.beta1) * (0.2 - 0.0) + 0.0; auto m2 = (1.0 - optim.beta2) * (0.2 * 0.2 - 0.0) + 0.0; assert(approxEqual(w.sliced, [[-optim.lr * m1 / (m2 + optim.eps) ^^ 0.5, 0.0, 0.0], [0.0, 0.0, 0.0]].nparray)); auto m1_ = optim.moment1[".fc1.weight"].to!(typeof(w)); assert(approxEqual(m1_.sliced, [[m1, 0.0, 0.0], [0.0, 0.0, 0.0]].nparray)); auto m2_ = optim.moment2[".fc1.weight"].to!(typeof(w)); assert(approxEqual(m2_.sliced, [[m2, 0.0, 0.0], [0.0, 0.0, 0.0]].nparray)); } version (grain_cuda) { auto model = MLP!(float, DeviceStorage)(3); auto optim = Adam!(typeof(model))(model, 0.1); optim.update(); }
https://arxiv.org/pdf/1412.6980v8.pdf