diff --git a/include/neural-graphics-primitives/adam_optimizer.h b/include/neural-graphics-primitives/adam_optimizer.h index 2f09d6d69..45e6025a7 100644 --- a/include/neural-graphics-primitives/adam_optimizer.h +++ b/include/neural-graphics-primitives/adam_optimizer.h @@ -44,6 +44,10 @@ class AdamOptimizer { m_state.variable -= actual_learning_rate * m_state.first_moment.cwiseQuotient(m_state.second_moment.cwiseSqrt() + T::Constant(m_hparams.epsilon)); } + uint32_t step() const { + return m_state.iter; + } + void set_learning_rate(float lr) { m_hparams.learning_rate = lr; } @@ -62,7 +66,7 @@ class AdamOptimizer { private: struct State { - int iter = 0; + uint32_t iter = 0; T first_moment = T::Zero(); T second_moment = T::Zero(); T variable = T::Zero(); @@ -76,8 +80,6 @@ class AdamOptimizer { } m_hparams; }; - - class RotationAdamOptimizer { public: RotationAdamOptimizer(float learning_rate, float epsilon = 1e-08f, float beta1 = 0.9f, float beta2 = 0.99f) { @@ -112,6 +114,10 @@ class RotationAdamOptimizer { m_state.variable = result.axis() * result.angle(); } + uint32_t step() const { + return m_state.iter; + } + void set_learning_rate(float lr) { m_hparams.learning_rate = lr; } @@ -126,7 +132,7 @@ class RotationAdamOptimizer { private: struct State { - int iter = 0; + uint32_t iter = 0; Eigen::Vector3f first_moment = Eigen::Vector3f::Zero(); Eigen::Vector3f second_moment = Eigen::Vector3f::Zero(); Eigen::Vector3f variable = Eigen::Vector3f::Zero(); diff --git a/src/testbed_nerf.cu b/src/testbed_nerf.cu index efd9014eb..560379661 100644 --- a/src/testbed_nerf.cu +++ b/src/testbed_nerf.cu @@ -2573,8 +2573,8 @@ void Testbed::train_nerf(uint32_t target_batch_size, uint32_t n_training_steps, pos_gradient += m_nerf.training.cam_pos_offset[i].variable() * l2_reg; rot_gradient += m_nerf.training.cam_rot_offset[i].variable() * l2_reg; - m_nerf.training.cam_pos_offset[i].set_learning_rate(std::max(1e-3f * std::pow(0.33f, (float)(m_training_step / 2048)), m_optimizer->learning_rate()/1000.0f)); - m_nerf.training.cam_rot_offset[i].set_learning_rate(std::max(1e-3f * std::pow(0.33f, (float)(m_training_step / 2048)), m_optimizer->learning_rate()/1000.0f)); + m_nerf.training.cam_pos_offset[i].set_learning_rate(std::max(1e-3f * std::pow(0.33f, (float)(m_nerf.training.cam_pos_offset[i].step() / 128)), m_optimizer->learning_rate()/1000.0f)); + m_nerf.training.cam_rot_offset[i].set_learning_rate(std::max(1e-3f * std::pow(0.33f, (float)(m_nerf.training.cam_rot_offset[i].step() / 128)), m_optimizer->learning_rate()/1000.0f)); m_nerf.training.cam_pos_offset[i].step(pos_gradient); m_nerf.training.cam_rot_offset[i].step(rot_gradient); @@ -2598,7 +2598,7 @@ void Testbed::train_nerf(uint32_t target_batch_size, uint32_t n_training_steps, Vector2f focal_length_gradient = m_nerf.training.cam_focal_length_gradient * per_camera_loss_scale; float l2_reg = m_nerf.training.intrinsic_l2_reg; focal_length_gradient += m_nerf.training.cam_focal_length_offset.variable() * l2_reg; - m_nerf.training.cam_focal_length_offset.set_learning_rate(std::max(1e-3f * std::pow(0.33f, (float)(m_training_step / 2048)),m_optimizer->learning_rate() / 1000.0f)); + m_nerf.training.cam_focal_length_offset.set_learning_rate(std::max(1e-3f * std::pow(0.33f, (float)(m_nerf.training.cam_focal_length_offset.step() / 128)),m_optimizer->learning_rate() / 1000.0f)); m_nerf.training.cam_focal_length_offset.step(focal_length_gradient); m_nerf.training.update_metadata(); }