maxent\_grpo.training.weighting.logic
=====================================

.. automodule:: maxent_grpo.training.weighting.logic

   
   .. rubric:: Functions

   .. autosummary::
   
      _ensure_tau_history
      _maybe_init_controller_state
      _resolve_target_entropy
      _split_ref_logprobs_per_token
      _sync_controller_state
      _to_float_list
      apply_meta_controller_update
      broadcast_controller_state
      build_uniform_weight_stats
      build_weighting_settings
      collect_weight_entropy
      compute_weight_stats
      controller_state_dict
      load_controller_state
      maybe_update_beta
      maybe_update_tau
      save_controller_state
      split_reference_logprobs
      split_reference_token_counts
      weight_matrix_from_q
      weight_vector_from_q