maxent\_grpo.training.pipeline
==============================

.. automodule:: maxent_grpo.training.pipeline

   
   .. rubric:: Functions

   .. autosummary::
   
      _behavior_logp_tensor_from_meta
      _coerce_token_logprob_value
      _collect_batch_stats
      _completion_diversity_metrics
      _deepspeed_zero_stage
      _dist_any_flag
      _extract_token_logprob_seq
      _maybe_apply_entropy_bonus
      _mean
      _progress_log_enabled
      _rank_tag
      _reference_stats_from_meta
      _require_artifact
      _resolve_weighting_value
      _token_logp_tensor_from_meta
      _tokenize_for_diversity
      _weighted_mean
      prepare_training_batch
   
   .. rubric:: Classes

   .. autosummary::
   
      PreparedBatch
      _BatchStats
      _TraceCounter
   
   .. rubric:: Exceptions

   .. autosummary::
   
      _SkipBatch