maxent\_grpo.training.trl\_trainer
==================================

.. automodule:: maxent_grpo.training.trl_trainer

   
   .. rubric:: Functions

   .. autosummary::
   
      _adapter_disabled_context
      _apply_eos_completion_mask
      _build_ema_alias_index
      _build_prompt_text
      _build_rich_rollout_rows
      _build_seed_worker
      _canonical_metric_key
      _clamp_log_delta
      _coerce_bool
      _coerce_non_negative_float
      _completion_diversity_metrics
      _empty_dataset_like
      _entropy_normalization_scale
      _find_token_prefix_len_for_text
      _flatten_prompt_major_tensor
      _gather_eval_benchmark_ids_for_prompts
      _is_main_process
      _legacy_metric_aliases
      _local_metric_tensor
      _mask_invalid_logit_columns
      _mean
      _metric_suffix_from_benchmark
      _metric_tensor_for_logging
      _nanmax_tensor
      _nanmin_tensor
      _normalize_group_mass_proxy
      _normalize_listwise_q_targets
      _normalize_text_for_prefix_match
      _numeric_or_none
      _pad_completion_rows
      _pad_logprob_rows
      _reshape_prompt_major_tensor
      _resolve_ema_source_param
      _resolve_prompt_group_sizes
      _resolve_token_id_upper_bound
      _resolve_tokenizer_vocab_limit
      _resolve_vocab_size_limit
      _selected_logps_and_entropy
      _shuffle_listwise_tensor_dict
      _split_listwise_tensor_dict
      _strip_ema_param_prefixes
      _strip_mode_prefix
      _supports_adapter_disabled_reference
      _token_prefix_search_order
      _tokenize_for_diversity
      _use_lightweight_greedy_eval
      _use_local_only_eval_diversity_metrics
      _use_local_only_lightweight_eval_metrics
      _use_sharded_prompt_major_greedy_eval
      _weighted_mean
      _write_rich_rollout_sidecar
      apply_chat_template
      build_custom_grpo_trainer
      gather
      is_conversational
      maybe_apply_chat_template
      wrap_trl_trainer