maxent\_grpo.training.trl\_trainer ================================== .. automodule:: maxent_grpo.training.trl_trainer .. rubric:: Functions .. autosummary:: _adapter_disabled_context _apply_eos_completion_mask _build_ema_alias_index _build_prompt_text _build_rich_rollout_rows _build_seed_worker _canonical_metric_key _clamp_log_delta _coerce_bool _coerce_non_negative_float _completion_diversity_metrics _empty_dataset_like _entropy_normalization_scale _find_token_prefix_len_for_text _flatten_prompt_major_tensor _gather_eval_benchmark_ids_for_prompts _is_main_process _legacy_metric_aliases _local_metric_tensor _mask_invalid_logit_columns _mean _metric_suffix_from_benchmark _metric_tensor_for_logging _nanmax_tensor _nanmin_tensor _normalize_group_mass_proxy _normalize_listwise_q_targets _normalize_text_for_prefix_match _numeric_or_none _pad_completion_rows _pad_logprob_rows _reshape_prompt_major_tensor _resolve_ema_source_param _resolve_prompt_group_sizes _resolve_token_id_upper_bound _resolve_tokenizer_vocab_limit _resolve_vocab_size_limit _selected_logps_and_entropy _shuffle_listwise_tensor_dict _split_listwise_tensor_dict _strip_ema_param_prefixes _strip_mode_prefix _supports_adapter_disabled_reference _token_prefix_search_order _tokenize_for_diversity _use_lightweight_greedy_eval _use_local_only_eval_diversity_metrics _use_local_only_lightweight_eval_metrics _use_sharded_prompt_major_greedy_eval _weighted_mean _write_rich_rollout_sidecar apply_chat_template build_custom_grpo_trainer gather is_conversational maybe_apply_chat_template wrap_trl_trainer