maxent\_grpo.rewards.basic ========================== .. automodule:: maxent_grpo.rewards.basic .. rubric:: Functions .. autosummary:: _bind_reward_kwargs _call_seed_paper_reward_oat_parity _canon_math _close_seed_paper_reward_pool _count_format_tags _extract_apps_cases _extract_boxed_answer _extract_content _extract_humaneval_test _extract_mbpp_tests _extract_prompt_text _extract_python_code _gold_math_candidates _iter_boxed_answers _load_seed_paper_reward_fn _normalize_text_lines _outputs_match _parse_answer_payload _parse_entry_point _prepare_seed_paper_import_paths _pure_accuracy_math_match_flags _run_script _score_apps_code _score_humaneval_code _score_mbpp_code _score_python_unit_tests_sample _seed_paper_answer_tag_reward_fn _seed_paper_boxed_reward_fn _seed_paper_repo_dir _seed_paper_reward_pool _seed_paper_reward_worker _seed_paper_site_packages_dir _tag_multiplier accuracy_reward binary_code_reward boxed_accuracy_reward_math format_reward get_code_format_reward get_cosine_scaled_reward get_missing_boxed_answer_penalty_reward get_repetition_penalty_reward get_reward_funcs len_reward pure_accuracy_math_correctness pure_accuracy_reward_math python_unit_test_reward reasoning_steps_reward seed_paper_answer_tag_accuracy_reward_math seed_paper_boxed_accuracy_reward_math tag_count_reward truncate_after_first_boxed_answer uses_pure_accuracy_math_reward .. rubric:: Classes .. autosummary:: RewardConfig RewardFunction