maxent\_grpo.rewards.basic
==========================

.. automodule:: maxent_grpo.rewards.basic

   
   .. rubric:: Functions

   .. autosummary::
   
      _bind_reward_kwargs
      _call_seed_paper_reward_oat_parity
      _canon_math
      _close_seed_paper_reward_pool
      _count_format_tags
      _extract_apps_cases
      _extract_boxed_answer
      _extract_content
      _extract_humaneval_test
      _extract_mbpp_tests
      _extract_prompt_text
      _extract_python_code
      _gold_math_candidates
      _iter_boxed_answers
      _load_seed_paper_reward_fn
      _normalize_text_lines
      _outputs_match
      _parse_answer_payload
      _parse_entry_point
      _prepare_seed_paper_import_paths
      _pure_accuracy_math_match_flags
      _run_script
      _score_apps_code
      _score_humaneval_code
      _score_mbpp_code
      _score_python_unit_tests_sample
      _seed_paper_answer_tag_reward_fn
      _seed_paper_boxed_reward_fn
      _seed_paper_repo_dir
      _seed_paper_reward_pool
      _seed_paper_reward_worker
      _seed_paper_site_packages_dir
      _tag_multiplier
      accuracy_reward
      binary_code_reward
      boxed_accuracy_reward_math
      format_reward
      get_code_format_reward
      get_cosine_scaled_reward
      get_missing_boxed_answer_penalty_reward
      get_repetition_penalty_reward
      get_reward_funcs
      len_reward
      pure_accuracy_math_correctness
      pure_accuracy_reward_math
      python_unit_test_reward
      reasoning_steps_reward
      seed_paper_answer_tag_accuracy_reward_math
      seed_paper_boxed_accuracy_reward_math
      tag_count_reward
      truncate_after_first_boxed_answer
      uses_pure_accuracy_math_reward
   
   .. rubric:: Classes

   .. autosummary::
   
      RewardConfig
      RewardFunction