FitScript/models.py at main · coffeine16/FitScript · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""
Data models for the FitScript Fitness Prescription Environment.

The FitScript environment simulates a fitness prescription task where an AI agent
must generate safe, effective, and personalized workout and nutrition plans for
clients with varying health conditions and constraints.
"""

from typing import Any, Dict, List, Optional
from openenv.core.env_server.types import Action, Observation
from pydantic import Field


class FitscriptAction(Action):
    """
    Action for the FitScript environment.

    The agent sends a complete fitness prescription as a structured text message.
    The prescription should address the client scenario presented in the observation,
    covering workout planning, nutrition guidance, and safety considerations.

    Fields:
        message: The agent's full fitness prescription response. Should include:
                 - Weekly workout split (exercises, sets, reps, frequency)
                 - Caloric and macronutrient targets
                 - Safety considerations and contraindications
                 - Progression strategy
    """

    message: str = Field(
        ...,
        description=(
            "The agent's fitness prescription response. Should include workout plan, "
            "nutrition targets, safety considerations, and progression strategy."
        ),
    )


class FitscriptObservation(Observation):
    """
    Observation from the FitScript environment.

    On reset(), provides the client scenario the agent must prescribe for.
    On step(), provides graded feedback on the agent's prescription.

    Fields:
        echoed_message:       The agent's last prescription (echoed for context).
        task_id:              Which task is active (1=easy, 2=medium, 3=hard).
        task_description:     Human-readable description of the current task.
        client_scenario:      Full client profile the agent must prescribe for.
        feedback:             Grader feedback after each step (empty before first step).
        safety_score:         Safety sub-score 0.0–1.0 (weighted 40% of total reward).
        efficacy_score:       Efficacy sub-score 0.0–1.0 (weighted 35% of total reward).
        personalization_score: Personalization sub-score 0.0–1.0 (weighted 25%).
        checks_passed:        List of specific grader checks that passed.
        checks_failed:        List of specific grader checks that failed.
        step_number:          Current step number within the episode.
        max_steps:            Maximum steps allowed for this task.
    """

    # Core echo field (required by OpenEnv convention)
    echoed_message: str = Field(
        default="",
        description="The agent's last prescription message, echoed back for context.",
    )

    # Task context
    task_id: int = Field(
        default=1,
        description="Active task ID: 1=Easy (basic prescription), 2=Medium (injury+constraints), 3=Hard (multi-client).",
    )
    task_description: str = Field(
        default="",
        description="Human-readable description of the current task objective.",
    )
    client_scenario: str = Field(
        default="",
        description="Full client profile the agent must generate a prescription for.",
    )

    # Grader feedback
    feedback: str = Field(
        default="",
        description="Detailed grader feedback on the last prescription submitted.",
    )
    safety_score: float = Field(
        default=0.0,
        description="Safety sub-score (0.0–1.0): Did the prescription avoid contraindicated exercises and dangerous practices?",
    )
    efficacy_score: float = Field(
        default=0.0,
        description="Efficacy sub-score (0.0–1.0): Will this plan actually achieve the client's stated goal?",
    )
    personalization_score: float = Field(
        default=0.0,
        description="Personalization sub-score (0.0–1.0): Is the plan tailored to the client vs. a generic template?",
    )
    completeness_score: float = Field(
        default=0.0,
        description="Completeness sub-score (0.0–1.0): Did the agent actually provide all required specifics (numbers, exercises, sets/reps)?",
    )
    checks_passed: List[str] = Field(
        default_factory=list,
        description="List of specific grader checks the prescription passed.",
    )
    checks_failed: List[str] = Field(
        default_factory=list,
        description="List of specific grader checks the prescription failed.",
    )

    # Episode tracking
    step_number: int = Field(
        default=0,
        description="Current step number within the episode (0 = just reset).",
    )
    max_steps: int = Field(
        default=3,
        description="Maximum steps allowed for this task episode.",
    )