Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/benchmarks.md
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,8 @@ kaggle benchmarks tasks status <TASK> [options]

Prints the task's metadata (slug, creation status, creation time, URL) followed by a table of all runs. Each run row shows the model name, run state, start time, and end time. Any errored runs display their error messages below the table.

If task creation itself failed, the `Status:` line shows the failure *kind* — the cleaned creation-state enum, titlecased (e.g. `Kernel_Without_Run`, `No_Model_Specified`, `Validation_Failed`, `Errored`) — and an `Error:` line is appended below it with the server-provided `creation_error_message` explaining what went wrong.

---

### `kaggle benchmarks tasks download`
Expand Down
22 changes: 19 additions & 3 deletions skills/references/benchmarks.md
Original file line number Diff line number Diff line change
Expand Up @@ -277,14 +277,14 @@ kaggle b t status my-task -m gemini-2.5-pro -m claude-sonnet-4
**Output format:**
```
Task: my-task
Status: COMPLETED
Status: Completed
Created: 2026-04-28 18:13:04
Task URL: https://www.kaggle.com/...

Model Status Started Ended
--------------------------------------------------------------------------
gemini-2.5-pro COMPLETED 2026-04-28 18:13:04 2026-04-28 18:14:00
claude-sonnet-4 ERRORED 2026-04-28 18:13:04 2026-04-28 18:13:04
gemini-2.5-pro Completed 2026-04-28 18:13:04 2026-04-28 18:14:00
claude-sonnet-4 Errored 2026-04-28 18:13:04 2026-04-28 18:13:04

Errors:
[claude-sonnet-4]
Expand All @@ -295,6 +295,22 @@ Errors:

If no runs exist: `No runs yet. Use 'kaggle b t run my-task' to start one.`

**Task creation failures:** When the task itself failed to be created
(e.g. `KERNEL_WITHOUT_RUN`, `NO_MODEL_SPECIFIED`, `VALIDATION_FAILED`,
`ERRORED`), the `Status:` line shows the failure *kind* (titlecased
enum) and an `Error:` line is appended below it with the server-provided
`creation_error_message` — for example:

```
Status: Kernel_Without_Run
Error: Notebook finished but produced no output. Did you forget to call .run() or .evaluate()?
```

The `kaggle b t run` command applies the same pattern when refusing to
schedule runs against a non-completed task: the raised error includes
`status: <KIND>` and, if present, an `Error: <message>` line with the
server's explanation.

### `kaggle benchmarks tasks download`

Downloads completed or errored run outputs.
Expand Down
8 changes: 5 additions & 3 deletions src/kaggle/api/kaggle_api_extended.py
Original file line number Diff line number Diff line change
Expand Up @@ -6995,7 +6995,7 @@ def _clean_enum_str(s: str) -> str:

@staticmethod
def _format_state(state) -> str:
"""Render an enum state in Titlecase (e.g. ``Completed``)."""
"""Render an enum state in Titlecase (e.g. ``Completed``, ``Kernel_Without_Run``)."""
return KaggleApi._clean_enum_str(state).title()

@staticmethod
Expand Down Expand Up @@ -7687,8 +7687,8 @@ def benchmarks_tasks_run_cli(self, task, model=None, wait=None, poll_interval=60
f"Task '{task}' is not ready to run (status: {self._clean_enum_str(state)}). "
f"Only completed tasks can be run."
)
if state == self._TASK_CREATION_ERRORED:
error_msg += f"\n Task Info: {task_info}"
if task_info.creation_error_message:
error_msg += f"\n Error: {task_info.creation_error_message}"
raise ValueError(error_msg)

if not models:
Expand Down Expand Up @@ -7799,6 +7799,8 @@ def benchmarks_tasks_status_cli(self, task, model=None):
version = task_info.slug.version_number or "unset"
print(f"Version: {version}")
print(f"Status: {self._format_state(task_info.creation_state)}")
if task_info.creation_error_message:
print(f"Error: {task_info.creation_error_message}")
print(f"Created: {self._format_time(task_info.create_time)}")
print(f"Public: {getattr(task_info, 'is_public', False)}")
url = getattr(task_info, "url", None)
Expand Down
73 changes: 70 additions & 3 deletions src/kaggle/test/test_benchmarks_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,11 +566,25 @@ def test_run_rejects_non_positive_poll_interval(self, api, interval):
with pytest.raises(ValueError, match="--poll-interval must be a positive integer"):
api.benchmarks_tasks_run_cli("my-task", ["gemini-pro"], poll_interval=interval)

def test_run_errored_task_includes_task_info(self, api):
"""ERRORED task error message includes task info."""
def test_run_errored_task_surfaces_creation_error_message(self, api):
"""When task creation failed, run shows status (kind) and Error (server message) separately."""
task = _make_task(state=ERRORED)
task.creation_error_message = "Notebook produced no run output"
api._mock_benchmarks.get_benchmark_task.return_value = task
with pytest.raises(ValueError) as exc_info:
api.benchmarks_tasks_run_cli("my-task", ["gemini-pro"])
msg = str(exc_info.value)
assert "status: ERRORED" in msg
assert "Error: Notebook produced no run output" in msg

def test_run_errored_task_without_creation_error_message(self, api):
"""When creation_error_message is empty, no Error line is appended."""
api._mock_benchmarks.get_benchmark_task.return_value = _make_task(state=ERRORED)
with pytest.raises(ValueError, match="Task Info:"):
with pytest.raises(ValueError) as exc_info:
api.benchmarks_tasks_run_cli("my-task", ["gemini-pro"])
msg = str(exc_info.value)
assert "status: ERRORED" in msg
assert "Error:" not in msg

@pytest.mark.parametrize("status_code", [403, 404], ids=["forbidden", "not_found"])
def test_run_task_not_found(self, api, status_code):
Expand Down Expand Up @@ -1086,6 +1100,59 @@ def test_status_pagination(self, api, capsys):
assert "gemini-1" in output
assert "gemini-2" in output

def test_status_shows_creation_error_message(self, api, capsys):
"""Failed task creation surfaces creation_error_message in the header."""
task = _make_task(state=ERRORED)
task.creation_error_message = "Kernel produced no run output"
api._mock_benchmarks.get_benchmark_task.return_value = task
_setup_runs_response(api, [])
api.benchmarks_tasks_status_cli("my-task")
output = capsys.readouterr().out
assert "Error: Kernel produced no run output" in output

def test_status_omits_error_when_empty(self, api, capsys):
"""No Error line when creation_error_message is empty."""
api._mock_benchmarks.get_benchmark_task.return_value = _make_task()
_setup_runs_response(api, [])
api.benchmarks_tasks_status_cli("my-task")
output = capsys.readouterr().out
assert "Error:" not in output


class TestFormatState:
"""``KaggleApi._format_state`` renders the raw cleaned enum (the error *kind*).

Explanatory messages belong in ``creation_error_message`` on the task
object and are displayed by callers as a separate ``Error:`` line.
"""

@pytest.mark.parametrize(
"state, expected",
[
(COMPLETED, "Completed"),
(QUEUED, "Queued"),
(RUNNING, "Running"),
(ERRORED, "Errored"),
],
)
def test_known_creation_states(self, state, expected):
assert KaggleApi._format_state(state) == expected

@pytest.mark.parametrize(
"raw, expected",
[
("KERNEL_WITHOUT_RUN", "Kernel_Without_Run"),
("NO_MODEL_SPECIFIED", "No_Model_Specified"),
("VALIDATION_FAILED", "Validation_Failed"),
("ERRORED", "Errored"),
("COMPLETED", "Completed"),
("SOMETHING_NEW", "Something_New"),
("PENDING", "Pending"),
],
)
def test_renders_cleaned_enum(self, raw, expected):
assert KaggleApi._format_state(raw) == expected


# ============================================================
# Download
Expand Down
Loading