Skip to content

Commit 8f7030a

Browse files
author
Andrei Bratu
committed
nit found only in TS
1 parent 87c317b commit 8f7030a

File tree

2 files changed

+68
-4
lines changed

2 files changed

+68
-4
lines changed

src/humanloop/evals/run.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@
7272
from humanloop.types import PromptKernelRequest as Prompt
7373
from humanloop.types import ToolKernelRequest as Tool
7474
from humanloop.types.agent_response import AgentResponse
75+
from humanloop.types.agent_kernel_request import AgentKernelRequest as Agent
7576
from humanloop.types.datapoint_response import DatapointResponse
7677
from humanloop.types.dataset_response import DatasetResponse
7778
from humanloop.types.evaluation_run_response import EvaluationRunResponse
@@ -402,7 +403,7 @@ def _get_subclient(client: "BaseHumanloop", file_config: FileEvalConfig) -> Huma
402403

403404

404405
def _safe_get_default_file_version(client: "BaseHumanloop", file_config: FileEvalConfig) -> EvaluatedFile:
405-
"""Get default version of a File from online workspace.
406+
"""Get default version of a File from remote workspace.
406407
407408
Uses either the File path or id from the config.
408409
@@ -421,13 +422,13 @@ def _safe_get_default_file_version(client: "BaseHumanloop", file_config: FileEva
421422
raise HumanloopRuntimeError(
422423
f"File in Humanloop workspace at {path} is not of type {type}, but {hl_file.type}."
423424
)
424-
# cast is safe, we can only fetch Files allowed by FileType
425+
# cast is safe, we can only fetch Files that can be evaluated
425426
return typing.cast(EvaluatedFile, hl_file)
426427
elif file_id is not None:
427428
subclient = _get_subclient(client=client, file_config=file_config)
428429
return subclient.get(id=file_id)
429430
else:
430-
raise HumanloopRuntimeError("You must provide a path or id in your `file` config.")
431+
raise HumanloopRuntimeError("You must provide either the path or the id in your `file` config.")
431432

432433

433434
def _resolve_file(client: "BaseHumanloop", file_config: FileEvalConfig) -> tuple[EvaluatedFile, Optional[Callable]]:
@@ -447,7 +448,14 @@ def _resolve_file(client: "BaseHumanloop", file_config: FileEvalConfig) -> tuple
447448
"You are trying to create a new version of the File by passing the `version` argument. "
448449
"You must pass either the `file.path` or `file.id` argument and provider proper `file.version` for upserting the File."
449450
)
450-
hl_file = _safe_get_default_file_version(client=client, file_config=file_config)
451+
try:
452+
hl_file = _safe_get_default_file_version(client=client, file_config=file_config)
453+
except ApiError:
454+
if not version or not path or file_id:
455+
raise HumanloopRuntimeError(
456+
"File does not exist on Humanloop. Please provide a `file.path` and a version to create a new version.",
457+
)
458+
return _upsert_file(file_config=file_config, client=client), callable or None
451459

452460
if (version_id or environment) and (callable or version):
453461
raise HumanloopRuntimeError(
@@ -683,6 +691,9 @@ def _upsert_file(client: "BaseHumanloop", file_config: FileEvalConfig) -> Evalua
683691
elif type_ == "tool":
684692
# Will throw error if version is invalid
685693
Tool.model_validate(version)
694+
elif type_ == "agent":
695+
# Will throw error if version is invalid
696+
Agent.model_validate(version)
686697
else:
687698
raise NotImplementedError(f"Unsupported File type: {type_}")
688699

tests/integration/test_evals.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,3 +347,56 @@ def test_flow_eval_resolves_to_default_with_callable(
347347
finally:
348348
# Clean up test resources
349349
humanloop_test_client.flows.delete(id=flow_response.id)
350+
351+
352+
@pytest.mark.skip(reason="Skip until agents are in prod")
353+
def test_agent_eval_works_upserting(
354+
humanloop_test_client: Humanloop,
355+
eval_dataset: TestIdentifiers,
356+
output_not_null_evaluator: TestIdentifiers,
357+
sdk_test_dir: str,
358+
):
359+
humanloop_test_client.evaluations.run( # type: ignore [attr-defined]
360+
name="test_eval_run",
361+
file={
362+
"path": f"{sdk_test_dir}/Test Agent",
363+
"type": "agent",
364+
"version": {
365+
"model": "gpt-4o",
366+
"template": [
367+
{
368+
"role": "system",
369+
"content": "You are a helpful assistant, offering very short answers.",
370+
},
371+
{
372+
"role": "user",
373+
"content": "{{question}}",
374+
},
375+
],
376+
"provider": "openai",
377+
"temperature": 0,
378+
"max_iterations": 5,
379+
},
380+
},
381+
dataset={
382+
"path": eval_dataset.file_path,
383+
},
384+
evaluators=[
385+
{
386+
"path": output_not_null_evaluator.file_path,
387+
}
388+
],
389+
)
390+
files_response = humanloop_test_client.files.list_files(page=1, size=100)
391+
eval_agent = None
392+
for file in files_response.records:
393+
if file.path == f"{sdk_test_dir}/Test Agent":
394+
eval_agent = file
395+
break
396+
assert eval_agent and eval_agent.type == "agent"
397+
# THEN the evaluation finishes successfully
398+
evaluations_response = humanloop_test_client.evaluations.list(file_id=eval_agent.id)
399+
assert evaluations_response.items and len(evaluations_response.items) == 1
400+
evaluation_id = evaluations_response.items[0].id
401+
runs_response = humanloop_test_client.evaluations.list_runs_for_evaluation(id=evaluation_id) # type: ignore [attr-defined, arg-type]
402+
assert runs_response.runs[0].status == "completed"

0 commit comments

Comments
 (0)