Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions py/src/braintrust/framework.py
Original file line number Diff line number Diff line change
Expand Up @@ -1276,7 +1276,8 @@ async def run_evaluator(
) -> EvalResultWithSummary[Input, Output]:
"""Wrapper on _run_evaluator_internal that times out execution after evaluator.timeout."""
results = await asyncio.wait_for(
_run_evaluator_internal(experiment, evaluator, position, filters, stream, state, enable_cache), evaluator.timeout
_run_evaluator_internal(experiment, evaluator, position, filters, stream, state, enable_cache),
evaluator.timeout,
)

if experiment:
Expand Down Expand Up @@ -1473,9 +1474,7 @@ def report_progress(event: TaskProgressEvent):
async def ensure_spans_flushed():
# Flush native Braintrust spans
if experiment:
await asyncio.get_event_loop().run_in_executor(
None, lambda: experiment.state.flush()
)
await asyncio.get_event_loop().run_in_executor(None, lambda: experiment.state.flush())
elif state:
await asyncio.get_event_loop().run_in_executor(None, lambda: state.flush())
else:
Expand Down Expand Up @@ -1671,6 +1670,11 @@ async def with_max_concurrency(coro):
for trial_index in range(evaluator.trial_count):
tasks.append(asyncio.create_task(with_max_concurrency(run_evaluator_task(datum, trial_index))))

if not tasks:
eprint(
f"{bcolors.WARNING}Warning: no data rows found for evaluator '{evaluator.eval_name}'. The experiment will be empty.{bcolors.ENDC}"
)

results = []
for task in std_tqdm(tasks, desc=f"{evaluator.eval_name} (tasks)", position=position, disable=position is None):
results.append(await task)
Expand Down
20 changes: 20 additions & 0 deletions py/src/braintrust/test_framework.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,7 @@ def task_with_hooks(input, hooks):
assert len(root_span) == 1
assert root_span[0].get("tags") == None


@pytest.mark.asyncio
async def test_eval_enable_cache():
state = BraintrustState()
Expand Down Expand Up @@ -564,3 +565,22 @@ async def test_eval_enable_cache():
)
state.span_cache.start.assert_called()
state.span_cache.stop.assert_called()


@pytest.mark.asyncio
async def test_run_evaluator_empty_dataset_warns(capsys):
"""Warn when run_evaluator receives an empty dataset."""
evaluator = Evaluator(
project_name="test-project",
eval_name="test-evaluator",
data=[],
task=lambda input: input,
scores=[],
experiment_name=None,
metadata=None,
)
await run_evaluator(experiment=None, evaluator=evaluator, position=None, filters=[])

captured = capsys.readouterr()
assert "Warning" in captured.err
assert "empty" in captured.err.lower()
Loading