Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
minimum_pre_commit_version: "2.9.0"
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
rev: v6.0.0
hooks:
- id: check-yaml
args: [--allow-multiple-documents]
Expand All @@ -15,20 +15,20 @@ repos:
hooks:
- id: prettier
- repo: https://github.com/asottile/reorder-python-imports
rev: v3.10.0
rev: v3.16.0
hooks:
- id: reorder-python-imports
args: [--py39-plus]
- repo: https://github.com/psf/black
rev: 23.3.0
- repo: https://github.com/psf/black-pre-commit-mirror
rev: 26.3.1
hooks:
- id: black
- repo: https://github.com/PyCQA/flake8
rev: 6.0.0
rev: 7.3.0
hooks:
- id: flake8
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.4.1
rev: v1.20.0
hooks:
- id: mypy
additional_dependencies: [numpy, httpx, pytest, structlog, types-PyYAML]
Expand Down
3 changes: 1 addition & 2 deletions ice/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@
except ImportError:

class Tfew(Agent):
def __init__(self, *args, **kwargs):
...
def __init__(self, *args, **kwargs): ...


MACHINE_AGENTS = {
Expand Down
2 changes: 1 addition & 1 deletion ice/agents/augmented.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ async def classify(
default: Optional[str] = None,
verbose: bool = False,
) -> tuple[dict[str, float], Optional[str]]:
(machine_probs, explanation) = await self.machine.classify(
machine_probs, explanation = await self.machine.classify(
prompt=prompt,
choices=choices,
default=default,
Expand Down
1 change: 1 addition & 0 deletions ice/cache.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Decorator for caching function results to disk
"""

import asyncio
import functools
import inspect
Expand Down
1 change: 0 additions & 1 deletion ice/datasets/qasper.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from ice.paper import split_sentences
from ice.recipes.meta.eval_paper_qa.types import PaperQaGoldStandard


TRAIN_PATH = "/code/datasets/qasper-train-v0.3.json"

VAL_PATH = "/code/datasets/qasper-dev-v0.3.json"
Expand Down
20 changes: 11 additions & 9 deletions ice/evaluation/evaluate_recipe_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,7 @@ def __str__(self) -> str:
correctness = (
"Correct"
if self.is_correct == True
else "Incorrect"
if self.is_correct == False
else "Not evaluated"
else "Incorrect" if self.is_correct == False else "Not evaluated"
)
return f"""{correctness}.
- Predicted: {self.predicted}
Expand Down Expand Up @@ -196,13 +194,17 @@ def evaluated_classifications(self) -> list[EvaluatedClassification]:

for i in range(0, max(len(recipe_classifications), len(gold_classifications))):
evaluated_classification = EvaluatedClassification(
predicted=recipe_classifications[i]
if i < len(recipe_classifications)
else None,
predicted=(
recipe_classifications[i]
if i < len(recipe_classifications)
else None
),
gold=gold_classifications[i] if i < len(gold_classifications) else None,
classification_eq=self.classification_eq[i]
if i < len(self.classification_eq)
else None,
classification_eq=(
self.classification_eq[i]
if i < len(self.classification_eq)
else None
),
)

evaluated_classifications.append(evaluated_classification)
Expand Down
18 changes: 9 additions & 9 deletions ice/evaluation/evaluation_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,9 +382,9 @@ def make_dashboard_row_df(self):
classification_summary.proportion_correct
)

row[
f"Classification {i+1} # evaluated"
] = classification_summary.num_evaluated
row[f"Classification {i+1} # evaluated"] = (
classification_summary.num_evaluated
)

df = pd.DataFrame([row])
df.to_csv(
Expand All @@ -408,9 +408,9 @@ def make_experiments_evaluation_df(self):
"ice_commit": latest_commit_hash(),
"document_id": result.document_id,
"split": result.gold_standard.split if result.gold_standard else None,
"experiment": result.gold_standard.experiment
if result.gold_standard
else None,
"experiment": (
result.gold_standard.experiment if result.gold_standard else None
),
"total_gs_quotes": len(
result.evaluated_excerpts.gold_standards_in_excerpts_results
),
Expand All @@ -420,9 +420,9 @@ def make_experiments_evaluation_df(self):
"excerpts": result.evaluated_excerpts.excerpts,
"gs_quotes": result.evaluated_excerpts.gold_standards_str(),
"answer": result.answer,
"gs_answer": result.gold_standard.answer
if result.gold_standard
else None,
"gs_answer": (
result.gold_standard.answer if result.gold_standard else None
),
"answer_rating": result.answer_rating,
"failure_modes": result.failure_modes,
}
Expand Down
16 changes: 10 additions & 6 deletions ice/evaluation/summarize_experiment_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,17 @@ async def summarize_experiment_evals(results_file: str):
row.get("classification_1"),
row.get("classification_2"),
],
answer_rating=None
if pd.isna(row.get("answer_rating"))
else int(row.get("answer_rating")),
answer_rating=(
None
if pd.isna(row.get("answer_rating"))
else int(row.get("answer_rating"))
),
elicit_commit=row.get("elicit_commit"),
failure_modes=None
if pd.isna(row.get("failure_modes"))
else row.failure_modes.split(","),
failure_modes=(
None
if pd.isna(row.get("failure_modes"))
else row.failure_modes.split(",")
),
)
for _, row in recipe_df.iterrows()
]
Expand Down
6 changes: 3 additions & 3 deletions ice/formatter/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def _is_partial(**fields: Union[literal, _NotNeededSentinel]):


def all_values_needed(
examples: Sequence[Mapping[str, Union[literal_or_transform, _NotNeededSentinel]]]
examples: Sequence[Mapping[str, Union[literal_or_transform, _NotNeededSentinel]]],
) -> TypeGuard[Sequence[Mapping[str, literal_or_transform]]]:
return all(
(
Expand Down Expand Up @@ -107,7 +107,7 @@ def _unparse(parses: _StdLibFormatStringParses) -> str:


def _no_sentinels_remaining(
concrete_values: dict[str, Union[literal, _NotNeededSentinel]]
concrete_values: dict[str, Union[literal, _NotNeededSentinel]],
) -> TypeGuard[dict[str, literal]]:
return all(
(value is not _not_needed_sentinel for value in concrete_values.values())
Expand Down Expand Up @@ -146,7 +146,7 @@ def _format_truncate(


def _has_stop(
concrete_values: Mapping[str, Union[literal, _NotNeededSentinel]]
concrete_values: Mapping[str, Union[literal, _NotNeededSentinel]],
) -> bool:
return any(isinstance(value, StopSentinel) for value in concrete_values.values())

Expand Down
1 change: 0 additions & 1 deletion ice/formatter/transform/dependent.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

from ice.formatter.transform import _Transform


T_contra = TypeVar("T_contra", contravariant=True)


Expand Down
1 change: 0 additions & 1 deletion ice/formatter/transform/value.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

from ice.formatter.transform import _Transform


T_contra = TypeVar("T_contra", contravariant=True)


Expand Down
1 change: 0 additions & 1 deletion ice/json_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

from fvalues import F


JSONValue = Union[
str, int, float, bool, None, list["JSONValue"], dict[str, "JSONValue"]
]
Expand Down
7 changes: 3 additions & 4 deletions ice/metrics/gold_paragraphs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Make a dataframe that contains the paragraphs that contain the gold standard quotes.
"""

import asyncio
from pathlib import Path
from typing import Optional
Expand Down Expand Up @@ -72,15 +73,13 @@ def get_containing_paragraph(
# Explanations:
# - Quote is split across two paragraphs
# - Document paragraphs don't include quote
log.warning(
f"""Couldn't find gold standard paragraph for quote
log.warning(f"""Couldn't find gold standard paragraph for quote

> {quote}

in {document_id}. Best recall was {best_recall:.2f}. Best paragraph was:

> {best_recall_paragraph}"""
)
> {best_recall_paragraph}""")
return best_recall_paragraph


Expand Down
12 changes: 4 additions & 8 deletions ice/metrics/gold_standards.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,7 @@ def get_gold_standards(
question_short_name: Optional[str] = None,
experiment: Optional[str] = None,
model_type: None = None,
) -> list[GoldStandard[Any]]:
...
) -> list[GoldStandard[Any]]: ...


@overload
Expand All @@ -195,8 +194,7 @@ def get_gold_standards(
document_id: Optional[str] = None,
question_short_name: Optional[str] = None,
experiment: Optional[str] = None,
) -> list[GoldStandard[ParsedGoldStandardType]]:
...
) -> list[GoldStandard[ParsedGoldStandardType]]: ...


def get_gold_standards(
Expand Down Expand Up @@ -226,8 +224,7 @@ def get_gold_standard(
question_short_name: Optional[str] = None,
experiment: Optional[str] = None,
model_type: None = None,
) -> Optional[GoldStandard[Any]]:
...
) -> Optional[GoldStandard[Any]]: ...


@overload
Expand All @@ -237,8 +234,7 @@ def get_gold_standard(
document_id: Optional[str] = None,
question_short_name: Optional[str] = None,
experiment: Optional[str] = None,
) -> Optional[GoldStandard[ParsedGoldStandardType]]:
...
) -> Optional[GoldStandard[ParsedGoldStandardType]]: ...


def get_gold_standard(
Expand Down
6 changes: 3 additions & 3 deletions ice/paper.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,9 @@ def parse_txt(file: Path) -> list[dict]:
"number": section_title_number(current_section),
}
],
"sectionType": "abstract"
if current_section == "Abstract"
else "main",
"sectionType": (
"abstract" if current_section == "Abstract" else "main"
),
}
)
return body
Expand Down
1 change: 0 additions & 1 deletion ice/recipes/adherence_tfew_paragraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
from ice.utils import map_async
from ice.utils import max_by_value


gpt2_tokenizer: GPT2TokenizerFast = AutoTokenizer.from_pretrained("gpt2")


Expand Down
7 changes: 4 additions & 3 deletions ice/recipes/blinding_dynamic.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
- routledge-2006.pdf
- vittengl-2009.pdf
"""

import itertools
from typing import Any
from typing import Literal
Expand Down Expand Up @@ -344,9 +345,9 @@ async def run(self, paper: Paper):
results_by_intervention: dict[str, dict[Group, dict[str, Any]]] = {}
interventions = await self.interventions(paper)
for intervention in interventions:
results_by_intervention[
intervention
] = await self.blinding_for_intervention(paper, intervention)
results_by_intervention[intervention] = (
await self.blinding_for_intervention(paper, intervention)
)

recipe_results: list[RecipeResult] = []
for intervention in interventions:
Expand Down
1 change: 0 additions & 1 deletion ice/recipes/consort_flow/baseline_elicit_answer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from ice.apis.openai import openai_complete
from ice.recipes.program_search.nodes.answer.types import Demonstration


log = get_logger()


Expand Down
8 changes: 5 additions & 3 deletions ice/recipes/consort_flow/baselines.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,9 +342,11 @@ async def _all_options(
except TooLongRequestError:
selections = remove_lowest_perplexity(selections)
return PaperQaAnswer(
answer=["The question is not answered in the text."]
if do_return_list
else "The question is not answered in the text.",
answer=(
["The question is not answered in the text."]
if do_return_list
else "The question is not answered in the text."
),
support_candidates=texts,
support_labels=[False for text in texts],
support_scores=[t[1] for t in texts_with_perplexities],
Expand Down
8 changes: 5 additions & 3 deletions ice/recipes/consort_flow/golds.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,11 @@ def paper_to_allocation_gold_standards(
(
f"The {exp.name} experiment included {len(exp.arms or [])} arms: {', '.join((arm.name for arm in exp.arms or []))}. How many participants were initially allocated to the {arm.name} arm of the {exp.name} experiment?",
texts,
arm.allocated.quotes
if arm.allocated and isinstance(arm.allocated, SampleSize)
else [],
(
arm.allocated.quotes
if arm.allocated and isinstance(arm.allocated, SampleSize)
else []
),
)
for exp in gs.parsed_answer.experiments
for arm in (exp.arms or [])
Expand Down
1 change: 0 additions & 1 deletion ice/recipes/experiments_and_arms/prompts/can_name_arms.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from ice.recipes.experiments_and_arms.prompts.utils import start_last_example
from ice.recipes.experiments_and_arms.types import MultipartReasoningPrompt


CAN_WE_NAME_ARMS_EXAMPLES: list[
dict[str, Union[ValueTransform[Sequence[str]], str, int]]
] = [
Expand Down
1 change: 0 additions & 1 deletion ice/recipes/experiments_and_arms/prompts/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from structlog.stdlib import get_logger


log = get_logger()


Expand Down
1 change: 0 additions & 1 deletion ice/recipes/experiments_and_arms/prompts/consensus.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from structlog.stdlib import get_logger


log = get_logger()


Expand Down
Loading
Loading