From 2fd20fad1988118b5fd0c776fe2e68abd01595ed Mon Sep 17 00:00:00 2001 From: "Michael E. Karpeles" Date: Wed, 6 May 2026 18:32:22 -0600 Subject: [PATCH 1/4] feat(sources/itan): add ITAN Global Publishing adapter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First concrete DataProvider/DataProviderRecord implementation using the primitives from openlibrary-client#435. Sources the ITAN catalog JSONL from: https://github.com/ITANigp/itan-ebook-backend (feature/open-library branch) ITANRecord (DataProviderRecord subclass): - Fields map ITAN's pre-OL-formatted schema (they already use OL field names) - extra='allow' absorbs ebook_access and any future ITAN-specific keys - to_ol_import() applies three cleanup steps: - Strips leading/trailing whitespace from subjects (" Contemporary Fiction") - Filters malformed isbn_13 values using the schema pattern (ITAN uses "0" and "978" as placeholders — 35 and 11 occurrences respectively in 67 records) - Drops ebook_access (not in OL import schema; would be silently ignored by API) ITANProvider (JSONLProvider subclass): - One-liner: declares SOURCE_URL and RECORD_CLASS; traversal is inherited Tests (22): - Unit tests for all cleanup/skip logic with synthetic fixtures - Live end-to-end test fetching all 67 records from real ITAN URL - Every output record cross-validated against import.schema.json - pytest.skip if network unavailable (CI-safe) Closes https://github.com/internetarchive/openlibrary/issues/12091 Depends on internetarchive/openlibrary-client#435 --- sources/__init__.py | 0 sources/itan/__init__.py | 0 sources/itan/provider.py | 31 +++++ sources/itan/record.py | 100 ++++++++++++++++ sources/tests/__init__.py | 0 sources/tests/test_itan.py | 226 +++++++++++++++++++++++++++++++++++++ 6 files changed, 357 insertions(+) create mode 100644 sources/__init__.py create mode 100644 sources/itan/__init__.py create mode 100644 sources/itan/provider.py create mode 100644 sources/itan/record.py create mode 100644 sources/tests/__init__.py create mode 100644 sources/tests/test_itan.py diff --git a/sources/__init__.py b/sources/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/sources/itan/__init__.py b/sources/itan/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/sources/itan/provider.py b/sources/itan/provider.py new file mode 100644 index 00000000..dd1d991d --- /dev/null +++ b/sources/itan/provider.py @@ -0,0 +1,31 @@ +""" +provider.py +~~~~~~~~~~~ + +DataProvider for ITAN Global Publishing. + +Streams the ITAN catalog JSONL file and yields ITANRecord instances. +Inherits all traversal logic from JSONLProvider — HTTP streaming, +bad-line skipping, and logging are handled upstream. + +Usage:: + + from sources.itan.provider import ITANProvider + + for record in ITANProvider().iter_ol_records(): + print(record.model_dump(exclude_none=True)) +""" + +from olclient.imports import JSONLProvider + +from sources.itan.record import ITANRecord + + +class ITANProvider(JSONLProvider): + SOURCE_SLUG = "itan_technologies" + TITLE = "ITAN Global Publishing" + SOURCE_URL = ( + "https://raw.githubusercontent.com/ITANigp/itan-ebook-backend" + "/refs/heads/feature/open-library/data/itan_catalog.jsonl" + ) + RECORD_CLASS = ITANRecord diff --git a/sources/itan/record.py b/sources/itan/record.py new file mode 100644 index 00000000..10ce7af2 --- /dev/null +++ b/sources/itan/record.py @@ -0,0 +1,100 @@ +""" +record.py +~~~~~~~~~ + +DataProviderRecord for ITAN Global Publishing. + +The ITAN catalog is already structured close to the OL import format, so the +transformation is mostly cleanup: + + - Strip leading/trailing whitespace from subjects (several have " Subject") + - Filter invalid isbn_13 values — the catalog uses "0" as a placeholder + - Drop ebook_access, which is not in the OL import schema + +Source: https://github.com/ITANigp/itan-ebook-backend +Issue: https://github.com/internetarchive/openlibrary/issues/12091 +""" + +from __future__ import annotations + +import re +from typing import List, Optional + +from olclient.imports import DataProviderRecord, OLAuthor, OLImportRecord + +# OL import schema pattern for isbn_13 +_ISBN13_RE = re.compile(r'^([0-9][- ]*){13}$') + + +class ITANRecord(DataProviderRecord): + """One record from the ITAN catalog JSONL file. + + Field names intentionally match the OL import schema because ITAN pre-formats + their data that way. extra='allow' (inherited) absorbs ebook_access and any + other ITAN-specific keys without raising. + """ + + title: str + authors: List[dict] + publishers: List[str] + publish_date: str + source_records: List[str] + identifiers: Optional[dict] = None + languages: Optional[List[str]] = None + subjects: Optional[List[str]] = None + subtitle: Optional[str] = None + number_of_pages: Optional[int] = None + notes: Optional[str] = None + isbn_13: Optional[List[str]] = None + isbn_10: Optional[List[str]] = None + contributions: Optional[List[str]] = None + + # ebook_access and any future ITAN-specific fields are absorbed by extra='allow' + + def to_ol_import(self) -> Optional[OLImportRecord]: + if not self.title or not self.authors: + return None + + authors = [ + OLAuthor(name=a["name"]) + for a in self.authors + if a.get("name", "").strip() + ] + if not authors: + return None + + subjects = ( + [s.strip() for s in self.subjects if s.strip()] + if self.subjects + else None + ) + + # Filter malformed ISBNs — ITAN uses "0" and "978" as placeholders + isbn_13 = ( + [v for v in self.isbn_13 if _ISBN13_RE.match(v)] + if self.isbn_13 + else None + ) or None + + isbn_10 = ( + [v for v in self.isbn_10 if v and v != "0"] + if self.isbn_10 + else None + ) or None + + return OLImportRecord( + title=self.title, + source_records=self.source_records, + authors=authors, + publishers=self.publishers, + publish_date=self.publish_date, + subtitle=self.subtitle, + number_of_pages=self.number_of_pages, + notes=self.notes, + languages=self.languages, + subjects=subjects or None, + isbn_13=isbn_13, + isbn_10=isbn_10, + identifiers=self.identifiers, + contributions=self.contributions, + ) diff --git a/sources/tests/__init__.py b/sources/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/sources/tests/test_itan.py b/sources/tests/test_itan.py new file mode 100644 index 00000000..0cc58bc1 --- /dev/null +++ b/sources/tests/test_itan.py @@ -0,0 +1,226 @@ +""" +Tests for sources/itan/record.py and sources/itan/provider.py + +Coverage: +- ITANRecord: field parsing, to_ol_import() transformation +- Cleanup logic: bad ISBNs filtered, subjects stripped, ebook_access dropped +- Skip logic: missing title, missing/empty authors +- ITANProvider: end-to-end over real ITAN data (live HTTP, 67 records) +- Cross-validation: every output record passes import.schema.json +""" + +from __future__ import annotations + +import json +import os +from nturl2path import pathname2url + +import jsonschema +import pytest + +from olclient.imports import OLImportRecord +from sources.itan.provider import ITANProvider +from sources.itan.record import ITANRecord + +# --------------------------------------------------------------------------- +# Schema validator (reuse olclient's copy of import.schema.json) +# --------------------------------------------------------------------------- + +_SCHEMA_PATH = os.path.abspath( + os.path.join( + os.path.dirname(__file__), + '..', '..', '..', 'openlibrary-client-imports', # local worktree + 'olclient', 'schemata', 'import.schema.json', + ) +) + +# Fall back to installed package location if worktree path doesn't exist +if not os.path.exists(_SCHEMA_PATH): + import olclient + _SCHEMA_PATH = os.path.join( + os.path.dirname(olclient.__file__), 'schemata', 'import.schema.json' + ) + +with open(_SCHEMA_PATH) as _f: + _SCHEMA = json.load(_f) + +_RESOLVER = jsonschema.RefResolver( + 'file:' + pathname2url(os.path.abspath(_SCHEMA_PATH)), _SCHEMA +) +_VALIDATOR = jsonschema.Draft4Validator(_SCHEMA, resolver=_RESOLVER) + + +def assert_valid_schema(record: OLImportRecord) -> None: + data = record.model_dump(exclude_none=True) + errors = list(_VALIDATOR.iter_errors(data)) + assert not errors, f"Schema errors: {errors}\nRecord: {data}" + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +FULL_RAW = { + "title": "Shadows Of The Continent", + "authors": [{"name": "Tolulope Taiwo"}], + "publishers": ["Itan Technologies"], + "publish_date": "2026", + "languages": ["eng"], + "subjects": ["African Literature & Fiction", " Contemporary Fiction", "romance"], + "source_records": ["itan_technologies:BOO1109"], + "identifiers": {"itan_technologies": ["BOO1109"]}, + "ebook_access": "borrowable", # NOT in OL schema — must be dropped + "subtitle": "A Pan-African Romance Suspense Novel", + "number_of_pages": 189, + "notes": "A gripping Pan-African romantic thriller.", + "isbn_13": ["0"], # placeholder — must be filtered + "contributions": ["Editor: Jane Doe"], +} + +MINIMAL_RAW = { + "title": "Minimal Book", + "authors": [{"name": "Author One"}], + "publishers": ["Pub"], + "publish_date": "2024", + "source_records": ["itan_technologies:BOO0001"], +} + + +# --------------------------------------------------------------------------- +# ITANRecord unit tests +# --------------------------------------------------------------------------- + +class TestITANRecord: + def test_parses_full_record(self): + rec = ITANRecord.model_validate(FULL_RAW) + assert rec.title == "Shadows Of The Continent" + assert rec.number_of_pages == 189 + + def test_absorbs_ebook_access_without_error(self): + rec = ITANRecord.model_validate(FULL_RAW) + # ebook_access is accepted via extra='allow' but not surfaced as a typed field + assert rec.model_extra.get("ebook_access") == "borrowable" + + def test_to_ol_import_returns_record(self): + rec = ITANRecord.model_validate(MINIMAL_RAW) + result = rec.to_ol_import() + assert isinstance(result, OLImportRecord) + assert result.title == "Minimal Book" + assert result.source_records == ["itan_technologies:BOO0001"] + + def test_strips_whitespace_from_subjects(self): + rec = ITANRecord.model_validate(FULL_RAW) + result = rec.to_ol_import() + assert " Contemporary Fiction" not in result.subjects + assert "Contemporary Fiction" in result.subjects + + def test_filters_placeholder_isbn(self): + rec = ITANRecord.model_validate(FULL_RAW) + result = rec.to_ol_import() + # isbn_13 was ["0"] — should be removed entirely + assert result.isbn_13 is None + + def test_keeps_real_isbn(self): + # Both "0" and "978" are ITAN placeholders; real 13-digit ISBNs survive + raw = {**MINIMAL_RAW, "isbn_13": ["9780441569595", "0", "978"]} + rec = ITANRecord.model_validate(raw) + result = rec.to_ol_import() + assert result.isbn_13 == ["9780441569595"] + + def test_drops_ebook_access_from_output(self): + rec = ITANRecord.model_validate(FULL_RAW) + result = rec.to_ol_import() + dumped = result.model_dump(exclude_none=True) + assert "ebook_access" not in dumped + + def test_skips_record_with_no_title(self): + raw = {**MINIMAL_RAW, "title": ""} + rec = ITANRecord.model_validate(raw) + assert rec.to_ol_import() is None + + def test_skips_record_with_empty_authors_list(self): + raw = {**MINIMAL_RAW, "authors": []} + rec = ITANRecord.model_validate(raw) + assert rec.to_ol_import() is None + + def test_skips_record_with_blank_author_names(self): + raw = {**MINIMAL_RAW, "authors": [{"name": " "}]} + rec = ITANRecord.model_validate(raw) + assert rec.to_ol_import() is None + + def test_preserves_identifiers(self): + rec = ITANRecord.model_validate(FULL_RAW) + result = rec.to_ol_import() + assert result.identifiers == {"itan_technologies": ["BOO1109"]} + + def test_output_passes_schema(self): + rec = ITANRecord.model_validate(FULL_RAW) + assert_valid_schema(rec.to_ol_import()) + + def test_minimal_output_passes_schema(self): + rec = ITANRecord.model_validate(MINIMAL_RAW) + assert_valid_schema(rec.to_ol_import()) + + def test_all_subjects_stripped(self): + raw = {**MINIMAL_RAW, "subjects": [" Sci-Fi ", " Horror", "Fantasy "]} + rec = ITANRecord.model_validate(raw) + result = rec.to_ol_import() + assert result.subjects == ["Sci-Fi", "Horror", "Fantasy"] + + def test_empty_subjects_list_becomes_none(self): + raw = {**MINIMAL_RAW, "subjects": [" ", ""]} + rec = ITANRecord.model_validate(raw) + result = rec.to_ol_import() + assert result.subjects is None + + +# --------------------------------------------------------------------------- +# ITANProvider — live end-to-end over real data +# --------------------------------------------------------------------------- + +@pytest.fixture(scope="module") +def live_records(): + """Fetch all ITAN records once for the module; skip if network unavailable.""" + try: + return list(ITANProvider().iter_ol_records()) + except Exception as exc: + pytest.skip(f"Could not reach ITAN source: {exc}") + + +class TestITANProviderLive: + def test_yields_expected_count(self, live_records): + # 67 records in the catalog; all should yield (none are missing required fields) + assert len(live_records) == 67 + + def test_all_records_are_ol_import_records(self, live_records): + assert all(isinstance(r, OLImportRecord) for r in live_records) + + def test_no_placeholder_isbns_in_output(self, live_records): + for r in live_records: + if r.isbn_13: + assert "0" not in r.isbn_13, f"Placeholder ISBN in {r.source_records}" + + def test_no_ebook_access_in_output(self, live_records): + for r in live_records: + dumped = r.model_dump(exclude_none=True) + assert "ebook_access" not in dumped + + def test_no_whitespace_leading_subjects(self, live_records): + for r in live_records: + if r.subjects: + for s in r.subjects: + assert s == s.strip(), f"Unstripped subject {s!r} in {r.source_records}" + + def test_all_records_have_source_records_prefix(self, live_records): + for r in live_records: + assert any( + sr.startswith("itan_technologies:") for sr in r.source_records + ), f"Unexpected source_records format: {r.source_records}" + + def test_all_records_pass_json_schema(self, live_records): + failures = [] + for r in live_records: + errors = list(_VALIDATOR.iter_errors(r.model_dump(exclude_none=True))) + if errors: + failures.append((r.source_records, errors)) + assert not failures, f"{len(failures)} records failed schema validation: {failures[:3]}" From 2c25f6c5d408cab7d81bb32221d8e0d9279d4f60 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 7 May 2026 00:33:13 +0000 Subject: [PATCH 2/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- sources/itan/record.py | 18 +++++------------- sources/tests/test_itan.py | 28 ++++++++++++++++++++-------- 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/sources/itan/record.py b/sources/itan/record.py index 10ce7af2..c2c9ad5e 100644 --- a/sources/itan/record.py +++ b/sources/itan/record.py @@ -23,7 +23,7 @@ from olclient.imports import DataProviderRecord, OLAuthor, OLImportRecord # OL import schema pattern for isbn_13 -_ISBN13_RE = re.compile(r'^([0-9][- ]*){13}$') +_ISBN13_RE = re.compile(r"^([0-9][- ]*){13}$") class ITANRecord(DataProviderRecord): @@ -56,30 +56,22 @@ def to_ol_import(self) -> Optional[OLImportRecord]: return None authors = [ - OLAuthor(name=a["name"]) - for a in self.authors - if a.get("name", "").strip() + OLAuthor(name=a["name"]) for a in self.authors if a.get("name", "").strip() ] if not authors: return None subjects = ( - [s.strip() for s in self.subjects if s.strip()] - if self.subjects - else None + [s.strip() for s in self.subjects if s.strip()] if self.subjects else None ) # Filter malformed ISBNs — ITAN uses "0" and "978" as placeholders isbn_13 = ( - [v for v in self.isbn_13 if _ISBN13_RE.match(v)] - if self.isbn_13 - else None + [v for v in self.isbn_13 if _ISBN13_RE.match(v)] if self.isbn_13 else None ) or None isbn_10 = ( - [v for v in self.isbn_10 if v and v != "0"] - if self.isbn_10 - else None + [v for v in self.isbn_10 if v and v != "0"] if self.isbn_10 else None ) or None return OLImportRecord( diff --git a/sources/tests/test_itan.py b/sources/tests/test_itan.py index 0cc58bc1..22569115 100644 --- a/sources/tests/test_itan.py +++ b/sources/tests/test_itan.py @@ -29,23 +29,29 @@ _SCHEMA_PATH = os.path.abspath( os.path.join( os.path.dirname(__file__), - '..', '..', '..', 'openlibrary-client-imports', # local worktree - 'olclient', 'schemata', 'import.schema.json', + "..", + "..", + "..", + "openlibrary-client-imports", # local worktree + "olclient", + "schemata", + "import.schema.json", ) ) # Fall back to installed package location if worktree path doesn't exist if not os.path.exists(_SCHEMA_PATH): import olclient + _SCHEMA_PATH = os.path.join( - os.path.dirname(olclient.__file__), 'schemata', 'import.schema.json' + os.path.dirname(olclient.__file__), "schemata", "import.schema.json" ) with open(_SCHEMA_PATH) as _f: _SCHEMA = json.load(_f) _RESOLVER = jsonschema.RefResolver( - 'file:' + pathname2url(os.path.abspath(_SCHEMA_PATH)), _SCHEMA + "file:" + pathname2url(os.path.abspath(_SCHEMA_PATH)), _SCHEMA ) _VALIDATOR = jsonschema.Draft4Validator(_SCHEMA, resolver=_RESOLVER) @@ -69,11 +75,11 @@ def assert_valid_schema(record: OLImportRecord) -> None: "subjects": ["African Literature & Fiction", " Contemporary Fiction", "romance"], "source_records": ["itan_technologies:BOO1109"], "identifiers": {"itan_technologies": ["BOO1109"]}, - "ebook_access": "borrowable", # NOT in OL schema — must be dropped + "ebook_access": "borrowable", # NOT in OL schema — must be dropped "subtitle": "A Pan-African Romance Suspense Novel", "number_of_pages": 189, "notes": "A gripping Pan-African romantic thriller.", - "isbn_13": ["0"], # placeholder — must be filtered + "isbn_13": ["0"], # placeholder — must be filtered "contributions": ["Editor: Jane Doe"], } @@ -90,6 +96,7 @@ def assert_valid_schema(record: OLImportRecord) -> None: # ITANRecord unit tests # --------------------------------------------------------------------------- + class TestITANRecord: def test_parses_full_record(self): rec = ITANRecord.model_validate(FULL_RAW) @@ -178,6 +185,7 @@ def test_empty_subjects_list_becomes_none(self): # ITANProvider — live end-to-end over real data # --------------------------------------------------------------------------- + @pytest.fixture(scope="module") def live_records(): """Fetch all ITAN records once for the module; skip if network unavailable.""" @@ -209,7 +217,9 @@ def test_no_whitespace_leading_subjects(self, live_records): for r in live_records: if r.subjects: for s in r.subjects: - assert s == s.strip(), f"Unstripped subject {s!r} in {r.source_records}" + assert ( + s == s.strip() + ), f"Unstripped subject {s!r} in {r.source_records}" def test_all_records_have_source_records_prefix(self, live_records): for r in live_records: @@ -223,4 +233,6 @@ def test_all_records_pass_json_schema(self, live_records): errors = list(_VALIDATOR.iter_errors(r.model_dump(exclude_none=True))) if errors: failures.append((r.source_records, errors)) - assert not failures, f"{len(failures)} records failed schema validation: {failures[:3]}" + assert ( + not failures + ), f"{len(failures)} records failed schema validation: {failures[:3]}" From 0a368b725dc1bc534c36bbc83b0fcb361bb9ff1e Mon Sep 17 00:00:00 2001 From: "Michael E. Karpeles" Date: Wed, 17 Jun 2026 13:30:42 -0600 Subject: [PATCH 3/4] feat(itan): store bookstore slug as identifier for direct deep links Adds _SLUG_MAP (scraped from itan.app/bookstore?search=) and uses the full slug as the itan_technologies identifier value so OL can build a direct link via https://itan.app/bookstore/@@@. source_records keeps the stable BOO ID for deduplication. All 67 records resolved. --- sources/itan/record.py | 83 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 82 insertions(+), 1 deletion(-) diff --git a/sources/itan/record.py b/sources/itan/record.py index c2c9ad5e..64eaefc7 100644 --- a/sources/itan/record.py +++ b/sources/itan/record.py @@ -25,6 +25,79 @@ # OL import schema pattern for isbn_13 _ISBN13_RE = re.compile(r"^([0-9][- ]*){13}$") +# Scraped from https://itan.app/bookstore?search=. +# Stored as the itan_technologies identifier so OL builds a direct deep link via +# url template: https://itan.app/bookstore/@@@ +_SLUG_MAP = { + "BOO1017": "book-clouds-and-mercy-BOO1017", + "BOO1019": "afrocentric-science-fiction-fantasy-titan-race-edentu-oroso-boo1019", + "BOO1021": "africa-children-books-amanda-the-smart-safety-girl-elizabeth-uwalaka-boo1021", + "BOO1022": "african-children-books-the-king-s-daughter-mopelola-adeniyi-boo1022", + "BOO1023": "african-romance-travails-of-eve-s-daughters-mopelola-adeniyi-boo1023", + "BOO1024": "african-children-books-kubi-the-lion-prince-mopelola-adeniyi-boo1024", + "BOO1025": "african-children-books-the-lying-bird-mopelola-adeniyi-boo1025", + "BOO1026": "african-literature-fiction-dairy-of-a-whiz-kid-eny-awevia-boo1026", + "BOO1027": "african-literature-fiction-in-bed-with-her-guy-mopelola-adeniyi-boo1027", + "BOO1028": "african-literature-fiction-juliet-matthew-simpa-boo1028", + "BOO1029": "african-religious-fiction-soul-reapers-mopelola-adeniyi-boo1029", + "BOO1030": "african-religious-fiction-a-rough-diamond-mopelola-adeniyi-boo1030", + "BOO1031": "african-children-books-african-tales-for-modern-times-mopelola-adeniyi-boo1031", + "BOO1032": "african-children-books-talking-doll-mopelola-adeniyi-boo1032", + "BOO1033": "african-children-books-african-tales-for-modern-times-vol-2-mopelola-adeniyi-boo1033", + "BOO1034": "african-children-books-asoro-s-visit-to-the-dentist-mopelola-adeniyi-boo1034", + "BOO1035": "african-literature-fiction-we-belong-to-nobody-edentu-oroso-boo1035", + "BOO1037": "african-literature-fiction-revamping-me-mopelola-adeniyi-boo1037", + "BOO1038": "african-literature-fiction-enemies-within-me-anthony-uyaebo-boo1038", + "BOO1039": "african-romance-heart-webs-mopelola-adeniyi-boo1039", + "BOO1040": "african-literature-fiction-the-grief-gallery-john-chizoba-vincent-boo1040", + "BOO1041": "african-literature-fiction-how-we-chose-who-dies-igoche-john-igoche-boo1041", + "BOO1042": "african-science-fiction-fantasy-something-strange-elvis-chidiebube-boo1042", + "BOO1043": "african-science-fiction-fantasy-the-mask-of-oshun-ode-sylvia-boo1043", + "BOO1044": "african-literature-fiction-the-quiet-general-other-stories-matthew-simpa-boo1044", + "BOO1045": "african-mystery-thriller-and-suspense-nyanya14-amaechi-praise-boo1045", + "BOO1046": "african-religious-fiction-on-eagle-s-wings-emmanuel-olaoluwa-boo1046", + "BOO1047": "african-religious-fiction-the-wind-and-the-fire-emmanuel-olaoluwa-boo1047", + "BOO1048": "african-religious-fiction-the-throne-and-the-city-emmanuel-olaoluwa-boo1048", + "BOO1049": "african-mystery-thriller-and-suspense-the-first-whisper-chimdinma-anagor-boo1049", + "BOO1050": "african-mystery-thriller-and-suspense-the-fairy-s-magic-wand-joshua-okoromodeke-boo1050", + "BOO1051": "african-mystery-thriller-and-suspense-the-field-of-gold-joshua-okoromodeke-boo1051", + "BOO1053": "african-mystery-thriller-and-suspense-prototype-lyra-jennifer-okafor-boo1053", + "BOO1054": "african-literature-fiction-aminu-s-diary-usman-inuwa-boo1054", + "BOO1055": "african-religious-fiction-marked-mopelola-adeniyi-boo1055", + "BOO1056": "african-mystery-thriller-and-suspense-the-mambila-mirage-sa-idu-sulaiman-boo1056", + "BOO1057": "african-mystery-thriller-and-suspense-veins-of-deception-amina-sa-id-sulaiman-boo1057", + "BOO1058": "african-literature-fiction-my-mother-s-tears-chimbuikem-obiajunwa-boo1058", + "BOO1059": "african-literature-fiction-echoes-behind-the-wall-ahmad-abubakar-mustafa-boo1059", + "BOO1065": "african-children-books-it-is-in-you-mopelola-adeniyi-boo1065", + "BOO1066": "african-children-books-the-wise-princess-mopelola-adeniyi-boo1066", + "BOO1067": "african-children-books-financial-savvy-kids-mopelola-adeniyi-boo1067", + "BOO1068": "african-romance-better-than-chocolate-and-other-stories-buka-chiro-kafor-boo1068", + "BOO1073": "african-literature-fiction-emancipation-atamgbo-raymond-otogwung-boo1073", + "BOO1077": "african-romance-love-in-lagos-s-dirt-prince-atanda-boo1077", + "BOO1079": "african-children-books-the-activity-kindergarten-of-wonderful-stories-ahmad-abubakar-mustafa-boo1079", + "BOO1081": "african-science-fiction-fantasy-the-cube-that-birthed-gods-chukwuebuka-akadile-boo1081", + "BOO1085": "african-mystery-thriller-and-suspense-two-fronts-buka-chiro-kafor-boo1085", + "BOO1086": "african-literature-fiction-lifted-by-forex-aondaver-james-yange-boo1086", + "BOO1088": "african-literature-fiction-the-republic-of-wazimba-isaac-ogbadu-achimugu-boo1088", + "BOO1089": "african-literature-fiction-a-future-that-remembers-isaac-ogbadu-achimugu-boo1089", + "BOO1090": "african-science-fiction-fantasy-still-ours-lily-baby-girl-boo1090", + "BOO1091": "african-romance-the-road-to-uncertainty-obinna-godswill-chinegwu-boo1091", + "BOO1092": "african-mystery-thriller-and-suspense-the-killer-and-the-saint-obinna-godswill-chinegwu-boo1092", + "BOO1093": "african-romance-home-calling-obinna-godswill-chinegwu-boo1093", + "BOO1095": "african-religious-fiction-the-journey-beyond-life-death-eternity-james-yange-boo1095", + "BOO1097": "african-literature-fiction-the-iron-fist-obinna-godswill-chinegwu-boo1097", + "BOO1098": "african-literature-fiction-the-chief-who-walked-back-isaac-achimugu-boo1098", + "BOO1099": "african-romance-mrs-senator-carissa-chiagozie-boo1099", + "BOO1100": "african-literature-fiction-the-beauty-of-scars-oluwadamilola-loise-anjorin-boo1100", + "BOO1109": "african-literature-fiction-shadows-of-the-continent-urunna-ikemefuna-boo1109", + "BOO1110": "african-romance-sworn-strangers-urunna-ikemefuna-boo1110", + "BOO1111": "african-literature-fiction-born-different-yetunde-anyanwun-boo1111", + "BOO1112": "african-mystery-thriller-and-suspense-dancing-with-the-enemy-obinna-godswill-chinegwu-boo1112", + "BOO1113": "african-science-fiction-fantasy-kanran-earth-scavengers-prince-atanda-boo1113", + "BOO1116": "african-mystery-thriller-and-suspense-ember-s-and-halo-s-david-uchenna-ejiegbu-boo1116", + "BOO1117": "african-literature-fiction-the-tortoise-that-carried-iron-isaac-achimugu-boo1117", +} + class ITANRecord(DataProviderRecord): """One record from the ITAN catalog JSONL file. @@ -74,6 +147,14 @@ def to_ol_import(self) -> Optional[OLImportRecord]: [v for v in self.isbn_10 if v and v != "0"] if self.isbn_10 else None ) or None + # Use the bookstore slug as the identifier value so OL builds a direct deep link. + # source_records keeps the stable BOO ID for deduplication. + boo_id = next( + (sr.split(":", 1)[1] for sr in self.source_records if ":" in sr), None + ) + slug = _SLUG_MAP.get(boo_id) if boo_id else None + identifiers = {"itan_technologies": [slug or boo_id]} if (slug or boo_id) else self.identifiers + return OLImportRecord( title=self.title, source_records=self.source_records, @@ -87,6 +168,6 @@ def to_ol_import(self) -> Optional[OLImportRecord]: subjects=subjects or None, isbn_13=isbn_13, isbn_10=isbn_10, - identifiers=self.identifiers, + identifiers=identifiers, contributions=self.contributions, ) From 479a96db8bf838e93928396f21f25020e03808bd Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 17 Jun 2026 19:31:30 +0000 Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- sources/itan/record.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sources/itan/record.py b/sources/itan/record.py index 64eaefc7..ec87c526 100644 --- a/sources/itan/record.py +++ b/sources/itan/record.py @@ -153,7 +153,11 @@ def to_ol_import(self) -> Optional[OLImportRecord]: (sr.split(":", 1)[1] for sr in self.source_records if ":" in sr), None ) slug = _SLUG_MAP.get(boo_id) if boo_id else None - identifiers = {"itan_technologies": [slug or boo_id]} if (slug or boo_id) else self.identifiers + identifiers = ( + {"itan_technologies": [slug or boo_id]} + if (slug or boo_id) + else self.identifiers + ) return OLImportRecord( title=self.title,