From 5e76716e6e9765b9445f9721392f4118fbf83f7c Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Fri, 23 Jun 2023 23:48:09 +0200 Subject: [PATCH 1/8] Implementation EntityShape Fix #470 --- requirements.txt | 1 + setup.cfg | 1 + setup.py | 3 ++- test/test_entity_item.py | 7 +++++++ wikibaseintegrator/entities/baseentity.py | 21 +++++++++++++++++++++ 5 files changed, 32 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 4f33e295..19e4e628 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ oauthlib~=3.2.2 requests~=2.31.0 requests-oauthlib~=1.3.1 ujson~=5.8.0 +entityshape~=0.0.1 diff --git a/setup.cfg b/setup.cfg index cb904dd2..7b813853 100644 --- a/setup.cfg +++ b/setup.cfg @@ -40,6 +40,7 @@ install_requires = requests>=2.27.1,<2.29.0 requests-oauthlib~=1.3.1 ujson>=5.4,<5.6 + entityshape~=0.0.1 python_requires = >=3.8, <3.13 [options.extras_require] diff --git a/setup.py b/setup.py index e2d48c03..b3bc19e3 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,8 @@ "oauthlib ~= 3.2.0", "requests >= 2.27.1,< 2.32.0", "requests-oauthlib ~= 1.3.1", - "ujson >= 5.4,< 5.9" + "ujson >= 5.4,< 5.9", + "entityshape ~= 0.0.1" ], extras_require={ "dev": [ diff --git a/test/test_entity_item.py b/test/test_entity_item.py index a83ed3aa..4efb5e1c 100644 --- a/test/test_entity_item.py +++ b/test/test_entity_item.py @@ -107,3 +107,10 @@ def test_new_lines(self): item.claims.add(MonolingualText(prop_nr=123, text="Multi\r\nline")) item.claims.add(MonolingualText(prop_nr=123, text="Multi\rline")) item.claims.add(MonolingualText(prop_nr=123, text="Multi\nline")) + + def test_entity_schema(self): + random_campsite = wbi.item.get('Q119156070') + assert random_campsite.validate_schema(entity_schema="E376") + assert random_campsite.validate_schema(entity_schema="376") + assert random_campsite.validate_schema(entity_schema=376) + assert not wbi.item.get('Q582').validate_schema(entity_schema="E376") diff --git a/wikibaseintegrator/entities/baseentity.py b/wikibaseintegrator/entities/baseentity.py index b032da62..2d978096 100644 --- a/wikibaseintegrator/entities/baseentity.py +++ b/wikibaseintegrator/entities/baseentity.py @@ -1,12 +1,16 @@ from __future__ import annotations import logging +import re from copy import copy from typing import TYPE_CHECKING, Any +from entityshape import EntityShape + from wikibaseintegrator import wbi_fastrun from wikibaseintegrator.datatypes import BaseDataType from wikibaseintegrator.models.claims import Claim, Claims +from wikibaseintegrator.wbi_config import config from wikibaseintegrator.wbi_enums import ActionIfExists from wikibaseintegrator.wbi_exceptions import MissingEntityException from wikibaseintegrator.wbi_helpers import delete_page, edit_entity, mediawiki_api_call_helper @@ -299,6 +303,23 @@ def get_entity_url(self, wikibase_url: str | None = None) -> str: raise ValueError('wikibase_url or entity ID is null.') + def validate_schema(self, entity_schema: str, language: str | None = None) -> bool: + if isinstance(entity_schema, str): + pattern = re.compile(r'^(?:[a-zA-Z]+:)?E?([0-9]+)$') + matches = pattern.match(entity_schema) + + if not matches: + raise ValueError(f"Invalid EntitySchema ID ({entity_schema}), format must be 'E[0-9]+'") + + entity_schema = f'E{matches.group(1)}' + elif isinstance(entity_schema, int): + entity_schema = f'E{entity_schema}' + else: + raise ValueError(f"Invalid EntitySchema ID ({entity_schema}), format must be 'E[0-9]+'") + + language = str(language or config['DEFAULT_LANGUAGE']) + return EntityShape(qid=self.id, eid=entity_schema, lang=language).get_result().is_valid + def __repr__(self): """A mixin implementing a simple __repr__.""" return "<{klass} @{id:x} {attrs}>".format( # pylint: disable=consider-using-f-string From cabd253c292a9ca41d37a35296a3928161669212 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 25 Jun 2023 14:28:37 +0200 Subject: [PATCH 2/8] Update entityshape --- requirements.txt | 2 +- setup.cfg | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index 19e4e628..728b803c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,4 @@ oauthlib~=3.2.2 requests~=2.31.0 requests-oauthlib~=1.3.1 ujson~=5.8.0 -entityshape~=0.0.1 +entityshape~=0.0.2 diff --git a/setup.cfg b/setup.cfg index 7b813853..df78eb36 100644 --- a/setup.cfg +++ b/setup.cfg @@ -40,7 +40,7 @@ install_requires = requests>=2.27.1,<2.29.0 requests-oauthlib~=1.3.1 ujson>=5.4,<5.6 - entityshape~=0.0.1 + entityshape~=0.0.2 python_requires = >=3.8, <3.13 [options.extras_require] diff --git a/setup.py b/setup.py index b3bc19e3..e36f284a 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ "requests >= 2.27.1,< 2.32.0", "requests-oauthlib ~= 1.3.1", "ujson >= 5.4,< 5.9", - "entityshape ~= 0.0.1" + "entityshape ~= 0.0.2" ], extras_require={ "dev": [ From 3f68ae8bea98c4ee73572bf37ce9db9d4a3f970c Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 25 Jun 2023 14:31:54 +0200 Subject: [PATCH 3/8] Rename validate_schema to schema_validator --- test/test_entity_item.py | 8 ++++---- wikibaseintegrator/entities/baseentity.py | 20 ++++++++++---------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/test/test_entity_item.py b/test/test_entity_item.py index 4efb5e1c..92a49cbf 100644 --- a/test/test_entity_item.py +++ b/test/test_entity_item.py @@ -110,7 +110,7 @@ def test_new_lines(self): def test_entity_schema(self): random_campsite = wbi.item.get('Q119156070') - assert random_campsite.validate_schema(entity_schema="E376") - assert random_campsite.validate_schema(entity_schema="376") - assert random_campsite.validate_schema(entity_schema=376) - assert not wbi.item.get('Q582').validate_schema(entity_schema="E376") + assert random_campsite.schema_validator(entity_schema_id="E376").is_valid + assert random_campsite.schema_validator(entity_schema_id="376").is_valid + assert random_campsite.schema_validator(entity_schema_id=376).is_valid + assert not wbi.item.get('Q582').schema_validator(entity_schema_id="E376").is_valid diff --git a/wikibaseintegrator/entities/baseentity.py b/wikibaseintegrator/entities/baseentity.py index 2d978096..ee884196 100644 --- a/wikibaseintegrator/entities/baseentity.py +++ b/wikibaseintegrator/entities/baseentity.py @@ -5,7 +5,7 @@ from copy import copy from typing import TYPE_CHECKING, Any -from entityshape import EntityShape +from entityshape import EntityShape, Result from wikibaseintegrator import wbi_fastrun from wikibaseintegrator.datatypes import BaseDataType @@ -303,22 +303,22 @@ def get_entity_url(self, wikibase_url: str | None = None) -> str: raise ValueError('wikibase_url or entity ID is null.') - def validate_schema(self, entity_schema: str, language: str | None = None) -> bool: - if isinstance(entity_schema, str): + def schema_validator(self, entity_schema_id: str, language: str | None = None) -> Result: + if isinstance(entity_schema_id, str): pattern = re.compile(r'^(?:[a-zA-Z]+:)?E?([0-9]+)$') - matches = pattern.match(entity_schema) + matches = pattern.match(entity_schema_id) if not matches: - raise ValueError(f"Invalid EntitySchema ID ({entity_schema}), format must be 'E[0-9]+'") + raise ValueError(f"Invalid EntitySchema ID ({entity_schema_id}), format must be 'E[0-9]+'") - entity_schema = f'E{matches.group(1)}' - elif isinstance(entity_schema, int): - entity_schema = f'E{entity_schema}' + entity_schema_id = f'E{matches.group(1)}' + elif isinstance(entity_schema_id, int): + entity_schema_id = f'E{entity_schema_id}' else: - raise ValueError(f"Invalid EntitySchema ID ({entity_schema}), format must be 'E[0-9]+'") + raise ValueError(f"Invalid EntitySchema ID ({entity_schema_id}), format must be 'E[0-9]+'") language = str(language or config['DEFAULT_LANGUAGE']) - return EntityShape(qid=self.id, eid=entity_schema, lang=language).get_result().is_valid + return EntityShape(qid=self.id, eid=entity_schema_id, lang=language).get_result() def __repr__(self): """A mixin implementing a simple __repr__.""" From 25e430792355eba49698d1aedc1575cc2556f1d2 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 25 Jun 2023 17:34:55 +0200 Subject: [PATCH 4/8] Update entityshape to 0.1.0 --- requirements.txt | 2 +- setup.cfg | 2 +- setup.py | 2 +- wikibaseintegrator/entities/baseentity.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index 728b803c..5fe86c19 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,4 @@ oauthlib~=3.2.2 requests~=2.31.0 requests-oauthlib~=1.3.1 ujson~=5.8.0 -entityshape~=0.0.2 +entityshape~=0.1.0 diff --git a/setup.cfg b/setup.cfg index df78eb36..8a465ad6 100644 --- a/setup.cfg +++ b/setup.cfg @@ -40,7 +40,7 @@ install_requires = requests>=2.27.1,<2.29.0 requests-oauthlib~=1.3.1 ujson>=5.4,<5.6 - entityshape~=0.0.2 + entityshape~=0.1.0 python_requires = >=3.8, <3.13 [options.extras_require] diff --git a/setup.py b/setup.py index e36f284a..92beea53 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ "requests >= 2.27.1,< 2.32.0", "requests-oauthlib ~= 1.3.1", "ujson >= 5.4,< 5.9", - "entityshape ~= 0.0.2" + "entityshape ~= 0.1.0" ], extras_require={ "dev": [ diff --git a/wikibaseintegrator/entities/baseentity.py b/wikibaseintegrator/entities/baseentity.py index ee884196..8c0bf6d0 100644 --- a/wikibaseintegrator/entities/baseentity.py +++ b/wikibaseintegrator/entities/baseentity.py @@ -318,7 +318,7 @@ def schema_validator(self, entity_schema_id: str, language: str | None = None) - raise ValueError(f"Invalid EntitySchema ID ({entity_schema_id}), format must be 'E[0-9]+'") language = str(language or config['DEFAULT_LANGUAGE']) - return EntityShape(qid=self.id, eid=entity_schema_id, lang=language).get_result() + return EntityShape(qid=self.id, eid=entity_schema_id, lang=language).validate_and_get_result() def __repr__(self): """A mixin implementing a simple __repr__.""" From ce35e54c82bb543394e0a7aa8f0157df46099d92 Mon Sep 17 00:00:00 2001 From: Dennis Priskorn Date: Thu, 22 Jun 2023 21:08:03 +0200 Subject: [PATCH 5/8] feat: Add pyentityshape and a test where it is used to validate an item. --- test/test_entityshape.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 test/test_entityshape.py diff --git a/test/test_entityshape.py b/test/test_entityshape.py new file mode 100644 index 00000000..ca424efd --- /dev/null +++ b/test/test_entityshape.py @@ -0,0 +1,17 @@ +from unittest import TestCase + +from entityshape import EntityShape + +from wikibaseintegrator import WikibaseIntegrator +from wikibaseintegrator.wbi_config import config as wbi_config + +wbi_config['USER_AGENT'] = 'WikibaseIntegrator-pytest/1.0 (test_entity_item.py)' +wbi = WikibaseIntegrator() +class TestEntityShape(TestCase): + def test_validate_one_item(self): + item = wbi.item.get("Q1") + #item.validate(eid="E1", lang="en") + e = EntityShape(qid=item.id, eid="E1", lang="en") + result = e.get_result() + print(result) + assert result != {} From 8082496007e98dce1baa0ba70c16fa639ea9a5cb Mon Sep 17 00:00:00 2001 From: Dennis Priskorn Date: Fri, 23 Jun 2023 18:36:11 +0200 Subject: [PATCH 6/8] feat: Update to entityshape and add a new test where it is used to validate a group of items against a single entityschema. --- test/test_entityshape.py | 43 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/test/test_entityshape.py b/test/test_entityshape.py index ca424efd..d0d12dfa 100644 --- a/test/test_entityshape.py +++ b/test/test_entityshape.py @@ -4,14 +4,49 @@ from wikibaseintegrator import WikibaseIntegrator from wikibaseintegrator.wbi_config import config as wbi_config +from wikibaseintegrator.wbi_helpers import execute_sparql_query wbi_config['USER_AGENT'] = 'WikibaseIntegrator-pytest/1.0 (test_entity_item.py)' wbi = WikibaseIntegrator() class TestEntityShape(TestCase): def test_validate_one_item(self): - item = wbi.item.get("Q1") + item = wbi.item.get("Q96620548") #item.validate(eid="E1", lang="en") - e = EntityShape(qid=item.id, eid="E1", lang="en") + e = EntityShape(qid=item.id, eid="E376", lang="en") result = e.get_result() - print(result) - assert result != {} + assert result.is_valid is False + assert result.required_properties_that_are_missing == ["P137"] + + def test_validate_all_campsite_shelter_items(self): + # This query was build in a few seconds using https://query.wikidata.org/querybuilder/?uselang=en :) + results = execute_sparql_query(""" + SELECT DISTINCT ?item ?itemLabel WHERE { + SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE]". } + { + SELECT DISTINCT ?item WHERE { + ?item p:P31 ?statement0. + ?statement0 (ps:P31/(wdt:P279*)) wd:Q96620652. + } + LIMIT 100 + } + } + """) + bindings = results["results"]["bindings"] + print(f"Found {len(bindings)} results") + count = 1 + for result in bindings: + qid = result["itemLabel"]["value"] + print(f"Working on: {qid}") + #print(result) + item = wbi.item.get(qid) + e = EntityShape(qid=item.id, eid="E376", lang="en") + result = e.get_result() + # Ignore the invalid shelters missing an operator P137 + if result.is_valid is False and result.required_properties_that_are_missing == {"P137"}: + print("Skipping campsite only missing and operator") + elif result.is_valid is True: + print("Skipping valid campsite - they are boring!") + else: + print(f"is_valid: {result.is_valid}, required_properties_that_are_missing:{result.required_properties_that_are_missing}, statements_with_property_that_is_not_allowed:{result.statements_with_property_that_is_not_allowed}, properties_with_too_many_statements:{result.properties_with_too_many_statements}, see {item.get_entity_url()}") + # assert result.is_valid is False + # assert result.required_properties_that_are_missing == ["P137"] From ff8ec0641ac8260c54fb9f4340dbd3a454bc1477 Mon Sep 17 00:00:00 2001 From: Dennis Priskorn Date: Tue, 27 Jun 2023 04:36:53 +0200 Subject: [PATCH 7/8] feat: Implement support for PyShex also. Add pydantic because it is just nice to have. Add tests. Add documentation. --- README.md | 30 ++++++++ requirements.txt | 3 + test/test_entity_item.py | 53 +++++++++++-- wikibaseintegrator/entities/baseentity.py | 90 ++++++++++++++++++++++- wikibaseintegrator/wbi_exceptions.py | 8 ++ 5 files changed, 177 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 6ff267db..f53ec3a5 100644 --- a/README.md +++ b/README.md @@ -341,6 +341,36 @@ media.claims.add(Item(prop_nr='P180', value='Q3146211')) media.write() ``` +## Entity validation +Two different validators for entities are available. + +### Entityshape +This is a in beta state. For simple entity schemas it has proven reliable. + +See https://github.com/dpriskorn/entityshape#limitations for a list of limitations + +```python +from wikibaseintegrator import WikibaseIntegrator + +wbi = WikibaseIntegrator() +item = wbi.item.get('Q1') +result = item.entityshape_schema_validator(entity_schema_id="E1") +print(result) +``` + +### PyShex +This is considered highly experimental/alpha state. +We have not yet been able to successfully validate an item or lexeme with this library. + +```python +from wikibaseintegrator import WikibaseIntegrator + +wbi = WikibaseIntegrator() +item = wbi.item.get('Q1') +result = item.pyshex_schema_validator(entity_schema_id="E1") +print(result) +``` + # More than Wikibase # WikibaseIntegrator natively supports some extensions: diff --git a/requirements.txt b/requirements.txt index 5fe86c19..ffcd737e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,6 @@ requests~=2.31.0 requests-oauthlib~=1.3.1 ujson~=5.8.0 entityshape~=0.1.0 +pyshex~=0.8.1 +rdflib~=6.3.2 +pydantic~=1.10.9 \ No newline at end of file diff --git a/test/test_entity_item.py b/test/test_entity_item.py index 92a49cbf..7ee17a0a 100644 --- a/test/test_entity_item.py +++ b/test/test_entity_item.py @@ -108,9 +108,52 @@ def test_new_lines(self): item.claims.add(MonolingualText(prop_nr=123, text="Multi\rline")) item.claims.add(MonolingualText(prop_nr=123, text="Multi\nline")) - def test_entity_schema(self): + def test_entityshape_entity_validation(self): random_campsite = wbi.item.get('Q119156070') - assert random_campsite.schema_validator(entity_schema_id="E376").is_valid - assert random_campsite.schema_validator(entity_schema_id="376").is_valid - assert random_campsite.schema_validator(entity_schema_id=376).is_valid - assert not wbi.item.get('Q582').schema_validator(entity_schema_id="E376").is_valid + assert random_campsite.entityshape_schema_validator(entity_schema_id="E376").is_valid + assert random_campsite.entityshape_schema_validator(entity_schema_id="376").is_valid + assert random_campsite.entityshape_schema_validator(entity_schema_id=376).is_valid + assert not wbi.item.get('Q582').entityshape_schema_validator(entity_schema_id="E376").is_valid + + def test_pyshex_entity_validation(self): + # TODO find a combination of shex and entity that is valid + # danish noun + result = wbi.lexeme.get('L41172').pyshex_schema_validator(entity_schema_id="E34") + assert result.valid is False + # This error makes no sense TODO report upstream to pyshex + assert result.reason == 'Import failure on https://www.wikidata.org/wiki/Special:EntitySchemaText/E68' + random_campsite = wbi.item.get('Q119156070') + result = random_campsite.pyshex_schema_validator(entity_schema_id="E376") + assert result.valid is False + assert result.reason == (' Testing wd:Q119156070 against shape campsite\n' + ' Datatype constraint (http://www.w3.org/2001/XMLSchema#string) does not ' + 'match URIRef ' + '\n' + ' Testing wd:Q119156070 against shape campsite\n' + ' Datatype constraint (http://www.w3.org/2001/XMLSchema#string) does not ' + 'match URIRef ' + '\n' + ' Testing wd:Q119156070 against shape campsite\n' + ' No matching triples found for predicate wdt:P31') + assert random_campsite.pyshex_schema_validator(entity_schema_id="376").valid is False + assert random_campsite.pyshex_schema_validator(entity_schema_id=376).valid is False + result2 = wbi.item.get('Q582').pyshex_schema_validator(entity_schema_id="E376") + assert not result2.valid + assert result2.reason == (' Testing wd:Q582 against shape campsite\n' + ' Triples:\n' + ' wd:Q582 wdt:P31 wd:Q1549591 .\n' + ' wd:Q582 wdt:P31 wd:Q484170 .\n' + ' 2 triples exceeds max {1,1}\n' + ' Testing wd:Q582 against shape campsite\n' + ' Node: wd:Q1549591 not in value set:\n' + '\t {"values": ["http://www.wikidata.org/entity/Q832778", "http:...\n' + ' Testing wd:Q582 against shape campsite\n' + ' Node: wd:Q484170 not in value set:\n' + '\t {"values": ["http://www.wikidata.org/entity/Q832778", "http:...\n' + ' Testing wd:Q582 against shape campsite\n' + ' Triples:\n' + ' wd:Q582 wdt:P31 wd:Q1549591 .\n' + ' wd:Q582 wdt:P31 wd:Q484170 .\n' + ' 2 triples exceeds max {1,1}\n' + ' Testing wd:Q582 against shape campsite\n' + ' No matching triples found for predicate wdt:P31') diff --git a/wikibaseintegrator/entities/baseentity.py b/wikibaseintegrator/entities/baseentity.py index 8c0bf6d0..f4636f85 100644 --- a/wikibaseintegrator/entities/baseentity.py +++ b/wikibaseintegrator/entities/baseentity.py @@ -5,14 +5,19 @@ from copy import copy from typing import TYPE_CHECKING, Any +import requests from entityshape import EntityShape, Result +from pydantic import BaseModel +from pyshex import ShExEvaluator +from pyshex.shex_evaluator import EvaluationResult +from rdflib import Graph from wikibaseintegrator import wbi_fastrun from wikibaseintegrator.datatypes import BaseDataType from wikibaseintegrator.models.claims import Claim, Claims from wikibaseintegrator.wbi_config import config from wikibaseintegrator.wbi_enums import ActionIfExists -from wikibaseintegrator.wbi_exceptions import MissingEntityException +from wikibaseintegrator.wbi_exceptions import MissingEntityException, TtlDownloadError, EntitySchemaDownloadError from wikibaseintegrator.wbi_helpers import delete_page, edit_entity, mediawiki_api_call_helper from wikibaseintegrator.wbi_login import _Login @@ -22,6 +27,17 @@ log = logging.getLogger(__name__) +class PyshexResult(BaseModel): + reason: str + valid: bool + + def __str__(self): + return ( + f"Valid: {self.valid}\n" + f"Reason: {self.reason}" + ) + + class BaseEntity: ETYPE = 'base-entity' subclasses: list[type[BaseEntity]] = [] @@ -303,7 +319,7 @@ def get_entity_url(self, wikibase_url: str | None = None) -> str: raise ValueError('wikibase_url or entity ID is null.') - def schema_validator(self, entity_schema_id: str, language: str | None = None) -> Result: + def _get_valid_entity_schema_id(self, entity_schema_id) -> str: if isinstance(entity_schema_id, str): pattern = re.compile(r'^(?:[a-zA-Z]+:)?E?([0-9]+)$') matches = pattern.match(entity_schema_id) @@ -316,10 +332,80 @@ def schema_validator(self, entity_schema_id: str, language: str | None = None) - entity_schema_id = f'E{entity_schema_id}' else: raise ValueError(f"Invalid EntitySchema ID ({entity_schema_id}), format must be 'E[0-9]+'") + return entity_schema_id + + def _get_ttl_data(self) -> str: + """Download the entity data in turtle format (ttl)""" + api_endpoint = 'https://www.wikidata.org/wiki/Special:EntityData/' + api_url = f'{api_endpoint}{self.id}.ttl' + # TODO fix timeout + response = requests.get(api_url, timeout=10) + if response.status_code == 200: + return response.text + else: + raise TtlDownloadError() + + def _get_schema_text(self, entity_schema_id) -> str: + """ + Downloads the schema from wikidata + + :param entity_schema_id: the entityschema id to be downloaded + """ + url: str = f"https://www.wikidata.org/wiki/EntitySchema:{entity_schema_id}?action=raw" + response = requests.get(url, timeout=10) + if response.status_code == 200: + json_text: dict = response.json() + return json_text["schemaText"] + else: + raise EntitySchemaDownloadError() + # TODO make an interface for the validator so the user + # does not have to think about how the internals of the validators work + # The users should get similar output no matter which validator they choose + def entityshape_schema_validator(self, entity_schema_id: str, language: str | None = None) -> Result: + entity_schema_id = self._get_valid_entity_schema_id(entity_schema_id=entity_schema_id) language = str(language or config['DEFAULT_LANGUAGE']) return EntityShape(qid=self.id, eid=entity_schema_id, lang=language).validate_and_get_result() + def pyshex_schema_validator(self, entity_schema_id: str) -> PyshexResult: + entity_schema_id = self._get_valid_entity_schema_id(entity_schema_id=entity_schema_id) + return self._check_shex_conformance(entity_schema_id=entity_schema_id) + + def _check_shex_conformance(self, entity_schema_id: str= "", data: str= "") -> PyshexResult: + """ + Static method which can be used to check for conformance of a Wikidata item to an EntitySchema any SPARQL query + + :param entity_schema_id: The URI prefixes required for an endpoint, default is the Wikidata specific prefixes + :param data: Turtle data to be validated (Optional) + :return: The results of the query are an instance of PyshexResult + """ + # load the string of ttl data into a rdf graph to please ShExEvaluator + rdfdata = Graph() + if not data: + # This downloads the ttl data + data = self._get_ttl_data() + # print(data) + # exit() + rdfdata.parse(data=data) + else: + rdfdata.parse(data=data) + for result in ShExEvaluator(rdf=rdfdata, schema=self._get_schema_text(entity_schema_id=entity_schema_id), focus=f"http://www.wikidata.org/entity/{self.id}").evaluate(): + result: EvaluationResult + # convert named tuple to pydantic class which is way nicer + # class EvaluationResult(NamedTuple): + # result: bool + # focus: Optional[URIRef] + # start: Optional[URIRef] + # reason: Optional[str] + # We return early because we expect only one result from ShExEvaluator + return PyshexResult( + valid=result[0], + # We ignore these for now as they seem overcomplicated + #focus=result[1], + #start=result[2], + reason=result[3], + ) + def __repr__(self): """A mixin implementing a simple __repr__.""" return "<{klass} @{id:x} {attrs}>".format( # pylint: disable=consider-using-f-string diff --git a/wikibaseintegrator/wbi_exceptions.py b/wikibaseintegrator/wbi_exceptions.py index 8440f8fc..ac312286 100644 --- a/wikibaseintegrator/wbi_exceptions.py +++ b/wikibaseintegrator/wbi_exceptions.py @@ -99,3 +99,11 @@ class MissingEntityException(Exception): class SearchError(Exception): pass + + +class TtlDownloadError(BaseException): + pass + + +class EntitySchemaDownloadError(BaseException): + pass From 7270888b696c8dbfce683122e0c7bc2a7b45c5f6 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Fri, 30 Jun 2023 00:18:58 +0200 Subject: [PATCH 8/8] Update baseentity.py --- wikibaseintegrator/entities/baseentity.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wikibaseintegrator/entities/baseentity.py b/wikibaseintegrator/entities/baseentity.py index f4636f85..437a892b 100644 --- a/wikibaseintegrator/entities/baseentity.py +++ b/wikibaseintegrator/entities/baseentity.py @@ -17,7 +17,7 @@ from wikibaseintegrator.models.claims import Claim, Claims from wikibaseintegrator.wbi_config import config from wikibaseintegrator.wbi_enums import ActionIfExists -from wikibaseintegrator.wbi_exceptions import MissingEntityException, TtlDownloadError, EntitySchemaDownloadError +from wikibaseintegrator.wbi_exceptions import EntitySchemaDownloadError, MissingEntityException, TtlDownloadError from wikibaseintegrator.wbi_helpers import delete_page, edit_entity, mediawiki_api_call_helper from wikibaseintegrator.wbi_login import _Login