diff --git a/README.md b/README.md index 6f671f2b..d7821013 100644 --- a/README.md +++ b/README.md @@ -577,6 +577,11 @@ type instances for references and qualifiers. WikibaseIntegrator supports SDC (Structured Data on Commons) to update a media file hosted on Wikimedia Commons. +> [!IMPORTANT] +> To ease the compatibility between Wikidata and Wikimedia Commons, the MediaInfo entity use the "claims" field instead of the "statements" from the API. +> When you read a MediaInfo entity, the "claims" field will be filled with the data from the "statements" field. +> When you write a MediaInfo entity, the "claims" field will be replaced by the "statements" field. + ### Retrieve data ### ```python diff --git a/test/test_entity_mediainfo.py b/test/test_entity_mediainfo.py index add0e7f1..52bbe9c4 100644 --- a/test/test_entity_mediainfo.py +++ b/test/test_entity_mediainfo.py @@ -3,20 +3,43 @@ from wikibaseintegrator import WikibaseIntegrator from wikibaseintegrator.wbi_config import config as wbi_config -wbi_config['USER_AGENT'] = 'WikibaseIntegrator-pytest/1.0 (test_entity_mediainfo.py)' - wbi = WikibaseIntegrator() class TestEntityMediaInfo(unittest.TestCase): + def setUp(self): + self._user_agent_exists = 'USER_AGENT' in wbi_config + self._old_user_agent = wbi_config.get('USER_AGENT') + self._wikibase_url_exists = 'WIKIBASE_URL' in wbi_config + self._old_wikibase_url = wbi_config.get('WIKIBASE_URL') + self._mediawiki_api_url_exists = 'MEDIAWIKI_API_URL' in wbi_config + self._old_mediawiki_api_url = wbi_config.get('MEDIAWIKI_API_URL') + wbi_config['USER_AGENT'] = 'WikibaseIntegrator-pytest/1.0 (test_entity_mediainfo.py)' + wbi_config['WIKIBASE_URL'] = 'https://commons.wikimedia.org' + wbi_config['MEDIAWIKI_API_URL'] = 'https://commons.wikimedia.org/w/api.php' + + def tearDown(self): + if self._user_agent_exists: + wbi_config['USER_AGENT'] = self._old_user_agent + else: + wbi_config.pop('USER_AGENT', None) + if self._wikibase_url_exists: + wbi_config['WIKIBASE_URL'] = self._old_wikibase_url + else: + wbi_config.pop('WIKIBASE_URL', None) + if self._mediawiki_api_url_exists: + wbi_config['MEDIAWIKI_API_URL'] = self._old_mediawiki_api_url + else: + wbi_config.pop('MEDIAWIKI_API_URL', None) + def test_get(self): # Test with complete id - assert wbi.mediainfo.get('M75908279', mediawiki_api_url='https://commons.wikimedia.org/w/api.php').id == 'M75908279' + assert wbi.mediainfo.get('M75908279').id == 'M75908279' # Test with numeric id as string - assert wbi.mediainfo.get('75908279', mediawiki_api_url='https://commons.wikimedia.org/w/api.php').id == 'M75908279' + assert wbi.mediainfo.get('75908279').id == 'M75908279' # Test with numeric id as int - assert wbi.mediainfo.get(75908279, mediawiki_api_url='https://commons.wikimedia.org/w/api.php').id == 'M75908279' + assert wbi.mediainfo.get(75908279).id == 'M75908279' # Test with invalid id with self.assertRaises(ValueError): @@ -31,9 +54,19 @@ def test_get(self): wbi.mediainfo.get(-1) def test_get_json(self): - assert wbi.mediainfo.get('M75908279', mediawiki_api_url='https://commons.wikimedia.org/w/api.php').get_json() + assert wbi.mediainfo.get('M75908279').get_json() def test_entity_url(self): - assert wbi.mediainfo.new(id='M582').get_entity_url() == 'http://www.wikidata.org/entity/M582' - assert wbi.mediainfo.new(id='582').get_entity_url() == 'http://www.wikidata.org/entity/M582' - assert wbi.mediainfo.new(id=582).get_entity_url() == 'http://www.wikidata.org/entity/M582' + assert wbi.mediainfo.new(id='M75908279').get_entity_url() == 'https://commons.wikimedia.org/entity/M75908279' + assert wbi.mediainfo.new(id='75908279').get_entity_url() == 'https://commons.wikimedia.org/entity/M75908279' + assert wbi.mediainfo.new(id=75908279).get_entity_url() == 'https://commons.wikimedia.org/entity/M75908279' + + # Test if we can read the claims/statements of the entity + def test_entity_claims(self): + media = wbi.mediainfo.get('M75908279') + assert media.claims + + # Test if we can have the statements field in the json + def test_get_statements(self): + media = wbi.mediainfo.get('M75908279') + assert media.get_json()['statements'] diff --git a/wikibaseintegrator/datatypes/string.py b/wikibaseintegrator/datatypes/string.py index 58a75df1..527c36d2 100644 --- a/wikibaseintegrator/datatypes/string.py +++ b/wikibaseintegrator/datatypes/string.py @@ -24,7 +24,7 @@ def set_value(self, value: Optional[str] = None): assert isinstance(value, str) or value is None, f"Expected str, found {type(value)} ({value})" if value and ('\n' in value or '\r' in value): - raise ValueError("String value must not contain new line character") + raise ValueError("String value must not contain newline character") if value: self.mainsnak.datavalue = { diff --git a/wikibaseintegrator/entities/mediainfo.py b/wikibaseintegrator/entities/mediainfo.py index 55b04bfa..5fe4bdec 100644 --- a/wikibaseintegrator/entities/mediainfo.py +++ b/wikibaseintegrator/entities/mediainfo.py @@ -121,20 +121,23 @@ def get_by_title(self, titles: list[str] | str, sites: str = 'commonswiki', **kw return MediaInfoEntity(api=self.api).from_json(json_data=json_data['entities'][list(json_data['entities'].keys())[0]]) def get_json(self) -> dict[str, str | dict]: - return { + json_data = { 'labels': self.labels.get_json(), 'descriptions': self.descriptions.get_json(), **super().get_json() } - # if 'claims' in json_data: # MediaInfo change name of 'claims' to 'statements' - # json_data['statements'] = json_data.pop('claims') + if 'claims' in json_data: # MediaInfo change name of 'claims' to 'statements' + json_data['statements'] = json_data.pop('claims') - # if 'statements' in json_data: - # for prop_nr in json_data['statements']: - # for statement in json_data['statements'][prop_nr]: - # if 'mainsnak' in statement and 'datatype' in statement['mainsnak']: - # del statement['mainsnak']['datatype'] + if isinstance(json_data, dict) and 'statements' in json_data and isinstance(json_data['statements'], dict): + for prop_nr, statements in json_data['statements'].items(): + for statement in statements: + if isinstance(statement, dict) and 'mainsnak' in statement: + if isinstance(statement['mainsnak'], dict) and 'datatype' in statement['mainsnak']: + del statement['mainsnak']['datatype'] + + return json_data def from_json(self, json_data: dict[str, Any]) -> MediaInfoEntity: super().from_json(json_data=json_data) @@ -143,7 +146,7 @@ def from_json(self, json_data: dict[str, Any]) -> MediaInfoEntity: self.labels = Labels().from_json(json_data['labels']) if 'descriptions' in json_data: self.descriptions = Descriptions().from_json(json_data['descriptions']) - if 'aliases' in json_data: + if 'statements' in json_data: self.claims = Claims().from_json(json_data['statements']) return self