Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,11 @@ type instances for references and qualifiers.

WikibaseIntegrator supports SDC (Structured Data on Commons) to update a media file hosted on Wikimedia Commons.

> [!IMPORTANT]
> To ease the compatibility between Wikidata and Wikimedia Commons, the MediaInfo entity use the "claims" field instead of the "statements" from the API.
> When you read a MediaInfo entity, the "claims" field will be filled with the data from the "statements" field.
> When you write a MediaInfo entity, the "claims" field will be replaced by the "statements" field.

### Retrieve data ###

```python
Expand Down
51 changes: 42 additions & 9 deletions test/test_entity_mediainfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,43 @@
from wikibaseintegrator import WikibaseIntegrator
from wikibaseintegrator.wbi_config import config as wbi_config

wbi_config['USER_AGENT'] = 'WikibaseIntegrator-pytest/1.0 (test_entity_mediainfo.py)'

wbi = WikibaseIntegrator()


class TestEntityMediaInfo(unittest.TestCase):

def setUp(self):
self._user_agent_exists = 'USER_AGENT' in wbi_config
self._old_user_agent = wbi_config.get('USER_AGENT')
self._wikibase_url_exists = 'WIKIBASE_URL' in wbi_config
self._old_wikibase_url = wbi_config.get('WIKIBASE_URL')
self._mediawiki_api_url_exists = 'MEDIAWIKI_API_URL' in wbi_config
self._old_mediawiki_api_url = wbi_config.get('MEDIAWIKI_API_URL')
wbi_config['USER_AGENT'] = 'WikibaseIntegrator-pytest/1.0 (test_entity_mediainfo.py)'
wbi_config['WIKIBASE_URL'] = 'https://commons.wikimedia.org'
wbi_config['MEDIAWIKI_API_URL'] = 'https://commons.wikimedia.org/w/api.php'

def tearDown(self):
if self._user_agent_exists:
wbi_config['USER_AGENT'] = self._old_user_agent
else:
wbi_config.pop('USER_AGENT', None)
if self._wikibase_url_exists:
wbi_config['WIKIBASE_URL'] = self._old_wikibase_url
else:
wbi_config.pop('WIKIBASE_URL', None)
if self._mediawiki_api_url_exists:
wbi_config['MEDIAWIKI_API_URL'] = self._old_mediawiki_api_url
else:
wbi_config.pop('MEDIAWIKI_API_URL', None)

def test_get(self):
# Test with complete id
assert wbi.mediainfo.get('M75908279', mediawiki_api_url='https://commons.wikimedia.org/w/api.php').id == 'M75908279'
assert wbi.mediainfo.get('M75908279').id == 'M75908279'
# Test with numeric id as string
assert wbi.mediainfo.get('75908279', mediawiki_api_url='https://commons.wikimedia.org/w/api.php').id == 'M75908279'
assert wbi.mediainfo.get('75908279').id == 'M75908279'
# Test with numeric id as int
assert wbi.mediainfo.get(75908279, mediawiki_api_url='https://commons.wikimedia.org/w/api.php').id == 'M75908279'
assert wbi.mediainfo.get(75908279).id == 'M75908279'

# Test with invalid id
with self.assertRaises(ValueError):
Expand All @@ -31,9 +54,19 @@ def test_get(self):
wbi.mediainfo.get(-1)

def test_get_json(self):
assert wbi.mediainfo.get('M75908279', mediawiki_api_url='https://commons.wikimedia.org/w/api.php').get_json()
assert wbi.mediainfo.get('M75908279').get_json()

def test_entity_url(self):
assert wbi.mediainfo.new(id='M582').get_entity_url() == 'http://www.wikidata.org/entity/M582'
assert wbi.mediainfo.new(id='582').get_entity_url() == 'http://www.wikidata.org/entity/M582'
assert wbi.mediainfo.new(id=582).get_entity_url() == 'http://www.wikidata.org/entity/M582'
assert wbi.mediainfo.new(id='M75908279').get_entity_url() == 'https://commons.wikimedia.org/entity/M75908279'
assert wbi.mediainfo.new(id='75908279').get_entity_url() == 'https://commons.wikimedia.org/entity/M75908279'
assert wbi.mediainfo.new(id=75908279).get_entity_url() == 'https://commons.wikimedia.org/entity/M75908279'

# Test if we can read the claims/statements of the entity
def test_entity_claims(self):
media = wbi.mediainfo.get('M75908279')
assert media.claims

# Test if we can have the statements field in the json
def test_get_statements(self):
media = wbi.mediainfo.get('M75908279')
assert media.get_json()['statements']
2 changes: 1 addition & 1 deletion wikibaseintegrator/datatypes/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def set_value(self, value: Optional[str] = None):
assert isinstance(value, str) or value is None, f"Expected str, found {type(value)} ({value})"

if value and ('\n' in value or '\r' in value):
raise ValueError("String value must not contain new line character")
raise ValueError("String value must not contain newline character")

if value:
self.mainsnak.datavalue = {
Expand Down
21 changes: 12 additions & 9 deletions wikibaseintegrator/entities/mediainfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,20 +121,23 @@ def get_by_title(self, titles: list[str] | str, sites: str = 'commonswiki', **kw
return MediaInfoEntity(api=self.api).from_json(json_data=json_data['entities'][list(json_data['entities'].keys())[0]])

def get_json(self) -> dict[str, str | dict]:
return {
json_data = {
'labels': self.labels.get_json(),
'descriptions': self.descriptions.get_json(),
**super().get_json()
}

# if 'claims' in json_data: # MediaInfo change name of 'claims' to 'statements'
# json_data['statements'] = json_data.pop('claims')
if 'claims' in json_data: # MediaInfo change name of 'claims' to 'statements'
json_data['statements'] = json_data.pop('claims')

# if 'statements' in json_data:
# for prop_nr in json_data['statements']:
# for statement in json_data['statements'][prop_nr]:
# if 'mainsnak' in statement and 'datatype' in statement['mainsnak']:
# del statement['mainsnak']['datatype']
if isinstance(json_data, dict) and 'statements' in json_data and isinstance(json_data['statements'], dict):
for prop_nr, statements in json_data['statements'].items():
for statement in statements:
if isinstance(statement, dict) and 'mainsnak' in statement:
if isinstance(statement['mainsnak'], dict) and 'datatype' in statement['mainsnak']:
Comment on lines +133 to +137
Copy link

Copilot AI Aug 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The nested type checking creates deep indentation and reduces readability. Consider extracting this logic into a separate method or using early returns to flatten the structure.

Copilot uses AI. Check for mistakes.
del statement['mainsnak']['datatype']

return json_data

def from_json(self, json_data: dict[str, Any]) -> MediaInfoEntity:
super().from_json(json_data=json_data)
Expand All @@ -143,7 +146,7 @@ def from_json(self, json_data: dict[str, Any]) -> MediaInfoEntity:
self.labels = Labels().from_json(json_data['labels'])
if 'descriptions' in json_data:
self.descriptions = Descriptions().from_json(json_data['descriptions'])
Copy link

Copilot AI Aug 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This condition should check for 'aliases' instead of 'statements'. The change appears to be incorrect as it's checking for statements but the comment and logic suggest it should handle aliases processing.

Copilot uses AI. Check for mistakes.
if 'aliases' in json_data:
if 'statements' in json_data:
self.claims = Claims().from_json(json_data['statements'])

return self
Expand Down
Loading