|
3 | 3 | """ |
4 | 4 | import json |
5 | 5 | import re |
6 | | -from typing import Tuple, Any |
7 | 6 |
|
8 | 7 | import requests |
9 | 8 | from requests import Response |
10 | 9 |
|
| 10 | +from src.compareproperties import CompareProperties |
| 11 | +from src.comparestatements import CompareStatements |
| 12 | + |
11 | 13 |
|
12 | 14 | class CompareJSONLD: |
13 | 15 | """ |
@@ -156,347 +158,3 @@ def _process_each_of(self) -> None: |
156 | 158 | :return: |
157 | 159 | """ |
158 | 160 | pass |
159 | | - |
160 | | - |
161 | | -class CompareProperties: |
162 | | - |
163 | | - def __init__(self, entity: str, entities: dict, props: list, names: dict, start_shape: dict) -> None: |
164 | | - self._entities: dict = entities |
165 | | - self._names: dict = names |
166 | | - self._entity: str = entity |
167 | | - self._props: list = props |
168 | | - self._start_shape: dict = start_shape |
169 | | - |
170 | | - def compare_properties(self) -> dict: |
171 | | - """ |
172 | | -
|
173 | | - :return: |
174 | | - """ |
175 | | - if "entities" not in self._entities: |
176 | | - return {} |
177 | | - if self._entity not in self._entities["entities"]: |
178 | | - return {} |
179 | | - if "claims" not in self._entities["entities"][self._entity]: |
180 | | - return {} |
181 | | - |
182 | | - claims: dict = self._entities["entities"][self._entity]["claims"] |
183 | | - properties: dict = {} |
184 | | - if self._start_shape is None: |
185 | | - return properties |
186 | | - utilities: Utilities = Utilities() |
187 | | - for prop in self._props: |
188 | | - child: dict = {"name": self._names[prop], |
189 | | - "necessity": utilities.calculate_necessity(prop, self._start_shape)} |
190 | | - if prop in claims: |
191 | | - response: str = self.check_claims_for_props(claims, prop) |
192 | | - else: |
193 | | - response: str = "missing" |
194 | | - if child["necessity"] != "absent": |
195 | | - if response != "": |
196 | | - child["response"] = response |
197 | | - elif response != "present": |
198 | | - child["response"] = response |
199 | | - properties[prop] = child |
200 | | - return properties |
201 | | - |
202 | | - def check_claims_for_props(self, claims: dict, prop: str) -> str: |
203 | | - """" |
204 | | -
|
205 | | - :return: |
206 | | - """ |
207 | | - cardinality: str = "correct" |
208 | | - allowed: str = "present" |
209 | | - if "expression" not in self._start_shape: |
210 | | - return "present" |
211 | | - if "expressions" not in self._start_shape["expression"]: |
212 | | - return "present" |
213 | | - for expression in self._start_shape["expression"]["expressions"]: |
214 | | - if "predicate" in expression and expression["predicate"].endswith(prop): |
215 | | - allowed_list = self._get_allowed_list(claims, prop, expression) |
216 | | - cardinality2 = self._process_cardinalities(expression, allowed_list, self._start_shape, prop) |
217 | | - if cardinality2 not in ["", "correct"]: |
218 | | - cardinality = cardinality2 |
219 | | - if "correct" in allowed_list: |
220 | | - allowed = "correct" |
221 | | - if cardinality == "correct": |
222 | | - response: str = allowed |
223 | | - else: |
224 | | - response: str = cardinality |
225 | | - return response |
226 | | - |
227 | | - def _get_allowed_list(self, claims: dict, prop: str, expression: dict) -> list: |
228 | | - if prop not in claims: |
229 | | - return [] |
230 | | - |
231 | | - allowed_list: list = [] |
232 | | - for statement in claims[prop]: |
233 | | - is_it_allowed: str = "" |
234 | | - if statement["mainsnak"]["property"] == prop: |
235 | | - is_it_allowed = self._process_triple_constraint(statement["mainsnak"], |
236 | | - expression, |
237 | | - "") |
238 | | - if "extra" in self._start_shape: |
239 | | - for extra in self._start_shape["extra"]: |
240 | | - if extra.endswith(prop) and is_it_allowed == "incorrect": |
241 | | - is_it_allowed = "allowed" |
242 | | - allowed_list.append(is_it_allowed) |
243 | | - return allowed_list |
244 | | - |
245 | | - def _process_cardinalities(self, expression: dict, allowed_list: list, shape: dict, prop: str) -> str: |
246 | | - if "predicate" not in expression: |
247 | | - return "" |
248 | | - if not expression["predicate"].endswith(prop): |
249 | | - return "" |
250 | | - occurrences: int = allowed_list.count("correct") |
251 | | - occurrences += allowed_list.count("present") |
252 | | - cardinality: str = "correct" |
253 | | - for expression in shape["expression"]["expressions"]: |
254 | | - if "predicate" in expression and expression["predicate"].endswith(prop): |
255 | | - cardinality = self._get_cardinalities(occurrences, expression) |
256 | | - predicate: str = f'http://www.wikidata.org/prop/direct/{prop}' |
257 | | - if "extra" in shape and predicate in shape["extra"] and cardinality == "too many statements": |
258 | | - cardinality = "correct" |
259 | | - return cardinality |
260 | | - |
261 | | - @staticmethod |
262 | | - def _get_cardinalities(occurrences: int, expression: dict) -> str: |
263 | | - cardinality: str = "correct" |
264 | | - min_cardinality: bool = True |
265 | | - max_cardinality: bool = True |
266 | | - max_card: int = 1 |
267 | | - min_card: int = 1 |
268 | | - if "max" in expression: |
269 | | - max_card = expression["max"] |
270 | | - if "min" in expression: |
271 | | - min_card = expression["min"] |
272 | | - if max_card < occurrences: |
273 | | - max_cardinality = False |
274 | | - if min_card > occurrences: |
275 | | - min_cardinality = False |
276 | | - if max_card == -1: |
277 | | - max_cardinality = True |
278 | | - if min_card == -1: |
279 | | - min_cardinality = True |
280 | | - if min_cardinality and not max_cardinality: |
281 | | - cardinality = "too many statements" |
282 | | - if max_cardinality and not min_cardinality: |
283 | | - cardinality = "not enough correct statements" |
284 | | - return cardinality |
285 | | - |
286 | | - @staticmethod |
287 | | - def _process_triple_constraint(statement: dict, expression: dict, allowed: str) -> str: |
288 | | - """ |
289 | | - Processes triple constraint expression types in the shape |
290 | | -
|
291 | | - :param dict statement: The entity's statement to be assessed |
292 | | - :param dict expression: The expression from the shape to be assessed against |
293 | | - :param str allowed: Whether the statement is allowed by the expression or not currently |
294 | | - :return: allowed |
295 | | - """ |
296 | | - if "property" not in statement: |
297 | | - return allowed |
298 | | - if "predicate" not in expression: |
299 | | - return allowed |
300 | | - |
301 | | - if expression["predicate"].endswith(statement["property"]): |
302 | | - allowed = "present" |
303 | | - try: |
304 | | - if expression["valueExpr"]["type"] == "NodeConstraint": |
305 | | - allowed = Utilities.process_node_constraint(statement, |
306 | | - expression["valueExpr"], |
307 | | - allowed) |
308 | | - except (KeyError, TypeError): |
309 | | - pass |
310 | | - return allowed |
311 | | - |
312 | | - |
313 | | -class CompareStatements: |
314 | | - |
315 | | - def __init__(self, entities: dict, entity: str, start_shape: dict) -> None: |
316 | | - self._entities: dict = entities |
317 | | - self._entity: str = entity |
318 | | - self.start_shape: dict = start_shape |
319 | | - |
320 | | - def compare_statements(self) -> dict: |
321 | | - """ |
322 | | - Compares the statements with the shape |
323 | | -
|
324 | | - :return: statements |
325 | | - """ |
326 | | - if "entities" not in self._entities: |
327 | | - return {} |
328 | | - |
329 | | - statements: dict = {} |
330 | | - claims: dict = self._entities["entities"][self._entity]['claims'] |
331 | | - for claim in claims: |
332 | | - property_statement_results: list = [] |
333 | | - for statement in claims[claim]: |
334 | | - child: dict = {"property": claim} |
335 | | - utilities: Utilities = Utilities() |
336 | | - necessity = utilities.calculate_necessity(statement["mainsnak"]["property"], self.start_shape) |
337 | | - if necessity != "absent": |
338 | | - child["necessity"] = necessity |
339 | | - child, allowed = self._process_shape(statement["mainsnak"], self.start_shape, child) |
340 | | - statements[statement["id"]] = child |
341 | | - if allowed.startswith("missing"): |
342 | | - allowed = "incorrect" |
343 | | - property_statement_results.append(allowed) |
344 | | - return statements |
345 | | - |
346 | | - def _process_shape(self, statement: dict, shape: dict, child: dict) -> Tuple[Any, str]: |
347 | | - """ |
348 | | - Processes a full shape |
349 | | -
|
350 | | - :param statement: The entity's statement to be assessed |
351 | | - :param shape: The shape to be assessed against |
352 | | - :param child: The current response from the assessment |
353 | | - :return: child and allowed |
354 | | - """ |
355 | | - expressions: dict = {} |
356 | | - if "expression" in shape and "expressions" in shape["expression"]: |
357 | | - expressions = shape["expression"]["expressions"] |
358 | | - allowed: str = "not in schema" |
359 | | - for expression in expressions: |
360 | | - allowed = self.process_expressions(expression, shape, statement, allowed) |
361 | | - if allowed != "": |
362 | | - child["response"] = allowed |
363 | | - return child, allowed |
364 | | - |
365 | | - def process_expressions(self, expression: dict, shape: dict, statement: dict, allowed: str) -> str: |
366 | | - if "type" not in expression: |
367 | | - return allowed |
368 | | - if "predicate" not in expression: |
369 | | - return allowed |
370 | | - if "property" not in statement: |
371 | | - return allowed |
372 | | - |
373 | | - if expression["type"] == "TripleConstraint" and expression["predicate"].endswith(statement["property"]): |
374 | | - allowed = self._process_triple_constraint(statement, |
375 | | - expression, |
376 | | - allowed) |
377 | | - if "extra" in shape: |
378 | | - for extra in shape["extra"]: |
379 | | - if extra.endswith(statement["property"]) and allowed == "incorrect": |
380 | | - allowed = "allowed" |
381 | | - return allowed |
382 | | - |
383 | | - @staticmethod |
384 | | - def _process_triple_constraint(statement: dict, expression: dict, allowed: str) -> str: |
385 | | - """ |
386 | | - Processes triple constraint expression types in the shape |
387 | | -
|
388 | | - :param statement: The entity's statement to be assessed |
389 | | - :param expression: The expression from the shape to be assessed against |
390 | | - :param allowed: Whether the statement is allowed by the expression or not currently |
391 | | - :return: allowed |
392 | | - """ |
393 | | - if "property" not in statement: |
394 | | - return allowed |
395 | | - if "predicate" not in expression: |
396 | | - return allowed |
397 | | - |
398 | | - if expression["predicate"].endswith(statement["property"]): |
399 | | - allowed = "allowed" |
400 | | - Utilities.process_cardinalities(expression, {"mainsnak": statement}) |
401 | | - try: |
402 | | - if expression["valueExpr"]["type"] == "NodeConstraint": |
403 | | - allowed = Utilities.process_node_constraint(statement, |
404 | | - expression["valueExpr"], |
405 | | - allowed) |
406 | | - except (KeyError, TypeError): |
407 | | - pass |
408 | | - return allowed |
409 | | - |
410 | | - |
411 | | -class Utilities: |
412 | | - |
413 | | - def calculate_necessity(self, prop: str, shape: dict) -> str: |
414 | | - """ |
415 | | - Check if a property is required, optional or absent from a shape |
416 | | -
|
417 | | - :param str prop: the property to be checked |
418 | | - :param dict shape: the shape to check against |
419 | | - :return: necessity |
420 | | - """ |
421 | | - necessity: str = "absent" |
422 | | - list_of_expressions: list = [] |
423 | | - |
424 | | - if "expression" not in shape: |
425 | | - return necessity |
426 | | - |
427 | | - if "expressions" in shape["expression"]: |
428 | | - for expression in shape["expression"]["expressions"]: |
429 | | - list_of_expressions.append(expression) |
430 | | - else: |
431 | | - list_of_expressions.append(shape["expression"]) |
432 | | - |
433 | | - for expression in list_of_expressions: |
434 | | - if "predicate" in expression and expression["predicate"].endswith(prop): |
435 | | - necessity = self.required_or_absent(expression) |
436 | | - return necessity |
437 | | - |
438 | | - @staticmethod |
439 | | - def required_or_absent(expression: dict) -> str: |
440 | | - necessity: str = "optional" |
441 | | - if ("min" in expression and expression["min"] > 0) or ("min" not in expression and "max" not in expression): |
442 | | - necessity = "required" |
443 | | - if "min" in expression and "max" in expression and expression["min"] == 0 and expression["max"] == 0: |
444 | | - necessity = "absent" |
445 | | - return necessity |
446 | | - |
447 | | - @staticmethod |
448 | | - def process_cardinalities(expression: dict, claim: dict) -> str: |
449 | | - """ |
450 | | - Processes cardinalities in expressions |
451 | | -
|
452 | | - :return: cardinality |
453 | | - """ |
454 | | - cardinality: str = "correct" |
455 | | - min_cardinality: bool = True |
456 | | - max_cardinality: bool = True |
457 | | - max_card: int = 1 |
458 | | - min_card: int = 1 |
459 | | - if "max" in expression: |
460 | | - max_card = expression["max"] |
461 | | - if "min" in expression: |
462 | | - min_card = expression["min"] |
463 | | - if max_card < len(claim): |
464 | | - max_cardinality = False |
465 | | - if min_card > len(claim): |
466 | | - min_cardinality = False |
467 | | - if max_card == -1: |
468 | | - max_cardinality = True |
469 | | - if min_card == -1: |
470 | | - min_cardinality = True |
471 | | - if min_cardinality and not max_cardinality: |
472 | | - cardinality = "too many statements" |
473 | | - if max_cardinality and not min_cardinality: |
474 | | - cardinality = "not enough correct statements" |
475 | | - return cardinality |
476 | | - |
477 | | - @staticmethod |
478 | | - def process_node_constraint(statement: dict, expression: dict, allowed: str) -> str: |
479 | | - """ |
480 | | - Processes node constraint expression types in the shape |
481 | | -
|
482 | | - :param dict statement: The entity's statement to be assessed |
483 | | - :param dict expression: The expression from the shape to be assessed against |
484 | | - :param str allowed: Whether the statement is allowed by the expression or not currently |
485 | | - :return: allowed |
486 | | - """ |
487 | | - if "snaktype" not in statement: |
488 | | - return allowed |
489 | | - if "datavalue" not in statement: |
490 | | - return allowed |
491 | | - if "type" not in statement["datavalue"]: |
492 | | - return allowed |
493 | | - |
494 | | - if statement["snaktype"] == "value" and \ |
495 | | - statement["datavalue"]["type"] == "wikibase-entityid": |
496 | | - obj = f'http://www.wikidata.org/entity/{statement["datavalue"]["value"]["id"]}' |
497 | | - if "values" in expression: |
498 | | - if obj in expression["values"]: |
499 | | - allowed = "correct" |
500 | | - else: |
501 | | - allowed = "incorrect" |
502 | | - return allowed |
0 commit comments