From 4e92489e0751c1a70102eeee1962a11b4f3b2590 Mon Sep 17 00:00:00 2001 From: Miel Vander Sande Date: Wed, 22 Apr 2026 15:00:21 +0200 Subject: [PATCH 1/7] Add configurable hashAlgorithm --- tests/runtests.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/runtests.py b/tests/runtests.py index 052edb8e..2ec643c2 100644 --- a/tests/runtests.py +++ b/tests/runtests.py @@ -671,7 +671,6 @@ def create(test): options[k] = v options['documentLoader'] = create_document_loader(test) options['hashAlgorithm'] = test.data.get('hashAlgorithm') - if 'expandContext' in options: filename = os.path.join(test.dirname, options['expandContext']) options['expandContext'] = read_json(filename) From 1b52af79d89fbcbd7a03e9f8eadebca6c1dc41e1 Mon Sep 17 00:00:00 2001 From: Miel Vander Sande Date: Mon, 4 May 2026 10:09:10 +0200 Subject: [PATCH 2/7] Introduce rdflib and legacy mode. Move parse and serialize functions to rdflib --- lib/pyld/jsonld.py | 51 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 12 deletions(-) diff --git a/lib/pyld/jsonld.py b/lib/pyld/jsonld.py index d29a3e48..504993d4 100644 --- a/lib/pyld/jsonld.py +++ b/lib/pyld/jsonld.py @@ -26,17 +26,23 @@ from urllib.parse import urlparse import lxml.html +import rdflib from cachetools import LRUCache from frozendict import frozendict +from rdflib import RDF, XSD, BNode, Dataset, Literal, URIRef +from rdflib.graph import DATASET_DEFAULT_GRAPH_ID +from rdflib.parser import StringInputSource +from rdflib.plugins.parsers.nquads import NQuadsParser +from rdflib.plugins.serializers.nquads import _nq_row +from rdflib.term import Identifier from c14n.Canonicalize import canonicalize from pyld.__about__ import __copyright__, __license__, __version__ from pyld.canon import RDFC10, URDNA2015, URGNA2012, UnknownFormatError +from pyld.context_resolver import ContextResolver from pyld.identifier_issuer import IdentifierIssuer -from pyld.nquads import ParserError, parse_nquads, serialize_nquad, serialize_nquads - -from .context_resolver import ContextResolver -from .iri_resolver import resolve, unresolve +from pyld.iri_resolver import resolve, unresolve +from pyld.util import from_legacy_dataset, from_legacy_triple, to_legacy_dataset __all__ = [ '__copyright__', @@ -139,6 +145,10 @@ # Initial contexts, defined on first access INITIAL_CONTEXTS = {} +# In legacy mode, pyld returns RDF datasets as +# RDFJS-like dict objects, equivalent to PyLD < 4.0 +LEGACY_MODE = True + # Handler to call if a property was dropped during expansion OnPropertyDropped = Callable[[str | None], Any] @@ -1415,7 +1425,7 @@ def get_context_value(ctx, key, type_): return rval @staticmethod - def parse_nquads(input_): + def parse_nquads(input_: str) -> Dataset | dict: """ Parses RDF in the form of N-Quads. @@ -1424,15 +1434,28 @@ def parse_nquads(input_): :return: an RDF dataset. """ try: - result = parse_nquads(input_) - return result - except ParserError as cause: + dataset = Dataset() + # TODO: workaround to preserve bnodes for testing and preserve previous behaviour; + # this should be handled in the test harness instead + bnode_context = { + label: label + for label in re.findall(r'_:([A-Za-z0-9_][A-Za-z0-9_.-]*)', input_) + } + parser = NQuadsParser() + rdflib.NORMALIZE_LITERALS = False + parser.parse(StringInputSource(input_), dataset, bnode_context=bnode_context) + return to_legacy_dataset(dataset) if LEGACY_MODE else dataset + except SyntaxError as cause: + raise JsonLdError( + str(cause), 'jsonld.ParseError', {'line': cause.lineno} + ) from cause + except Exception as cause: raise JsonLdError( - str(cause), 'jsonld.ParseError', {'line': cause.line_number} + str(cause), 'jsonld.ParseError', {} ) from cause @staticmethod - def to_nquads(dataset): + def to_nquads(dataset: dict | Dataset): """ Converts an RDF dataset to N-Quads. @@ -1440,11 +1463,15 @@ def to_nquads(dataset): :return: the N-Quads string. """ - return serialize_nquads(dataset) + if not isinstance(dataset, Dataset): + dataset = from_legacy_dataset(dataset) + return dataset.serialize(format='nquads') @staticmethod def to_nquad(triple, graph_name=None): - return serialize_nquad(triple, graph_name) + if isinstance(triple, dict): + triple = from_legacy_triple(triple) + return _nq_row(triple, graph_name) @staticmethod def arrayify(value): From e678d9eba5b3c0854e383196e65808e631cd4a1d Mon Sep 17 00:00:00 2001 From: Miel Vander Sande Date: Mon, 4 May 2026 10:14:08 +0200 Subject: [PATCH 3/7] Convert main algorithms to using rdflib --- lib/pyld/jsonld.py | 347 +++++++++++++++++++++++---------------------- 1 file changed, 175 insertions(+), 172 deletions(-) diff --git a/lib/pyld/jsonld.py b/lib/pyld/jsonld.py index 504993d4..09dd4050 100644 --- a/lib/pyld/jsonld.py +++ b/lib/pyld/jsonld.py @@ -70,21 +70,6 @@ 'freeze', ] -# XSD constants -XSD_BOOLEAN = 'http://www.w3.org/2001/XMLSchema#boolean' -XSD_DOUBLE = 'http://www.w3.org/2001/XMLSchema#double' -XSD_INTEGER = 'http://www.w3.org/2001/XMLSchema#integer' -XSD_STRING = 'http://www.w3.org/2001/XMLSchema#string' - -# RDF constants -RDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' -RDF_LIST = RDF + 'List' -RDF_FIRST = RDF + 'first' -RDF_REST = RDF + 'rest' -RDF_NIL = RDF + 'nil' -RDF_TYPE = RDF + 'type' -RDF_LANGSTRING = RDF + 'langString' -RDF_JSON_LITERAL = RDF + 'JSON' # BCP47 REGEX_BCP47 = r'^[a-zA-Z]{1,8}(-[a-zA-Z0-9]{1,8})*$' @@ -1004,7 +989,7 @@ def normalize(self, input_, options): {'format': cause.format}) from cause - def from_rdf(self, dataset, options): + def from_rdf(self, dataset: dict | Dataset, options): """ Converts an RDF dataset to JSON-LD. @@ -1051,6 +1036,10 @@ def from_rdf(self, dataset, options): parser = _rdf_parsers[options['format']] dataset = parser(dataset) + # Convert legacy datasets for backwards-compatibility + if not isinstance(dataset, Dataset): + dataset = from_legacy_dataset(dataset) + # convert from RDF return self._from_rdf(dataset, options) @@ -1098,25 +1087,27 @@ def to_rdf(self, input_, options): node_map = {'@default': {}} self._create_node_map(expanded, node_map, '@default', issuer) # output RDF dataset - dataset = {} + dataset = Dataset() for graph_name, graph in sorted(node_map.items()): # skip relative IRIs if graph_name == '@default' or _is_absolute_iri(graph_name): - dataset[graph_name] = self._graph_to_rdf(graph, issuer, options) - + g = self._rdflib_term_from_id(graph_name) if graph_name != '@default' else dataset.default_graph + for t in self._graph_to_rdf(graph, issuer, options): + s, p, o = t + dataset.add((s, p, o, g)) # convert to output format if 'format' in options: if ( options['format'] == 'application/n-quads' or options['format'] == 'application/nquads' ): - return self.to_nquads(dataset) + return dataset.serialize(format='nquads') raise JsonLdError( 'Unknown output format.', 'jsonld.UnknownFormat', {'format': options['format']}, ) - return dataset + return to_legacy_dataset(dataset) if LEGACY_MODE else dataset def process_context(self, active_ctx, local_ctx, options): """ @@ -2943,7 +2934,7 @@ def _frame(self, input_, frame, options): return framed - def _from_rdf(self, dataset, options): + def _from_rdf(self, dataset: Dataset, options): """ Converts an RDF dataset to JSON-LD. @@ -2956,68 +2947,71 @@ def _from_rdf(self, dataset, options): graph_map = {'@default': default_graph} referenced_once = {} - for name, graph in dataset.items(): + for s, p, o, graph_name in dataset.quads((None, None, None, None)): + name = ( + '@default' + if graph_name is None or graph_name == DATASET_DEFAULT_GRAPH_ID + else self._id_from_rdflib_term(graph_name) + ) graph_map.setdefault(name, {}) if name != '@default' and name not in default_graph: default_graph[name] = {'@id': name} node_map = graph_map[name] - for triple in graph: - # get subject, predicate, object - s = triple['subject']['value'] - p = triple['predicate']['value'] - o = triple['object'] + s = self._id_from_rdflib_term(s) + p = str(p) - node = node_map.setdefault(s, {'@id': s}) + node = node_map.setdefault(s, {'@id': s}) - object_is_id = o['type'] == 'IRI' or o['type'] == 'blank node' - if object_is_id and o['value'] not in node_map: - node_map[o['value']] = {'@id': o['value']} + object_is_id = not isinstance(o, Literal) + object_id = self._id_from_rdflib_term(o) if object_is_id else None + if object_is_id and object_id not in node_map: + node_map[object_id] = {'@id': object_id} - if ( - p == RDF_TYPE - and not options.get('useRdfType', False) - and object_is_id - ): - JsonLdProcessor.add_value( - node, '@type', o['value'], {'propertyIsArray': True} - ) - continue - - value = self._rdf_to_object( - o, options['useNativeTypes'], options['rdfDirection'] + if ( + p == str(RDF.type) + and not options.get('useRdfType', False) + and object_is_id + ): + JsonLdProcessor.add_value( + node, '@type', object_id, {'propertyIsArray': True} ) - JsonLdProcessor.add_value(node, p, value, {'propertyIsArray': True}) - - # object may be an RDF list/partial list node but we - # can't know easily until all triples are read - if object_is_id: - # track rdf:nil uniquely per graph - if o['value'] == RDF_NIL: - object = node_map[o['value']] - if 'usages' not in object: - object['usages'] = [] - object['usages'].append( - {'node': node, 'property': p, 'value': value} - ) - # object referenced more than once - elif o['value'] in referenced_once: - referenced_once[o['value']] = False - # track single reference - else: - referenced_once[o['value']] = { - 'node': node, - 'property': p, - 'value': value, - } + continue + + value = self._rdf_to_object( + o, options['useNativeTypes'], options['rdfDirection'] + ) + JsonLdProcessor.add_value(node, p, value, {'propertyIsArray': True}) + + # object may be an RDF list/partial list node but we + # can't know easily until all triples are read + if object_is_id: + # track rdf:nil uniquely per graph + if object_id == str(RDF.nil): + object = node_map[object_id] + if 'usages' not in object: + object['usages'] = [] + object['usages'].append( + {'node': node, 'property': p, 'value': value} + ) + # object referenced more than once + elif object_id in referenced_once: + referenced_once[object_id] = False + # track single reference + else: + referenced_once[object_id] = { + 'node': node, + 'property': p, + 'value': value, + } # convert linked lists to @list arrays for _name, graph_object in graph_map.items(): # no @lists to be converted, continue - if RDF_NIL not in graph_object: + if str(RDF.nil) not in graph_object: continue # iterate backwards through each RDF list - nil = graph_object[RDF_NIL] + nil = graph_object[str(RDF.nil)] if 'usages' not in nil: continue for usage in nil['usages']: @@ -3035,23 +3029,23 @@ def _from_rdf(self, dataset, options): # and, optionally, @type where the value is rdf:List. node_key_count = len(node.keys()) while ( - property == RDF_REST + property == str(RDF.rest) and _is_object(referenced_once.get(node['@id'])) - and _is_array(node[RDF_FIRST]) - and len(node[RDF_FIRST]) == 1 - and _is_array(node[RDF_REST]) - and len(node[RDF_REST]) == 1 + and _is_array(node[str(RDF.first)]) + and len(node[str(RDF.first)]) == 1 + and _is_array(node[str(RDF.rest)]) + and len(node[str(RDF.rest)]) == 1 and ( node_key_count == 3 or ( node_key_count == 4 and _is_array(node.get('@type')) and len(node['@type']) == 1 - and node['@type'][0] == RDF_LIST + and node['@type'][0] == str(RDF.List) ) ) ): - list_.append(node[RDF_FIRST][0]) + list_.append(node[str(RDF.first)][0]) list_nodes.append(node['@id']) # get next node, moving backwards through list @@ -3722,38 +3716,30 @@ def _graph_to_rdf(self, graph, issuer, options): :return: the array of RDF triples for the given graph. """ - triples = [] + triples = [] # TODO: use rdflib.Graph instead for id_, node in sorted(graph.items()): for property, items in sorted(node.items()): - if property == '@type': - property = RDF_TYPE - elif _is_keyword(property): + if property != '@type' and _is_keyword(property): continue for item in items: # skip relative IRI subjects and predicates - if not (_is_absolute_iri(id_) and _is_absolute_iri(property)): + if not _is_absolute_iri(id_) or ( + property != '@type' and not _is_absolute_iri(property) + ): continue # RDF subject - subject = {} - if id_.startswith('_:'): - subject['type'] = 'blank node' - else: - subject['type'] = 'IRI' - subject['value'] = id_ + subject = self._rdflib_term_from_id(id_) # RDF predicate - predicate = {} - if property.startswith('_:'): - # skip bnode predicates unless producing - # generalized RDF - if not options['produceGeneralizedRdf']: - continue - predicate['type'] = 'blank node' - else: - predicate['type'] = 'IRI' - predicate['value'] = property + + # skip bnode predicates unless producing + # generalized RDF + if property.startswith('_:') and not options['produceGeneralizedRdf']: + continue + + predicate = self._rdflib_term_from_id(property) if property != '@type' else RDF.type # convert list, value or node object to triple object = self._object_to_rdf( @@ -3761,16 +3747,10 @@ def _graph_to_rdf(self, graph, issuer, options): ) # skip None objects (they are relative IRIs) if object is not None: - triples.append( - { - 'subject': subject, - 'predicate': predicate, - 'object': object, - } - ) + triples.append((subject, predicate, object)) return triples - def _list_to_rdf(self, list_, issuer, triples, rdf_direction): + def _list_to_rdf(self, list_: list, issuer: IdentifierIssuer, triples, rdf_direction) -> BNode | URIRef: """ Converts a @list value into a linked list of blank node RDF triples (and RDF collection). @@ -3782,31 +3762,47 @@ def _list_to_rdf(self, list_, issuer, triples, rdf_direction): :return: the head of the list """ - first = {'type': 'IRI', 'value': RDF_FIRST} - rest = {'type': 'IRI', 'value': RDF_REST} - nil = {'type': 'IRI', 'value': RDF_NIL} last = list_.pop() if list_ else None # result is the head of the list - result = {'type': 'blank node', 'value': issuer.get_id()} if last else nil + result = BNode(issuer.get_id()) if last else RDF.nil subject = result for item in list_: object = self._object_to_rdf(item, issuer, triples, rdf_direction) - next = {'type': 'blank node', 'value': issuer.get_id()} - triples.append({'subject': subject, 'predicate': first, 'object': object}) - triples.append({'subject': subject, 'predicate': rest, 'object': next}) + next = BNode(issuer.get_id()) + triples.append((subject, RDF.first, object)) + triples.append((subject, RDF.rest, next)) subject = next # tail of list if last: object = self._object_to_rdf(last, issuer, triples, rdf_direction) - triples.append({'subject': subject, 'predicate': first, 'object': object}) - triples.append({'subject': subject, 'predicate': rest, 'object': nil}) + triples.append((subject, RDF.first, object)) + triples.append((subject, RDF.rest, RDF.nil)) return result + + def _rdflib_term_from_id(self, id_: str) -> BNode | URIRef: + """ + Converts a JSON-LD @id value to an RDFLib term. + :param id_: the JSON-LD @id value. + :return: the RDFLib term. + """ + if id_.startswith('_:'): + return BNode(id_[2:]) + return URIRef(id_) + + def _id_from_rdflib_term(self, term: Identifier) -> str: + """ + Converts an RDFLib term to a JSON-LD @id value. + :param term: the RDFLib term. + :return: the JSON-LD @id value. + """ + return '_:' + str(term) if isinstance(term, BNode) else str(term) + def _object_to_rdf(self, item, issuer, triples, rdf_direction): """ Converts a JSON-LD value object to an RDF literal or a JSON-LD string @@ -3820,78 +3816,86 @@ def _object_to_rdf(self, item, issuer, triples, rdf_direction): :return: the RDF literal or RDF resource. """ - object = {} if _is_value(item): - object['type'] = 'literal' value = item['@value'] datatype = item.get('@type') # convert to XSD datatypes as appropriate if datatype == '@json': - object['value'] = canonicalize(value).decode('UTF-8') - object['datatype'] = RDF_JSON_LITERAL + return Literal( + canonicalize(value).decode('UTF-8'), + datatype = RDF.JSON + ) elif _is_bool(value): - object['value'] = 'true' if value else 'false' - object['datatype'] = datatype or XSD_BOOLEAN + return Literal( + 'true' if value else 'false', + datatype=URIRef(datatype) if datatype else XSD.boolean + ) # if `value` is a float number, elif _is_double(value): - # use the canonical double representation - object['value'] = _canonicalize_double(value) - # add the double datatype if none is given - object['datatype'] = datatype or XSD_DOUBLE - return object - elif datatype == XSD_DOUBLE: + return Literal( + # use the canonical double representation + _canonicalize_double(value), + # add the double datatype if none is given + datatype=URIRef(datatype) if datatype else XSD.double, + normalize=False) + elif datatype == str(XSD.double): # since the previous branch did not activate, we know that `value` is not a float number. try: float_value = float(value) except (ValueError, TypeError): # if `value` is not convertible to float, we will return it as-is. - object['value'] = value - object['datatype'] = XSD_DOUBLE - return object + return Literal( + value, datatype=XSD.double, normalize=False + ) else: - # we have a float, and canonicalization may proceed. - object['value'] = _canonicalize_double(float_value) - object['datatype'] = XSD_DOUBLE - return object + return Literal( + # we have a float, and canonicalization may proceed. + _canonicalize_double(float_value), + datatype=XSD.double, normalize=False + ) elif _is_integer(value): - object['value'] = str(value) - object['datatype'] = datatype or XSD_INTEGER + return Literal( + str(value), + datatype = URIRef(datatype) if datatype else XSD.integer + ) elif rdf_direction == 'i18n-datatype' and '@direction' in item: - datatype = 'https://www.w3.org/ns/i18n#{}_{}'.format( - item.get('@language', ''), item['@direction'] + return Literal( + value, + datatype='https://www.w3.org/ns/i18n#{}_{}'.format( + item.get('@language', ''), item['@direction'] + ), + normalize=False, ) - object['value'] = value - object['datatype'] = datatype + elif '@language' in item: - object['value'] = value - object['datatype'] = datatype or RDF_LANGSTRING - object['language'] = item['@language'] - else: - object['value'] = value - object['datatype'] = datatype or XSD_STRING + return Literal( + value, + lang=item['@language'], + datatype=None, + normalize=False, + ) + return Literal( + value, + datatype=URIRef(datatype) if datatype else XSD.string, + normalize=False, + ) # convert list object to RDF elif _is_list(item): - list_ = self._list_to_rdf(item['@list'], issuer, triples, rdf_direction) - object['value'] = list_['value'] - object['type'] = list_['type'] + return self._list_to_rdf(item['@list'], issuer, triples, rdf_direction) + # convert string/node object to RDF else: id_ = item['@id'] if _is_object(item) else item - if id_.startswith('_:'): - object['type'] = 'blank node' - else: - object['type'] = 'IRI' - object['value'] = id_ # skip relative IRIs - if object['type'] == 'IRI' and not _is_absolute_iri(object['value']): + if not id_.startswith('_:') and not _is_absolute_iri(id_): return None - return object + return self._rdflib_term_from_id(id_) - def _rdf_to_object(self, o, use_native_types, rdf_direction): + def _rdf_to_object(self, o: Identifier, use_native_types, rdf_direction) -> dict: """ Converts an RDF triple object to a JSON-LD object. @@ -3902,22 +3906,21 @@ def _rdf_to_object(self, o, use_native_types, rdf_direction): :return: the JSON-LD object. """ # convert IRI/BlankNode object to JSON-LD - if o['type'] == 'IRI' or o['type'] == 'blank node': - return {'@id': o['value']} + if not isinstance(o, Literal): + id_ = self._id_from_rdflib_term(o) + return {'@id': id_} # convert literal object to JSON-LD - rval = {'@value': o['value']} + rval = {'@value': str(o)} # add language - if 'language' in o: - rval['@language'] = o['language'] + if o.language is not None: + rval['@language'] = o.language # add datatype else: - type_ = o['datatype'] - if not type_: - type_ = XSD_STRING + type_ = o.datatype if o.datatype else XSD.string - if type_ == RDF_JSON_LITERAL: + if type_ == RDF.JSON: type_ = '@json' try: rval['@value'] = json.loads(rval['@value']) @@ -3931,20 +3934,20 @@ def _rdf_to_object(self, o, use_native_types, rdf_direction): # use native types for certain xsd types if use_native_types: - if type_ == XSD_BOOLEAN: + if type_ == XSD.boolean: if rval['@value'] == 'true': rval['@value'] = True elif rval['@value'] == 'false': rval['@value'] = False elif _is_numeric(rval['@value']): - if type_ == XSD_INTEGER: + if type_ == XSD.integer: if rval['@value'].isdigit(): rval['@value'] = int(rval['@value']) - elif type_ == XSD_DOUBLE: + elif type_ == XSD.double: rval['@value'] = float(rval['@value']) # do not add native type - if type_ not in [XSD_BOOLEAN, XSD_INTEGER, XSD_DOUBLE, XSD_STRING]: - rval['@type'] = type_ + if type_ not in [XSD.boolean, XSD.integer, XSD.double, XSD.string]: + rval['@type'] = str(type_) elif rdf_direction == 'i18n-datatype' and type_.startswith( 'https://www.w3.org/ns/i18n#' ): @@ -3954,8 +3957,8 @@ def _rdf_to_object(self, o, use_native_types, rdf_direction): if not re.match(REGEX_BCP47, language): warnings.warn('@language must be valid BCP47', stacklevel=2) rval['@direction'] = direction - elif type_ != XSD_STRING: - rval['@type'] = type_ + elif type_ != XSD.string: + rval['@type'] = str(type_) return rval def _create_node_map( From 385077120d299ce8b6c51605434759013bfb84fd Mon Sep 17 00:00:00 2001 From: Miel Vander Sande Date: Mon, 4 May 2026 11:00:17 +0200 Subject: [PATCH 4/7] Remove legacy nquads parser. Also add minor typing and legacy conversion --- lib/pyld/jsonld.py | 10 +- lib/pyld/nquads.py | 244 --------------------------------------------- 2 files changed, 8 insertions(+), 246 deletions(-) delete mode 100644 lib/pyld/nquads.py diff --git a/lib/pyld/jsonld.py b/lib/pyld/jsonld.py index 09dd4050..ab3deb16 100644 --- a/lib/pyld/jsonld.py +++ b/lib/pyld/jsonld.py @@ -29,7 +29,7 @@ import rdflib from cachetools import LRUCache from frozendict import frozendict -from rdflib import RDF, XSD, BNode, Dataset, Literal, URIRef +from rdflib import RDF, XSD, BNode, Dataset, Literal, Node, URIRef from rdflib.graph import DATASET_DEFAULT_GRAPH_ID from rdflib.parser import StringInputSource from rdflib.plugins.parsers.nquads import NQuadsParser @@ -1459,7 +1459,13 @@ def to_nquads(dataset: dict | Dataset): return dataset.serialize(format='nquads') @staticmethod - def to_nquad(triple, graph_name=None): + def to_nquad(triple: dict | tuple[Node, Node, Node], graph_name=None): + """Converts an RDF triple to an N-Quad string. + :param triple: the RDF triple to convert, either as a dict with keys + 'subject', 'predicate', 'object' (legacy) or as a tuple of rdflib Nodes. + :param graph_name: the name of the graph, if any (default: None). + :return: the N-Quad string representation of the triple. + """ if isinstance(triple, dict): triple = from_legacy_triple(triple) return _nq_row(triple, graph_name) diff --git a/lib/pyld/nquads.py b/lib/pyld/nquads.py deleted file mode 100644 index 9e986d7d..00000000 --- a/lib/pyld/nquads.py +++ /dev/null @@ -1,244 +0,0 @@ -import re - -XSD_STRING = 'http://www.w3.org/2001/XMLSchema#string' -RDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' -RDF_LANGSTRING = RDF + 'langString' - - -def escape(value: str): - return ( - value.replace("\\", "\\\\") - .replace("\t", "\\t") - .replace("\n", "\\n") - .replace("\r", "\\r") - .replace('"', '\\"') - ) - - -def unescape(value: str): - return ( - value.replace('\\"', '"') - .replace("\\t", "\t") - .replace("\\n", "\n") - .replace("\\r", "\r") - .replace("\\\\", "\\") - ) - - -def parse_nquads(input_: str): - """ - Parses RDF in the form of N-Quads. - - :param input_: the N-Quads input to parse. - - :return: an RDF dataset. - """ - # define partial regexes - iri = '(?:<([^:]+:[^>]*)>)' - bnode = '(_:(?:[A-Za-z0-9_][A-Za-z0-9_.-]*))' - plain = '"([^"\\\\]*(?:\\\\.[^"\\\\]*)*)"' - datatype = '(?:\\^\\^' + iri + ')' - language = '(?:@([a-zA-Z]+(?:-[a-zA-Z0-9]+)*))' - literal = '(?:' + plain + '(?:' + datatype + '|' + language + ')?)' - ws = '[ \\t]+' - wso = '[ \\t]*' - empty = r'^' + wso + '$' - - # define quad part regexes - subject = '(?:' + iri + '|' + bnode + ')' + ws - property = iri + ws - object = '(?:' + iri + '|' + bnode + '|' + literal + ')' + wso - graph = '(?:\\.|(?:(?:' + iri + '|' + bnode + ')' + wso + '\\.))' - - # Note: Notice that the graph position does not include literals - # even though they are specified as a possible value in the - # N-Quads note (http://sw.deri.org/2008/07/n-quads/). This is - # intentional, as literals in that position are not supported by the - # RDF data model or the JSON-LD data model. - # See: https://github.com/digitalbazaar/pyld/pull/19 - - # full quad regex - quad = r'^' + wso + subject + property + object + graph + wso + '$' - - # build RDF dataset - dataset = {} - - # split N-Quad input into lines - lines = input_.splitlines(True) - line_number = 0 - for line in lines: - line_number += 1 - - # skip empty lines - if re.search(empty, line) is not None: - continue - - # parse quad - match = re.search(quad, line) - if match is None: - raise ParserError( - f'Error while parsing N-Quads invalid quad {line} at line {line_number}.', - line_number=line_number, - ) - match = match.groups() - - # create RDF triple - triple = {'subject': {}, 'predicate': {}, 'object': {}} - - # get subject - if match[0] is not None: - triple['subject'] = {'type': 'IRI', 'value': match[0]} - else: - triple['subject'] = {'type': 'blank node', 'value': match[1]} - - # get predicate - triple['predicate'] = {'type': 'IRI', 'value': match[2]} - - # get object - if match[3] is not None: - triple['object'] = {'type': 'IRI', 'value': match[3]} - elif match[4] is not None: - triple['object'] = {'type': 'blank node', 'value': match[4]} - else: - triple['object'] = {'type': 'literal'} - unescaped = unescape(match[5]) - if match[6] is not None: - triple['object']['datatype'] = match[6] - elif match[7] is not None: - triple['object']['datatype'] = RDF_LANGSTRING - triple['object']['language'] = match[7] - else: - triple['object']['datatype'] = XSD_STRING - triple['object']['value'] = unescaped - - # get graph name ('@default' is used for the default graph) - name = '@default' - if match[8] is not None: - name = match[8] - elif match[9] is not None: - name = match[9] - - # initialize graph in dataset - if name not in dataset: - dataset[name] = [triple] - # add triple if unique to its graph - else: - unique = True - triples = dataset[name] - for t in dataset[name]: - if _compare_rdf_triples(t, triple): - unique = False - break - if unique: - triples.append(triple) - - return dataset - - -def serialize_nquads(dataset): - """ - Converts an RDF dataset to N-Quads. - - :param dataset: the RDF dataset to convert. - - :return: the N-Quads string. - """ - quads = [] - for graph_name, triples in dataset.items(): - for triple in triples: - if graph_name == '@default': - graph_name = None - quads.append(serialize_nquad(triple, graph_name)) - quads.sort() - return ''.join(quads) - - -def serialize_nquad(triple, graph_name=None): - """ - Converts an RDF triple and graph name to an N-Quad string (a single - quad). - - :param triple: the RDF triple or quad to convert (a triple or quad - may be passed, if a triple is passed then `graph_name` should be - given to specify the name of the graph the triple is in, `None` - for the default graph). - :param graph_name: the name of the graph containing the triple, None - for the default graph. - - :return: the N-Quad string. - """ - s = triple['subject'] - p = triple['predicate'] - o = triple['object'] - g = triple.get('name', {'value': graph_name})['value'] - - quad = '' - - # subject is an IRI - if s['type'] == 'IRI': - quad += '<' + s['value'] + '>' - else: - quad += s['value'] - quad += ' ' - - # property is an IRI - if p['type'] == 'IRI': - quad += '<' + p['value'] + '>' - else: - quad += p['value'] - quad += ' ' - - # object is IRI, bnode, or literal - if o['type'] == 'IRI': - quad += '<' + o['value'] + '>' - elif o['type'] == 'blank node': - quad += o['value'] - else: - escaped = escape(o['value']) - quad += '"' + escaped + '"' - if o['datatype'] == RDF_LANGSTRING: - if o.get('language'): - quad += '@' + o['language'] - elif o['datatype'] != XSD_STRING: - quad += '^^<' + o['datatype'] + '>' - - # graph - if g is not None: - if not g.startswith('_:'): - quad += ' <' + g + '>' - else: - quad += ' ' + g - - quad += ' .\n' - return quad - - -def _compare_rdf_triples(t1, t2): - """ - Compares two RDF triples for equality. - - :param t1: the first triple. - :param t2: the second triple. - - :return: True if the triples are the same, False if not. - """ - for attr in ['subject', 'predicate', 'object']: - if ( - t1[attr]['type'] != t2[attr]['type'] - or t1[attr]['value'] != t2[attr]['value'] - ): - return False - - if t1['object'].get('language') != t2['object'].get('language'): - return False - return t1['object'].get('datatype') == t2['object'].get('datatype') - - -class ParserError(ValueError): - """ - Base class for parsing errors. - """ - - def __init__(self, message, line_number=None): - Exception.__init__(self, message) - self.line_number = line_number From d4bacf86ba61da079139be7cf6eb59371bbb3784 Mon Sep 17 00:00:00 2001 From: Miel Vander Sande Date: Mon, 4 May 2026 11:25:05 +0200 Subject: [PATCH 5/7] Remove whitespace and satisfy linter --- lib/pyld/jsonld.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/pyld/jsonld.py b/lib/pyld/jsonld.py index ab3deb16..fbcc3d51 100644 --- a/lib/pyld/jsonld.py +++ b/lib/pyld/jsonld.py @@ -3790,7 +3790,7 @@ def _list_to_rdf(self, list_: list, issuer: IdentifierIssuer, triples, rdf_direc return result - + def _rdflib_term_from_id(self, id_: str) -> BNode | URIRef: """ Converts a JSON-LD @id value to an RDFLib term. @@ -3842,7 +3842,7 @@ def _object_to_rdf(self, item, issuer, triples, rdf_direction): elif _is_double(value): return Literal( # use the canonical double representation - _canonicalize_double(value), + _canonicalize_double(value), # add the double datatype if none is given datatype=URIRef(datatype) if datatype else XSD.double, normalize=False) @@ -3858,7 +3858,7 @@ def _object_to_rdf(self, item, issuer, triples, rdf_direction): else: return Literal( # we have a float, and canonicalization may proceed. - _canonicalize_double(float_value), + _canonicalize_double(float_value), datatype=XSD.double, normalize=False ) elif _is_integer(value): @@ -3880,7 +3880,7 @@ def _object_to_rdf(self, item, issuer, triples, rdf_direction): value, lang=item['@language'], datatype=None, - normalize=False, + normalize=False, ) return Literal( value, From 031225bd820adcd93415272ee91c5e70f5dbc016 Mon Sep 17 00:00:00 2001 From: Miel Vander Sande Date: Mon, 4 May 2026 11:59:03 +0200 Subject: [PATCH 6/7] Fix incomplete typing --- lib/pyld/jsonld.py | 2 +- specifications/json-ld-api | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/pyld/jsonld.py b/lib/pyld/jsonld.py index fbcc3d51..4aa9503a 100644 --- a/lib/pyld/jsonld.py +++ b/lib/pyld/jsonld.py @@ -989,7 +989,7 @@ def normalize(self, input_, options): {'format': cause.format}) from cause - def from_rdf(self, dataset: dict | Dataset, options): + def from_rdf(self, dataset: dict | Dataset | str, options): """ Converts an RDF dataset to JSON-LD. diff --git a/specifications/json-ld-api b/specifications/json-ld-api index 04a4eb7d..590f78d1 160000 --- a/specifications/json-ld-api +++ b/specifications/json-ld-api @@ -1 +1 @@ -Subproject commit 04a4eb7dc7cbc313f3f5be7ad9a3b06e87741693 +Subproject commit 590f78d171c45898ddaf6f8bd660a0567f4c7e5c From bdebb559143470fa0faed276b5217e5690dc6934 Mon Sep 17 00:00:00 2001 From: Miel Vander Sande Date: Thu, 21 May 2026 10:30:27 +0200 Subject: [PATCH 7/7] Adjust tests nquad output --- tests/test_jsonld.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/tests/test_jsonld.py b/tests/test_jsonld.py index 85458e4c..4e6cc8f7 100644 --- a/tests/test_jsonld.py +++ b/tests/test_jsonld.py @@ -850,13 +850,15 @@ def test_conflicting_property_names(self): "dublinCore": {"title": "Chapter 1: Jonathan Harker's Journal"}, } - expected = """ _:b0 . - "test" . -_:b0 "Chapter 1: Jonathan Harker's Journal" . + expected = """ "test"^^ . + _:b0 . +_:b0 "Chapter 1: Jonathan Harker's Journal"^^ . + """ nquads = jsonld.to_rdf(input, options={'format': 'application/n-quads'}) - assert nquads == expected + # TODO: move this into a helper function for comparing nquads + assert sorted(nquads.splitlines()) == sorted(expected.splitlines()) def test_conflicting_property_names_in_nested_node(self): @@ -877,12 +879,14 @@ def test_conflicting_property_names_in_nested_node(self): "dublinCore": {"title": "Chapter 1: Jonathan Harker's Journal"}, } - expected = """ "test" . - "Chapter 1: Jonathan Harker's Journal" . + expected = """ "test"^^ . + "Chapter 1: Jonathan Harker's Journal"^^ . + """ nquads = jsonld.to_rdf(input, options={'format': 'application/n-quads'}) - assert nquads == expected + # TODO: move this into a helper function for comparing nquads + assert sorted(nquads.splitlines()) == sorted(expected.splitlines()) class TestCompact: