Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test for #494 (canonicalization sometimes collapses BNodes) #496

Merged
merged 4 commits into from
Aug 10, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions rdflib/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,9 @@ def stringify(x):
return unicode(x)
if isinstance(color, Node):
return stringify(color)
value = sum(map(self.hashfunc, ' '.join([stringify(x) for x in color])))
value = 0
for triple in color:
value += self.hashfunc(' '.join([stringify(x) for x in triple]))
val = u"%x" % value
self._hash_cache[color] = val
return val
Expand Down Expand Up @@ -290,7 +292,7 @@ def _initial_color(self):

def _individuate(self, color, individual):
new_color = list(color.color)
new_color.append((len(color.nodes)))
new_color.append((len(color.nodes),))

color.nodes.remove(individual)
c = Color([individual], self.hashfunc, tuple(new_color),
Expand Down Expand Up @@ -320,6 +322,7 @@ def _refine(self, coloring, sequence):
sequence = sequence[:si] + colors + sequence[si+1:]
except ValueError:
sequence = colors[1:] + sequence

return coloring

@_runtime("to_hash_runtime")
Expand Down Expand Up @@ -407,7 +410,6 @@ def _traces(self, coloring, stats=None, depth=[0]):
stats['prunings'] += 1
discrete = [x for x in best if self._discrete(x)]
if len(discrete) == 0:
very_best = None
best_score = None
best_depth = None
for coloring in best:
Expand All @@ -434,6 +436,7 @@ def canonical_triples(self, stats=None):
if stats is not None:
stats['initial_coloring_runtime'] = _total_seconds(datetime.now() - start_coloring)
stats['initial_color_count'] = len(coloring)

if not self._discrete(coloring):
depth = [0]
coloring = self._traces(coloring, stats=stats, depth=depth)
Expand Down
109 changes: 105 additions & 4 deletions test/test_canonicalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ def negative_graph_match_test():
True
],
[ unicode('''@prefix : <http://example.org/ns#> .
:linear_two_step_symmetry_start :related [ :related [ :related :linear_two_step_symmatry_end]],
:linear_two_step_symmetry_start :related [ :related [ :related :linear_two_step_symmatry_end]],
[ :related [ :related :linear_two_step_symmatry_end]].'''),
unicode('''@prefix : <http://example.org/ns#> .
:linear_two_step_symmetry_start :related [ :related [ :related :linear_two_step_symmatry_end]],
:linear_two_step_symmetry_start :related [ :related [ :related :linear_two_step_symmatry_end]],
[ :related [ :related :linear_two_step_symmatry_end]].'''),
True
],
Expand Down Expand Up @@ -68,7 +68,7 @@ def negative_graph_match_test():
].'''),
False
],
# This test fails because the algorithm purposefully breaks the symmetry of symetric
# This test fails because the algorithm purposefully breaks the symmetry of symetric
[ unicode('''@prefix : <http://example.org/ns#> .
_:a :rel [
:rel [
Expand Down Expand Up @@ -144,8 +144,109 @@ def negative_graph_match_test():
def fn(rdf1, rdf2, identical):
digest1 = get_digest_value(rdf1,"text/turtle")
digest2 = get_digest_value(rdf2,"text/turtle")
print rdf1
print digest1
print rdf2
print digest2
assert (digest1 == digest2) == identical
for inputs in testInputs:
yield fn, inputs[0], inputs[1], inputs[2]
yield fn, inputs[0], inputs[1], inputs[2]

def test_issue494_collapsing_bnodes():
"""Test for https://github.com/RDFLib/rdflib/issues/494 collapsing BNodes"""
g = Graph()
g += [
(BNode('Na1a8fbcf755f41c1b5728f326be50994'),
RDF['object'],
URIRef(u'source')),
(BNode('Na1a8fbcf755f41c1b5728f326be50994'),
RDF['predicate'],
BNode('vcb3')),
(BNode('Na1a8fbcf755f41c1b5728f326be50994'),
RDF['subject'],
BNode('vcb2')),
(BNode('Na1a8fbcf755f41c1b5728f326be50994'),
RDF['type'],
RDF['Statement']),
(BNode('Na713b02f320d409c806ff0190db324f4'),
RDF['object'],
URIRef(u'target')),
(BNode('Na713b02f320d409c806ff0190db324f4'),
RDF['predicate'],
BNode('vcb0')),
(BNode('Na713b02f320d409c806ff0190db324f4'),
RDF['subject'],
URIRef(u'source')),
(BNode('Na713b02f320d409c806ff0190db324f4'),
RDF['type'],
RDF['Statement']),
(BNode('Ndb804ba690a64b3dbb9063c68d5e3550'),
RDF['object'],
BNode('vr0KcS4')),
(BNode('Ndb804ba690a64b3dbb9063c68d5e3550'),
RDF['predicate'],
BNode('vrby3JV')),
(BNode('Ndb804ba690a64b3dbb9063c68d5e3550'),
RDF['subject'],
URIRef(u'source')),
(BNode('Ndb804ba690a64b3dbb9063c68d5e3550'),
RDF['type'],
RDF['Statement']),
(BNode('Ndfc47fb1cd2d4382bcb8d5eb7835a636'),
RDF['object'],
URIRef(u'source')),
(BNode('Ndfc47fb1cd2d4382bcb8d5eb7835a636'),
RDF['predicate'],
BNode('vcb5')),
(BNode('Ndfc47fb1cd2d4382bcb8d5eb7835a636'),
RDF['subject'],
URIRef(u'target')),
(BNode('Ndfc47fb1cd2d4382bcb8d5eb7835a636'),
RDF['type'],
RDF['Statement']),
(BNode('Nec6864ef180843838aa9805bac835c98'),
RDF['object'],
URIRef(u'source')),
(BNode('Nec6864ef180843838aa9805bac835c98'),
RDF['predicate'],
BNode('vcb4')),
(BNode('Nec6864ef180843838aa9805bac835c98'),
RDF['subject'],
URIRef(u'source')),
(BNode('Nec6864ef180843838aa9805bac835c98'),
RDF['type'],
RDF['Statement']),
]

print 'graph length: %d, nodes: %d' % (len(g), len(g.all_nodes()))
print 'triple_bnode degrees:'
for triple_bnode in g.subjects(RDF['type'], RDF['Statement']):
print len(list(g.triples([triple_bnode, None, None])))
print 'all node degrees:'
g_node_degs = sorted([
len(list(g.triples([node, None, None])))
for node in g.all_nodes()
], reverse=True)
print g_node_degs

cg = to_canonical_graph(g)
print 'graph length: %d, nodes: %d' % (len(cg), len(cg.all_nodes()))
print 'triple_bnode degrees:'
for triple_bnode in cg.subjects(RDF['type'], RDF['Statement']):
print len(list(cg.triples([triple_bnode, None, None])))
print 'all node degrees:'
cg_node_degs = sorted([
len(list(cg.triples([node, None, None])))
for node in cg.all_nodes()
], reverse=True)
print cg_node_degs

assert len(g) == len(cg), \
'canonicalization changed number of triples in graph'
assert len(g.all_nodes()) == len(cg.all_nodes()), \
'canonicalization changed number of nodes in graph'
assert len(list(g.subjects(RDF['type'], RDF['Statement']))) == \
len(list(cg.subjects(RDF['type'], RDF['Statement']))), \
'canonicalization changed number of statements'
assert g_node_degs == cg_node_degs, \
'canonicalization changed node degrees'