From 4d8b5eba0923237c3c0f2736bcec11b4f848e903 Mon Sep 17 00:00:00 2001 From: Gunnar Aastrand Grimnes Date: Sun, 15 Jan 2012 15:34:31 +0100 Subject: [PATCH 1/3] added graph slice code + test --- rdflib/graph.py | 33 +++++++++ test/test_slice.py | 174 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 207 insertions(+) create mode 100644 test/test_slice.py diff --git a/rdflib/graph.py b/rdflib/graph.py index 5ac2a56c5..cbdc82835 100644 --- a/rdflib/graph.py +++ b/rdflib/graph.py @@ -367,6 +367,39 @@ def triples(self, (s, p, o)): for (s, p, o), cg in self.__store.triples((s, p, o), context=self): yield (s, p, o) + def __getitem__(self, item, subject=None): + + if isinstance(item, tuple) and len(item)==1: + item=item[0] + + if isinstance(item, slice): + + s,p,o=item.start,item.stop,item.step + if not isinstance(s,tuple): s=(s,) + if not isinstance(p,tuple): p=(p,) + if not isinstance(o,tuple): o=(o,) + + if subject: s=(subject,) + + for _s in s: + for _p in p: + for _o in o: + for t in self.triples((_s,_p,_o)): + yield t + + elif isinstance(item, Node): + + if subject: item=subject + for t in self.triples((item,None,None)): yield t + + elif isinstance(item, tuple): + # carry out the first one, recurse while constraining subject + for x in self.__getitem__(item[0],subject): + for y in self.__getitem__(item[1:], x[2]): + yield y + else: + raise TypeError("You can only index a graph by a single rdflib term, tuples or a slice of rdflib terms.") + def __len__(self): """Returns the number of triples in the graph diff --git a/test/test_slice.py b/test/test_slice.py new file mode 100644 index 000000000..0807cfcce --- /dev/null +++ b/test/test_slice.py @@ -0,0 +1,174 @@ + +from rdflib import Graph, URIRef +import unittest + +class GraphSlice(unittest.TestCase): + + def testSlice(self): + """ + Slicing in python supports: + Slicing a range, i.e element 2-5, with a step + slicing in more than one dimension with comma + + normal lists only let you do ranges or single items + + scipy lets you slice multidimensional arrays like this: + array[(2,5),10:20] returns the 10-20th column of the 2nd and 5th row + in python slice syntax + You can combine tuples and ranges, but not vice versa, i.e. + i.e + a[(0,1):2] is ok, although what is means is not defined for scipy + + a[(0:1),2] is NOT ok. + + In theory, a graph could be seen as a 3-dimensional array of booleans, + i.e. one dimension for subject, predicate, object, and bools whether + this triple is contained in the graph. + + So we could use slice dimensions for each triple element, however, this + leaves us with range-slices unused, since there is no concept or order + for rdflib nodes (or there is lexical order, but it's not very useful) + + Better is perhaps to pervert the slice object, + and use start, stop, step as subject, predicate, object + + This leaves us with several dimensions, i.e. several objects + And also with tuples used for start, stop, step... + + Functions that would be interesting would be: + * disjunction - matching either of the patterns given + * conjunction - matching all of the patterns given + * property-paths - going further in the graph + + Gut feeling tells me that conjunction is least useful, + i.e. neither of these strike me as very useful: + [(bob,bill):likes] - everything bob AND bill likes + [bob:(likes,hates)] - everything bob likes AND hates + [::(pizza,cheese)] - everything about pizza AND cheese + + but the disjunction case does seem useful: + [resource:(SKOS.prefLabel,RDFS.label)] - + give me either of the two label properties + [:RDF.type:(RDFS.Class,OWL.Class)] - + give me all RDFS classes or OWL classes + + I think having paths would very nice - i.e.: + g[resource:RDF.type,:RDFS.label] -> get me all labels of the types of this thing + + I have implemented disjunction and paths. + + One problem with using slices and :: notation for the s,p,o part is that + this does not generalize to ConjunctiveGraphs, as slices can only have 3 parts. + However, maybe one does not want to mix and match contexts very often, so having + a simple __getitem__ which is the same as get_graph on ConjunctiveGraphs is probably enough: + cg[mycontext][:RDF.type,:RDFS.label] + This is not implemented atm. + + Below are some examples - that should make it much clearer + + all operations return generators over full triples - + although one could try to be clever, and match subject_predicates + and related functions, and only return tuples - depending on what was given + I think this would be too confusing + """ + + sl=lambda x,y: self.assertEquals(len(list(x)),y) + soe=lambda x,y: self.assertEquals(set([a[2] for a in x]),set(y)) # equals objects + g=self.graph + + # Single terms are all trivial: + + # single index slices by subject, i.e. return triples((x,None,None)) + # tell me everything about "tarek" + sl(g[self.tarek],2) + + # single slice slices by s,p,o, with : used to split + # tell me everything about "tarek" (same as above) + sl(g[self.tarek::],2) + + # give me every "likes" relationship + sl(g[:self.likes:],5) + + # give me every relationship to pizza + sl(g[::self.pizza],3) + + # give me everyone who likes pizza + sl(g[:self.likes:self.pizza],2) + + # does tarek like pizza? + sl(g[self.tarek:self.likes:self.pizza],1) + + # Much more intesting is using tuples: + + # tuples in slices + # give me everything bob OR tarek like + + # (alternative could be: + # give me everything both bob AND tarek like) + sl(g[(self.tarek, self.bob):self.likes],3) + + # everything hated or liked + sl(g[:(self.hates,self.likes)], 7) + + # hated or liked, pizza or cheese + sl(g[:(self.hates,self.likes):(self.pizza,self.cheese)], 6) + + + # give everything tarek OR bob, likes OR hates + + # two alternatives: + # give everything tarek AND bob, likes AND hates + # or pair-wise matching: + # give everything tarek likes OR bob hates + sl(g[(self.tarek, self.bob):(self.likes,self.hates)],5) + + + + # several slices, i.e. several patterns + # + # a nested path, ignore the subject of the second pattern + # "give me everything liked by something bob hates" + + ## (alternatives could be: + ## give me everything tarek likes AND hates + ## or + ## give me everything tarek likes OR hates ) + sl(g[self.bob:self.hates,:self.likes], 2) + soe(g[self.bob:self.hates,:self.likes], [self.pizza,self.cheese]) + + + + + def setUp(self): + self.graph = Graph() + + self.michel = URIRef(u'michel') + self.tarek = URIRef(u'tarek') + self.bob = URIRef(u'bob') + self.likes = URIRef(u'likes') + self.hates = URIRef(u'hates') + self.pizza = URIRef(u'pizza') + self.cheese = URIRef(u'cheese') + + self.addStuff() + + def addStuff(self): + tarek = self.tarek + michel = self.michel + bob = self.bob + likes = self.likes + hates = self.hates + pizza = self.pizza + cheese = self.cheese + + self.graph.add((tarek, likes, pizza)) + self.graph.add((tarek, likes, cheese)) + self.graph.add((michel, likes, pizza)) + self.graph.add((michel, likes, cheese)) + self.graph.add((bob, likes, cheese)) + self.graph.add((bob, hates, pizza)) + self.graph.add((bob, hates, michel)) # gasp! + + +if __name__ == '__main__': + unittest.main() From a02b2307abf5b48990bd33a7da868bd37cbdd26f Mon Sep 17 00:00:00 2001 From: gromgull Date: Mon, 16 Apr 2012 16:55:33 +0200 Subject: [PATCH 2/3] added slice to resources --- rdflib/resource.py | 55 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/rdflib/resource.py b/rdflib/resource.py index 6fb371b6a..2ca3c8a4e 100644 --- a/rdflib/resource.py +++ b/rdflib/resource.py @@ -290,7 +290,7 @@ """) -from rdflib.term import BNode, URIRef +from rdflib.term import Node, BNode, URIRef from rdflib.namespace import RDF __all__ = ['Resource'] @@ -402,6 +402,59 @@ def _cast(self, node): else: return node + def __getitem__(self, item, subject=None): + """ + Resources can be sliced like graphs, but the subject is fixed. + + r[RDFS.label] returns triples for (self.identifier, RDFS.label, None) + r[RDFS.label, Literal("Bob")] for (self.identifier, RDFS.label, "Bob") + etc. + + For deeper path, the second level works as for graphs: + r[FOAF.knows, :FOAF.name] gives the name of all people this resource knows + """ + + if isinstance(item, tuple) and len(item)==1: + item=item[0] + + if isinstance(item, slice): + if not subject: # first item is fixed + if item.step: + raise TypeError("Resources fix the subject for slicing, and can only be sliced by predicate/object. ") + p,o=item.start,item.stop + s=(self.identifier,) + else: + s,p,o=item.start, item.stop, item.step + + if not isinstance(p,tuple): p=(p,) + if not isinstance(o,tuple): o=(o,) + + if subject: + s=(subject,) + + for _s in s: + for _p in p: + for _o in o: + for t in self.triples((_s,_p,_o)): + yield t + + elif isinstance(item, Node): + + if subject: + item=subject + for t in self.triples((item,None,None)): yield t + else: + for t in self.triples((self.identifier, item, None)): yield t + + elif isinstance(item, tuple): + # carry out the first one, recurse while constraining subject + for x in self.__getitem__(item[0],subject): + for y in self.__getitem__(item[1:], x[2]): + yield y + else: + raise TypeError("You can only index a graph by a single rdflib term, tuples or a slice of rdflib terms.") + + def _new(self, subject): return type(self)(self._graph, subject) From 7a6ad5c1cdd2894803620afcb06221a278917de3 Mon Sep 17 00:00:00 2001 From: Gunnar Aastrand Grimnes Date: Thu, 2 May 2013 10:42:28 +0200 Subject: [PATCH 3/3] simplified slice syntax for graph/resources. Fixes https://code.google.com/p/rdflib/issues/detail?id=202 :) --- examples/foafpaths.py | 6 +-- rdflib/graph.py | 62 ++++++++++++++++---------- rdflib/resource.py | 46 ++++--------------- test/test_slice.py | 100 ++---------------------------------------- 4 files changed, 56 insertions(+), 158 deletions(-) diff --git a/examples/foafpaths.py b/examples/foafpaths.py index 1dd958c58..39e27fc40 100644 --- a/examples/foafpaths.py +++ b/examples/foafpaths.py @@ -7,9 +7,9 @@ p1 / p2 => Path sequence p1 | p2 => Path alternative -p1 % '*' => chain of 0 or more p's -p1 % '+' => chain of 1 or more p's -p1 % '?' => 0 or 1 p +p1 * '*' => chain of 0 or more p's +p1 * '+' => chain of 1 or more p's +p1 * '?' => 0 or 1 p ~p1 => p1 is inverted order (s p1 o) <=> (o ~p1 s) -p1 => NOT p1, i.e. any property by p1 diff --git a/rdflib/graph.py b/rdflib/graph.py index cb8018c00..bf2a45b4e 100644 --- a/rdflib/graph.py +++ b/rdflib/graph.py @@ -365,38 +365,56 @@ def triples(self, (s, p, o)): for (s, p, o), cg in self.__store.triples((s, p, o), context=self): yield (s, p, o) - def __getitem__(self, item, subject=None): + @py3compat.format_doctest_out + def __getitem__(self, item): + """ + A graph can be "sliced" as a shortcut for the triples method + The python slice syntax is (ab)used for specifying triples. + A generator over matching triples is returned - if isinstance(item, tuple) and len(item)==1: - item=item[0] + >>> import rdflib + >>> g = rdflib.Graph() + >>> g.add((rdflib.URIRef('urn:bob'), rdflib.RDFS.label, rdflib.Literal('Bob'))) - if isinstance(item, slice): + >>> list(g[rdflib.URIRef('urn:bob')]) # all triples about bob + [(rdflib.term.URIRef(%(u)s'urn:bob'), rdflib.term.URIRef(%(u)s'http://www.w3.org/2000/01/rdf-schema#label'), rdflib.term.Literal(%(u)s'Bob'))] + + >>> list(g[:rdflib.RDFS.label]) # all label triples + [(rdflib.term.URIRef(%(u)s'urn:bob'), rdflib.term.URIRef(%(u)s'http://www.w3.org/2000/01/rdf-schema#label'), rdflib.term.Literal(%(u)s'Bob'))] - s,p,o=item.start,item.stop,item.step - if not isinstance(s,tuple): s=(s,) - if not isinstance(p,tuple): p=(p,) - if not isinstance(o,tuple): o=(o,) + >>> list(g[::rdflib.Literal('Bob')]) # all label triples + [(rdflib.term.URIRef(%(u)s'urn:bob'), rdflib.term.URIRef(%(u)s'http://www.w3.org/2000/01/rdf-schema#label'), rdflib.term.Literal(%(u)s'Bob'))] + + Combined with SPARQL paths, more complex queries can be + written concisely: + + Name of all Bobs friends: + + g[bob : FOAF.knows/FOAF.name ] + + Some label for Bob: + + g[bob : DC.title|FOAF.name|RDFS.label] - if subject: s=(subject,) + All friends and friends of friends of Bob - for _s in s: - for _p in p: - for _o in o: - for t in self.triples((_s,_p,_o)): - yield t + g[bob : FOAF.knows * '+'] + + etc. + + """ + + if isinstance(item, slice): + + s,p,o=item.start,item.stop,item.step + return self.triples((s,p,o)) elif isinstance(item, Node): - if subject: item=subject - for t in self.triples((item,None,None)): yield t + return self.triples((item,None,None)) - elif isinstance(item, tuple): - # carry out the first one, recurse while constraining subject - for x in self.__getitem__(item[0],subject): - for y in self.__getitem__(item[1:], x[2]): - yield y else: - raise TypeError("You can only index a graph by a single rdflib term, tuples or a slice of rdflib terms.") + raise TypeError("You can only index a graph by a single rdflib term or a slice of rdflib terms.") def __len__(self): """Returns the number of triples in the graph diff --git a/rdflib/resource.py b/rdflib/resource.py index 100be4459..d50a502b3 100644 --- a/rdflib/resource.py +++ b/rdflib/resource.py @@ -434,57 +434,29 @@ def _cast(self, node): else: return node - def __getitem__(self, item, subject=None): + def __getitem__(self, item): """ Resources can be sliced like graphs, but the subject is fixed. r[RDFS.label] returns triples for (self.identifier, RDFS.label, None) - r[RDFS.label, Literal("Bob")] for (self.identifier, RDFS.label, "Bob") + r[RDFS.label : Literal("Bob")] for (self.identifier, RDFS.label, "Bob") etc. - For deeper path, the second level works as for graphs: - r[FOAF.knows, :FOAF.name] gives the name of all people this resource knows """ - if isinstance(item, tuple) and len(item)==1: - item=item[0] - if isinstance(item, slice): - if not subject: # first item is fixed - if item.step: - raise TypeError("Resources fix the subject for slicing, and can only be sliced by predicate/object. ") - p,o=item.start,item.stop - s=(self.identifier,) - else: - s,p,o=item.start, item.stop, item.step - - if not isinstance(p,tuple): p=(p,) - if not isinstance(o,tuple): o=(o,) - - if subject: - s=(subject,) - - for _s in s: - for _p in p: - for _o in o: - for t in self.triples((_s,_p,_o)): - yield t + if item.step: + raise TypeError("Resources fix the subject for slicing, and can only be sliced by predicate/object. ") + p,o=item.start,item.stop + s=(self.identifier,) + return self.triples((s,p,o)) elif isinstance(item, Node): - if subject: - item=subject - for t in self.triples((item,None,None)): yield t - else: - for t in self.triples((self.identifier, item, None)): yield t + return self.triples((self.identifier, item, None)) - elif isinstance(item, tuple): - # carry out the first one, recurse while constraining subject - for x in self.__getitem__(item[0],subject): - for y in self.__getitem__(item[1:], x[2]): - yield y else: - raise TypeError("You can only index a graph by a single rdflib term, tuples or a slice of rdflib terms.") + raise TypeError("You can only index a resource by a single rdflib term, a slice of rdflib terms.") def _new(self, subject): diff --git a/test/test_slice.py b/test/test_slice.py index 0807cfcce..429a73669 100644 --- a/test/test_slice.py +++ b/test/test_slice.py @@ -6,70 +6,10 @@ class GraphSlice(unittest.TestCase): def testSlice(self): """ - Slicing in python supports: - Slicing a range, i.e element 2-5, with a step - slicing in more than one dimension with comma - - normal lists only let you do ranges or single items - - scipy lets you slice multidimensional arrays like this: - array[(2,5),10:20] returns the 10-20th column of the 2nd and 5th row - in python slice syntax - You can combine tuples and ranges, but not vice versa, i.e. - i.e - a[(0,1):2] is ok, although what is means is not defined for scipy - - a[(0:1),2] is NOT ok. - - In theory, a graph could be seen as a 3-dimensional array of booleans, - i.e. one dimension for subject, predicate, object, and bools whether - this triple is contained in the graph. - - So we could use slice dimensions for each triple element, however, this - leaves us with range-slices unused, since there is no concept or order - for rdflib nodes (or there is lexical order, but it's not very useful) - - Better is perhaps to pervert the slice object, + We pervert the slice object, and use start, stop, step as subject, predicate, object - This leaves us with several dimensions, i.e. several objects - And also with tuples used for start, stop, step... - - Functions that would be interesting would be: - * disjunction - matching either of the patterns given - * conjunction - matching all of the patterns given - * property-paths - going further in the graph - - Gut feeling tells me that conjunction is least useful, - i.e. neither of these strike me as very useful: - [(bob,bill):likes] - everything bob AND bill likes - [bob:(likes,hates)] - everything bob likes AND hates - [::(pizza,cheese)] - everything about pizza AND cheese - - but the disjunction case does seem useful: - [resource:(SKOS.prefLabel,RDFS.label)] - - give me either of the two label properties - [:RDF.type:(RDFS.Class,OWL.Class)] - - give me all RDFS classes or OWL classes - - I think having paths would very nice - i.e.: - g[resource:RDF.type,:RDFS.label] -> get me all labels of the types of this thing - - I have implemented disjunction and paths. - - One problem with using slices and :: notation for the s,p,o part is that - this does not generalize to ConjunctiveGraphs, as slices can only have 3 parts. - However, maybe one does not want to mix and match contexts very often, so having - a simple __getitem__ which is the same as get_graph on ConjunctiveGraphs is probably enough: - cg[mycontext][:RDF.type,:RDFS.label] - This is not implemented atm. - - Below are some examples - that should make it much clearer - - all operations return generators over full triples - - although one could try to be clever, and match subject_predicates - and related functions, and only return tuples - depending on what was given - I think this would be too confusing + all operations return generators over full triples """ sl=lambda x,y: self.assertEquals(len(list(x)),y) @@ -98,43 +38,11 @@ def testSlice(self): # does tarek like pizza? sl(g[self.tarek:self.likes:self.pizza],1) - # Much more intesting is using tuples: - - # tuples in slices - # give me everything bob OR tarek like - - # (alternative could be: - # give me everything both bob AND tarek like) - sl(g[(self.tarek, self.bob):self.likes],3) + # More intesting is using paths # everything hated or liked - sl(g[:(self.hates,self.likes)], 7) - - # hated or liked, pizza or cheese - sl(g[:(self.hates,self.likes):(self.pizza,self.cheese)], 6) - - - # give everything tarek OR bob, likes OR hates - - # two alternatives: - # give everything tarek AND bob, likes AND hates - # or pair-wise matching: - # give everything tarek likes OR bob hates - sl(g[(self.tarek, self.bob):(self.likes,self.hates)],5) - - - - # several slices, i.e. several patterns - # - # a nested path, ignore the subject of the second pattern - # "give me everything liked by something bob hates" + sl(g[:self.hates|self.likes], 7) - ## (alternatives could be: - ## give me everything tarek likes AND hates - ## or - ## give me everything tarek likes OR hates ) - sl(g[self.bob:self.hates,:self.likes], 2) - soe(g[self.bob:self.hates,:self.likes], [self.pizza,self.cheese])