Skip to content

Commit

Permalink
Use uuid4() to generate BNode ids. Fixes issue #185
Browse files Browse the repository at this point in the history
  • Loading branch information
gjhiggins committed Apr 20, 2012
1 parent 41a71f0 commit 111e4ef
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 130 deletions.
58 changes: 42 additions & 16 deletions rdflib/term.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
This module defines the different types of terms. Terms are the kinds of
This module defines the different types of terms. Terms are the kinds of
objects that can appear in a quoted/asserted triple. This includes those
that are core to RDF:
Expand Down Expand Up @@ -39,8 +39,9 @@
_LOGGER = logging.getLogger(__name__)

import base64

import sys
import threading

from urlparse import urlparse, urljoin, urldefrag
from string import ascii_letters
from random import choice
Expand All @@ -54,6 +55,12 @@
except ImportError:
from md5 import md5

try:
from uuid import uuid4
except ImportError:
import os
import random

import py3compat
b = py3compat.b

Expand Down Expand Up @@ -176,23 +183,44 @@ def md5_term_hash(self):
return d.hexdigest()



def _letter():
while True:
yield choice(ascii_letters)

def _unique_id():
"""Create a (hopefully) unique prefix"""
uid = "".join(islice(_letter(), 0, 8))
return uid
# Used to read: """Create a (hopefully) unique prefix"""
# now retained merely to leave interal API unchanged.
return "id-"

# Adapted from http://icodesnip.com/snippet/python/simple-universally-unique-id-uuid-or-guid
def bnode_uuid():
"""
Generates a uuid on behalf of Python 2.4
"""
import socket
import time
try:
preseed = os.urandom(16)
except NotImplementedError:
preseed = ''
# Have doubts about this. random.seed will just hash the string
random.seed('%s%s%s' % (preseed, os.getpid(), time.time()))
del preseed
t = long(time.time() * 1000.0)
r = long(random.random()*100000000000000000L)
try:
a = socket.gethostbyname(socket.gethostname())
except:
# if we can't get a network address, just imagine one
a = random.random()*100000000000000000L
data = str(t) + ' ' + str(r) + ' ' + str(a)
data = md5(data.encode('ascii')).hexdigest()
return data

def _serial_number_generator():
i = 0
while 1:
yield i
i = i + 1
if sys.version_info[:2] < (2, 5):
yield bnode_uuid()
else:
yield uuid4()

bNodeLock = threading.RLock()
# bNodeLock = threading.RLock()

class BNode(Identifier):
"""
Expand All @@ -211,9 +239,7 @@ def __new__(cls, value=None,
# so that BNode values do not
# collide with ones created with a different instance of this module
# at some other time.
bNodeLock.acquire()
node_id = _sn_gen.next()
bNodeLock.release()
value = "%s%s" % (_prefix, node_id)
else:
# TODO: check that value falls within acceptable bnode value range
Expand Down
122 changes: 8 additions & 114 deletions test/test_issue200.py
Original file line number Diff line number Diff line change
@@ -1,146 +1,40 @@
#!/usr/bin/env python

import os, sys, time, random
import os
import rdflib
import unittest
try:
from hashlib import md5
except ImportError:
from md5 import md5


import platform
if platform.system() == 'Java':
from nose import SkipTest
raise SkipTest('No os.pipe() in Jython, skipping')

# Adapted from http://icodesnip.com/snippet/python/simple-universally-unique-id-uuid-or-guid
def bnode_uuid():
"""
Generates a uuid on behalf of Python 2.4
"""
import socket
try:
preseed = os.urandom(16)
except NotImplementedError:
preseed = ''
# Have doubts about this. random.seed will just hash the string
random.seed('%s%s%s' % (preseed, os.getpid(), time.time()))
del preseed
t = long( time.time() * 1000.0 )
r = long( random.random()*100000000000000000L )
try:
a = socket.gethostbyname( socket.gethostname() )
except:
# if we can't get a network address, just imagine one
a = random.random()*100000000000000000L
data = str(t)+' '+str(r)+' '+str(a)
data = md5(data.encode('ascii')).hexdigest()
yield data

class TestRandomSeedInFork(unittest.TestCase):
def test_same_bnodeid_sequence_in_fork(self):
"""Demonstrates that os.fork()ed child processes produce the same
sequence of BNode ids as the parent process.
"""
r, w = os.pipe() # these are file descriptors, not file objects
pid = os.fork()
if pid:
pb1 = rdflib.term.BNode()
os.close(w) # use os.close() to close a file descriptor
r = os.fdopen(r) # turn r into a file object
txt = r.read()
os.waitpid(pid, 0) # make sure the child process gets cleaned up
else:
os.close(r)
w = os.fdopen(w, 'w')
cb = rdflib.term.BNode()
w.write(cb)
w.close()
os._exit(0)
assert txt == str(pb1), "Test now obsolete, random seed working"

def test_random_not_reseeded_in_fork(self):
"""Demonstrates ineffectiveness of reseeding Python's random.
def test_bnode_id_differs_in_fork(self):
"""Checks that os.fork()ed child processes produce a
different sequence of BNode ids from the parent process.
"""
r, w = os.pipe() # these are file descriptors, not file objects
r, w = os.pipe() # these are file descriptors, not file objects
pid = os.fork()
if pid:
pb1 = rdflib.term.BNode()
os.close(w) # use os.close() to close a file descriptor
r = os.fdopen(r) # turn r into a file object
os.close(w) # use os.close() to close a file descriptor
r = os.fdopen(r) # turn r into a file object
txt = r.read()
os.waitpid(pid, 0) # make sure the child process gets cleaned up
os.waitpid(pid, 0) # make sure the child process gets cleaned up
else:
os.close(r)
try:
preseed = os.urandom(16)
except NotImplementedError:
preseed = ''
# Have doubts about this. random.seed will just hash the string
random.seed('%s%s%s' % (preseed, os.getpid(), time.time()))
del preseed
w = os.fdopen(w, 'w')
cb = rdflib.term.BNode()
w.write(cb)
w.close()
os._exit(0)
assert txt == str(pb1), "Reseeding worked, this test is obsolete"

def test_bnode_uuid_differs_in_fork(self):
"""
os.fork()ed child processes should produce a different sequence of
BNode ids from the sequence produced by the parent process.
"""
r, w = os.pipe() # these are file descriptors, not file objects
pid = os.fork()
if pid:
pb1 = rdflib.term.BNode(_sn_gen=bnode_uuid(), _prefix="urn:uuid:")
os.close(w) # use os.close() to close a file descriptor
r = os.fdopen(r) # turn r into a file object
txt = r.read()
os.waitpid(pid, 0) # make sure the child process gets cleaned up
else:
os.close(r)
w = os.fdopen(w, 'w')
cb = rdflib.term.BNode(_sn_gen=bnode_uuid(), _prefix="urn:uuid:")
w.write(cb)
w.close()
os._exit(0)
assert txt != str(pb1), "Parent process BNode id: " + \
"%s, child process BNode id: %s" % (
txt, str(pb1))

def test_uuid_differs_in_fork(self):
"""
os.fork()ed child processes using uuid4() should produce a different
sequence of BNode ids from the sequence produced by the parent process.
"""
if sys.version_info[:2] == (2, 4):
from nose import SkipTest
raise SkipTest('uuid4() not available prior to Python 2.5')
def bnode_uuid():
# Redefine 'cos Python 2.5 and above allows use of uuid
import uuid
yield uuid.uuid4()
r, w = os.pipe() # these are file descriptors, not file objects
pid = os.fork()
if pid:
pb1 = rdflib.term.BNode(_sn_gen=bnode_uuid(), _prefix="urn:uuid:")
os.close(w) # use os.close() to close a file descriptor
r = os.fdopen(r) # turn r into a file object
txt = r.read()
os.waitpid(pid, 0) # make sure the child process gets cleaned up
else:
os.close(r)
w = os.fdopen(w, 'w')
cb = rdflib.term.BNode(_sn_gen=bnode_uuid(), _prefix="urn:uuid:")
w.write(cb)
w.close()
os._exit(0)
assert txt != str(pb1), "Parent process BNode id: " + \
"%s, child process BNode id: %s" % (
txt, str(pb1))

if __name__ == "__main__":
unittest.main()

0 comments on commit 111e4ef

Please sign in to comment.