Added \x00 symbol to pdf whitespaces. Slightly refactored utils module.

oyv · Mar 24, 2014 · cfe2d03 · cfe2d03
1 parent c4bcffd
commit cfe2d03
Showing 1 changed file with 17 additions and 12 deletions.
diff --git a/PyPDF2/utils.py b/PyPDF2/utils.py
@@ -32,12 +32,22 @@
 __author_email__ = "biziqe@mathieu.fenniak.net"
 
 
+import sys
 # "Str" maintains compatibility with Python 2.x.
 # The next line is obfuscated like this so 2to3 won't change it.
 try: 
     import __builtin__ as builtins
 except ImportError:  # Py3
     import builtins
+
+
+if sys.version_info[0] < 3:
+    string_type = unicode
+    bytes_type = str
+else:
+    string_type = str
+    bytes_type = bytes
+
 Str = getattr(builtins, "basestring", str)
 
 
@@ -66,7 +76,7 @@ def readNonWhitespace(stream):
     Finds and reads the next non-whitespace character (ignores whitespace).
     """
     tok = b_(' ')
-    while tok == b_('\n') or tok == b_('\r') or tok == b_(' ') or tok == b_('\t'):
+    while tok in WHITESPACES:
         tok = stream.read(1)
     return tok
 
@@ -77,7 +87,7 @@ def skipOverWhitespace(stream):
     """
     tok = b_(' ')
     cnt = 0;
-    while tok == b_('\n') or tok == b_('\r') or tok == b_(' ') or tok == b_('\t'):
+    while tok in WHITESPACES:
         tok = stream.read(1)
         cnt+=1
     return (cnt > 1)
@@ -157,10 +167,6 @@ class PdfReadWarning(UserWarning):
 class PdfStreamError(PdfReadError):
     pass
 
-def hexStr(num):
-    return hex(num).replace('L', '')
-
-import sys
 
 def b_(s):
     if sys.version_info[0] < 3:
@@ -213,9 +219,8 @@ def hexencode(b):
         coder = codecs.getencoder('hex_codec')
         return coder(b)[0]
 
-if sys.version_info[0] < 3:
-    string_type = unicode
-    bytes_type = str
-else:
-    string_type = str
-    bytes_type = bytes
+def hexStr(num):
+    return hex(num).replace('L', '')
+
+
+WHITESPACES = map(b_, [' ', '\n', '\r', '\t', '\x00'])