Skip to content

Commit

Permalink
Enforcing explicit UTF-8 blob name. (googleapis#3354)
Browse files Browse the repository at this point in the history
  • Loading branch information
dhermes authored May 2, 2017
1 parent 37107a5 commit 0164a4e
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 15 deletions.
32 changes: 27 additions & 5 deletions storage/google/cloud/storage/blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,11 @@ class Blob(_PropertyMixin):
"""A wrapper around Cloud Storage's concept of an ``Object``.
:type name: str
:param name: The name of the blob. This corresponds to the
unique path of the object in the bucket.
:param name: The name of the blob. This corresponds to the unique path of
the object in the bucket. If bytes, will be converted to a
unicode object. Blob / object names can contain any sequence
of valid unicode characters, of length 1-1024 bytes when
UTF-8 encoded.
:type bucket: :class:`google.cloud.storage.bucket.Bucket`
:param bucket: The bucket to which this blob belongs.
Expand Down Expand Up @@ -104,6 +107,7 @@ class Blob(_PropertyMixin):
"""

def __init__(self, name, bucket, chunk_size=None, encryption_key=None):
name = _bytes_to_unicode(name)
super(Blob, self).__init__(name=name)

self.chunk_size = chunk_size # Check that setter accepts value.
Expand Down Expand Up @@ -148,7 +152,7 @@ def path_helper(bucket_path, blob_name):
:rtype: str
:returns: The relative URL path for ``blob_name``.
"""
return bucket_path + '/o/' + quote(blob_name, safe='')
return bucket_path + '/o/' + _quote(blob_name)

@property
def acl(self):
Expand Down Expand Up @@ -190,7 +194,7 @@ def public_url(self):
return '{storage_base_url}/{bucket_name}/{quoted_name}'.format(
storage_base_url='https://storage.googleapis.com',
bucket_name=self.bucket.name,
quoted_name=quote(self.name, safe=''))
quoted_name=_quote(self.name))

def generate_signed_url(self, expiration, method='GET',
content_type=None,
Expand Down Expand Up @@ -261,7 +265,7 @@ def generate_signed_url(self, expiration, method='GET',
"""
resource = '/{bucket_name}/{quoted_name}'.format(
bucket_name=self.bucket.name,
quoted_name=quote(self.name, safe=''))
quoted_name=_quote(self.name))

if credentials is None:
client = self._require_client(client)
Expand Down Expand Up @@ -1362,3 +1366,21 @@ def _get_encryption_headers(key, source=False):
prefix + 'Key': _bytes_to_unicode(key),
prefix + 'Key-Sha256': _bytes_to_unicode(key_hash),
}


def _quote(value):
"""URL-quote a string.
If the value is unicode, this method first UTF-8 encodes it as bytes and
then quotes the bytes. (In Python 3, ``urllib.parse.quote`` does this
encoding automatically, but in Python 2, non-ASCII characters cannot be
quoted.)
:type value: str or bytes
:param value: The value to be URL-quoted.
:rtype: str
:returns: The encoded value (bytes in Python 2, unicode in Python 3).
"""
value = _to_bytes(value, encoding='utf-8')
return quote(value, safe='')
5 changes: 3 additions & 2 deletions storage/tests/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ def test_copy_existing_file(self):


class TestUnicode(unittest.TestCase):

def test_fetch_object_and_check_content(self):
client = storage.Client()
bucket = client.bucket('storage-library-test-bucket')
Expand All @@ -256,8 +257,8 @@ def test_fetch_object_and_check_content(self):
# Normalization Form D: an ASCII e followed by U+0301 combining
# character; URL should end with Caf%C3%A9
test_data = {
u'Caf\u00e9'.encode('utf-8'): b'Normalization Form C',
u'Cafe\u0301'.encode('utf-8'): b'Normalization Form D',
u'Caf\u00e9': b'Normalization Form C',
u'Cafe\u0301': b'Normalization Form D',
}
for blob_name, file_contents in test_data.items():
blob = bucket.blob(blob_name)
Expand Down
59 changes: 51 additions & 8 deletions storage/tests/unit/test_blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ def _make_credentials():

class Test_Blob(unittest.TestCase):

def _make_one(self, *args, **kw):
@staticmethod
def _make_one(*args, **kw):
from google.cloud.storage.blob import Blob

properties = kw.pop('properties', None)
Expand All @@ -45,6 +46,13 @@ def test_ctor_wo_encryption_key(self):
self.assertIs(blob._acl.blob, blob)
self.assertEqual(blob._encryption_key, None)

def test_ctor_with_encoded_unicode(self):
blob_name = b'wet \xe2\x9b\xb5'
blob = self._make_one(blob_name, bucket=None)
unicode_name = u'wet \N{sailboat}'
self.assertNotEqual(blob.name, blob_name)
self.assertEqual(blob.name, unicode_name)

def test_ctor_w_encryption_key(self):
KEY = b'01234567890123456789012345678901' # 32 bytes
BLOB_NAME = 'blob-name'
Expand Down Expand Up @@ -91,21 +99,21 @@ def test_chunk_size_setter_bad_value(self):
def test_acl_property(self):
from google.cloud.storage.acl import ObjectACL

FAKE_BUCKET = _Bucket()
blob = self._make_one(None, bucket=FAKE_BUCKET)
fake_bucket = _Bucket()
blob = self._make_one(u'name', bucket=fake_bucket)
acl = blob.acl
self.assertIsInstance(acl, ObjectACL)
self.assertIs(acl, blob._acl)

def test_path_no_bucket(self):
FAKE_BUCKET = object()
NAME = 'blob-name'
blob = self._make_one(NAME, bucket=FAKE_BUCKET)
def test_path_bad_bucket(self):
fake_bucket = object()
name = u'blob-name'
blob = self._make_one(name, bucket=fake_bucket)
self.assertRaises(AttributeError, getattr, blob, 'path')

def test_path_no_name(self):
bucket = _Bucket()
blob = self._make_one(None, bucket=bucket)
blob = self._make_one(u'', bucket=bucket)
self.assertRaises(ValueError, getattr, blob, 'path')

def test_path_normal(self):
Expand All @@ -120,6 +128,12 @@ def test_path_w_slash_in_name(self):
blob = self._make_one(BLOB_NAME, bucket=bucket)
self.assertEqual(blob.path, '/b/name/o/parent%2Fchild')

def test_path_with_non_ascii(self):
blob_name = u'Caf\xe9'
bucket = _Bucket()
blob = self._make_one(blob_name, bucket=bucket)
self.assertEqual(blob.path, '/b/name/o/Caf%C3%A9')

def test_public_url(self):
BLOB_NAME = 'blob-name'
bucket = _Bucket()
Expand All @@ -136,6 +150,13 @@ def test_public_url_w_slash_in_name(self):
blob.public_url,
'https://storage.googleapis.com/name/parent%2Fchild')

def test_public_url_with_non_ascii(self):
blob_name = u'winter \N{snowman}'
bucket = _Bucket()
blob = self._make_one(blob_name, bucket=bucket)
expected_url = 'https://storage.googleapis.com/name/winter%20%E2%98%83'
self.assertEqual(blob.public_url, expected_url)

def _basic_generate_signed_url_helper(self, credentials=None):
BLOB_NAME = 'blob-name'
EXPIRATION = '2014-10-16T20:34:37.000Z'
Expand Down Expand Up @@ -2227,6 +2248,28 @@ def test_updated_unset(self):
self.assertIsNone(blob.updated)


class Test__quote(unittest.TestCase):

@staticmethod
def _call_fut(value):
from google.cloud.storage.blob import _quote

return _quote(value)

def test_bytes(self):
quoted = self._call_fut(b'\xDE\xAD\xBE\xEF')
self.assertEqual(quoted, '%DE%AD%BE%EF')

def test_unicode(self):
helicopter = u'\U0001f681'
quoted = self._call_fut(helicopter)
self.assertEqual(quoted, '%F0%9F%9A%81')

def test_bad_type(self):
with self.assertRaises(TypeError):
self._call_fut(None)


class _Responder(object):

def __init__(self, *responses):
Expand Down

0 comments on commit 0164a4e

Please sign in to comment.