Skip to content

Commit

Permalink
[Storage][Blob][Bug]Support parsing blob url with '/' in blob name (#…
Browse files Browse the repository at this point in the history
…12619)

* [Storage][Blob][Bug]Support parsing blob url with '/' in blob name

* [Storage]support parsing emulator url

* fix pylint

* make format_shared_key_credential to private

* fix test
  • Loading branch information
xiafu-msft authored Aug 12, 2020
1 parent 49c90a6 commit a1039c8
Show file tree
Hide file tree
Showing 13 changed files with 143 additions and 50 deletions.
20 changes: 14 additions & 6 deletions sdk/storage/azure-storage-blob/azure/storage/blob/_blob_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
try:
from urllib.parse import urlparse, quote, unquote
except ImportError:
from urlparse import urlparse # type: ignore
from urllib2 import quote, unquote # type: ignore
from urlparse import urlparse # type: ignore
from urllib2 import quote, unquote # type: ignore

import six
from azure.core.tracing.decorator import distributed_trace
Expand Down Expand Up @@ -180,7 +180,7 @@ def _format_url(self, hostname):
@classmethod
def from_blob_url(cls, blob_url, credential=None, snapshot=None, **kwargs):
# type: (str, Optional[Any], Optional[Union[str, Dict[str, Any]]], Any) -> BlobClient
"""Create BlobClient from a blob url.
"""Create BlobClient from a blob url. This doesn't support customized blob url with '/' in blob name.
:param str blob_url:
The full endpoint URL to the Blob, including SAS token and snapshot if used. This could be
Expand Down Expand Up @@ -209,10 +209,18 @@ def from_blob_url(cls, blob_url, credential=None, snapshot=None, **kwargs):
if not parsed_url.netloc:
raise ValueError("Invalid URL: {}".format(blob_url))

path_blob = parsed_url.path.lstrip('/').split('/')
account_path = ""
if len(path_blob) > 2:
account_path = "/" + "/".join(path_blob[:-2])
if ".core." in parsed_url.netloc:
# .core. is indicating non-customized url. Blob name with directory info can also be parsed.
path_blob = parsed_url.path.lstrip('/').split('/', 1)
elif "localhost" in parsed_url.netloc or "127.0.0.1" in parsed_url.netloc:
path_blob = parsed_url.path.lstrip('/').split('/', 2)
account_path += path_blob[0]
else:
# for customized url. blob name that has directory info cannot be parsed.
path_blob = parsed_url.path.lstrip('/').split('/')
if len(path_blob) > 2:
account_path = "/" + "/".join(path_blob[:-2])
account_url = "{}://{}{}?{}".format(
parsed_url.scheme,
parsed_url.netloc.rstrip('/'),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,17 @@ def __init__(
raise ValueError("Invalid service: {}".format(service))
service_name = service.split('-')[0]
account = parsed_url.netloc.split(".{}.core.".format(service_name))

self.account_name = account[0] if len(account) > 1 else None
secondary_hostname = None
if not self.account_name and parsed_url.netloc.startswith("localhost") \
or parsed_url.netloc.startswith("127.0.0.1"):
self.account_name = parsed_url.path.strip("/")

self.credential = format_shared_key_credential(account, credential)
self.credential = _format_shared_key_credential(self.account_name, credential)
if self.scheme.lower() != "https" and hasattr(self.credential, "get_token"):
raise ValueError("Token credential is only supported with HTTPS.")

secondary_hostname = None
if hasattr(self.credential, "account_name"):
self.account_name = self.credential.account_name
secondary_hostname = "{}-secondary.{}.{}".format(
Expand Down Expand Up @@ -326,11 +331,11 @@ def __exit__(self, *args): # pylint: disable=arguments-differ
pass


def format_shared_key_credential(account, credential):
def _format_shared_key_credential(account_name, credential):
if isinstance(credential, six.string_types):
if len(account) < 2:
if not account_name:
raise ValueError("Unable to determine account name for shared key credential.")
credential = {"account_name": account[0], "account_key": credential}
credential = {"account_name": account_name, "account_key": credential}
if isinstance(credential, dict):
if "account_name" not in credential:
raise ValueError("Shared key credential missing 'account_name")
Expand Down
26 changes: 26 additions & 0 deletions sdk/storage/azure-storage-blob/tests/test_blob_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,7 @@ def test_create_service_with_cstr_succeeds_if_sec_with_prim(self, resource_group
self.assertTrue(service.primary_endpoint.startswith('https://www.mydomain.com/'))
self.assertTrue(service.secondary_endpoint.startswith('https://www-sec.mydomain.com/'))


def test_create_service_with_custom_account_endpoint_path(self):
account_name = "blobstorage"
account_key = "blobkey"
Expand Down Expand Up @@ -438,6 +439,31 @@ def test_create_service_with_custom_account_endpoint_path(self):
self.assertEqual(service.primary_hostname, 'local-machine:11002/custom/account/path')
self.assertEqual(service.url, 'http://local-machine:11002/custom/account/path/foo/bar?snapshot=baz')

def test_create_blob_client_with_sub_directory_path_in_blob_name(self):
blob_url = "https://testaccount.blob.core.windows.net/containername/dir1/sub000/2010_Unit150_Ivan097_img0003.jpg"
blob_client = BlobClient.from_blob_url(blob_url)
self.assertEqual(blob_client.container_name, "containername")
self.assertEqual(blob_client.blob_name, "dir1/sub000/2010_Unit150_Ivan097_img0003.jpg")

blob_emulator_url = 'http://127.0.0.1:1000/devstoreaccount1/containername/dir1/sub000/2010_Unit150_Ivan097_img0003.jpg'
blob_client = BlobClient.from_blob_url(blob_emulator_url)
self.assertEqual(blob_client.container_name, "containername")
self.assertEqual(blob_client.blob_name, "dir1/sub000/2010_Unit150_Ivan097_img0003.jpg")

def test_create_client_for_emulator(self):
container_client = ContainerClient(
account_url='http://127.0.0.1:1000/devstoreaccount1',
container_name='newcontainer',
credential='Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==')

self.assertEqual(container_client.container_name, "newcontainer")
self.assertEqual(container_client.account_name, "devstoreaccount1")

ContainerClient.from_container_url('http://127.0.0.1:1000/devstoreaccount1/newcontainer')
self.assertEqual(container_client.container_name, "newcontainer")
self.assertEqual(container_client.account_name, "devstoreaccount1")


@GlobalStorageAccountPreparer()
def test_request_callback_signed_header(self, resource_group, location, storage_account, storage_account_key):
# Arrange
Expand Down
11 changes: 11 additions & 0 deletions sdk/storage/azure-storage-blob/tests/test_blob_client_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,17 @@ def test_create_service_with_custom_account_endpoint_path(self):
self.assertEqual(service.primary_hostname, 'local-machine:11002/custom/account/path')
self.assertEqual(service.url, 'http://local-machine:11002/custom/account/path/foo/bar?snapshot=baz')

def test_create_blob_client_with_sub_directory_path_in_blob_name(self):
blob_url = "https://testaccount.blob.core.windows.net/containername/dir1/sub000/2010_Unit150_Ivan097_img0003.jpg"
blob_client = BlobClient.from_blob_url(blob_url)
self.assertEqual(blob_client.container_name, "containername")
self.assertEqual(blob_client.blob_name, "dir1/sub000/2010_Unit150_Ivan097_img0003.jpg")

blob_emulator_url = 'http://127.0.0.1:1000/devstoreaccount1/containername/dir1/sub000/2010_Unit150_Ivan097_img0003.jpg'
blob_client = BlobClient.from_blob_url(blob_emulator_url)
self.assertEqual(blob_client.container_name, "containername")
self.assertEqual(blob_client.blob_name, "dir1/sub000/2010_Unit150_Ivan097_img0003.jpg")

@GlobalStorageAccountPreparer()
@AsyncStorageTestCase.await_prepared_test
async def test_request_callback_signed_header_async(self, resource_group, location, storage_account, storage_account_key):
Expand Down
12 changes: 6 additions & 6 deletions sdk/storage/azure-storage-blob/tests/test_largest_block_blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
BlobServiceClient,
BlobBlock
)
from azure.storage.blob._shared.base_client import format_shared_key_credential
from azure.storage.blob._shared.base_client import _format_shared_key_credential

from _shared.testcase import StorageTestCase, GlobalStorageAccountPreparer

Expand Down Expand Up @@ -97,7 +97,7 @@ def test_put_block_bytes_largest(self, resource_group, location, storage_account
@GlobalStorageAccountPreparer()
def test_put_block_bytes_largest_without_network(self, resource_group, location, storage_account, storage_account_key):
payload_dropping_policy = PayloadDroppingPolicy()
credential_policy = format_shared_key_credential([storage_account.name, "dummy"], storage_account_key)
credential_policy = _format_shared_key_credential(storage_account.name, storage_account_key)
self._setup(storage_account, storage_account_key, [payload_dropping_policy, credential_policy])
blob = self._create_blob()

Expand Down Expand Up @@ -157,7 +157,7 @@ def test_put_block_stream_largest(self, resource_group, location, storage_accoun
@GlobalStorageAccountPreparer()
def test_put_block_stream_largest_without_network(self, resource_group, location, storage_account, storage_account_key):
payload_dropping_policy = PayloadDroppingPolicy()
credential_policy = format_shared_key_credential([storage_account.name, "dummy"], storage_account_key)
credential_policy = _format_shared_key_credential(storage_account.name, storage_account_key)
self._setup(storage_account, storage_account_key, [payload_dropping_policy, credential_policy])
blob = self._create_blob()

Expand Down Expand Up @@ -211,7 +211,7 @@ def test_create_largest_blob_from_path(self, resource_group, location, storage_a
@GlobalStorageAccountPreparer()
def test_create_largest_blob_from_path_without_network(self, resource_group, location, storage_account, storage_account_key):
payload_dropping_policy = PayloadDroppingPolicy()
credential_policy = format_shared_key_credential([storage_account.name, "dummy"], storage_account_key)
credential_policy = _format_shared_key_credential(storage_account.name, storage_account_key)
self._setup(storage_account, storage_account_key, [payload_dropping_policy, credential_policy])
blob_name = self._get_blob_reference()
blob = self.bsc.get_blob_client(self.container_name, blob_name)
Expand All @@ -237,7 +237,7 @@ def test_create_largest_blob_from_path_without_network(self, resource_group, loc
@GlobalStorageAccountPreparer()
def test_create_largest_blob_from_stream_without_network(self, resource_group, location, storage_account, storage_account_key):
payload_dropping_policy = PayloadDroppingPolicy()
credential_policy = format_shared_key_credential([storage_account.name, "dummy"], storage_account_key)
credential_policy = _format_shared_key_credential(storage_account.name, storage_account_key)
self._setup(storage_account, storage_account_key, [payload_dropping_policy, credential_policy])
blob_name = self._get_blob_reference()
blob = self.bsc.get_blob_client(self.container_name, blob_name)
Expand All @@ -257,7 +257,7 @@ def test_create_largest_blob_from_stream_without_network(self, resource_group, l
@GlobalStorageAccountPreparer()
def test_create_largest_blob_from_stream_single_upload_without_network(self, resource_group, location, storage_account, storage_account_key):
payload_dropping_policy = PayloadDroppingPolicy()
credential_policy = format_shared_key_credential([storage_account.name, "dummy"], storage_account_key)
credential_policy = _format_shared_key_credential(storage_account.name, storage_account_key)
self._setup(storage_account, storage_account_key, [payload_dropping_policy, credential_policy],
max_single_put_size=LARGEST_SINGLE_UPLOAD_SIZE)
blob_name = self._get_blob_reference()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from azure.storage.blob import (
BlobBlock
)
from azure.storage.blob._shared.base_client import format_shared_key_credential
from azure.storage.blob._shared.base_client import _format_shared_key_credential
from azure.storage.blob._shared.constants import CONNECTION_TIMEOUT, READ_TIMEOUT

from _shared.asynctestcase import AsyncStorageTestCase
Expand Down Expand Up @@ -123,7 +123,7 @@ async def test_put_block_bytes_largest(self, resource_group, location, storage_a
@AsyncStorageTestCase.await_prepared_test
async def test_put_block_bytes_largest_without_network(self, resource_group, location, storage_account, storage_account_key):
payload_dropping_policy = PayloadDroppingPolicy()
credential_policy = format_shared_key_credential([storage_account.name, "dummy"], storage_account_key)
credential_policy = _format_shared_key_credential(storage_account.name, storage_account_key)
await self._setup(storage_account, storage_account_key, [payload_dropping_policy, credential_policy])
blob = await self._create_blob()

Expand Down Expand Up @@ -185,7 +185,7 @@ async def test_put_block_stream_largest(self, resource_group, location, storage_
@AsyncStorageTestCase.await_prepared_test
async def test_put_block_stream_largest_without_network(self, resource_group, location, storage_account, storage_account_key):
payload_dropping_policy = PayloadDroppingPolicy()
credential_policy = format_shared_key_credential([storage_account.name, "dummy"], storage_account_key)
credential_policy = _format_shared_key_credential(storage_account.name, storage_account_key)
await self._setup(storage_account, storage_account_key, [payload_dropping_policy, credential_policy])
blob = await self._create_blob()

Expand Down Expand Up @@ -241,7 +241,7 @@ async def test_create_largest_blob_from_path(self, resource_group, location, sto
@AsyncStorageTestCase.await_prepared_test
async def test_create_largest_blob_from_path_without_network(self, resource_group, location, storage_account, storage_account_key):
payload_dropping_policy = PayloadDroppingPolicy()
credential_policy = format_shared_key_credential([storage_account.name, "dummy"], storage_account_key)
credential_policy = _format_shared_key_credential(storage_account.name, storage_account_key)
await self._setup(storage_account, storage_account_key, [payload_dropping_policy, credential_policy])
blob_name = self._get_blob_reference()
blob = self.bsc.get_blob_client(self.container_name, blob_name)
Expand All @@ -268,7 +268,7 @@ async def test_create_largest_blob_from_path_without_network(self, resource_grou
@AsyncStorageTestCase.await_prepared_test
async def test_create_largest_blob_from_stream_without_network(self, resource_group, location, storage_account, storage_account_key):
payload_dropping_policy = PayloadDroppingPolicy()
credential_policy = format_shared_key_credential([storage_account.name, "dummy"], storage_account_key)
credential_policy = _format_shared_key_credential(storage_account.name, storage_account_key)
await self._setup(storage_account, storage_account_key, [payload_dropping_policy, credential_policy])
blob_name = self._get_blob_reference()
blob = self.bsc.get_blob_client(self.container_name, blob_name)
Expand All @@ -289,7 +289,7 @@ async def test_create_largest_blob_from_stream_without_network(self, resource_gr
@AsyncStorageTestCase.await_prepared_test
async def test_create_largest_blob_from_stream_single_upload_without_network(self, resource_group, location, storage_account, storage_account_key):
payload_dropping_policy = PayloadDroppingPolicy()
credential_policy = format_shared_key_credential([storage_account.name, "dummy"], storage_account_key)
credential_policy = _format_shared_key_credential(storage_account.name, storage_account_key)
await self._setup(storage_account, storage_account_key, [payload_dropping_policy, credential_policy],
max_single_put_size=LARGEST_SINGLE_UPLOAD_SIZE)
blob_name = self._get_blob_reference()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,17 @@ def __init__(
raise ValueError("Invalid service: {}".format(service))
service_name = service.split('-')[0]
account = parsed_url.netloc.split(".{}.core.".format(service_name))

self.account_name = account[0] if len(account) > 1 else None
secondary_hostname = None
if not self.account_name and parsed_url.netloc.startswith("localhost") \
or parsed_url.netloc.startswith("127.0.0.1"):
self.account_name = parsed_url.path.strip("/")

self.credential = format_shared_key_credential(account, credential)
self.credential = _format_shared_key_credential(self.account_name, credential)
if self.scheme.lower() != "https" and hasattr(self.credential, "get_token"):
raise ValueError("Token credential is only supported with HTTPS.")

secondary_hostname = None
if hasattr(self.credential, "account_name"):
self.account_name = self.credential.account_name
secondary_hostname = "{}-secondary.{}.{}".format(
Expand Down Expand Up @@ -320,11 +325,11 @@ def __exit__(self, *args): # pylint: disable=arguments-differ
pass


def format_shared_key_credential(account, credential):
def _format_shared_key_credential(account_name, credential):
if isinstance(credential, six.string_types):
if len(account) < 2:
if not account_name:
raise ValueError("Unable to determine account name for shared key credential.")
credential = {"account_name": account[0], "account_key": credential}
credential = {"account_name": account_name, "account_key": credential}
if isinstance(credential, dict):
if "account_name" not in credential:
raise ValueError("Shared key credential missing 'account_name")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from azure.core.pipeline.policies import HTTPPolicy

from azure.core.exceptions import ResourceExistsError
from azure.storage.blob._shared.base_client import format_shared_key_credential
from azure.storage.blob._shared.base_client import _format_shared_key_credential
from azure.storage.filedatalake import DataLakeServiceClient
from testcase import (
StorageTestCase,
Expand All @@ -34,7 +34,7 @@ def setUp(self):
super(LargeFileTest, self).setUp()
url = self._get_account_url()
self.payload_dropping_policy = PayloadDroppingPolicy()
credential_policy = format_shared_key_credential([self.settings.STORAGE_DATA_LAKE_ACCOUNT_NAME, "dummy"],
credential_policy = _format_shared_key_credential(self.settings.STORAGE_DATA_LAKE_ACCOUNT_NAME,
self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY)
self.dsc = DataLakeServiceClient(url,
credential=self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from azure.core.exceptions import ResourceExistsError
from azure.core.pipeline.policies import SansIOHTTPPolicy
from azure.storage.blob._shared.base_client import format_shared_key_credential
from azure.storage.blob._shared.base_client import _format_shared_key_credential
from azure.storage.filedatalake.aio import DataLakeServiceClient
from testcase import (
StorageTestCase,
Expand All @@ -36,7 +36,7 @@ def setUp(self):
super(LargeFileTest, self).setUp()
url = self._get_account_url()
self.payload_dropping_policy = PayloadDroppingPolicy()
credential_policy = format_shared_key_credential([self.settings.STORAGE_DATA_LAKE_ACCOUNT_NAME, "dummy"],
credential_policy = _format_shared_key_credential(self.settings.STORAGE_DATA_LAKE_ACCOUNT_NAME,
self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY)
self.dsc = DataLakeServiceClient(url,
credential=self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY,
Expand Down
Loading

0 comments on commit a1039c8

Please sign in to comment.