forked from databricks/databricks-sdk-java
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Properly escape multi-segment path parameters (databricks#252)
## Changes Ports databricks/databricks-sdk-go#869 to Java SDK. Currently, path parameters are directly interpolated into the request URL without escaping. This means that characters like `/`, `?` and `#` will not be percent-encoded and will affect the semantics of the URL, starting a new path segment, query parameters, or fragment, respectively. This means that it is impossible for users of the Files API to upload/download objects that contain `?` or `#` in their name. `/` is allowed in the path of the Files API, so it does not need to be escaped. The Files API is currently marked with `x-databricks-multi-segment`, indicating that it should be permitted to have `/` characters but other characters need to be percent-encoded. This PR implements this. ## Tests - [x] Unit test for multi-segment path escaping behavior. - [x] Updated integration test to use # and ? symbols in the file name.
- Loading branch information
1 parent
480e55e
commit 243392f
Showing
5 changed files
with
164 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
102 changes: 102 additions & 0 deletions
102
databricks-sdk-java/src/main/java/com/databricks/sdk/core/http/Encoding.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
package com.databricks.sdk.core.http; | ||
|
||
import java.nio.ByteBuffer; | ||
import java.nio.charset.Charset; | ||
import java.nio.charset.StandardCharsets; | ||
import java.util.BitSet; | ||
|
||
/** | ||
* Utility class for encoding strings for use in URLs. | ||
* | ||
* <p>Adapted from URLEncodingUtils.java from Apache's HttpClient library. | ||
*/ | ||
public class Encoding { | ||
|
||
/** | ||
* Unreserved characters, i.e. alphanumeric, plus: {@code _ - ! . ~ ' ( ) *} | ||
* | ||
* <p>This list is the same as the {@code unreserved} list in <a | ||
* href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a> | ||
*/ | ||
private static final BitSet UNRESERVED = new BitSet(256); | ||
|
||
/** | ||
* Characters which are safe to use in a path, excluding /, i.e. {@link #UNRESERVED} plus | ||
* punctuation plus @ | ||
*/ | ||
private static final BitSet PATHSAFE = new BitSet(256); | ||
|
||
/** Characters which are safe to use in a path, including /. */ | ||
private static final BitSet PATH_SPECIAL = new BitSet(256); | ||
|
||
static { | ||
// unreserved chars | ||
// alpha characters | ||
for (int i = 'a'; i <= 'z'; i++) { | ||
UNRESERVED.set(i); | ||
} | ||
for (int i = 'A'; i <= 'Z'; i++) { | ||
UNRESERVED.set(i); | ||
} | ||
// numeric characters | ||
for (int i = '0'; i <= '9'; i++) { | ||
UNRESERVED.set(i); | ||
} | ||
UNRESERVED.set('_'); // these are the characters of the "mark" list | ||
UNRESERVED.set('-'); | ||
UNRESERVED.set('.'); | ||
UNRESERVED.set('*'); | ||
UNRESERVED.set('!'); | ||
UNRESERVED.set('~'); | ||
UNRESERVED.set('\''); | ||
UNRESERVED.set('('); | ||
UNRESERVED.set(')'); | ||
|
||
// URL path safe | ||
PATHSAFE.or(UNRESERVED); | ||
PATHSAFE.set(';'); // param separator | ||
PATHSAFE.set(':'); // RFC 2396 | ||
PATHSAFE.set('@'); | ||
PATHSAFE.set('&'); | ||
PATHSAFE.set('='); | ||
PATHSAFE.set('+'); | ||
PATHSAFE.set('$'); | ||
PATHSAFE.set(','); | ||
|
||
PATH_SPECIAL.or(PATHSAFE); | ||
PATH_SPECIAL.set('/'); | ||
} | ||
|
||
private static final int RADIX = 16; | ||
|
||
private static String urlEncode( | ||
final String content, | ||
final Charset charset, | ||
final BitSet safechars, | ||
final boolean blankAsPlus) { | ||
if (content == null) { | ||
return null; | ||
} | ||
final StringBuilder buf = new StringBuilder(); | ||
final ByteBuffer bb = charset.encode(content); | ||
while (bb.hasRemaining()) { | ||
final int b = bb.get() & 0xff; | ||
if (safechars.get(b)) { | ||
buf.append((char) b); | ||
} else if (blankAsPlus && b == ' ') { | ||
buf.append('+'); | ||
} else { | ||
buf.append("%"); | ||
final char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, RADIX)); | ||
final char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, RADIX)); | ||
buf.append(hex1); | ||
buf.append(hex2); | ||
} | ||
} | ||
return buf.toString(); | ||
} | ||
|
||
public static String encodeMultiSegmentPathParameter(String param) { | ||
return urlEncode(param, StandardCharsets.UTF_8, PATH_SPECIAL, false); | ||
} | ||
} |
37 changes: 29 additions & 8 deletions
37
databricks-sdk-java/src/main/java/com/databricks/sdk/service/files/FilesImpl.java
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
13 changes: 13 additions & 0 deletions
13
databricks-sdk-java/src/test/java/com/databricks/sdk/core/http/EncodingTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
package com.databricks.sdk.core.http; | ||
|
||
import static org.junit.jupiter.api.Assertions.assertEquals; | ||
|
||
import org.junit.jupiter.api.Test; | ||
|
||
public class EncodingTest { | ||
@Test | ||
public void encodeMultiSegmentPathParameter() { | ||
assertEquals("/foo/bar", Encoding.encodeMultiSegmentPathParameter("/foo/bar")); | ||
assertEquals("a%3Fb%23c", Encoding.encodeMultiSegmentPathParameter("a?b#c")); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters