Skip to content

Commit

Permalink
Properly escape multi-segment path parameters (databricks#252)
Browse files Browse the repository at this point in the history
## Changes
Ports databricks/databricks-sdk-go#869 to Java
SDK.

Currently, path parameters are directly interpolated into the request
URL without escaping. This means that characters like `/`, `?` and `#`
will not be percent-encoded and will affect the semantics of the URL,
starting a new path segment, query parameters, or fragment,
respectively. This means that it is impossible for users of the Files
API to upload/download objects that contain `?` or `#` in their name.
`/` is allowed in the path of the Files API, so it does not need to be
escaped.

The Files API is currently marked with `x-databricks-multi-segment`,
indicating that it should be permitted to have `/` characters but other
characters need to be percent-encoded. This PR implements this.

## Tests
- [x] Unit test for multi-segment path escaping behavior.
- [x] Updated integration test to use # and ? symbols in the file name.
  • Loading branch information
mgyucht authored and vikrantpuppala committed Apr 23, 2024
1 parent 480e55e commit 243392f
Show file tree
Hide file tree
Showing 5 changed files with 164 additions and 12 deletions.
22 changes: 19 additions & 3 deletions .codegen/impl.java.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import java.util.HashMap;

import com.databricks.sdk.core.ApiClient;
import com.databricks.sdk.core.DatabricksException;
import com.databricks.sdk.core.http.Encoding;
import com.databricks.sdk.support.Generated;

{{range .Package.ImportedEntities}}
Expand All @@ -25,9 +26,7 @@ class {{.PascalName}}Impl implements {{.PascalName}}Service {
{{range .Methods}}
@Override
public {{if not .Response.IsEmpty -}}{{template "type" .Response}}{{else}}void{{end}} {{.CamelName}}{{if .IsNameReserved}}Content{{end}}({{if .Request}}{{template "type" .Request}} request{{end}}) {
String path = {{if .PathParts -}}
String.format("{{range .PathParts}}{{.Prefix}}{{if or .Field .IsAccountId}}%s{{end}}{{ end }}"{{ range .PathParts }}{{if .Field}}, request.get{{.Field.PascalName}}(){{ else if .IsAccountId }}, apiClient.configuredAccountID(){{end}}{{ end }})
{{- else}}"{{.Path}}"{{end}};
String path = {{ template "path" . }};
{{ template "headers" . -}}
{{ if .Response.IsEmpty -}}
{{ template "api-call" . }}
Expand All @@ -39,6 +38,23 @@ class {{.PascalName}}Impl implements {{.PascalName}}Service {
{{end}}
}

{{- define "path" -}}
{{- if .PathParts -}}
String.format("{{range .PathParts -}}
{{- .Prefix -}}
{{- if or .Field .IsAccountId -}}%s{{- end -}}
{{- end -}}"
{{- range .PathParts -}}
{{- if and .Field .Field.IsPathMultiSegment -}}, Encoding.encodeMultiSegmentPathParameter(request.get{{.Field.PascalName}}())
{{- else if .Field -}}, request.get{{.Field.PascalName}}()
{{- else if .IsAccountId -}}, apiClient.configuredAccountID()
{{- end -}}
{{- end -}})
{{- else -}}
"{{.Path}}"
{{- end -}}
{{- end -}}

{{ define "api-call" }}
apiClient.{{.Verb}}(path
{{- if .Request}}, {{ template "request-param" .}}{{end}}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
package com.databricks.sdk.core.http;

import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.BitSet;

/**
* Utility class for encoding strings for use in URLs.
*
* <p>Adapted from URLEncodingUtils.java from Apache's HttpClient library.
*/
public class Encoding {

/**
* Unreserved characters, i.e. alphanumeric, plus: {@code _ - ! . ~ ' ( ) *}
*
* <p>This list is the same as the {@code unreserved} list in <a
* href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>
*/
private static final BitSet UNRESERVED = new BitSet(256);

/**
* Characters which are safe to use in a path, excluding /, i.e. {@link #UNRESERVED} plus
* punctuation plus @
*/
private static final BitSet PATHSAFE = new BitSet(256);

/** Characters which are safe to use in a path, including /. */
private static final BitSet PATH_SPECIAL = new BitSet(256);

static {
// unreserved chars
// alpha characters
for (int i = 'a'; i <= 'z'; i++) {
UNRESERVED.set(i);
}
for (int i = 'A'; i <= 'Z'; i++) {
UNRESERVED.set(i);
}
// numeric characters
for (int i = '0'; i <= '9'; i++) {
UNRESERVED.set(i);
}
UNRESERVED.set('_'); // these are the characters of the "mark" list
UNRESERVED.set('-');
UNRESERVED.set('.');
UNRESERVED.set('*');
UNRESERVED.set('!');
UNRESERVED.set('~');
UNRESERVED.set('\'');
UNRESERVED.set('(');
UNRESERVED.set(')');

// URL path safe
PATHSAFE.or(UNRESERVED);
PATHSAFE.set(';'); // param separator
PATHSAFE.set(':'); // RFC 2396
PATHSAFE.set('@');
PATHSAFE.set('&');
PATHSAFE.set('=');
PATHSAFE.set('+');
PATHSAFE.set('$');
PATHSAFE.set(',');

PATH_SPECIAL.or(PATHSAFE);
PATH_SPECIAL.set('/');
}

private static final int RADIX = 16;

private static String urlEncode(
final String content,
final Charset charset,
final BitSet safechars,
final boolean blankAsPlus) {
if (content == null) {
return null;
}
final StringBuilder buf = new StringBuilder();
final ByteBuffer bb = charset.encode(content);
while (bb.hasRemaining()) {
final int b = bb.get() & 0xff;
if (safechars.get(b)) {
buf.append((char) b);
} else if (blankAsPlus && b == ' ') {
buf.append('+');
} else {
buf.append("%");
final char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, RADIX));
final char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, RADIX));
buf.append(hex1);
buf.append(hex2);
}
}
return buf.toString();
}

public static String encodeMultiSegmentPathParameter(String param) {
return urlEncode(param, StandardCharsets.UTF_8, PATH_SPECIAL, false);
}
}

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package com.databricks.sdk.core.http;

import static org.junit.jupiter.api.Assertions.assertEquals;

import org.junit.jupiter.api.Test;

public class EncodingTest {
@Test
public void encodeMultiSegmentPathParameter() {
assertEquals("/foo/bar", Encoding.encodeMultiSegmentPathParameter("/foo/bar"));
assertEquals("a%3Fb%23c", Encoding.encodeMultiSegmentPathParameter("a?b#c"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ void uploadAndDownloadFile(WorkspaceClient workspace) throws IOException {
workspace,
(volumePath) -> {
// Generate a random file name and random contents of 10 KiB.
String fileName = NameUtils.uniqueName(volumePath + "/test");
String fileName = NameUtils.uniqueName(volumePath + "/test-with-?-and-#");
byte[] fileContents = new byte[1024 * 10];
for (int i = 0; i < fileContents.length; i++) {
fileContents[i] = (byte) (i & 0xFF);
Expand Down

0 comments on commit 243392f

Please sign in to comment.