Skip to content

Commit

Permalink
Optimize ASCII dir names reads
Browse files Browse the repository at this point in the history
  • Loading branch information
franz1981 committed Nov 16, 2024
1 parent e09b0e5 commit 91ef695
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileVisitResult;
import java.nio.file.FileVisitor;
import java.nio.file.Files;
Expand Down Expand Up @@ -116,6 +117,9 @@ public static SerializedApplication read(InputStream inputStream, Path appRoot)
Set<String> parentFirstPackages = new HashSet<>();
int numPaths = in.readUnsignedShort();
ClassLoadingResource[] allClassLoadingResources = new ClassLoadingResource[numPaths];
// tmp buffer for reading the directory names
// TODO the information of the biggest dirName could be saved somewhere in the serialized form
byte[] tmpReadNameBytes = null;
for (int pathCount = 0; pathCount < numPaths; pathCount++) {
String path = in.readUTF();
boolean hasManifest = in.readBoolean();
Expand All @@ -128,12 +132,28 @@ public static SerializedApplication read(InputStream inputStream, Path appRoot)
allClassLoadingResources[pathCount] = resource;
int numDirs = in.readUnsignedShort();
for (int i = 0; i < numDirs; ++i) {
String fullDirName = in.readUTF();
int encodedLength = in.readShort();
// negative length indicates that the string is UTF-8 encoded
final boolean isUTF8 = encodedLength < 0;
if (isUTF8) {
encodedLength = -encodedLength;
}
// enlarge the scratch buffer if needed
if (tmpReadNameBytes == null || tmpReadNameBytes.length < encodedLength) {
// let's be generous and double the size, so save further resizes
tmpReadNameBytes = new byte[encodedLength * 2];
}
int readBytes = in.read(tmpReadNameBytes, 0, encodedLength);
assert readBytes == encodedLength;
// TODO not sure if having separate call-sites is better or not: it means compiling 2 methods instead of one!
String fullDirName = isUTF8 ? new String(tmpReadNameBytes, 0, encodedLength, StandardCharsets.UTF_8)
// Save String to verify if the byte[] content really contains negatives!
: new String(tmpReadNameBytes, 0, 0, encodedLength);
var dirName = StringView.of(fullDirName);
// now try to be smart and save some memory by NOT having substrings over and over again
final int subDirs = in.readInt();
for (int j = 0; j < subDirs; j++) {
var subDirName = StringView.subOf(fullDirName, in.readInt(), in.readInt());
var subDirName = StringView.subOf(fullDirName, in.readInt(), in.readUnsignedShort());
resourceDirectoryTracker.addResourceDir(subDirName, resource);
}
resourceDirectoryTracker.addResourceDir(dirName, resource);
Expand All @@ -150,8 +170,8 @@ public static SerializedApplication read(InputStream inputStream, Path appRoot)
}
// this map is populated correctly because the JarResource entries are added to allClassLoadingResources
// in the same order as the classpath was written during the writing of the index
Map<String, ClassLoadingResource[]> directlyIndexedResourcesIndexMap = new HashMap<>();
int directlyIndexedSize = in.readUnsignedShort();
Map<String, ClassLoadingResource[]> directlyIndexedResourcesIndexMap = new HashMap<>(directlyIndexedSize);
for (int i = 0; i < directlyIndexedSize; i++) {
String resource = in.readUTF();
int indexesSize = in.readUnsignedShort();
Expand Down Expand Up @@ -254,7 +274,6 @@ private static List<String> writeJar(DataOutputStream out, Path jar) throws IOEx
out.writeShort(dirs.size());
for (String dirName : dirs) {
// push this a bit forward to help the read to get faster!
// TODO: we could check if it's an ASCII string too and further optimize it
// write the positions of each / in the string
var subDirs = new ArrayList<String>();
int subDirLength = dirName.indexOf('/');
Expand All @@ -265,12 +284,16 @@ private static List<String> writeJar(DataOutputStream out, Path jar) throws IOEx
subDirLength = dirName.indexOf('/', subDirLength + 1);
}
// write in the opposite order here, to hydrate StringView(s) in the right order
out.writeUTF(dirName);
// TODO these could be made cheaper
var dirNameUtf8 = dirName.getBytes(StandardCharsets.UTF_8);
// using a negative length to indicate that the string is UTF-8 encoded, which should be less frequent
int writtenLength = dirNameUtf8.length == dirName.length() ? dirName.length() : -dirNameUtf8.length;
out.writeShort(writtenLength);
out.write(dirNameUtf8);
// TODO these could be made cheaper; maybe a short?
out.writeInt(subDirs.size());
for (String subDir : subDirs) {
out.writeInt(subDir.hashCode());
out.writeInt(subDir.length());
out.writeShort(subDir.length());
}
}
List<String> result = new ArrayList<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,26 +65,24 @@ private static boolean regionMatches(String a, String b, int length) {
public static StringView subOf(String s, int hashCode, int length) {
// we're not performing any specific check at runtime since this is a likely cold path
// and have to trust the data read from the serialized form
assert validateView(s, hashCode, length);
// assert validateHashCodeView(s, hashCode, length);
if (length < 0) {
throw new IllegalArgumentException("Length must be positive or zero");
}
if (length == 0) {
return EMPTY;
}
if (length == s.length()) {
return new StringView(s);
}
if (length > s.length()) {
throw new IllegalArgumentException("Length must be less than or equal to the full string length");
throw new IllegalArgumentException("Length must be less than or equal to the full string length: " + s
+ " hashCode = " + hashCode + " length = " + length);
}
return new SubStringView(s, hashCode, length);
}

private static boolean validateView(String s, int hashCode, int length) {
if (length < 0) {
throw new IllegalArgumentException("Length must be positive");
}
if (length > s.length()) {
throw new IllegalArgumentException("Length must be less than or equal to the full string length");
}
private static boolean validateHashCodeView(String s, int hashCode, int length) {
if (s.substring(0, length).hashCode() != hashCode) {
throw new IllegalArgumentException("Hash code does not match the substring hash code");
}
Expand Down

0 comments on commit 91ef695

Please sign in to comment.