Skip to content

Commit

Permalink
Add support for LRB traces to the simulator
Browse files Browse the repository at this point in the history
LRB offers a 17gb cdn trace (2.8B events) that it was analyzed against.

The archive support was improved so that a tar.gz can be supplied and
the entries will be concatenated.

When hit/miss penalty support was added, the Optimal policy degraded
by having to hold the entire event in-memory. This is now conditioned
so that for non-latency traces only the key (primitive long) is held.
This reduces GC pressure and improves performance on large traces.
  • Loading branch information
ben-manes committed Nov 22, 2020
1 parent b95222a commit 23cd333
Show file tree
Hide file tree
Showing 14 changed files with 249 additions and 75 deletions.
3 changes: 3 additions & 0 deletions checksum.xml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
<trusted-key id='86fdc7e2a11262cb' group='commons-codec' />
<trusted-key id='1861c322c56014b2' group='commons-collections' />
<trusted-key id='a41f13c999945293' group='commons-collections' />
<trusted-key id='86fdc7e2a11262cb' group='commons-io' />
<trusted-key id='9daadc1c9fcc82d0' group='commons-io' />
<trusted-key id='1241bc872c5e4ec0' group='commons-lang' />
<trusted-key id='1861c322c56014b2' group='commons-lang' />
Expand All @@ -110,6 +111,7 @@
<trusted-key id='6425559c47cc79c4' group='javax.ws.rs' />
<trusted-key id='0315bfb7970a144f' group='javax.xml.bind' />
<trusted-key id='4044edf1bb73efea' group='jaxen' />
<trusted-key id='c6fc46eb51cf569c' group='jaxen' />
<trusted-key id='72385ff0af338d52' group='joda-time' />
<trusted-key id='85911f425ec61b51' group='junit' />
<trusted-key id='efe8086f9e93774e' group='junit' />
Expand Down Expand Up @@ -137,6 +139,7 @@
<trusted-key id='bff2ee42c8282e76' group='org.apache.felix' />
<trusted-key id='de78987a9cd4d9d3' group='org.apache.htrace' />
<trusted-key id='7c25280eae63ebe5' group='org.apache.httpcomponents' />
<trusted-key id='11b1a21ad89afded' group='org.apache.jackrabbit' />
<trusted-key id='1de461528f1f1b2a' group='org.apache.jackrabbit' />
<trusted-key id='d108ff879aa63950' group='org.apache.jackrabbit' />
<trusted-key id='3595395eb3d8e1ba' group='org.apache.logging.log4j' />
Expand Down
4 changes: 2 additions & 2 deletions gradle/dependencies.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ ext {
flipTables: '1.1.0',
googleJavaFormat: '1.7',
guava: '30.0-jre',
jackrabbit: '1.34.0',
jackrabbit: '1.36',
jandex: '2.2.2.Final',
jamm: '0.3.3',
javaObjectLayout: '0.14',
Expand Down Expand Up @@ -97,7 +97,7 @@ ext {
semanticVersioning: '1.1.0',
shadow: '6.1.0',
sonarqube: '3.0',
spotbugs: '4.1.3',
spotbugs: '4.1.4',
spotbugsPlugin: '4.6.0',
stats: '0.2.2',
versions: '0.36.0',
Expand Down
2 changes: 1 addition & 1 deletion gradle/wrapper/gradle-wrapper.properties
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
distributionUrl=https\://services.gradle.org/distributions/gradle-6.7-bin.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-6.7.1-bin.zip
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,11 @@ public Policy makePolicy() {
}

private Stream<AccessEvent> readEventStream(BasicSettings settings) throws IOException {
if (settings.isSynthetic()) {
return Synthetic.generate(settings).events();
if (settings.trace().isSynthetic()) {
return Synthetic.generate(settings.trace()).events();
}
List<String> filePaths = settings.traceFiles().paths();
TraceFormat format = settings.traceFiles().format();
List<String> filePaths = settings.trace().traceFiles().paths();
TraceFormat format = settings.trace().traceFiles().format();
return format.readFiles(filePaths).events();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -80,22 +80,8 @@ public long maximumSize() {
return config().getLong("maximum-size");
}

public boolean isFiles() {
return config().getString("source").equals("files");
}

public boolean isSynthetic() {
return config().getString("source").equals("synthetic");
}

public TraceFilesSettings traceFiles() {
checkState(isFiles());
return new TraceFilesSettings();
}

public SyntheticSettings synthetic() {
checkState(isSynthetic());
return new SyntheticSettings();
public TraceSettings trace() {
return new TraceSettings();
}

/** Returns the config resolved at the simulator's path. */
Expand Down Expand Up @@ -185,6 +171,29 @@ public boolean enabled() {
}
}

public final class TraceSettings {
public long skip() {
return config().getLong("trace.skip");
}
public long limit() {
return config().getIsNull("trace.limit") ? Long.MAX_VALUE : config().getLong("trace.limit");
}
public boolean isFiles() {
return config().getString("trace.source").equals("files");
}
public boolean isSynthetic() {
return config().getString("trace.source").equals("synthetic");
}
public TraceFilesSettings traceFiles() {
checkState(isFiles());
return new TraceFilesSettings();
}
public SyntheticSettings synthetic() {
checkState(isSynthetic());
return new SyntheticSettings();
}
}

public final class TraceFilesSettings {
public List<String> paths() {
return config().getStringList("files.paths");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,9 @@ private void broadcast() {
return;
}

try (Stream<AccessEvent> events = traceReader.events()) {
long skip = settings.trace().skip();
long limit = settings.trace().limit();
try (Stream<AccessEvent> events = traceReader.events().skip(skip).limit(limit)) {
Iterators.partition(events.iterator(), batchSize)
.forEachRemaining(batch -> router.route(batch, self()));
router.route(FINISH, self());
Expand All @@ -128,11 +130,11 @@ private void broadcast() {

/** Returns a trace reader for the access events. */
private TraceReader makeTraceReader() {
if (settings.isSynthetic()) {
return Synthetic.generate(settings);
if (settings.trace().isSynthetic()) {
return Synthetic.generate(settings.trace());
}
List<String> filePaths = settings.traceFiles().paths();
TraceFormat format = settings.traceFiles().format();
List<String> filePaths = settings.trace().traceFiles().paths();
TraceFormat format = settings.trace().traceFiles().format();
return format.readFiles(filePaths);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import com.github.benmanes.caffeine.cache.simulator.BasicSettings.SyntheticSettings.HotspotSettings;
import com.github.benmanes.caffeine.cache.simulator.BasicSettings.SyntheticSettings.UniformSettings;
import com.github.benmanes.caffeine.cache.simulator.BasicSettings.TraceSettings;
import com.github.benmanes.caffeine.cache.simulator.parser.TraceReader.KeyOnlyTraceReader;

import site.ycsb.generator.CounterGenerator;
Expand All @@ -42,7 +43,7 @@ public final class Synthetic {
private Synthetic() {}

/** Returns a sequence of events based on the setting's distribution. */
public static KeyOnlyTraceReader generate(BasicSettings settings) {
public static KeyOnlyTraceReader generate(TraceSettings settings) {
int events = settings.synthetic().events();
switch (settings.synthetic().distribution().toLowerCase(US)) {
case "counter":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,27 @@
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UncheckedIOException;
import java.io.SequenceInputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Iterator;
import java.util.List;
import java.util.function.Function;

import org.apache.commons.compress.archivers.ArchiveException;
import org.apache.commons.compress.archivers.ArchiveInputStream;
import org.apache.commons.compress.archivers.ArchiveStreamFactory;
import org.apache.commons.compress.compressors.CompressorException;
import org.apache.commons.compress.compressors.CompressorStreamFactory;
import org.checkerframework.checker.nullness.qual.Nullable;
import org.tukaani.xz.XZInputStream;

import com.google.common.base.Throwables;
import com.google.common.collect.AbstractIterator;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterators;

/**
* A skeletal implementation that reads the trace files into a data stream.
*
Expand All @@ -46,28 +56,70 @@ protected AbstractTraceReader(String filePath) {
}

/** Returns the input stream of the trace data. */
protected InputStream readFile() {
@SuppressWarnings("PMD.CloseResource")
protected BufferedInputStream readFile() {
BufferedInputStream input = null;
try {
BufferedInputStream input = new BufferedInputStream(openFile(), BUFFER_SIZE);
input.mark(100);
try {
return new XZInputStream(input);
} catch (IOException e) {
input.reset();
}
try {
return new CompressorStreamFactory().createCompressorInputStream(input);
} catch (CompressorException e) {
input.reset();
input = new BufferedInputStream(openFile(), BUFFER_SIZE);
List<Function<InputStream, InputStream>> extractors = ImmutableList.of(
this::tryXZ, this::tryCompressed, this::tryArchived);
for (Function<InputStream, InputStream> extractor : extractors) {
input.mark(100);
InputStream next = extractor.apply(input);
if (next == null) {
input.reset();
} else {
input = new BufferedInputStream(next, BUFFER_SIZE);
}
}
return input;
} catch (Throwable t) {
try {
return new ArchiveStreamFactory().createArchiveInputStream(input);
} catch (ArchiveException e) {
input.reset();
if (input != null) {
input.close();
}
} catch (IOException e) {
t.addSuppressed(e);
}
return input;
Throwables.throwIfUnchecked(t);
throw new RuntimeException(t);
}
}

/** Returns a uncompressed stream if XZ encoded, else {@code null}. */
private @Nullable InputStream tryXZ(InputStream input) {
try {
return new XZInputStream(input);
} catch (IOException e) {
throw new UncheckedIOException(e);
return null;
}
}

/** Returns a uncompressed stream, else {@code null}. */
private @Nullable InputStream tryCompressed(InputStream input) {
try {
return new CompressorStreamFactory().createCompressorInputStream(input);
} catch (CompressorException e) {
return null;
}
}

/** Returns a unarchived stream, else {@code null}. */
private @Nullable InputStream tryArchived(InputStream input) {
try {
ArchiveInputStream archive = new ArchiveStreamFactory().createArchiveInputStream(input);
Iterator<InputStream> entries = new AbstractIterator<InputStream>() {
@Override protected InputStream computeNext() {
try {
return (archive.getNextEntry() == null) ? endOfData() : archive;
} catch (IOException e) {
return endOfData();
}
}
};
return new SequenceInputStream(Iterators.asEnumeration(entries));
} catch (ArchiveException e) {
return null;
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

import static java.util.Objects.requireNonNull;

import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.EOFException;
import java.io.IOException;
Expand Down Expand Up @@ -46,7 +45,7 @@ protected BinaryTraceReader(String filePath) {
@Override
@SuppressWarnings("PMD.CloseResource")
public Stream<AccessEvent> events() {
DataInputStream input = new DataInputStream(new BufferedInputStream(readFile()));
DataInputStream input = new DataInputStream(readFile());
Stream<AccessEvent> stream = StreamSupport.stream(Spliterators.spliteratorUnknownSize(
new TraceIterator(input), Spliterator.ORDERED), /* parallel */ false);
return stream.onClose(() -> Closeables.closeQuietly(input));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import com.github.benmanes.caffeine.cache.simulator.parser.gradle.GradleTraceReader;
import com.github.benmanes.caffeine.cache.simulator.parser.kaggle.OutbrainTraceReader;
import com.github.benmanes.caffeine.cache.simulator.parser.lirs.LirsTraceReader;
import com.github.benmanes.caffeine.cache.simulator.parser.lrb.LrbTraceReader;
import com.github.benmanes.caffeine.cache.simulator.parser.scarab.ScarabTraceReader;
import com.github.benmanes.caffeine.cache.simulator.parser.snia.cambridge.CambridgeTraceReader;
import com.github.benmanes.caffeine.cache.simulator.parser.snia.parallel.K5cloudTraceReader;
Expand Down Expand Up @@ -66,6 +67,7 @@ public enum TraceFormat {
CORDA(CordaTraceReader::new),
GRADLE(GradleTraceReader::new),
LIRS(LirsTraceReader::new),
LRB(LrbTraceReader::new),
OUTBRAIN(OutbrainTraceReader::new),
SCARAB(ScarabTraceReader::new),
SNIA_CAMBRIDGE(CambridgeTraceReader::new),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
* Copyright 2020 Ben Manes. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.github.benmanes.caffeine.cache.simulator.parser.lrb;

import static com.github.benmanes.caffeine.cache.simulator.policy.Policy.Characteristic.WEIGHTED;

import java.util.Set;
import java.util.stream.Stream;

import com.github.benmanes.caffeine.cache.simulator.parser.TextTraceReader;
import com.github.benmanes.caffeine.cache.simulator.policy.AccessEvent;
import com.github.benmanes.caffeine.cache.simulator.policy.Policy.Characteristic;
import com.google.common.collect.Sets;

/**
* A reader for the trace files provided by the authors of the LRB algorithm. See
* <a href="https://github.com/sunnyszy/lrb#trace">traces</a>.
*
* @author ben.manes@gmail.com (Ben Manes)
*/
public final class LrbTraceReader extends TextTraceReader {

public LrbTraceReader(String filePath) {
super(filePath);
}

@Override
public Set<Characteristic> characteristics() {
return Sets.immutableEnumSet(WEIGHTED);
}

@Override
public Stream<AccessEvent> events() {
return lines()
.map(line -> line.split(" "))
.map(array -> {
return AccessEvent.forKeyAndWeight(
Long.parseLong(array[1]), Integer.parseInt(array[2]));
});
}
}
Loading

2 comments on commit 23cd333

@ohadeytan
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't it the same format as adapt_size? (it comes from the same group)

@ben-manes
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ohadeytan oops, you’re right. There is a new column, extra_features, which I ignored. I can’t tell if they still hash the key+weight, though, as the caffeine runner does not. So maybe having both isn’t so bad...

Please sign in to comment.