Skip to content

Commit

Permalink
LUCENE-9651 Update benchmark module docs (#759)
Browse files Browse the repository at this point in the history
  • Loading branch information
madrob authored Mar 23, 2022
1 parent 5450d72 commit b3906e9
Show file tree
Hide file tree
Showing 11 changed files with 28 additions and 29 deletions.
9 changes: 4 additions & 5 deletions gradle/datasets/external-datasets.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ configure(project(":lucene:benchmark")) {
apply plugin: "de.undercouch.download"

ext {
dataDir = file("data")
dataDir = file("work")
}

task getEnWiki(type: Download) {
Expand Down Expand Up @@ -120,10 +120,9 @@ configure(project(":lucene:benchmark")) {
task getReuters(type: Download) {
ext {
name = "reuters21578"
// note: there is no HTTPS url and we don't care because this is merely test/perf data
src = "http://www.daviddlewis.com/resources/testcollections/reuters21578/${name}.tar.gz"
src = "https://kdd.ics.uci.edu/databases/${name}/${name}.tar.gz"
intermediate = file("${dataDir}/${name}.tar.gz")
dst = file("${dataDir}/${name}")
dst = file("${dataDir}/reuters-out")
}

outputs.dir ext.dst
Expand Down Expand Up @@ -171,4 +170,4 @@ configure(project(":lucene:benchmark")) {
logger.lifecycle("Downloading data set ${task.ext.name} from ${task.ext.src} to ${task.ext.dst}...")
}
}
}
}
1 change: 1 addition & 0 deletions gradle/validation/validate-source-patterns.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ allprojects {
configure(project(':lucene:benchmark')) {
project.tasks.withType(ValidateSourcePatternsTask) {
sourceFiles.exclude 'data/**'
sourceFiles.exclude 'work/**'

// Known .txt offenders.
sourceFiles.exclude '**/reuters.first20.lines.txt', '**/trecQRels.txt'
Expand Down
3 changes: 2 additions & 1 deletion lucene/benchmark/.gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
/data
/data
/work
4 changes: 2 additions & 2 deletions lucene/benchmark/conf/createLineFile.alg
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
# This alg will process the Reuters documents feed to produce a
# single file that contains all documents, one per line.
#
# To use this, first cd to benchmark and then run:
# To use this run:
#
# ant run-task -Dtask.alg=conf/createLineFile.alg
# gradlew :lucene:benchmark:run -Ptask.alg=conf/createLineFile.alg
#
# Then, to index the documents in the line file, see
# indexLineFile.alg.
Expand Down
4 changes: 2 additions & 2 deletions lucene/benchmark/conf/extractWikipedia.alg
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
# This alg will process the Wikipedia documents feed to produce a
# single file that contains all documents, one per line.
#
# To use this, first cd to benchmark and then run:
# To use this run:
#
# ant run-task -Dtask.alg=conf/extractWikipedia.alg
# gradlew :lucene:benchmark:run -Ptask.alg=conf/extractWikipedia.alg
#
# Then, to index the documents in the line file, see
# indexLineFile.alg.
Expand Down
5 changes: 2 additions & 3 deletions lucene/benchmark/conf/indexLineFile.alg
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,9 @@
# document to let you more accurately measure time spent analyzing and
# indexing your documents vs time spent creating the documents.
#
# To use this, you must first run the createLineFile.alg, then cd to
# benchmark and then run:
# To use this, you must first run the createLineFile.alg, then run:
#
# ant run-task -Dtask.alg=conf/indexLineFile.alg
# gradlew :lucene:benchmark:run -Ptask.alg=conf/indexLineFile.alg
#

analyzer=org.apache.lucene.analysis.core.SimpleAnalyzer
Expand Down
3 changes: 1 addition & 2 deletions lucene/benchmark/conf/micro-standard.alg
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ doc.tokenized=true
doc.term.vector=false
log.step=500

work.dir=data
docs.dir=reuters21578
docs.dir=reuters-out

#content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
Expand Down
4 changes: 2 additions & 2 deletions lucene/benchmark/conf/readContentSource.alg
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
# gather baselines for operations like indexing (if reading from the content
# source takes 'X' time, we cannot index faster).
#
# To use this, first cd to benchmark and then run:
# To use this run:
#
# ant run-task -Dtask.alg=conf/readContentSource.alg
# gradlew :lucene:benchmark:run -Ptask.alg=conf/readContentSource.alg
#

# Where to get documents from:
Expand Down
4 changes: 2 additions & 2 deletions lucene/benchmark/conf/tokenize.alg
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
# This alg reads all tokens out of a document but does not index them.
# This is useful for benchmarking tokenizers.
#
# To use this, cd to benchmark and then run:
# To use this run:
#
# ant run-task -Dtask.alg=conf/tokenize.alg
# gradlew :lucene:benchmark:run -Ptask.alg=conf/tokenize.alg
#

content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,9 @@
* <ul>
* <li>./gradlew -p lucene/benchmark getReuters run <br>
* - would run the <code>micro-standard.alg</code> "algorithm".
* <li>ant run-task -Dtask.alg=conf/compound-penalty.alg <br>
* <li>./gradlew -p lucene/benchmark getReuters run -Ptask.alg=conf/compound-penalty.alg <br>
* - would run the <code>compound-penalty.alg</code> "algorithm".
* <li>ant run-task -Dtask.alg=[full-path-to-your-alg-file] <br>
* <li>./gradlew -p lucene/benchmark getReuters run -Ptask.alg=[full-path-to-your-alg-file] <br>
* - would run <code>your perf test</code> "algorithm".
* <li>java org.apache.lucene.benchmark.byTask.programmatic.Sample <br>
* - would run a performance test programmatically - without using an alg file. This is less
Expand Down Expand Up @@ -131,8 +131,8 @@
* benchmark.ext.classpath property:
*
* <ul>
* <li>ant run-task -Dtask.alg=[full-path-to-your-alg-file] <span style="color:
* #FF0000">-Dbenchmark.ext.classpath=/mydir/classes </span> -Dtask.mem=512M
* <li>./gradlew -p lucene/benchmark run -Ptask.alg=[full-path-to-your-alg-file] <span
* style="color: #FF0000">-Dbenchmark.ext.classpath=/mydir/classes </span> -Dtask.mem=512M
* </ul>
*
* <p><u>External tasks</u>: When writing your own tasks under a package other than
Expand Down Expand Up @@ -494,7 +494,7 @@
* </pre>
*
* <p>The command line for running this sample: <br>
* <code>ant run-task -Dtask.alg=conf/sample.alg</code>
* <code>./gradlew -p lucene/benchmark getReuters run -Ptask.alg=conf/sample.alg</code>
*
* <p>The output report from running this test contains the following:
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,12 @@
* alternate views or to take in command line options. When reporting benchmarking runs you should
* state any alterations you have made.
*
* <p>To run the short version of the StandardBenchmarker, call "ant run-micro-standard". This
* should take a minute or so to complete and give you a preliminary idea of how your change affects
* the code.
* <p>To run the short version of the StandardBenchmarker, call "./gradlew -p lucene/benchmark run".
* This should take a minute or so to complete and give you a preliminary idea of how your change
* affects the code.
*
* <p>To run the long version of the StandardBenchmarker, call "ant run-standard". This takes
* considerably longer.
* <p>To run the long version of the StandardBenchmarker, call "./gradlew -p lucene/benchmark run
* -PtaskAlg=conf/standard.alg". This takes considerably longer, maybe 10 minutes.
*
* <p>The original code for these classes was donated by Andrzej Bialecki at
* http://issues.apache.org/jira/browse/LUCENE-675 and has been updated by Grant Ingersoll to make
Expand Down

0 comments on commit b3906e9

Please sign in to comment.