/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.IOException;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.Locale;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.SerialMergeScheduler;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;

// java -cp /home/rmuir/workspace/lucene/lucene/core/build/libs/lucene-core-10.0.0-SNAPSHOT.jar StringSetBenchmark.java /home/rmuir/Downloads/allCountries.txt /home/rmuir/Downloads/testIndex -1

/** Benchmark set queries on lines of Geonames. */
public class StringSetBenchmark {

  public static void main(String args[]) throws Exception {
    if (args.length != 3) {
      System.err.println("Usage: StringSetBenchmark /path/to/geonames.txt /path/to/index/dir doc_limit(or -1 means index all lines)");
      System.exit(2);
    }

    String geonamesDataPath = args[0];
    String indexPath = args[1];
    int docLimit = Integer.parseInt(args[2]);

    IOUtils.rm(Paths.get(indexPath));
    try (FSDirectory dir = FSDirectory.open(Paths.get(indexPath))) {

      System.err.println("Now run indexing");
      IndexWriterConfig config = new IndexWriterConfig();
      try (IndexWriter iw = new IndexWriter(dir, config);
          LineNumberReader reader = new LineNumberReader(new InputStreamReader(Files.newInputStream(Paths.get(geonamesDataPath))))) {
        long t0 = System.nanoTime();
        indexDocs(iw, reader, docLimit);
        System.out.println(String.format(Locale.ROOT, "Indexing time: %d msec", (System.nanoTime() - t0) / 1_000_000));
      }

      System.err.println("Index files: " + Arrays.toString(dir.listAll()));

      try (DirectoryReader reader = DirectoryReader.open(dir)) {
        System.out.print("BIG_BIG: ");
        doBench(reader, BIG_BIG);
        System.out.print("MEDIUM_BIG: ");
        doBench(reader, MEDIUM_BIG);
        System.out.print("SMALL_BIG: ");
        doBench(reader, SMALL_BIG);
        System.out.println();
        System.out.print("BIG_MEDIUM: ");
        doBench(reader, BIG_MEDIUM);
        System.out.print("MEDIUM_MEDIUM: ");
        doBench(reader, MEDIUM_MEDIUM);
        System.out.print("SMALL_MEDIUM: ");
        doBench(reader, SMALL_MEDIUM);
      }
    }
    System.out.println("dummy=" + DUMMY);
  }

  static void doBench(IndexReader reader, String[] queries) throws Exception {
    int iters = 300;
    // warmup
    for (int i = 0; i < iters; ++i) {
      getDocs(reader, queries, true);
      getDocs(reader, queries, false);
    }
    // Take the min across multiple runs to decrease noise
    long minDurationNS = Long.MAX_VALUE;
    for (int i = 0; i < iters; ++i) {
      long t0 = System.nanoTime();
      getDocs(reader, queries, true);
      minDurationNS = Math.min(minDurationNS, System.nanoTime() - t0);
    }
    System.out.print(String.format(Locale.ROOT, "count=%.5f msec", minDurationNS / 1_000_000.));
    minDurationNS = Long.MAX_VALUE;
    for (int i = 0; i < iters; ++i) {
      long t0 = System.nanoTime();
      getDocs(reader, queries, false);
      minDurationNS = Math.min(minDurationNS, System.nanoTime() - t0);
    }
    System.out.println(String.format(Locale.ROOT, " search=%.5f msec", minDurationNS / 1_000_000.));
  }

  static void indexDocs(IndexWriter iw, LineNumberReader reader, int docLimit) throws Exception {
    Document doc = new Document();
    TextField name = new TextField("name", "", Field.Store.NO);
    doc.add(name);
    StringField admin2 = new StringField("admin2", "", Field.Store.NO);
    doc.add(admin2);
    SortedDocValuesField admin2dv = new SortedDocValuesField("admin2", new BytesRef());
    doc.add(admin2dv);

    String line = null;
    while ((line = reader.readLine()) != null) {
      if (reader.getLineNumber() % 10000 == 0) {
        System.err.println("doc: " + reader.getLineNumber());
      }
      if (docLimit != -1 && reader.getLineNumber() == docLimit) {
        break;
      }
      String values[] = line.split("\t");
      name.setStringValue(values[1]);
      // we use Integer.parseInt to "fold" some values such as 020 -> 20
      // at least it keeps it consistent with the NumSetBenchmark as far as how it is indexed.
      int val = 0;
      try {
        val = Integer.parseInt(values[11]);
      } catch (Exception e) {}
      String stringVal = Integer.toString(val);
      admin2.setStringValue(stringVal);
      admin2dv.setBytesValue(new BytesRef(stringVal));
      iw.addDocument(doc);
    }
  }

  static int DUMMY;

  // for histogram:
  // cut -f 11 allCountries.txt | sort | uniq -c | sort -nr | more
  // "smaller numbers are more common"

  static final String[] BIG_BIG = new String[] {
    "la|1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25",    // 28930 hits
    "de|1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25",    // 23465 hits
    "saint|1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25",   // 14870 hits
    "canyon|1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25",   // 10946 hits
  };

  static final String[] MEDIUM_BIG = new String[] {
    "hotel|1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25", // 4144 hits
    "del|1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25",   // 3818 hits
    "les|1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25",   // 2518 hits
    "plaza|1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25",   // 1827 hits
    "parc|1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25",   // 1434 hits
    "by|1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25",   // 995 hits
  };

  static final String[] SMALL_BIG = new String[] {
    "channel|1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25",   // 495 hits
    "centre|1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25",   // 403 hits
    "st|1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25",   // 353 hits
    "imperial|1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25",   // 123 hits
    "silent|1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25",   // 23 hits
    "sant|1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25",   // 4 hits
    "andorra|1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25",   // 3 hits
  };

  static final String[] BIG_MEDIUM = new String[] {
    "la|21,22,23",    // 2226 hits
    "de|21,22,23",    // 1555 hits
    "saint|21,22,23",   // 1571 hits
    "canyon|21,22,23",   // 1552 hits
  };

  static final String[] MEDIUM_MEDIUM = new String[] {
    "hotel|21,22,23", // 319 hits
    "del|21,22,23",   // 227 hits
    "les|21,22,23",   // 140 hits
    "plaza|21,22,23",   // 7 hits
    "parc|21,22,23",   // 64 hits
    "by|21,22,23",   // 337 hits
  };

  static final String[] SMALL_MEDIUM = new String[] {
    "channel|21,22,23",   // 24 hits
    "centre|21,22,23",   // 55 hits
    "st|21,22,23",   // 4 hits
    "imperial|21,22,23",   // 6 hits
    "silent|21,22,23",   // 1 hits
    "sant|21,22,23",   // 0 hits
    "andorra|21,22,23",   // 62 hits
  };

  static void getDocs(IndexReader reader, String[] queries, boolean doCount) throws IOException {
    IndexSearcher searcher = new IndexSearcher(reader);
    searcher.setQueryCache(null); // benchmarking

    for (String textQuery : queries) {
      String parts[] = textQuery.split("\\|");
      String intParts[] = parts[1].split(",");
      BytesRef queryTerms[] = new BytesRef[intParts.length];
      for (int i = 0; i < intParts.length; i++) {
        queryTerms[i] = new BytesRef(intParts[i]);
      }
      BooleanQuery.Builder builder = new BooleanQuery.Builder();
      builder.add(new BooleanClause(new TermQuery(new Term("name", parts[0])), BooleanClause.Occur.MUST));
      builder.add(new BooleanClause(new TermInSetQuery("admin2", queryTerms), BooleanClause.Occur.MUST));
      if (doCount) {
        int hits = searcher.count(builder.build());
        DUMMY += hits;
      } else {
        int hits = (int) searcher.search(builder.build(), 10).totalHits.value;
        DUMMY += hits;
      }
    }
  }
}