diff --git a/.github/workflows/ci-build.yml b/.github/workflows/ci-build.yml index 928aa60ec..1136b69fa 100644 --- a/.github/workflows/ci-build.yml +++ b/.github/workflows/ci-build.yml @@ -38,6 +38,7 @@ jobs: with: repository: icatproject-contrib/icat-ansible path: icat-ansible + ref: icat-6.1 - name: Install Ansible run: pip install -r icat-ansible/requirements.txt diff --git a/pom.xml b/pom.xml index 99e93f4dd..79a4fd3a5 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ org.icatproject icat.server - 6.0.2-SNAPSHOT + 6.1.0-SNAPSHOT war ICAT Server A metadata catalogue to support Large Facility experimental data, @@ -113,13 +113,13 @@ org.icatproject icat.utils - 4.16.1 + 4.17.0-SNAPSHOT org.icatproject icat.client - 6.0.0 + 6.1.0-SNAPSHOT test @@ -231,7 +231,8 @@ ${javax.net.ssl.trustStore} - ${luceneUrl} + ${searchEngine} + ${searchUrls} false @@ -249,7 +250,8 @@ ${javax.net.ssl.trustStore} ${serverUrl} - ${luceneUrl} + ${searchEngine} + ${searchUrls} @@ -324,7 +326,8 @@ src/test/scripts/prepare_test.py ${containerHome} ${serverUrl} - ${luceneUrl} + ${searchEngine} + ${searchUrls} @@ -402,6 +405,3 @@ - - - diff --git a/src/main/config/run.properties.example b/src/main/config/run.properties.example index 06e732716..8b4fdc8b3 100644 --- a/src/main/config/run.properties.example +++ b/src/main/config/run.properties.example @@ -42,14 +42,24 @@ notification.Datafile = CU # Call logging setup log.list = SESSION WRITE READ INFO -# Lucene -lucene.url = https://localhost:8181 -lucene.populateBlockSize = 10000 -lucene.directory = ${HOME}/data/icat/lucene -lucene.backlogHandlerIntervalSeconds = 60 -lucene.enqueuedRequestIntervalSeconds = 5 -# The entities to index with Lucene. For example, remove 'Datafile' and 'DatafileParameter' if the number of datafiles exceeds lucene's limit of 2^32 entries in an index -!lucene.entitiesToIndex = Datafile Dataset Investigation InvestigationUser DatafileParameter DatasetParameter InvestigationParameter Sample +# Search Engine +# LUCENE, OPENSEARCH and ELASTICSEARCH engines are supported, however the latter two are considered experimental +search.engine = LUCENE +search.urls = https://localhost:8181 +search.populateBlockSize = 10000 +# Recommend setting search.searchBlockSize equal to maxIdsInQuery, so that all results can be authorised at once +# If search.searchBlockSize > maxIdsInQuery, then multiple auth checks may be needed for a single search +# The optimal value depends on how likely a user's auth request fails: larger values are more efficient when rejection is more likely +search.searchBlockSize = 1000 +search.directory = ${HOME}/data/icat/search +search.backlogHandlerIntervalSeconds = 60 +search.enqueuedRequestIntervalSeconds = 5 +search.aggregateFilesIntervalSeconds = 3600 +search.maxSearchTimeSeconds = 5 +# Configure this option to prevent certain entities being indexed +# For example, remove Datafile and DatafileParameter if these are not of interest +# Note then when commented out, the full set of all possible entities will be indexed - to disable all search functionality, instead comment out search.engine or search.urls +!search.entitiesToIndex = Datafile DatafileFormat DatafileParameter Dataset DatasetParameter DatasetType DatasetTechnique Facility Instrument InstrumentScientist Investigation InvestigationInstrument InvestigationParameter InvestigationType InvestigationUser ParameterType Sample SampleType SampleParameter User # List members of cluster !cluster = http://vm200.nubes.stfc.ac.uk:8080 https://smfisher:8181 diff --git a/src/main/java/org/icatproject/core/entity/Datafile.java b/src/main/java/org/icatproject/core/entity/Datafile.java index a9755ded1..1dcd56b73 100644 --- a/src/main/java/org/icatproject/core/entity/Datafile.java +++ b/src/main/java/org/icatproject/core/entity/Datafile.java @@ -3,12 +3,15 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.Date; +import java.util.HashMap; import java.util.List; +import java.util.Map; import jakarta.json.stream.JsonGenerator; import jakarta.persistence.CascadeType; import jakarta.persistence.Column; import jakarta.persistence.Entity; +import jakarta.persistence.EntityManager; import jakarta.persistence.FetchType; import jakarta.persistence.Index; import jakarta.persistence.JoinColumn; @@ -20,7 +23,10 @@ import jakarta.persistence.UniqueConstraint; import jakarta.xml.bind.annotation.XmlRootElement; -import org.icatproject.core.manager.LuceneApi; +import org.icatproject.core.IcatException; +import org.icatproject.core.manager.EntityInfoHandler; +import org.icatproject.core.manager.EntityInfoHandler.Relationship; +import org.icatproject.core.manager.search.SearchApi; @Comment("A data file") @SuppressWarnings("serial") @@ -77,6 +83,8 @@ public class Datafile extends EntityBaseBean implements Serializable { @OneToMany(cascade = CascadeType.ALL, mappedBy = "sourceDatafile") private List sourceDatafiles = new ArrayList(); + private static final Map documentFields = new HashMap<>(); + /* Needed for JPA */ public Datafile() { } @@ -194,26 +202,120 @@ public void setSourceDatafiles(List sourceDatafiles) { } @Override - public void getDoc(JsonGenerator gen) { - StringBuilder sb = new StringBuilder(name); - if (description != null) { - sb.append(" " + description); - } - if (doi != null) { - sb.append(" " + doi); - } + public void getDoc(EntityManager manager, JsonGenerator gen) throws IcatException { + SearchApi.encodeString(gen, "name", name); + SearchApi.encodeNullableString(gen, "description", description); + SearchApi.encodeNullableString(gen, "location", location); + SearchApi.encodeNullableString(gen, "doi", doi); + SearchApi.encodeLong(gen, "fileSize", fileSize, 0L); + SearchApi.encodeLong(gen, "fileCount", 1L); // Always 1, but makes sorting on fields consistent if (datafileFormat != null) { - sb.append(" " + datafileFormat.getName()); + if (datafileFormat.getName() == null) { + datafileFormat = manager.find(datafileFormat.getClass(), datafileFormat.id); + } + datafileFormat.getDoc(manager, gen); } - LuceneApi.encodeTextfield(gen, "text", sb.toString()); if (datafileModTime != null) { - LuceneApi.encodeStringField(gen, "date", datafileModTime); + SearchApi.encodeLong(gen, "date", datafileModTime); } else if (datafileCreateTime != null) { - LuceneApi.encodeStringField(gen, "date", datafileCreateTime); + SearchApi.encodeLong(gen, "date", datafileCreateTime); } else { - LuceneApi.encodeStringField(gen, "date", modTime); + SearchApi.encodeLong(gen, "date", modTime); + } + SearchApi.encodeLong(gen, "id", id); + + if (dataset != null) { + if (dataset.getName() == null || dataset.getInvestigation() == null) { + dataset = manager.find(dataset.getClass(), dataset.id); + } + SearchApi.encodeLong(gen, "dataset.id", dataset.id); + SearchApi.encodeString(gen, "dataset.name", dataset.getName()); + Sample sample = dataset.getSample(); + if (sample != null) { + if (sample.getName() == null) { + sample = manager.find(sample.getClass(), sample.id); + } + sample.getDoc(manager, gen); + } + Investigation investigation = dataset.getInvestigation(); + if (investigation != null) { + if (investigation.getName() == null || investigation.getVisitId() == null + || investigation.getTitle() == null || investigation.getCreateTime() == null) { + investigation = manager.find(investigation.getClass(), investigation.id); + } + SearchApi.encodeLong(gen, "investigation.id", investigation.id); + SearchApi.encodeString(gen, "investigation.name", investigation.getName()); + SearchApi.encodeString(gen, "visitId", investigation.getVisitId()); + if (investigation.getStartDate() != null) { + SearchApi.encodeLong(gen, "investigation.startDate", investigation.getStartDate()); + } else if (investigation.getCreateTime() != null) { + SearchApi.encodeLong(gen, "investigation.startDate", investigation.getCreateTime()); + } + } } - LuceneApi.encodeStoredId(gen, id); - LuceneApi.encodeStringField(gen, "dataset", dataset.id); } + + /** + * Gets the fields used in the search component for this entity, and the + * relationships that would restrict the content of those fields. + * + * @return Map of field names (as they appear on the search document) against + * the Relationships that need to be allowed for that field to be + * viewable. If there are no restrictive relationships, then the value + * will be null. + * @throws IcatException If the EntityInfoHandler cannot find one of the + * Relationships. + */ + public static Map getDocumentFields() throws IcatException { + if (documentFields.size() == 0) { + Relationship[] datafileFormatRelationships = { + EntityInfoHandler.getRelationshipsByName(Datafile.class).get("datafileFormat") }; + Relationship[] datasetRelationships = { + EntityInfoHandler.getRelationshipsByName(Datafile.class).get("dataset") }; + Relationship[] investigationRelationships = { + EntityInfoHandler.getRelationshipsByName(Datafile.class).get("dataset"), + EntityInfoHandler.getRelationshipsByName(Dataset.class).get("investigation") }; + Relationship[] investigationFacilityCyclesRelationships = { + EntityInfoHandler.getRelationshipsByName(Datafile.class).get("dataset"), + EntityInfoHandler.getRelationshipsByName(Dataset.class).get("investigation"), + EntityInfoHandler.getRelationshipsByName(Investigation.class).get("investigationFacilityCycles") }; + Relationship[] instrumentRelationships = { + EntityInfoHandler.getRelationshipsByName(Datafile.class).get("dataset"), + EntityInfoHandler.getRelationshipsByName(Dataset.class).get("investigation"), + EntityInfoHandler.getRelationshipsByName(Investigation.class).get("investigationInstruments"), + EntityInfoHandler.getRelationshipsByName(InvestigationInstrument.class).get("instrument") }; + Relationship[] sampleRelationships = { + EntityInfoHandler.getRelationshipsByName(Datafile.class).get("dataset"), + EntityInfoHandler.getRelationshipsByName(Dataset.class).get("sample"), + EntityInfoHandler.getRelationshipsByName(Sample.class).get("type") }; + Relationship[] sampleTypeRelationships = { + EntityInfoHandler.getRelationshipsByName(Datafile.class).get("dataset"), + EntityInfoHandler.getRelationshipsByName(Dataset.class).get("sample") }; + documentFields.put("name", null); + documentFields.put("description", null); + documentFields.put("location", null); + documentFields.put("doi", null); + documentFields.put("date", null); + documentFields.put("fileSize", null); + documentFields.put("fileCount", null); + documentFields.put("id", null); + documentFields.put("dataset.id", null); + documentFields.put("dataset.name", datasetRelationships); + documentFields.put("sample.id", datasetRelationships); + documentFields.put("sample.name", sampleRelationships); + documentFields.put("sample.investigation.id", sampleRelationships); + documentFields.put("sample.type.id", sampleRelationships); + documentFields.put("sample.type.name", sampleTypeRelationships); + documentFields.put("investigation.id", datasetRelationships); + documentFields.put("investigation.name", investigationRelationships); + documentFields.put("investigation.startDate", investigationRelationships); + documentFields.put("visitId", investigationRelationships); + documentFields.put("datafileFormat.id", null); + documentFields.put("datafileFormat.name", datafileFormatRelationships); + documentFields.put("InvestigationFacilityCycle facilityCycle.id", investigationFacilityCyclesRelationships); + documentFields.put("InvestigationInstrument instrument.id", instrumentRelationships); + } + return documentFields; + } + } diff --git a/src/main/java/org/icatproject/core/entity/DatafileFormat.java b/src/main/java/org/icatproject/core/entity/DatafileFormat.java index 2c38b891f..c1972e6d3 100644 --- a/src/main/java/org/icatproject/core/entity/DatafileFormat.java +++ b/src/main/java/org/icatproject/core/entity/DatafileFormat.java @@ -2,11 +2,16 @@ import java.io.Serializable; import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; import java.util.List; +import java.util.Set; +import jakarta.json.stream.JsonGenerator; import jakarta.persistence.CascadeType; import jakarta.persistence.Column; import jakarta.persistence.Entity; +import jakarta.persistence.EntityManager; import jakarta.persistence.FetchType; import jakarta.persistence.JoinColumn; import jakarta.persistence.ManyToOne; @@ -14,6 +19,9 @@ import jakarta.persistence.Table; import jakarta.persistence.UniqueConstraint; +import org.icatproject.core.IcatException; +import org.icatproject.core.manager.search.SearchApi; + @Comment("A data file format") @SuppressWarnings("serial") @Entity @@ -51,6 +59,8 @@ public void setFacility(Facility facility) { @Column(name = "VERSION", nullable = false) private String version; + public static Set docFields = new HashSet<>(Arrays.asList("datafileFormat.name", "datafileFormat.id")); + /* Needed for JPA */ public DatafileFormat() { } @@ -95,4 +105,10 @@ public void setVersion(String version) { this.version = version; } + @Override + public void getDoc(EntityManager manager, JsonGenerator gen) throws IcatException { + SearchApi.encodeString(gen, "datafileFormat.name", name); + SearchApi.encodeLong(gen, "datafileFormat.id", id); + } + } diff --git a/src/main/java/org/icatproject/core/entity/DatafileParameter.java b/src/main/java/org/icatproject/core/entity/DatafileParameter.java index 8b30ffa3e..2b8f679d3 100644 --- a/src/main/java/org/icatproject/core/entity/DatafileParameter.java +++ b/src/main/java/org/icatproject/core/entity/DatafileParameter.java @@ -13,8 +13,8 @@ import org.icatproject.core.IcatException; import org.icatproject.core.manager.EntityBeanManager.PersistMode; +import org.icatproject.core.manager.search.SearchApi; import org.icatproject.core.manager.GateKeeper; -import org.icatproject.core.manager.LuceneApi; @Comment("A parameter associated with a data file") @SuppressWarnings("serial") @@ -54,9 +54,9 @@ public void setDatafile(Datafile datafile) { } @Override - public void getDoc(JsonGenerator gen) { - super.getDoc(gen); - LuceneApi.encodeSortedDocValuesField(gen, "datafile", datafile.id); + public void getDoc(EntityManager manager, JsonGenerator gen) throws IcatException { + super.getDoc(manager, gen); + SearchApi.encodeLong(gen, "datafile.id", datafile.id); } } diff --git a/src/main/java/org/icatproject/core/entity/Dataset.java b/src/main/java/org/icatproject/core/entity/Dataset.java index 0f0dae57d..d50fe6fd3 100644 --- a/src/main/java/org/icatproject/core/entity/Dataset.java +++ b/src/main/java/org/icatproject/core/entity/Dataset.java @@ -3,12 +3,15 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.Date; +import java.util.HashMap; import java.util.List; +import java.util.Map; import jakarta.json.stream.JsonGenerator; import jakarta.persistence.CascadeType; import jakarta.persistence.Column; import jakarta.persistence.Entity; +import jakarta.persistence.EntityManager; import jakarta.persistence.FetchType; import jakarta.persistence.JoinColumn; import jakarta.persistence.ManyToOne; @@ -19,7 +22,10 @@ import jakarta.persistence.UniqueConstraint; import jakarta.xml.bind.annotation.XmlRootElement; -import org.icatproject.core.manager.LuceneApi; +import org.icatproject.core.IcatException; +import org.icatproject.core.manager.EntityInfoHandler; +import org.icatproject.core.manager.EntityInfoHandler.Relationship; +import org.icatproject.core.manager.search.SearchApi; @Comment("A collection of data files and part of an investigation") @SuppressWarnings("serial") @@ -93,6 +99,8 @@ public void setDataCollectionDatasets(List dataCollection @ManyToOne(fetch = FetchType.LAZY) private DatasetType type; + private static final Map documentFields = new HashMap<>(); + /* Needed for JPA */ public Dataset() { } @@ -226,43 +234,107 @@ public void setType(DatasetType type) { } @Override - public void getDoc(JsonGenerator gen) { - - StringBuilder sb = new StringBuilder(name + " " + type.getName() + " " + type.getName()); - if (description != null) { - sb.append(" " + description); + public void getDoc(EntityManager manager, JsonGenerator gen) throws IcatException { + SearchApi.encodeString(gen, "name", name); + SearchApi.encodeNullableString(gen, "description", description); + SearchApi.encodeNullableString(gen, "doi", doi); + if (startDate != null) { + SearchApi.encodeLong(gen, "startDate", startDate); + SearchApi.encodeLong(gen, "date", startDate); + } else { + SearchApi.encodeLong(gen, "startDate", createTime); + SearchApi.encodeLong(gen, "date", createTime); } - - if (doi != null) { - sb.append(" " + doi); + if (endDate != null) { + SearchApi.encodeLong(gen, "endDate", endDate); + } else { + SearchApi.encodeLong(gen, "endDate", modTime); + } + SearchApi.encodeLong(gen, "fileSize", fileSize, 0L); + SearchApi.encodeLong(gen, "fileCount", fileCount, 0L); + SearchApi.encodeLong(gen, "id", id); + if (investigation != null) { + if (investigation.getName() == null || investigation.getVisitId() == null + || investigation.getTitle() == null || investigation.getCreateTime() == null) { + investigation = manager.find(investigation.getClass(), investigation.id); + } + SearchApi.encodeLong(gen, "investigation.id", investigation.id); + SearchApi.encodeString(gen, "investigation.name", investigation.getName()); + SearchApi.encodeString(gen, "investigation.title", investigation.getTitle()); + SearchApi.encodeString(gen, "visitId", investigation.getVisitId()); + if (investigation.getStartDate() != null) { + SearchApi.encodeLong(gen, "investigation.startDate", investigation.getStartDate()); + } else if (investigation.getCreateTime() != null) { + SearchApi.encodeLong(gen, "investigation.startDate", investigation.getCreateTime()); + } } if (sample != null) { - sb.append(" " + sample.getName()); - if (sample.getType() != null) { - sb.append(" " + sample.getType().getName()); + if (sample.getName() == null) { + sample = manager.find(sample.getClass(), sample.id); } + sample.getDoc(manager, gen); } - LuceneApi.encodeTextfield(gen, "text", sb.toString()); - - if (startDate != null) { - LuceneApi.encodeStringField(gen, "startDate", startDate); - } else { - LuceneApi.encodeStringField(gen, "startDate", createTime); + if (type.getName() == null) { + type = manager.find(type.getClass(), type.id); } - - if (endDate != null) { - LuceneApi.encodeStringField(gen, "endDate", endDate); - } else { - LuceneApi.encodeStringField(gen, "endDate", modTime); + type.getDoc(manager, gen); + } + + /** + * Gets the fields used in the search component for this entity, and the + * relationships that would restrict the content of those fields. + * + * @return Map of field names (as they appear on the search document) against + * the Relationships that need to be allowed for that field to be + * viewable. If there are no restrictive relationships, then the value + * will be null. + * @throws IcatException If the EntityInfoHandler cannot find one of the + * Relationships. + */ + public static Map getDocumentFields() throws IcatException { + if (documentFields.size() == 0) { + Relationship[] sampleRelationships = { + EntityInfoHandler.getRelationshipsByName(Dataset.class).get("sample") }; + Relationship[] sampleTypeRelationships = { + EntityInfoHandler.getRelationshipsByName(Dataset.class).get("sample"), + EntityInfoHandler.getRelationshipsByName(Sample.class).get("type") }; + Relationship[] typeRelationships = { EntityInfoHandler.getRelationshipsByName(Dataset.class).get("type") }; + Relationship[] investigationRelationships = { + EntityInfoHandler.getRelationshipsByName(Dataset.class).get("investigation") }; + Relationship[] investigationFacilityCyclesRelationships = { + EntityInfoHandler.getRelationshipsByName(Dataset.class).get("investigation"), + EntityInfoHandler.getRelationshipsByName(Investigation.class).get("investigationFacilityCycles") }; + Relationship[] instrumentRelationships = { + EntityInfoHandler.getRelationshipsByName(Dataset.class).get("investigation"), + EntityInfoHandler.getRelationshipsByName(Investigation.class).get("investigationInstruments"), + EntityInfoHandler.getRelationshipsByName(InvestigationInstrument.class).get("instrument") }; + documentFields.put("name", null); + documentFields.put("description", null); + documentFields.put("doi", null); + documentFields.put("startDate", null); + documentFields.put("endDate", null); + documentFields.put("date", null); + documentFields.put("fileSize", null); + documentFields.put("fileCount", null); + documentFields.put("id", null); + documentFields.put("investigation.id", null); + documentFields.put("investigation.title", investigationRelationships); + documentFields.put("investigation.name", investigationRelationships); + documentFields.put("investigation.startDate", investigationRelationships); + documentFields.put("visitId", investigationRelationships); + documentFields.put("sample.id", null); + documentFields.put("sample.name", sampleRelationships); + documentFields.put("sample.investigation.id", sampleRelationships); + documentFields.put("sample.type.id", sampleRelationships); + documentFields.put("sample.type.name", sampleTypeRelationships); + documentFields.put("type.id", null); + documentFields.put("type.name", typeRelationships); + documentFields.put("InvestigationFacilityCycle facilityCycle.id", investigationFacilityCyclesRelationships); + documentFields.put("InvestigationInstrument instrument.id", instrumentRelationships); } - LuceneApi.encodeStoredId(gen, id); - - LuceneApi.encodeSortedDocValuesField(gen, "id", id); - - LuceneApi.encodeStringField(gen, "investigation", investigation.id); - + return documentFields; } } diff --git a/src/main/java/org/icatproject/core/entity/DatasetParameter.java b/src/main/java/org/icatproject/core/entity/DatasetParameter.java index 69f9cc1b9..0d7be7397 100644 --- a/src/main/java/org/icatproject/core/entity/DatasetParameter.java +++ b/src/main/java/org/icatproject/core/entity/DatasetParameter.java @@ -13,8 +13,8 @@ import org.icatproject.core.IcatException; import org.icatproject.core.manager.EntityBeanManager.PersistMode; +import org.icatproject.core.manager.search.SearchApi; import org.icatproject.core.manager.GateKeeper; -import org.icatproject.core.manager.LuceneApi; @Comment("A parameter associated with a data set") @SuppressWarnings("serial") @@ -54,8 +54,8 @@ public void setDataset(Dataset dataset) { } @Override - public void getDoc(JsonGenerator gen) { - super.getDoc(gen); - LuceneApi.encodeSortedDocValuesField(gen, "dataset", dataset.id); + public void getDoc(EntityManager manager, JsonGenerator gen) throws IcatException { + super.getDoc(manager, gen); + SearchApi.encodeLong(gen, "dataset.id", dataset.id); } } \ No newline at end of file diff --git a/src/main/java/org/icatproject/core/entity/DatasetTechnique.java b/src/main/java/org/icatproject/core/entity/DatasetTechnique.java index 2c5e0c308..4203a787e 100644 --- a/src/main/java/org/icatproject/core/entity/DatasetTechnique.java +++ b/src/main/java/org/icatproject/core/entity/DatasetTechnique.java @@ -2,13 +2,18 @@ import java.io.Serializable; +import jakarta.json.stream.JsonGenerator; import jakarta.persistence.Entity; +import jakarta.persistence.EntityManager; import jakarta.persistence.FetchType; import jakarta.persistence.JoinColumn; import jakarta.persistence.ManyToOne; import jakarta.persistence.Table; import jakarta.persistence.UniqueConstraint; +import org.icatproject.core.IcatException; +import org.icatproject.core.manager.search.SearchApi; + @Comment("Represents a many-to-many relationship between a dataset and the experimental technique being used to create that Dataset") @SuppressWarnings("serial") @Entity @@ -38,4 +43,14 @@ public void setDataset(Dataset dataset) { public void setTechnique(Technique technique) { this.technique = technique; } + + @Override + public void getDoc(EntityManager manager, JsonGenerator gen) throws IcatException { + SearchApi.encodeLong(gen, "id", id); + SearchApi.encodeLong(gen, "dataset.id", dataset.id); + if (technique.getName() == null) { + technique = manager.find(technique.getClass(), technique.id); + } + technique.getDoc(manager, gen); + } } diff --git a/src/main/java/org/icatproject/core/entity/DatasetType.java b/src/main/java/org/icatproject/core/entity/DatasetType.java index 7e85c4d44..cb1166000 100644 --- a/src/main/java/org/icatproject/core/entity/DatasetType.java +++ b/src/main/java/org/icatproject/core/entity/DatasetType.java @@ -2,11 +2,16 @@ import java.io.Serializable; import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; import java.util.List; +import java.util.Set; +import jakarta.json.stream.JsonGenerator; import jakarta.persistence.CascadeType; import jakarta.persistence.Column; import jakarta.persistence.Entity; +import jakarta.persistence.EntityManager; import jakarta.persistence.FetchType; import jakarta.persistence.JoinColumn; import jakarta.persistence.ManyToOne; @@ -14,6 +19,9 @@ import jakarta.persistence.Table; import jakarta.persistence.UniqueConstraint; +import org.icatproject.core.IcatException; +import org.icatproject.core.manager.search.SearchApi; + @Comment("A type of data set") @SuppressWarnings("serial") @Entity @@ -35,6 +43,8 @@ public class DatasetType extends EntityBaseBean implements Serializable { @Column(name = "NAME", nullable = false) private String name; + public static Set docFields = new HashSet<>(Arrays.asList("type.name", "type.id")); + /* Needed for JPA */ public DatasetType() { } @@ -71,4 +81,10 @@ public void setName(String name) { this.name = name; } + @Override + public void getDoc(EntityManager manager, JsonGenerator gen) throws IcatException { + SearchApi.encodeString(gen, "type.name", name); + SearchApi.encodeLong(gen, "type.id", id); + } + } diff --git a/src/main/java/org/icatproject/core/entity/EntityBaseBean.java b/src/main/java/org/icatproject/core/entity/EntityBaseBean.java index e09986f3e..3b57a4487 100644 --- a/src/main/java/org/icatproject/core/entity/EntityBaseBean.java +++ b/src/main/java/org/icatproject/core/entity/EntityBaseBean.java @@ -30,15 +30,16 @@ import org.icatproject.core.manager.EntityBeanManager.PersistMode; import org.icatproject.core.manager.EntityInfoHandler; import org.icatproject.core.manager.EntityInfoHandler.Relationship; +import org.icatproject.core.manager.search.SearchManager; import org.icatproject.core.manager.GateKeeper; -import org.icatproject.core.manager.LuceneManager; +import org.icatproject.core.manager.HasEntityId; import org.icatproject.core.parser.IncludeClause.Step; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @SuppressWarnings("serial") @MappedSuperclass -public abstract class EntityBaseBean implements Serializable { +public abstract class EntityBaseBean implements HasEntityId, Serializable { private static final Logger logger = LoggerFactory.getLogger(EntityBaseBean.class); @@ -79,8 +80,8 @@ void addToClone(EntityBaseBean clone) { // This is only used by the older create and createMany calls and not by the // new Restful write call - public void addToLucene(LuceneManager lucene) throws IcatException { - lucene.addDocument(this); + public void addToSearch(EntityManager manager, SearchManager searchManager) throws IcatException { + searchManager.addDocument(manager, this); Class klass = this.getClass(); Set rs = EntityInfoHandler.getRelatedEntities(klass); Map getters = EntityInfoHandler.getGetters(klass); @@ -92,7 +93,7 @@ public void addToLucene(LuceneManager lucene) throws IcatException { List collection = (List) m.invoke(this); if (!collection.isEmpty()) { for (EntityBaseBean bean : collection) { - bean.addToLucene(lucene); + bean.addToSearch(manager, searchManager); } } } catch (Exception e) { @@ -432,8 +433,11 @@ public String toString() { return this.getClass().getSimpleName() + ":" + id; } - /* This should be overridden by classes wishing to index things in lucene */ - public void getDoc(JsonGenerator gen) { + /* + * This should be overridden by classes wishing to index things in a search + * engine + */ + public void getDoc(EntityManager manager, JsonGenerator gen) throws IcatException { } } diff --git a/src/main/java/org/icatproject/core/entity/Facility.java b/src/main/java/org/icatproject/core/entity/Facility.java index e5ebaa4bf..d2ac6ad1f 100644 --- a/src/main/java/org/icatproject/core/entity/Facility.java +++ b/src/main/java/org/icatproject/core/entity/Facility.java @@ -2,15 +2,23 @@ import java.io.Serializable; import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; import java.util.List; +import java.util.Set; +import jakarta.json.stream.JsonGenerator; import jakarta.persistence.CascadeType; import jakarta.persistence.Column; import jakarta.persistence.Entity; +import jakarta.persistence.EntityManager; import jakarta.persistence.OneToMany; import jakarta.persistence.Table; import jakarta.persistence.UniqueConstraint; +import org.icatproject.core.IcatException; +import org.icatproject.core.manager.search.SearchApi; + @Comment("An experimental facility") @SuppressWarnings("serial") @Entity @@ -67,6 +75,8 @@ public class Facility extends EntityBaseBean implements Serializable { @OneToMany(cascade = CascadeType.ALL, mappedBy = "facility") private List dataPublicationTypes = new ArrayList(); + public static Set docFields = new HashSet<>(Arrays.asList("facility.name", "facility.id")); + /* Needed for JPA */ public Facility() { } @@ -199,4 +209,10 @@ public void setDataPublicationTypes(List dataPublicationTyp this.dataPublicationTypes = dataPublicationTypes; } + @Override + public void getDoc(EntityManager manager, JsonGenerator gen) throws IcatException { + SearchApi.encodeString(gen, "facility.name", name); + SearchApi.encodeLong(gen, "facility.id", id); + } + } diff --git a/src/main/java/org/icatproject/core/entity/Instrument.java b/src/main/java/org/icatproject/core/entity/Instrument.java index 9f5ea40d9..f1a8a7a75 100644 --- a/src/main/java/org/icatproject/core/entity/Instrument.java +++ b/src/main/java/org/icatproject/core/entity/Instrument.java @@ -4,9 +4,11 @@ import java.util.ArrayList; import java.util.List; +import jakarta.json.stream.JsonGenerator; import jakarta.persistence.CascadeType; import jakarta.persistence.Column; import jakarta.persistence.Entity; +import jakarta.persistence.EntityManager; import jakarta.persistence.FetchType; import jakarta.persistence.JoinColumn; import jakarta.persistence.ManyToOne; @@ -14,6 +16,9 @@ import jakarta.persistence.Table; import jakarta.persistence.UniqueConstraint; +import org.icatproject.core.IcatException; +import org.icatproject.core.manager.search.SearchApi; + @Comment("Used by a user within an investigation") @SuppressWarnings("serial") @Entity @@ -149,4 +154,11 @@ public void setShifts(List shifts) { this.shifts = shifts; } + @Override + public void getDoc(EntityManager manager, JsonGenerator gen) throws IcatException { + SearchApi.encodeNullableString(gen, "instrument.fullName", fullName); + SearchApi.encodeString(gen, "instrument.name", name); + SearchApi.encodeLong(gen, "instrument.id", id); + } + } diff --git a/src/main/java/org/icatproject/core/entity/InstrumentScientist.java b/src/main/java/org/icatproject/core/entity/InstrumentScientist.java index c7e727164..09f7c8e4a 100644 --- a/src/main/java/org/icatproject/core/entity/InstrumentScientist.java +++ b/src/main/java/org/icatproject/core/entity/InstrumentScientist.java @@ -2,13 +2,18 @@ import java.io.Serializable; +import jakarta.json.stream.JsonGenerator; import jakarta.persistence.Entity; +import jakarta.persistence.EntityManager; import jakarta.persistence.FetchType; import jakarta.persistence.JoinColumn; import jakarta.persistence.ManyToOne; import jakarta.persistence.Table; import jakarta.persistence.UniqueConstraint; +import org.icatproject.core.IcatException; +import org.icatproject.core.manager.search.SearchApi; + @Comment("Relationship between an ICAT user as an instrument scientist and the instrument") @SuppressWarnings("serial") @Entity @@ -43,4 +48,14 @@ public void setInstrument(Instrument instrument) { public InstrumentScientist() { } + @Override + public void getDoc(EntityManager manager, JsonGenerator gen) throws IcatException { + if (user.getName() == null) { + user = manager.find(user.getClass(), user.id); + } + user.getDoc(manager, gen); + SearchApi.encodeLong(gen, "instrument.id", instrument.id); + SearchApi.encodeLong(gen, "id", id); + } + } diff --git a/src/main/java/org/icatproject/core/entity/Investigation.java b/src/main/java/org/icatproject/core/entity/Investigation.java index adcc63406..0d4753dc8 100644 --- a/src/main/java/org/icatproject/core/entity/Investigation.java +++ b/src/main/java/org/icatproject/core/entity/Investigation.java @@ -3,12 +3,15 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.Date; +import java.util.HashMap; import java.util.List; +import java.util.Map; import jakarta.json.stream.JsonGenerator; import jakarta.persistence.CascadeType; import jakarta.persistence.Column; import jakarta.persistence.Entity; +import jakarta.persistence.EntityManager; import jakarta.persistence.FetchType; import jakarta.persistence.JoinColumn; import jakarta.persistence.ManyToOne; @@ -18,7 +21,10 @@ import jakarta.persistence.TemporalType; import jakarta.persistence.UniqueConstraint; -import org.icatproject.core.manager.LuceneApi; +import org.icatproject.core.IcatException; +import org.icatproject.core.manager.EntityInfoHandler; +import org.icatproject.core.manager.EntityInfoHandler.Relationship; +import org.icatproject.core.manager.search.SearchApi; @Comment("An investigation or experiment") @SuppressWarnings("serial") @@ -108,6 +114,8 @@ public class Investigation extends EntityBaseBean implements Serializable { @Column(name = "VISIT_ID", nullable = false) private String visitId; + private static final Map documentFields = new HashMap<>(); + /* Needed for JPA */ public Investigation() { } @@ -313,33 +321,100 @@ public void setVisitId(String visitId) { } @Override - public void getDoc(JsonGenerator gen) { - StringBuilder sb = new StringBuilder(visitId + " " + name + " " + facility.getName() + " " + type.getName()); - if (summary != null) { - sb.append(" " + summary); - } - if (doi != null) { - sb.append(" " + doi); - } - if (title != null) { - sb.append(" " + title); - } - LuceneApi.encodeTextfield(gen, "text", sb.toString()); + public void getDoc(EntityManager manager, JsonGenerator gen) throws IcatException { + SearchApi.encodeString(gen, "name", name); + SearchApi.encodeString(gen, "visitId", visitId); + SearchApi.encodeString(gen, "title", title); + SearchApi.encodeNullableString(gen, "summary", summary); + SearchApi.encodeNullableString(gen, "doi", doi); if (startDate != null) { - LuceneApi.encodeStringField(gen, "startDate", startDate); + SearchApi.encodeLong(gen, "startDate", startDate); + SearchApi.encodeLong(gen, "date", startDate); } else { - LuceneApi.encodeStringField(gen, "startDate", createTime); + SearchApi.encodeLong(gen, "startDate", createTime); + SearchApi.encodeLong(gen, "date", createTime); } if (endDate != null) { - LuceneApi.encodeStringField(gen, "endDate", endDate); + SearchApi.encodeLong(gen, "endDate", endDate); } else { - LuceneApi.encodeStringField(gen, "endDate", modTime); + SearchApi.encodeLong(gen, "endDate", modTime); } + SearchApi.encodeLong(gen, "fileSize", fileSize, 0L); + SearchApi.encodeLong(gen, "fileCount", fileCount, 0L); - LuceneApi.encodeSortedDocValuesField(gen, "id", id); + SearchApi.encodeLong(gen, "id", id); - LuceneApi.encodeStoredId(gen, id); + if (facility.getName() == null) { + facility = manager.find(facility.getClass(), facility.id); + } + facility.getDoc(manager, gen); + + if (type.getName() == null) { + type = manager.find(type.getClass(), type.id); + } + type.getDoc(manager, gen); + } + + /** + * Gets the fields used in the search component for this entity, and the + * relationships that would restrict the content of those fields. + * + * @return Map of field names (as they appear on the search document) against + * the Relationships that need to be allowed for that field to be + * viewable. If there are no restrictive relationships, then the value + * will be null. + * @throws IcatException If the EntityInfoHandler cannot find one of the + * Relationships. + */ + public static Map getDocumentFields() throws IcatException { + if (documentFields.size() == 0) { + Relationship[] typeRelationships = { EntityInfoHandler.getRelationshipsByName(Investigation.class).get("type") }; + Relationship[] facilityRelationships = { + EntityInfoHandler.getRelationshipsByName(Investigation.class).get("facility") }; + Relationship[] investigationFacilityCyclesRelationships = { + EntityInfoHandler.getRelationshipsByName(Investigation.class).get("investigationFacilityCycles") }; + Relationship[] instrumentRelationships = { + EntityInfoHandler.getRelationshipsByName(Investigation.class).get("investigationInstruments"), + EntityInfoHandler.getRelationshipsByName(InvestigationInstrument.class).get("instrument") }; + Relationship[] parameterRelationships = { + EntityInfoHandler.getRelationshipsByName(Investigation.class).get("parameters") }; + Relationship[] parameterTypeRelationships = { + EntityInfoHandler.getRelationshipsByName(Investigation.class).get("parameters"), + EntityInfoHandler.getRelationshipsByName(InvestigationParameter.class).get("type") }; + Relationship[] sampleRelationships = { + EntityInfoHandler.getRelationshipsByName(Investigation.class).get("samples") }; + Relationship[] sampleTypeRelationships = { + EntityInfoHandler.getRelationshipsByName(Investigation.class).get("samples"), + EntityInfoHandler.getRelationshipsByName(Sample.class).get("type") }; + documentFields.put("name", null); + documentFields.put("visitId", null); + documentFields.put("title", null); + documentFields.put("summary", null); + documentFields.put("doi", null); + documentFields.put("startDate", null); + documentFields.put("endDate", null); + documentFields.put("date", null); + documentFields.put("fileSize", null); + documentFields.put("fileCount", null); + documentFields.put("id", null); + documentFields.put("facility.name", facilityRelationships); + documentFields.put("facility.id", null); + documentFields.put("type.name", typeRelationships); + documentFields.put("type.id", null); + documentFields.put("InvestigationFacilityCycle facilityCycle.id", investigationFacilityCyclesRelationships); + documentFields.put("InvestigationInstrument instrument.fullName", instrumentRelationships); + documentFields.put("InvestigationInstrument instrument.id", instrumentRelationships); + documentFields.put("InvestigationInstrument instrument.name", instrumentRelationships); + documentFields.put("InvestigationParameter type.name", parameterTypeRelationships); + documentFields.put("InvestigationParameter stringValue", parameterRelationships); + documentFields.put("InvestigationParameter numericValue", parameterRelationships); + documentFields.put("InvestigationParameter dateTimeValue", parameterRelationships); + documentFields.put("Sample sample.id", sampleRelationships); + documentFields.put("Sample sample.name", sampleRelationships); + documentFields.put("Sample type.name", sampleTypeRelationships); + } + return documentFields; } } diff --git a/src/main/java/org/icatproject/core/entity/InvestigationFacilityCycle.java b/src/main/java/org/icatproject/core/entity/InvestigationFacilityCycle.java index f74cbbaad..e1b4ce02e 100644 --- a/src/main/java/org/icatproject/core/entity/InvestigationFacilityCycle.java +++ b/src/main/java/org/icatproject/core/entity/InvestigationFacilityCycle.java @@ -2,12 +2,17 @@ import java.io.Serializable; +import jakarta.json.stream.JsonGenerator; import jakarta.persistence.Entity; +import jakarta.persistence.EntityManager; import jakarta.persistence.JoinColumn; import jakarta.persistence.ManyToOne; import jakarta.persistence.Table; import jakarta.persistence.UniqueConstraint; +import org.icatproject.core.IcatException; +import org.icatproject.core.manager.search.SearchApi; + @Comment("Many to many relationship between investigation and facilityCycle. " + "Allows investigations to belong to multiple cycles at once.") @SuppressWarnings("serial") @@ -15,7 +20,6 @@ @Table(uniqueConstraints = { @UniqueConstraint(columnNames = { "FACILITYCYCLE_ID", "INVESTIGATION_ID" }) }) public class InvestigationFacilityCycle extends EntityBaseBean implements Serializable { - @JoinColumn(name = "FACILITYCYCLE_ID", nullable = false) @ManyToOne private FacilityCycle facilityCycle; @@ -44,4 +48,11 @@ public void setInvestigation(Investigation investigation) { this.investigation = investigation; } + @Override + public void getDoc(EntityManager manager, JsonGenerator gen) throws IcatException { + SearchApi.encodeLong(gen, "facilityCycle.id", facilityCycle.id); + SearchApi.encodeLong(gen, "investigation.id", investigation.id); + SearchApi.encodeLong(gen, "id", id); + } + } diff --git a/src/main/java/org/icatproject/core/entity/InvestigationInstrument.java b/src/main/java/org/icatproject/core/entity/InvestigationInstrument.java index 5eb4356fd..1c9bf310c 100644 --- a/src/main/java/org/icatproject/core/entity/InvestigationInstrument.java +++ b/src/main/java/org/icatproject/core/entity/InvestigationInstrument.java @@ -2,13 +2,18 @@ import java.io.Serializable; +import jakarta.json.stream.JsonGenerator; import jakarta.persistence.Entity; +import jakarta.persistence.EntityManager; import jakarta.persistence.FetchType; import jakarta.persistence.JoinColumn; import jakarta.persistence.ManyToOne; import jakarta.persistence.Table; import jakarta.persistence.UniqueConstraint; +import org.icatproject.core.IcatException; +import org.icatproject.core.manager.search.SearchApi; + @Comment("Represents a many-to-many relationship between an investigation and the instruments assigned") @SuppressWarnings("serial") @Entity @@ -39,4 +44,14 @@ public void setInvestigation(Investigation investigation) { this.investigation = investigation; } + @Override + public void getDoc(EntityManager manager, JsonGenerator gen) throws IcatException { + if (instrument.getName() == null) { + instrument = manager.find(instrument.getClass(), instrument.id); + } + instrument.getDoc(manager, gen); + SearchApi.encodeLong(gen, "investigation.id", investigation.id); + SearchApi.encodeLong(gen, "id", id); + } + } diff --git a/src/main/java/org/icatproject/core/entity/InvestigationParameter.java b/src/main/java/org/icatproject/core/entity/InvestigationParameter.java index f5d0c0a16..d7a4372c9 100644 --- a/src/main/java/org/icatproject/core/entity/InvestigationParameter.java +++ b/src/main/java/org/icatproject/core/entity/InvestigationParameter.java @@ -13,8 +13,8 @@ import org.icatproject.core.IcatException; import org.icatproject.core.manager.EntityBeanManager.PersistMode; +import org.icatproject.core.manager.search.SearchApi; import org.icatproject.core.manager.GateKeeper; -import org.icatproject.core.manager.LuceneApi; @Comment("A parameter associated with an investigation") @SuppressWarnings("serial") @@ -55,8 +55,8 @@ public void setInvestigation(Investigation investigation) { } @Override - public void getDoc(JsonGenerator gen) { - super.getDoc(gen); - LuceneApi.encodeSortedDocValuesField(gen, "investigation", investigation.id); + public void getDoc(EntityManager manager, JsonGenerator gen) throws IcatException { + super.getDoc(manager, gen); + SearchApi.encodeLong(gen, "investigation.id", investigation.id); } } \ No newline at end of file diff --git a/src/main/java/org/icatproject/core/entity/InvestigationType.java b/src/main/java/org/icatproject/core/entity/InvestigationType.java index 502449043..fd39dfe8b 100644 --- a/src/main/java/org/icatproject/core/entity/InvestigationType.java +++ b/src/main/java/org/icatproject/core/entity/InvestigationType.java @@ -2,11 +2,16 @@ import java.io.Serializable; import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; import java.util.List; +import java.util.Set; +import jakarta.json.stream.JsonGenerator; import jakarta.persistence.CascadeType; import jakarta.persistence.Column; import jakarta.persistence.Entity; +import jakarta.persistence.EntityManager; import jakarta.persistence.FetchType; import jakarta.persistence.JoinColumn; import jakarta.persistence.ManyToOne; @@ -14,6 +19,9 @@ import jakarta.persistence.Table; import jakarta.persistence.UniqueConstraint; +import org.icatproject.core.IcatException; +import org.icatproject.core.manager.search.SearchApi; + @Comment("A type of investigation") @SuppressWarnings("serial") @Entity @@ -51,6 +59,8 @@ public void setInvestigations(List investigations) { this.investigations = investigations; } + public static Set docFields = new HashSet<>(Arrays.asList("type.name", "type.id")); + /* Needed for JPA */ public InvestigationType() { } @@ -71,4 +81,10 @@ public void setDescription(String description) { this.description = description; } + @Override + public void getDoc(EntityManager manager, JsonGenerator gen) throws IcatException { + SearchApi.encodeString(gen, "type.name", name); + SearchApi.encodeLong(gen, "type.id", id); + } + } diff --git a/src/main/java/org/icatproject/core/entity/InvestigationUser.java b/src/main/java/org/icatproject/core/entity/InvestigationUser.java index faa432ab5..69c037dc0 100644 --- a/src/main/java/org/icatproject/core/entity/InvestigationUser.java +++ b/src/main/java/org/icatproject/core/entity/InvestigationUser.java @@ -5,12 +5,14 @@ import jakarta.json.stream.JsonGenerator; import jakarta.persistence.Column; import jakarta.persistence.Entity; +import jakarta.persistence.EntityManager; import jakarta.persistence.JoinColumn; import jakarta.persistence.ManyToOne; import jakarta.persistence.Table; import jakarta.persistence.UniqueConstraint; -import org.icatproject.core.manager.LuceneApi; +import org.icatproject.core.IcatException; +import org.icatproject.core.manager.search.SearchApi; @Comment("Many to many relationship between investigation and user. It is expected that this will show the association of " + "individual users with an investigation which might be derived from the proposal. It may also be used as the " @@ -38,12 +40,13 @@ public InvestigationUser() { } @Override - public void getDoc(JsonGenerator gen) { - if (user.getFullName() != null) { - LuceneApi.encodeTextfield(gen, "text", user.getFullName()); + public void getDoc(EntityManager manager, JsonGenerator gen) throws IcatException { + if (user.getName() == null) { + user = manager.find(user.getClass(), user.id); } - LuceneApi.encodeStringField(gen, "name", user.getName()); - LuceneApi.encodeSortedDocValuesField(gen, "investigation", investigation.id); + user.getDoc(manager, gen); + SearchApi.encodeLong(gen, "investigation.id", investigation.id); + SearchApi.encodeLong(gen, "id", id); } public String getRole() { diff --git a/src/main/java/org/icatproject/core/entity/Parameter.java b/src/main/java/org/icatproject/core/entity/Parameter.java index ef6c53070..af830c80c 100644 --- a/src/main/java/org/icatproject/core/entity/Parameter.java +++ b/src/main/java/org/icatproject/core/entity/Parameter.java @@ -16,8 +16,8 @@ import org.icatproject.core.IcatException; import org.icatproject.core.manager.EntityBeanManager.PersistMode; +import org.icatproject.core.manager.search.SearchApi; import org.icatproject.core.manager.GateKeeper; -import org.icatproject.core.manager.LuceneApi; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -162,16 +162,26 @@ public void postMergeFixup(EntityManager manager, GateKeeper gateKeeper) throws } @Override - public void getDoc(JsonGenerator gen) { - LuceneApi.encodeStringField(gen, "name", type.getName()); - LuceneApi.encodeStringField(gen, "units", type.getUnits()); + public void getDoc(EntityManager manager, JsonGenerator gen) throws IcatException { if (stringValue != null) { - LuceneApi.encodeStringField(gen, "stringValue", stringValue); + SearchApi.encodeString(gen, "stringValue", stringValue); } else if (numericValue != null) { - LuceneApi.encodeDoubleField(gen, "numericValue", numericValue); + SearchApi.encodeDouble(gen, "numericValue", numericValue); } else if (dateTimeValue != null) { - LuceneApi.encodeStringField(gen, "dateTimeValue", dateTimeValue); + SearchApi.encodeLong(gen, "dateTimeValue", dateTimeValue); } + if (rangeTop != null) { + SearchApi.encodeDouble(gen, "rangeTop", rangeTop); + } + if (rangeBottom != null) { + SearchApi.encodeDouble(gen, "rangeBottom", rangeBottom); + } + + if (type.getName() == null || type.getUnits() == null) { + type = manager.find(type.getClass(), type.id); + } + type.getDoc(manager, gen); + SearchApi.encodeLong(gen, "id", id); } } diff --git a/src/main/java/org/icatproject/core/entity/ParameterType.java b/src/main/java/org/icatproject/core/entity/ParameterType.java index cbd5f36d8..df24adfb2 100644 --- a/src/main/java/org/icatproject/core/entity/ParameterType.java +++ b/src/main/java/org/icatproject/core/entity/ParameterType.java @@ -2,11 +2,16 @@ import java.io.Serializable; import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; import java.util.List; +import java.util.Set; +import jakarta.json.stream.JsonGenerator; import jakarta.persistence.CascadeType; import jakarta.persistence.Column; import jakarta.persistence.Entity; +import jakarta.persistence.EntityManager; import jakarta.persistence.FetchType; import jakarta.persistence.JoinColumn; import jakarta.persistence.ManyToOne; @@ -14,6 +19,9 @@ import jakarta.persistence.Table; import jakarta.persistence.UniqueConstraint; +import org.icatproject.core.IcatException; +import org.icatproject.core.manager.search.SearchApi; + @Comment("A parameter type with unique name and units") @SuppressWarnings("serial") @Entity @@ -91,6 +99,9 @@ public class ParameterType extends EntityBaseBean implements Serializable { @Comment("If ordinary users are allowed to create their own parameter types this indicates that this one has been approved") private boolean verified; + public static Set docFields = new HashSet<>( + Arrays.asList("type.name", "type.units", "type.unitsSI", "numericValueSI", "type.id")); + /* Needed for JPA */ public ParameterType() { } @@ -271,4 +282,11 @@ public void setVerified(boolean verified) { this.verified = verified; } + @Override + public void getDoc(EntityManager manager, JsonGenerator gen) throws IcatException { + SearchApi.encodeString(gen, "type.name", name); + SearchApi.encodeString(gen, "type.units", units); + SearchApi.encodeLong(gen, "type.id", id); + } + } diff --git a/src/main/java/org/icatproject/core/entity/Sample.java b/src/main/java/org/icatproject/core/entity/Sample.java index 06d792249..54ef699c9 100644 --- a/src/main/java/org/icatproject/core/entity/Sample.java +++ b/src/main/java/org/icatproject/core/entity/Sample.java @@ -2,12 +2,16 @@ import java.io.Serializable; import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; import java.util.List; +import java.util.Set; import jakarta.json.stream.JsonGenerator; import jakarta.persistence.CascadeType; import jakarta.persistence.Column; import jakarta.persistence.Entity; +import jakarta.persistence.EntityManager; import jakarta.persistence.FetchType; import jakarta.persistence.JoinColumn; import jakarta.persistence.ManyToOne; @@ -15,7 +19,8 @@ import jakarta.persistence.Table; import jakarta.persistence.UniqueConstraint; -import org.icatproject.core.manager.LuceneApi; +import org.icatproject.core.IcatException; +import org.icatproject.core.manager.search.SearchApi; @Comment("A sample to be used in an investigation") @SuppressWarnings("serial") @@ -43,6 +48,9 @@ public class Sample extends EntityBaseBean implements Serializable { @ManyToOne(fetch = FetchType.LAZY) private SampleType type; + public static Set docFields = new HashSet<>( + Arrays.asList("sample.name", "sample.id", "sample.investigation.id")); + /* Needed for JPA */ public Sample() { } @@ -96,12 +104,21 @@ public void setType(SampleType type) { } @Override - public void getDoc(JsonGenerator gen) { - StringBuilder sb = new StringBuilder(name); + public void getDoc(EntityManager manager, JsonGenerator gen) throws IcatException { + SearchApi.encodeString(gen, "sample.name", name); + SearchApi.encodeLong(gen, "sample.id", id); + if (investigation != null) { + // Investigation is not nullable, but it is possible to pass Samples without their Investigation + // relationship populated when creating Datasets, where this field is not needed anyway - so guard against + // null pointers + SearchApi.encodeLong(gen, "sample.investigation.id", investigation.id); + } if (type != null) { - sb.append(" " + type.getName()); + if (type.getName() == null) { + type = manager.find(type.getClass(), type.id); + } + type.getDoc(manager, gen); } - LuceneApi.encodeTextfield(gen, "text", sb.toString()); - LuceneApi.encodeSortedDocValuesField(gen, "investigation", investigation.id); } + } diff --git a/src/main/java/org/icatproject/core/entity/SampleParameter.java b/src/main/java/org/icatproject/core/entity/SampleParameter.java index 376123ac1..f524ce739 100644 --- a/src/main/java/org/icatproject/core/entity/SampleParameter.java +++ b/src/main/java/org/icatproject/core/entity/SampleParameter.java @@ -2,6 +2,7 @@ import java.io.Serializable; +import jakarta.json.stream.JsonGenerator; import jakarta.persistence.Entity; import jakarta.persistence.EntityManager; import jakarta.persistence.JoinColumn; @@ -12,6 +13,7 @@ import org.icatproject.core.IcatException; import org.icatproject.core.manager.EntityBeanManager.PersistMode; +import org.icatproject.core.manager.search.SearchApi; import org.icatproject.core.manager.GateKeeper; @Comment("A parameter associated with a sample") @@ -51,4 +53,10 @@ public void setSample(Sample sample) { this.sample = sample; } + @Override + public void getDoc(EntityManager manager, JsonGenerator gen) throws IcatException { + super.getDoc(manager, gen); + SearchApi.encodeLong(gen, "sample.id", sample.id); + } + } \ No newline at end of file diff --git a/src/main/java/org/icatproject/core/entity/SampleType.java b/src/main/java/org/icatproject/core/entity/SampleType.java index dabab31fc..5dedd1da2 100644 --- a/src/main/java/org/icatproject/core/entity/SampleType.java +++ b/src/main/java/org/icatproject/core/entity/SampleType.java @@ -2,11 +2,16 @@ import java.io.Serializable; import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; import java.util.List; +import java.util.Set; +import jakarta.json.stream.JsonGenerator; import jakarta.persistence.CascadeType; import jakarta.persistence.Column; import jakarta.persistence.Entity; +import jakarta.persistence.EntityManager; import jakarta.persistence.FetchType; import jakarta.persistence.JoinColumn; import jakarta.persistence.ManyToOne; @@ -14,6 +19,9 @@ import jakarta.persistence.Table; import jakarta.persistence.UniqueConstraint; +import org.icatproject.core.IcatException; +import org.icatproject.core.manager.search.SearchApi; + @Comment("A sample to be used in an investigation") @SuppressWarnings("serial") @Entity @@ -40,6 +48,8 @@ public class SampleType extends EntityBaseBean implements Serializable { @OneToMany(cascade = CascadeType.ALL, mappedBy = "type") private List samples = new ArrayList<>(); + public static Set docFields = new HashSet<>(Arrays.asList("sample.type.name", "sample.type.id")); + /* Needed for JPA */ public SampleType() { } @@ -84,4 +94,10 @@ public void setSamples(List samples) { this.samples = samples; } + @Override + public void getDoc(EntityManager manager, JsonGenerator gen) throws IcatException { + SearchApi.encodeString(gen, "sample.type.name", name); + SearchApi.encodeLong(gen, "sample.type.id", id); + } + } diff --git a/src/main/java/org/icatproject/core/entity/Technique.java b/src/main/java/org/icatproject/core/entity/Technique.java index 3444b1afe..c3fd2e401 100644 --- a/src/main/java/org/icatproject/core/entity/Technique.java +++ b/src/main/java/org/icatproject/core/entity/Technique.java @@ -2,15 +2,23 @@ import java.io.Serializable; import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; import java.util.List; +import java.util.Set; +import jakarta.json.stream.JsonGenerator; import jakarta.persistence.CascadeType; import jakarta.persistence.Column; import jakarta.persistence.Entity; +import jakarta.persistence.EntityManager; import jakarta.persistence.OneToMany; import jakarta.persistence.Table; import jakarta.persistence.UniqueConstraint; +import org.icatproject.core.IcatException; +import org.icatproject.core.manager.search.SearchApi; + @Comment("Represents an experimental technique") @SuppressWarnings("serial") @Entity @@ -30,6 +38,9 @@ public class Technique extends EntityBaseBean implements Serializable { @OneToMany(cascade = CascadeType.ALL, mappedBy = "technique") private List datasetTechniques = new ArrayList(); + public static Set docFields = new HashSet<>( + Arrays.asList("technique.id", "technique.name", "technique.description", "technique.pid")); + public String getName() { return name; } @@ -61,4 +72,12 @@ public void setDescription(String description) { public void setDatasetTechniques(List datasetTechniques) { this.datasetTechniques = datasetTechniques; } + + @Override + public void getDoc(EntityManager manager, JsonGenerator gen) throws IcatException { + SearchApi.encodeLong(gen, "technique.id", id); + SearchApi.encodeString(gen, "technique.name", name); + SearchApi.encodeNullableString(gen, "technique.description", description); + SearchApi.encodeNullableString(gen, "technique.pid", pid); + } } diff --git a/src/main/java/org/icatproject/core/entity/User.java b/src/main/java/org/icatproject/core/entity/User.java index 521b29df3..a239e02a1 100644 --- a/src/main/java/org/icatproject/core/entity/User.java +++ b/src/main/java/org/icatproject/core/entity/User.java @@ -2,15 +2,23 @@ import java.io.Serializable; import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; import java.util.List; +import java.util.Set; +import jakarta.json.stream.JsonGenerator; import jakarta.persistence.CascadeType; import jakarta.persistence.Column; import jakarta.persistence.Entity; +import jakarta.persistence.EntityManager; import jakarta.persistence.OneToMany; import jakarta.persistence.Table; import jakarta.persistence.UniqueConstraint; +import org.icatproject.core.IcatException; +import org.icatproject.core.manager.search.SearchApi; + @Comment("A user of the facility") @SuppressWarnings("serial") @Entity @@ -54,6 +62,8 @@ public class User extends EntityBaseBean implements Serializable { @OneToMany(cascade = CascadeType.ALL, mappedBy = "user") private List dataPublicationUsers = new ArrayList(); + public static Set docFields = new HashSet<>(Arrays.asList("user.name", "user.fullName", "user.id")); + public User() { } @@ -158,4 +168,11 @@ public String toString() { return "User[name=" + name + "]"; } + @Override + public void getDoc(EntityManager manager, JsonGenerator gen) throws IcatException { + SearchApi.encodeNullableString(gen, "user.fullName", fullName); + SearchApi.encodeString(gen, "user.name", name); + SearchApi.encodeLong(gen, "user.id", id); + } + } diff --git a/src/main/java/org/icatproject/core/manager/EntityBeanManager.java b/src/main/java/org/icatproject/core/manager/EntityBeanManager.java index 185fba507..2e0c283bb 100644 --- a/src/main/java/org/icatproject/core/manager/EntityBeanManager.java +++ b/src/main/java/org/icatproject/core/manager/EntityBeanManager.java @@ -63,11 +63,17 @@ import org.icatproject.core.entity.Dataset; import org.icatproject.core.entity.EntityBaseBean; import org.icatproject.core.entity.Investigation; +import org.icatproject.core.entity.InvestigationInstrument; import org.icatproject.core.entity.ParameterValueType; +import org.icatproject.core.entity.Sample; import org.icatproject.core.entity.Session; import org.icatproject.core.manager.EntityInfoHandler.Relationship; import org.icatproject.core.manager.PropertyHandler.CallType; import org.icatproject.core.manager.PropertyHandler.Operation; +import org.icatproject.core.manager.search.FacetDimension; +import org.icatproject.core.manager.search.ScoredEntityBaseBean; +import org.icatproject.core.manager.search.SearchManager; +import org.icatproject.core.manager.search.SearchResult; import org.icatproject.core.oldparser.OldGetQuery; import org.icatproject.core.oldparser.OldInput; import org.icatproject.core.oldparser.OldLexerException; @@ -132,7 +138,7 @@ public enum PersistMode { Transmitter transmitter; @EJB - LuceneManager lucene; + SearchManager searchManager; private boolean log; @@ -142,9 +148,11 @@ public enum PersistMode { private Map notificationRequests; - private boolean luceneActive; + private boolean searchActive; + private long searchMaxSearchTimeMillis; private int maxEntities; + private int searchSearchBlockSize; private long exportCacheSize; private Set rootUserNames; @@ -249,8 +257,8 @@ public CreateResponse create(String userId, EntityBaseBean bean, EntityManager m long beanId = bean.getId(); - if (luceneActive) { - bean.addToLucene(lucene); + if (searchActive) { + bean.addToSearch(manager, searchManager); } userTransaction.commit(); if (logRequests.contains(CallType.WRITE)) { @@ -380,9 +388,9 @@ public List createMany(String userId, List beans transmitter.processMessage("createMany", ip, baos.toString(), startMillis); } - if (luceneActive) { + if (searchActive) { for (EntityBaseBean bean : beans) { - bean.addToLucene(lucene); + bean.addToSearch(manager, searchManager); } } @@ -499,9 +507,9 @@ public void delete(String userId, List beans, EntityManager mana userTransaction.commit(); - if (luceneActive) { + if (searchActive) { for (EntityBaseBean bean : allBeansToDelete) { - lucene.deleteDocument(bean); + searchManager.deleteDocument(bean); } } @@ -778,30 +786,64 @@ private void exportTable(String beanName, Set ids, OutputStream output, } } - private void filterReadAccess(List results, List allResults, + /** + * Performs authorisation for READ access on the newResults. Instead of + * returning the entries which can be READ, they are added to the end of + * acceptedResults, ensuring it doesn't exceed maxCount or maxEntities. + * + * @param acceptedResults List containing already authorised entities. Entries + * in newResults that pass authorisation will be added to + * acceptedResults. + * @param newResults List containing new results to check READ access to. + * Entries in newResults that pass authorisation will be + * added to acceptedResults. + * @param maxCount The maximum size of acceptedResults. Once reached, no + * more entries from newResults will be added. + * @param userId The user attempting to read the newResults. + * @param manager The EntityManager to use. + * @param klass The Class of the EntityBaseBean that is being + * filtered. + * @throws IcatException If more entities than the configuration option + * maxEntities would be added to acceptedResults, then an + * IcatException is thrown instead. + */ + private ScoredEntityBaseBean filterReadAccess(List acceptedResults, List newResults, int maxCount, String userId, EntityManager manager, Class klass) throws IcatException { - logger.debug("Got " + allResults.size() + " results from Lucene"); - for (ScoredEntityBaseBean sr : allResults) { - long entityId = sr.getEntityBaseBeanId(); - EntityBaseBean beanManaged = manager.find(klass, entityId); - if (beanManaged != null) { - try { - gateKeeper.performAuthorisation(userId, beanManaged, AccessType.READ, manager); - results.add(new ScoredEntityBaseBean(entityId, sr.getScore())); - if (results.size() > maxEntities) { + logger.debug("Got " + newResults.size() + " results from search engine"); + Set allowedIds = gateKeeper.getReadableIds(userId, newResults, klass.getSimpleName(), manager); + if (allowedIds == null) { + // A null result means there are no restrictions on the readable ids, so add as + // many newResults as we need to reach maxCount + int needed = maxCount - acceptedResults.size(); + if (newResults.size() > needed) { + acceptedResults.addAll(newResults.subList(0, needed)); + return newResults.get(needed - 1); + } else { + acceptedResults.addAll(newResults); + } + if (acceptedResults.size() > maxEntities) { + throw new IcatException(IcatExceptionType.VALIDATION, + "attempt to return more than " + maxEntities + " entities"); + } + } else { + // Otherwise, add results in order until we reach maxCount + for (ScoredEntityBaseBean newResult : newResults) { + if (allowedIds.contains(newResult.getId())) { + acceptedResults.add(newResult); + if (acceptedResults.size() > maxEntities) { throw new IcatException(IcatExceptionType.VALIDATION, "attempt to return more than " + maxEntities + " entities"); } - if (results.size() == maxCount) { - break; + if (acceptedResults.size() == maxCount) { + logger.debug("maxCount {} reached", maxCount); + return newResult; } - } catch (IcatException e) { - // Nothing to do } } } + return null; } private EntityBaseBean find(EntityBaseBean bean, EntityManager manager) throws IcatException { @@ -1149,8 +1191,10 @@ void init() { logRequests = propertyHandler.getLogSet(); log = !logRequests.isEmpty(); notificationRequests = propertyHandler.getNotificationRequests(); - luceneActive = lucene.isActive(); + searchActive = searchManager.isActive(); + searchMaxSearchTimeMillis = propertyHandler.getSearchMaxSearchTimeMillis(); maxEntities = propertyHandler.getMaxEntities(); + searchSearchBlockSize = propertyHandler.getSearchSearchBlockSize(); exportCacheSize = propertyHandler.getImportCacheSize(); rootUserNames = propertyHandler.getRootUserNames(); key = propertyHandler.getKey(); @@ -1375,160 +1419,358 @@ public EntityBaseBean lookup(EntityBaseBean bean, EntityManager manager) throws return results.get(0); } - public void luceneClear() throws IcatException { - if (luceneActive) { - lucene.clear(); + public void searchClear() throws IcatException { + if (searchActive) { + searchManager.clear(); } } - public void luceneCommit() throws IcatException { - if (luceneActive) { - lucene.commit(); + public void searchCommit() throws IcatException { + if (searchActive) { + searchManager.commit(); } } - public List luceneDatafiles(String userName, String user, String text, Date lower, Date upper, - List parms, int maxCount, EntityManager manager, String ip) throws IcatException { - long startMillis = log ? System.currentTimeMillis() : 0; + /** + * Performs a search on a single entity, and authorises the results before + * returning. Does not support sorting or searchAfter. + * + * @param userName User performing the search, used for authorisation. + * @param jo JsonObject containing the details of the query to be used. + * @param maxCount The maximum number of results to collect before returning. If + * a batch from the search engine has more than this many + * authorised results, then the excess results will be + * discarded. + * @param manager EntityManager for finding entities from their Id. + * @param ip Used for logging only. + * @param klass Class of the entity to search. + * @return SearchResult for the query. + * @throws IcatException + */ + public List freeTextSearch(String userName, JsonObject jo, int maxCount, + EntityManager manager, String ip, Class klass) throws IcatException { + long startMillis = System.currentTimeMillis(); List results = new ArrayList<>(); - if (luceneActive) { - LuceneSearchResult last = null; - Long uid = null; - List allResults = Collections.emptyList(); - /* - * As results may be rejected and maxCount may be 1 ensure that we - * don't make a huge number of calls to Lucene - */ - int blockSize = Math.max(1000, maxCount); + if (searchActive) { + searchDocuments(userName, jo, null, maxCount, maxCount, null, manager, klass, + startMillis, results, Arrays.asList("id")); + } + logSearch(userName, ip, startMillis, results, "freeTextSearch"); + return results; + } - do { - if (last == null) { - last = lucene.datafiles(user, text, lower, upper, parms, blockSize); - uid = last.getUid(); - } else { - last = lucene.datafilesAfter(uid, blockSize); + /** + * Performs a search on a single entity, and authorises the results before + * returning. + * + * @param userName User performing the search, used for authorisation. + * @param jo JsonObject containing the details of the query to be used. + * @param searchAfter JsonValue representation of the final result from a + * previous search. + * @param minCount The minimum number of results to collect before returning. + * If a batch from the search engine has at least this many + * authorised results, no further batches will be requested. + * @param maxCount The maximum number of results to collect before returning. + * If a batch from the search engine has more than this many + * authorised results, then the excess results will be + * discarded. + * @param sort String of Json representing sort criteria. + * @param manager EntityManager for finding entities from their Id. + * @param ip Used for logging only. + * @param klass Class of the entity to search. + * @return SearchResult for the query. + * @throws IcatException + */ + public SearchResult freeTextSearchDocs(String userName, JsonObject jo, JsonValue searchAfter, int minCount, + int maxCount, String sort, EntityManager manager, String ip, Class klass) + throws IcatException { + long startMillis = System.currentTimeMillis(); + JsonValue lastSearchAfter = null; + List results = new ArrayList<>(); + List dimensions = new ArrayList<>(); + if (searchActive) { + List fields = SearchManager.getPublicSearchFields(gateKeeper, klass.getSimpleName()); + lastSearchAfter = searchDocuments(userName, jo, searchAfter, maxCount, minCount, sort, manager, klass, + startMillis, results, fields); + + if (jo.containsKey("facets")) { + List jsonFacets = jo.getJsonArray("facets").getValuesAs(JsonObject.class); + for (JsonObject jsonFacet : jsonFacets) { + String target = jsonFacet.getString("target"); + JsonObject facetQuery = buildFacetQuery(klass, target, results, jsonFacet); + if (facetQuery != null) { + dimensions.addAll(searchManager.facetSearch(target, facetQuery, results.size(), 10)); + } } - allResults = last.getResults(); - filterReadAccess(results, allResults, maxCount, userName, manager, Datafile.class); - } while (results.size() != maxCount && allResults.size() == blockSize); - /* failing lucene retrieval calls clean up before throwing */ - lucene.freeSearcher(uid); + } } + logSearch(userName, ip, startMillis, results, "freeTextSearchDocs"); + return new SearchResult(lastSearchAfter, results, dimensions); + } + /** + * Performs logging dependent on the value of logRequests. + * + * @param userName User performing the search + * @param ip Used for logging only + * @param startMillis The start time of the search in milliseconds + * @param results List of authorised search results + * @param operation Name of the calling function + */ + private void logSearch(String userName, String ip, long startMillis, List results, + String operation) { if (logRequests.contains("R")) { ByteArrayOutputStream baos = new ByteArrayOutputStream(); try (JsonGenerator gen = Json.createGenerator(baos).writeStartObject()) { gen.write("userName", userName); if (results.size() > 0) { - gen.write("entityId", results.get(0).getEntityBaseBeanId()); + gen.write("entityId", results.get(0).getId()); } gen.writeEnd(); } - transmitter.processMessage("luceneDatafiles", ip, baos.toString(), startMillis); + transmitter.processMessage(operation, ip, baos.toString(), startMillis); } logger.debug("Returning {} results", results.size()); - return results; } - public List luceneDatasets(String userName, String user, String text, Date lower, Date upper, - List parms, int maxCount, EntityManager manager, String ip) throws IcatException { - long startMillis = log ? System.currentTimeMillis() : 0; - List results = new ArrayList<>(); - if (luceneActive) { - LuceneSearchResult last = null; - Long uid = null; - List allResults = Collections.emptyList(); - /* - * As results may be rejected and maxCount may be 1 ensure that we - * don't make a huge number of calls to Lucene - */ - int blockSize = Math.max(1000, maxCount); + /** + * Performs batches of searches, the results of which are authorised. Results + * are collected until they run out, minCount is reached, or too much time + * elapses. + * + * @param userName User performing the search, used for authorisation. + * @param jo JsonObject containing the details of the query to be used. + * @param searchAfter JsonValue representation of the final result from a + * previous search. + * @param minCount The minimum number of results to collect before returning. + * If a batch from the search engine has at least this many + * authorised results, no further batches will be requested. + * @param maxCount The maximum number of results to collect before returning. + * If a batch from the search engine has more than this many + * authorised results, then the excess results will be + * discarded. + * @param sort String of Json representing sort criteria. + * @param manager EntityManager for finding entities from their Id. + * @param klass Class of the entity to search. + * @param startMillis The start time of the search in milliseconds + * @param results List of results from the search. Authorised results will + * be appended to this List. + * @param fields Fields to include in the returned Documents. + * @return JsonValue representing the last result of the search, formatted to + * allow future searches to "search after" this result. May be null. + * @throws IcatException If the search exceeds the maximum allowed time. + */ + private JsonValue searchDocuments(String userName, JsonObject jo, JsonValue searchAfter, int maxCount, int minCount, + String sort, EntityManager manager, Class klass, long startMillis, + List results, List fields) throws IcatException { + JsonValue lastSearchAfter; + do { + SearchResult lastSearchResult = searchManager.freeTextSearch(jo, searchAfter, searchSearchBlockSize, sort, fields); + List allResults = lastSearchResult.getResults(); + ScoredEntityBaseBean lastBean = filterReadAccess(results, allResults, maxCount, userName, manager, + klass); + if (lastBean == null) { + // Haven't stopped early, so use the Lucene provided searchAfter document + lastSearchAfter = lastSearchResult.getSearchAfter(); + if (lastSearchAfter == null) { + return null; // If searchAfter is null, we ran out of results so stop here + } + searchAfter = lastSearchAfter; + } else { + // Have stopped early by reaching the limit, so build a searchAfter document + return searchManager.buildSearchAfter(lastBean, sort); + } + if (System.currentTimeMillis() - startMillis > searchMaxSearchTimeMillis) { + String msg = "Search cancelled for exceeding " + searchMaxSearchTimeMillis / 1000 + " seconds"; + throw new IcatException(IcatExceptionType.INTERNAL, msg); + } + } while (results.size() < minCount); + return lastSearchAfter; + } - do { - if (last == null) { - last = lucene.datasets(user, text, lower, upper, parms, blockSize); - uid = last.getUid(); - } else { - last = lucene.datasetsAfter(uid, blockSize); - } - allResults = last.getResults(); - filterReadAccess(results, allResults, maxCount, userName, manager, Dataset.class); - } while (results.size() != maxCount && allResults.size() == blockSize); - /* failing lucene retrieval calls clean up before throwing */ - lucene.freeSearcher(uid); - } - if (logRequests.contains("R")) { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - try (JsonGenerator gen = Json.createGenerator(baos).writeStartObject()) { - gen.write("userName", userName); - if (results.size() > 0) { - gen.write("entityId", results.get(0).getEntityBaseBeanId()); + /** + * Perform faceting on entities of klass using the criteria contained in jo. + * + * @param jo JsonObject containing "facets" key with a value of a JsonArray + * of JsonObjects. + * @param klass Class of the entity to facet. + * @return SearchResult with only the dimensions set. + * @throws IcatException + */ + public SearchResult facetDocs(JsonObject jo, Class klass) throws IcatException { + List dimensions = new ArrayList<>(); + if (searchActive && jo.containsKey("facets")) { + List jsonFacets = jo.getJsonArray("facets").getValuesAs(JsonObject.class); + for (JsonObject jsonFacet : jsonFacets) { + String target = jsonFacet.getString("target"); + JsonObject filterObject = jo.getJsonObject("filter"); + JsonObject facetQuery = buildFacetQuery(klass, target, filterObject, jsonFacet); + if (facetQuery != null) { + dimensions.addAll(searchManager.facetSearch(target, facetQuery, 1000, 10)); } - gen.writeEnd(); } - transmitter.processMessage("luceneDatasets", ip, baos.toString(), startMillis); } - logger.debug("Returning {} results", results.size()); - return results; + return new SearchResult(dimensions); } - public List luceneGetPopulating() { - if (luceneActive) { - return lucene.getPopulating(); + /** + * Formats Json for requesting faceting. Performs the logic needed to ensure + * that we do not facet on a field that should not be visible. + * + * @param klass Class of the entity to facet. + * @param target The entity which directly posses the dimensions of + * interest. Note this may be different than the klass, for + * example if klass is Investigation then target might be + * InvestigationParameter. + * @param filterObject JsonObject to be used as the query. + * @param jsonFacet JsonObject containing the dimensions to facet. + * @return JsonObject with the format + * {"query": `filterObject`, "dimensions": [...]} + * @throws IcatException + */ + private JsonObject buildFacetQuery(Class klass, String target, JsonObject filterObject, + JsonObject jsonFacet) throws IcatException { + if (target.equals(klass.getSimpleName())) { + return SearchManager.buildFacetQuery(filterObject, jsonFacet); } else { - return Collections.emptyList(); + Relationship relationship; + if (target.equals("SampleParameter")) { + Relationship sampleRelationship; + if (klass.getSimpleName().equals("Investigation")) { + sampleRelationship = EntityInfoHandler.getRelationshipsByName(klass).get("samples"); + } else { + if (klass.getSimpleName().equals("Datafile")) { + Relationship datasetRelationship = EntityInfoHandler.getRelationshipsByName(klass).get("dataset"); + if (!gateKeeper.allowed(datasetRelationship)) { + return null; + } + } + sampleRelationship = EntityInfoHandler.getRelationshipsByName(Dataset.class).get("sample"); + } + Relationship parameterRelationship = EntityInfoHandler.getRelationshipsByName(Sample.class).get("parameters"); + if (!gateKeeper.allowed(sampleRelationship) || !gateKeeper.allowed(parameterRelationship)) { + return null; + } + return SearchManager.buildFacetQuery(filterObject, jsonFacet); + } else if (target.contains("Parameter")) { + relationship = EntityInfoHandler.getRelationshipsByName(klass).get("parameters"); + } else if (target.contains("DatasetTechnique")) { + relationship = EntityInfoHandler.getRelationshipsByName(klass).get("datasetTechniques"); + } else { + relationship = EntityInfoHandler.getRelationshipsByName(klass).get(target.toLowerCase() + "s"); + } + + if (gateKeeper.allowed(relationship)) { + return SearchManager.buildFacetQuery(filterObject, jsonFacet); + } else { + logger.debug("Cannot collect facets for {} as Relationship with parent {} is not allowed", + target, klass.getSimpleName()); + return null; + } } } - public List luceneInvestigations(String userName, String user, String text, Date lower, - Date upper, List parms, List samples, String userFullName, int maxCount, - EntityManager manager, String ip) throws IcatException { - long startMillis = log ? System.currentTimeMillis() : 0; - List results = new ArrayList<>(); - if (luceneActive) { - LuceneSearchResult last = null; - Long uid = null; - List allResults = Collections.emptyList(); - /* - * As results may be rejected and maxCount may be 1 ensure that we - * don't make a huge number of calls to Lucene - */ - int blockSize = Math.max(1000, maxCount); - - do { - if (last == null) { - last = lucene.investigations(user, text, lower, upper, parms, samples, userFullName, blockSize); - uid = last.getUid(); + /** + * Formats Json for requesting faceting. Performs the logic needed to ensure + * that we do not facet on a field that should not be visible. + * + * @param klass Class of the entity to facet. + * @param target The entity which directly posses the dimensions of interest. + * Note this may be different than the klass, for example if + * klass is Investigation then target might be + * InvestigationParameter. + * @param results List of results from a previous search, containing entity + * ids. + * @param jsonFacet JsonObject containing the dimensions to facet. + * @return {"query": {`idField`: [...]}, "dimensions": [...]} + * @throws IcatException + */ + private JsonObject buildFacetQuery(Class klass, String target, + List results, JsonObject jsonFacet) throws IcatException { + String parentName = klass.getSimpleName(); + if (target.equals(parentName)) { + return SearchManager.buildFacetQuery(results, "id", jsonFacet); + } else { + Relationship relationship; + if (target.equals("SampleParameter")) { + Relationship sampleRelationship; + if (parentName.equals("Investigation")) { + sampleRelationship = EntityInfoHandler.getRelationshipsByName(klass).get("samples"); } else { - last = lucene.investigationsAfter(uid, blockSize); + if (parentName.equals("Datafile")) { + Relationship datasetRelationship = EntityInfoHandler.getRelationshipsByName(klass).get("dataset"); + if (!gateKeeper.allowed(datasetRelationship)) { + logger.debug("Cannot collect facets for {} as Relationship with parent {} is not allowed", target, + parentName); + return null; + } + } + sampleRelationship = EntityInfoHandler.getRelationshipsByName(Dataset.class).get("sample"); + } + Relationship parameterRelationship = EntityInfoHandler.getRelationshipsByName(Sample.class).get("parameters"); + if (!gateKeeper.allowed(sampleRelationship) || !gateKeeper.allowed(parameterRelationship)) { + logger.debug("Cannot collect facets for {} as Relationship with parent {} is not allowed", target, + parentName); + return null; + } + return SearchManager.buildFacetQuery(results, "sample.id", "sample.id", jsonFacet); + } else if (target.equals("InvestigationInstrument")) { + List relationships = new ArrayList<>(); + String resultIdField = "id"; + if (parentName.equals("Datafile")) { + resultIdField = "investigation.id"; + relationships.add(EntityInfoHandler.getRelationshipsByName(Datafile.class).get("dataset")); + relationships.add(EntityInfoHandler.getRelationshipsByName(Dataset.class).get("investigation")); + } else if (parentName.equals("Dataset")) { + resultIdField = "investigation.id"; + relationships.add(EntityInfoHandler.getRelationshipsByName(Dataset.class).get("investigation")); + } + relationships.add(EntityInfoHandler.getRelationshipsByName(Investigation.class).get("investigationInstruments")); + relationships.add(EntityInfoHandler.getRelationshipsByName(InvestigationInstrument.class).get("instrument")); + for (Relationship r : relationships) { + if (!gateKeeper.allowed(r)) { + logger.debug("Cannot collect facets for {} as Relationship with parent {} is not allowed", target, + parentName); + return null; + } } - allResults = last.getResults(); - filterReadAccess(results, allResults, maxCount, userName, manager, Investigation.class); - } while (results.size() != maxCount && allResults.size() == blockSize); - /* failing lucene retrieval calls clean up before throwing */ - lucene.freeSearcher(uid); - } - if (logRequests.contains("R")) { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - try (JsonGenerator gen = Json.createGenerator(baos).writeStartObject()) { - gen.write("userName", userName); - if (results.size() > 0) { - gen.write("entityId", results.get(0).getEntityBaseBeanId()); + return SearchManager.buildFacetQuery(results, resultIdField, "investigation.id", jsonFacet); + } else if (target.contains("Parameter")) { + relationship = EntityInfoHandler.getRelationshipsByName(klass).get("parameters"); + } else { + relationship = EntityInfoHandler.getRelationshipsByName(klass).get(target.toLowerCase() + "s"); + } + + if (gateKeeper.allowed(relationship)) { + if (target.equals("Sample") && parentName.equals("Investigation")) { + // As samples can be one to many on Investigations or one to one on Datasets, they do not follow + // usual naming conventions in the document mapping + return SearchManager.buildFacetQuery(results, "sample.investigation.id", jsonFacet); } - gen.writeEnd(); + return SearchManager.buildFacetQuery(results, parentName.toLowerCase() + ".id", jsonFacet); + } else { + logger.debug("Cannot collect facets for {} as Relationship with parent {} is not allowed", + target, parentName); + return null; } - transmitter.processMessage("luceneInvestigations", ip, baos.toString(), startMillis); } - logger.debug("Returning {} results", results.size()); - return results; } - public void lucenePopulate(String entityName, long minid, EntityManager manager) throws IcatException { - if (luceneActive) { + public List searchGetPopulating() { + if (searchActive) { + return searchManager.getPopulating(); + } else { + return Collections.emptyList(); + } + } + + public void searchPopulate(String entityName, Long minId, Long maxId, boolean delete, EntityManager manager) + throws IcatException { + if (searchActive) { // Throws IcatException if entityName is not an ICAT entity EntityInfoHandler.getClass(entityName); - lucene.populate(entityName, minid); + searchManager.populate(entityName, minId, maxId, delete); } } @@ -1897,8 +2139,8 @@ public NotificationMessage update(String userId, EntityBaseBean bean, EntityMana } transmitter.processMessage("update", ip, baos.toString(), startMillis); } - if (luceneActive) { - lucene.updateDocument(beanManaged); + if (searchActive) { + searchManager.updateDocument(manager, beanManaged); } return notification; } catch (IcatException e) { @@ -1970,7 +2212,7 @@ public List write(String userId, String json, EntityManager manager, UserT userTransaction.commit(); /* - * Nothing should be able to go wrong now so log, update lucene + * Nothing should be able to go wrong now so log, update * and send notification messages */ if (logRequests.contains(CallType.WRITE)) { @@ -1996,12 +2238,12 @@ public List write(String userId, String json, EntityManager manager, UserT } } - if (luceneActive) { + if (searchActive) { for (EntityBaseBean eb : creates) { - lucene.addDocument(eb); + searchManager.addDocument(manager, eb); } for (EntityBaseBean eb : updates) { - lucene.updateDocument(eb); + searchManager.updateDocument(manager, eb); } } @@ -2334,7 +2576,7 @@ public long cloneEntity(String userId, String beanName, long id, String keys, En } /* - * Nothing should be able to go wrong now so log, update lucene and send + * Nothing should be able to go wrong now so log, update and send * notification messages */ if (logRequests.contains(CallType.WRITE)) { @@ -2349,9 +2591,9 @@ public long cloneEntity(String userId, String beanName, long id, String keys, En transmitter.processMessage("write", ip, baos.toString(), startMillis); } - if (luceneActive) { + if (searchActive) { for (EntityBaseBean c : clonedTo.values()) { - lucene.addDocument(c); + searchManager.addDocument(manager, c); } } diff --git a/src/main/java/org/icatproject/core/manager/EntityInfoHandler.java b/src/main/java/org/icatproject/core/manager/EntityInfoHandler.java index aecf1af1d..9b151857a 100644 --- a/src/main/java/org/icatproject/core/manager/EntityInfoHandler.java +++ b/src/main/java/org/icatproject/core/manager/EntityInfoHandler.java @@ -23,6 +23,7 @@ import jakarta.json.stream.JsonGenerator; import jakarta.persistence.CascadeType; import jakarta.persistence.Column; +import jakarta.persistence.EntityManager; import jakarta.persistence.GeneratedValue; import jakarta.persistence.Id; import jakarta.persistence.JoinColumn; @@ -116,7 +117,7 @@ private static class PrivateEntityInfo { final Map gettersFromName; final Map relationshipsByName; final Set relInKey; - final boolean hasLuceneDoc; + final boolean hasSearchDoc; public PrivateEntityInfo(Set rels, List notNullableFields, Map getters, Map gettersFromName, Map stringFields, Map setters, @@ -124,7 +125,7 @@ public PrivateEntityInfo(Set rels, List notNullableFields, Map fieldComments, Set ones, Set attributes, Constructor constructor, Map fieldByName, String exportHeader, String exportNull, List fields, String exportHeaderAll, - Map relationshipsByName, Set relInKey, boolean hasLuceneDoc) { + Map relationshipsByName, Set relInKey, boolean hasSearchDoc) { // Use copyOf to create unmodifiable collections this.relatedEntities = Set.copyOf(rels); @@ -147,7 +148,7 @@ public PrivateEntityInfo(Set rels, List notNullableFields, this.exportHeaderAll = exportHeaderAll; this.relationshipsByName = Map.copyOf(relationshipsByName); this.relInKey = Set.copyOf(relInKey); - this.hasLuceneDoc = hasLuceneDoc; + this.hasSearchDoc = hasSearchDoc; } } @@ -573,17 +574,17 @@ private static PrivateEntityInfo buildEi(Class objectC } } - boolean hasLuceneDoc = true; + boolean hasSearchDoc = true; try { - objectClass.getDeclaredMethod("getDoc", JsonGenerator.class); + objectClass.getDeclaredMethod("getDoc", EntityManager.class, JsonGenerator.class); } catch (NoSuchMethodException e) { - hasLuceneDoc = false; + hasSearchDoc = false; } return new PrivateEntityInfo(rels, notNullableFields, getters, gettersFromName, stringFields, setters, updaters, constraintFields, commentString, comments, ones, attributes, constructor, fieldsByName, exportHeader.toString(), exportNull.toString(), fields, exportHeaderAll.toString(), relationshipsByName, - relInKey, hasLuceneDoc); + relInKey, hasSearchDoc); } private static PrivateEntityInfo getPrivateEntityInfo(Class objectClass) { @@ -771,8 +772,8 @@ public static Map getStringFields(Class objectClass) { - return getPrivateEntityInfo(objectClass).hasLuceneDoc; + public static boolean hasSearchDoc(Class objectClass) { + return getPrivateEntityInfo(objectClass).hasSearchDoc; } private static int setRelHeader(int n, Field field, StringBuilder exportHeader, StringBuilder exportNull, boolean con) { diff --git a/src/main/java/org/icatproject/core/manager/GateKeeper.java b/src/main/java/org/icatproject/core/manager/GateKeeper.java index 9b512c087..1d0251d5b 100644 --- a/src/main/java/org/icatproject/core/manager/GateKeeper.java +++ b/src/main/java/org/icatproject/core/manager/GateKeeper.java @@ -92,6 +92,8 @@ public int compare(String o1, String o2) { private boolean publicTablesStale; + private boolean publicSearchFieldsStale; + private Map cluster; private String basePath = "/icat"; @@ -169,32 +171,117 @@ public Set getPublicTables() { return publicTables; } - public List getReadable(String userId, List beans, EntityManager manager) { - - if (beans.size() == 0) { - return beans; - } - - EntityBaseBean object = beans.get(0); + public Boolean getPublicSearchFieldsStale() { + return publicSearchFieldsStale; + } - Class objectClass = object.getClass(); - String simpleName = objectClass.getSimpleName(); + /** + * Gets READ restrictions that apply to entities of type simpleName, that are + * relevant for the given userId. If userId belongs to a root user, or one of + * the restrictions is itself null, then null is returned. This corresponds to a + * case where the user can READ any entity of type simpleName. + * + * @param userId The user making the READ request. + * @param simpleName The name of the requested entity type. + * @param manager The EntityManager to use. + * @return Returns a list of restrictions that apply to the requested entity + * type. If there are no restrictions, then returns null. + */ + private List getRestrictions(String userId, String simpleName, EntityManager manager) { if (rootUserNames.contains(userId)) { logger.info("\"Root\" user " + userId + " is allowed READ to " + simpleName); - return beans; + return null; } List restrictions = gateKeeperHelper.getRules(Rule.INCLUDE_QUERY, userId, simpleName); logger.debug("Got " + restrictions.size() + " authz queries for READ by " + userId + " to a " - + objectClass.getSimpleName()); + + simpleName); for (String restriction : restrictions) { logger.debug("Query: " + restriction); if (restriction == null) { logger.info("Null restriction => READ permitted to " + simpleName); - return beans; + return null; + } + } + + return restrictions; + } + + /** + * Returns a sub list of the passed entities that the user has READ access to. + * Note that this method accepts and returns instances of EntityBaseBean, unlike + * getReadableIds. + * + * @param userId The user making the READ request. + * @param beans The entities the user wants to READ. + * @param manager The EntityManager to use. + * @return A list of entities the user has read access to + */ + public List getReadable(String userId, List beans, EntityManager manager) { + + if (beans.size() == 0) { + return beans; + } + EntityBaseBean object = beans.get(0); + Class objectClass = object.getClass(); + String simpleName = objectClass.getSimpleName(); + + List restrictions = getRestrictions(userId, simpleName, manager); + if (restrictions == null) { + return beans; + } + + Set readableIds = getReadableIds(userId, beans, restrictions, manager); + + List results = new ArrayList<>(); + for (EntityBaseBean bean : beans) { + if (readableIds.contains(bean.getId())) { + results.add(bean); } } + return results; + } + + /** + * Returns a set of ids that indicate entities of type simpleName that the user + * has READ access to. If all of the entities can be READ (restrictions are + * null) then null is returned. Note that while this accepts anything that + * HasEntityId, the ids are returned as a Set unlike getReadable. + * + * @param userId The user making the READ request. + * @param entities The entities to check. + * @param simpleName The name of the requested entity type. + * @param manager The EntityManager to use. + * @return Set of the ids that the user has read access to. If there are no + * restrictions, then returns null. + */ + public Set getReadableIds(String userId, List entities, String simpleName, + EntityManager manager) { + + if (entities.size() == 0) { + return null; + } + + List restrictions = getRestrictions(userId, simpleName, manager); + if (restrictions == null) { + return null; + } + + return getReadableIds(userId, entities, restrictions, manager); + } + + /** + * Returns a set of ids that indicate entities that the user has READ access to. + * + * @param userId The user making the READ request. + * @param entities The entities to check. + * @param restrictions The restrictions applying to the entities. + * @param manager The EntityManager to use. + * @return Set of the ids that the user has read access to. + */ + private Set getReadableIds(String userId, List entities, List restrictions, + EntityManager manager) { /* * IDs are processed in batches to avoid Oracle error: ORA-01795: @@ -205,13 +292,13 @@ public List getReadable(String userId, List bean StringBuilder sb = null; int i = 0; - for (EntityBaseBean bean : beans) { + for (HasEntityId entity : entities) { if (i == 0) { sb = new StringBuilder(); - sb.append(bean.getId()); + sb.append(entity.getId()); i = 1; } else { - sb.append("," + bean.getId()); + sb.append("," + entity.getId()); i++; } if (i == maxIdsInQuery) { @@ -224,27 +311,21 @@ public List getReadable(String userId, List bean idLists.add(sb.toString()); } - logger.debug("Check readability of " + beans.size() + " beans has been divided into " + idLists.size() + logger.debug("Check readability of " + entities.size() + " beans has been divided into " + idLists.size() + " queries."); - Set ids = new HashSet<>(); + Set readableIds = new HashSet<>(); for (String idList : idLists) { for (String qString : restrictions) { TypedQuery q = manager.createQuery(qString.replace(":pkids", idList), Long.class); if (qString.contains(":user")) { q.setParameter("user", userId); } - ids.addAll(q.getResultList()); + readableIds.addAll(q.getResultList()); } } - List results = new ArrayList<>(); - for (EntityBaseBean bean : beans) { - if (ids.contains(bean.getId())) { - results.add(bean); - } - } - return results; + return readableIds; } public Set getRootUserNames() { @@ -285,7 +366,7 @@ public boolean isAccessAllowed(String user, EntityBaseBean object, AccessType ac if (access == AccessType.CREATE) { qName = Rule.CREATE_QUERY; } else if (access == AccessType.READ) { - if (publicTables.contains(simpleName)) { + if (getPublicTables().contains(simpleName)) { // TODO see other comment on publicTables vs getPublicTables logger.info("All are allowed " + access + " to " + simpleName); return true; } @@ -337,10 +418,16 @@ public boolean isAccessAllowed(String user, EntityBaseBean object, AccessType ac public void markPublicStepsStale() { publicStepsStale = true; + publicSearchFieldsStale = true; } public void markPublicTablesStale() { publicTablesStale = true; + publicSearchFieldsStale = true; + } + + public void markPublicSearchFieldsFresh() { + publicSearchFieldsStale = false; } /** diff --git a/src/main/java/org/icatproject/core/manager/HasEntityId.java b/src/main/java/org/icatproject/core/manager/HasEntityId.java new file mode 100644 index 000000000..8ad36eb85 --- /dev/null +++ b/src/main/java/org/icatproject/core/manager/HasEntityId.java @@ -0,0 +1,8 @@ +package org.icatproject.core.manager; + +/** + * Interface for objects representing entities that hold the entity id. + */ +public interface HasEntityId { + public Long getId(); +} diff --git a/src/main/java/org/icatproject/core/manager/LuceneApi.java b/src/main/java/org/icatproject/core/manager/LuceneApi.java deleted file mode 100644 index 6d8ca753c..000000000 --- a/src/main/java/org/icatproject/core/manager/LuceneApi.java +++ /dev/null @@ -1,486 +0,0 @@ -package org.icatproject.core.manager; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; -import java.text.SimpleDateFormat; -import java.util.Date; -import java.util.List; -import java.util.TimeZone; - -import jakarta.json.Json; -import jakarta.json.stream.JsonGenerator; -import jakarta.json.stream.JsonParser; -import jakarta.json.stream.JsonParser.Event; -import jakarta.ws.rs.core.MediaType; - -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpDelete; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.client.methods.HttpPost; -import org.apache.http.client.utils.URIBuilder; -import org.apache.http.entity.StringEntity; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClients; -import org.icatproject.core.IcatException; -import org.icatproject.core.IcatException.IcatExceptionType; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class LuceneApi { - private enum ParserState { - None, Results - } - - static String basePath = "/icat.lucene"; - final static Logger logger = LoggerFactory.getLogger(LuceneApi.class); - - public static void encodeSortedDocValuesField(JsonGenerator gen, String name, Long value) { - gen.writeStartObject().write("type", "SortedDocValuesField").write("name", name).write("value", value) - .writeEnd(); - } - - public static void encodeStoredId(JsonGenerator gen, Long id) { - gen.writeStartObject().write("type", "StringField").write("name", "id").write("value", Long.toString(id)) - .write("store", true).writeEnd(); - } - - private static SimpleDateFormat df; - - static { - df = new SimpleDateFormat("yyyyMMddHHmm"); - TimeZone tz = TimeZone.getTimeZone("GMT"); - df.setTimeZone(tz); - } - - public static void encodeStringField(JsonGenerator gen, String name, Date value) { - String timeString; - synchronized (df) { - timeString = df.format(value); - } - gen.writeStartObject().write("type", "StringField").write("name", name).write("value", timeString).writeEnd(); - } - - public static void encodeDoubleField(JsonGenerator gen, String name, Double value) { - gen.writeStartObject().write("type", "DoubleField").write("name", name).write("value", value) - .write("store", true).writeEnd(); - } - - public static void encodeStringField(JsonGenerator gen, String name, Long value) { - gen.writeStartObject().write("type", "StringField").write("name", name).write("value", Long.toString(value)) - .writeEnd(); - } - - public static void encodeStringField(JsonGenerator gen, String name, String value) { - gen.writeStartObject().write("type", "StringField").write("name", name).write("value", value).writeEnd(); - - } - - public static void encodeTextfield(JsonGenerator gen, String name, String value) { - if (value != null) { - gen.writeStartObject().write("type", "TextField").write("name", name).write("value", value).writeEnd(); - } - } - - URI server; - - public LuceneApi(URI server) { - this.server = server; - } - - public void clear() throws IcatException { - try (CloseableHttpClient httpclient = HttpClients.createDefault()) { - URI uri = new URIBuilder(server).setPath(basePath + "/clear").build(); - HttpPost httpPost = new HttpPost(uri); - try (CloseableHttpResponse response = httpclient.execute(httpPost)) { - Rest.checkStatus(response, IcatExceptionType.INTERNAL); - } - } catch (IOException | URISyntaxException e) { - throw new IcatException(IcatExceptionType.INTERNAL, e.getClass() + " " + e.getMessage()); - } - - } - - public void commit() throws IcatException { - try (CloseableHttpClient httpclient = HttpClients.createDefault()) { - URI uri = new URIBuilder(server).setPath(basePath + "/commit").build(); - logger.trace("Making call {}", uri); - HttpPost httpPost = new HttpPost(uri); - try (CloseableHttpResponse response = httpclient.execute(httpPost)) { - Rest.checkStatus(response, IcatExceptionType.INTERNAL); - } - } catch (URISyntaxException | IOException e) { - throw new IcatException(IcatExceptionType.INTERNAL, e.getClass() + " " + e.getMessage()); - } - } - - public LuceneSearchResult datafiles(long uid, int maxResults) throws IcatException { - try (CloseableHttpClient httpclient = HttpClients.createDefault()) { - URI uri = new URIBuilder(server).setPath(basePath + "/datafiles/" + uid) - .setParameter("maxResults", Integer.toString(maxResults)).build(); - return getLsr(uri, httpclient); - - } catch (IOException | URISyntaxException e) { - throw new IcatException(IcatExceptionType.INTERNAL, e.getClass() + " " + e.getMessage()); - } - } - - public LuceneSearchResult datafiles(String user, String text, Date lower, Date upper, List parms, - int maxResults) throws IcatException { - - try (CloseableHttpClient httpclient = HttpClients.createDefault()) { - URI uri = new URIBuilder(server).setPath(basePath + "/datafiles") - .setParameter("maxResults", Integer.toString(maxResults)).build(); - logger.trace("Making call {}", uri); - - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - try (JsonGenerator gen = Json.createGenerator(baos)) { - gen.writeStartObject(); - if (user != null) { - gen.write("user", user); - } - if (text != null) { - gen.write("text", text); - } - if (lower != null) { - gen.write("lower", enc(lower)); - } - if (upper != null) { - gen.write("upper", enc(upper)); - } - if (parms != null && !parms.isEmpty()) { - gen.writeStartArray("params"); - for (ParameterPOJO parm : parms) { - gen.writeStartObject(); - if (parm.name != null) { - gen.write("name", parm.name); - } - if (parm.units != null) { - gen.write("units", parm.units); - } - if (parm.stringValue != null) { - gen.write("stringValue", parm.stringValue); - } - if (parm.lowerDateValue != null) { - gen.write("lowerDateValue", enc(parm.lowerDateValue)); - } - if (parm.upperDateValue != null) { - gen.write("upperDateValue", enc(parm.upperDateValue)); - } - if (parm.lowerNumericValue != null) { - gen.write("lowerNumericValue", parm.lowerNumericValue); - } - if (parm.upperNumericValue != null) { - gen.write("upperNumericValue", parm.upperNumericValue); - } - gen.writeEnd(); // object - } - gen.writeEnd(); // array - } - gen.writeEnd(); // object - } - return getLsr(uri, httpclient, baos); - } catch (IOException | URISyntaxException e) { - throw new IcatException(IcatExceptionType.INTERNAL, e.getClass() + " " + e.getMessage()); - } - }; - - private String enc(Date dateValue) { - synchronized (df) { - return df.format(dateValue); - } - } - - public LuceneSearchResult datasets(Long uid, int maxResults) throws IcatException { - try (CloseableHttpClient httpclient = HttpClients.createDefault()) { - URI uri = new URIBuilder(server).setPath(basePath + "/datasets/" + uid) - .setParameter("maxResults", Integer.toString(maxResults)).build(); - return getLsr(uri, httpclient); - - } catch (IOException | URISyntaxException e) { - throw new IcatException(IcatExceptionType.INTERNAL, e.getClass() + " " + e.getMessage()); - } - } - - public LuceneSearchResult datasets(String user, String text, Date lower, Date upper, List parms, - int maxResults) throws IcatException { - try (CloseableHttpClient httpclient = HttpClients.createDefault()) { - URI uri = new URIBuilder(server).setPath(basePath + "/datasets") - .setParameter("maxResults", Integer.toString(maxResults)).build(); - logger.trace("Making call {}", uri); - - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - try (JsonGenerator gen = Json.createGenerator(baos)) { - gen.writeStartObject(); - if (user != null) { - gen.write("user", user); - } - if (text != null) { - gen.write("text", text); - } - if (lower != null) { - gen.write("lower", enc(lower)); - } - if (upper != null) { - gen.write("upper", enc(upper)); - } - if (parms != null && !parms.isEmpty()) { - gen.writeStartArray("params"); - for (ParameterPOJO parm : parms) { - gen.writeStartObject(); - if (parm.name != null) { - gen.write("name", parm.name); - } - if (parm.units != null) { - gen.write("units", parm.units); - } - if (parm.stringValue != null) { - gen.write("stringValue", parm.stringValue); - } - if (parm.lowerDateValue != null) { - gen.write("lowerDateValue", enc(parm.lowerDateValue)); - } - if (parm.upperDateValue != null) { - gen.write("upperDateValue", enc(parm.upperDateValue)); - } - if (parm.lowerNumericValue != null) { - gen.write("lowerNumericValue", parm.lowerNumericValue); - } - if (parm.upperNumericValue != null) { - gen.write("upperNumericValue", parm.upperNumericValue); - } - gen.writeEnd(); // object - } - gen.writeEnd(); // array - } - gen.writeEnd(); // object - } - return getLsr(uri, httpclient, baos); - } catch (IOException | URISyntaxException e) { - throw new IcatException(IcatExceptionType.INTERNAL, e.getClass() + " " + e.getMessage()); - } - } - - public void freeSearcher(Long uid) throws IcatException { - try { - URI uri = new URIBuilder(server).setPath(basePath + "/freeSearcher/" + uid).build(); - logger.trace("Making call {}", uri); - try (CloseableHttpClient httpclient = HttpClients.createDefault()) { - HttpDelete httpDelete = new HttpDelete(uri); - try (CloseableHttpResponse response = httpclient.execute(httpDelete)) { - Rest.checkStatus(response, IcatExceptionType.INTERNAL); - } - } - } catch (URISyntaxException | IOException e) { - throw new IcatException(IcatExceptionType.INTERNAL, e.getClass() + " " + e.getMessage()); - } - } - - private LuceneSearchResult getLsr(URI uri, CloseableHttpClient httpclient) throws IcatException { - HttpGet httpGet = new HttpGet(uri); - LuceneSearchResult lsr = new LuceneSearchResult(); - List results = lsr.getResults(); - ParserState state = ParserState.None; - try (CloseableHttpResponse response = httpclient.execute(httpGet)) { - Rest.checkStatus(response, IcatExceptionType.INTERNAL); - try (JsonParser p = Json.createParser(response.getEntity().getContent())) { - String key = null; - while (p.hasNext()) { - Event e = p.next(); - if (e.equals(Event.KEY_NAME)) { - key = p.getString(); - } else if (state == ParserState.Results) { - if (e == (Event.START_ARRAY)) { - p.next(); - Long id = p.getLong(); - p.next(); - results.add(new ScoredEntityBaseBean(id, p.getBigDecimal().floatValue())); - p.next(); // skip the } - } - } else { // Not in results yet - if (e == Event.START_ARRAY && key.equals("results")) { - state = ParserState.Results; - } - } - } - } - } catch (IOException e) { - throw new IcatException(IcatExceptionType.INTERNAL, e.getClass() + " " + e.getMessage()); - } - return lsr; - } - - private LuceneSearchResult getLsr(URI uri, CloseableHttpClient httpclient, ByteArrayOutputStream baos) - throws IcatException { - logger.debug(baos.toString()); - try { - StringEntity input = new StringEntity(baos.toString()); - input.setContentType(MediaType.APPLICATION_JSON); - HttpPost httpPost = new HttpPost(uri); - httpPost.setEntity(input); - - LuceneSearchResult lsr = new LuceneSearchResult(); - List results = lsr.getResults(); - ParserState state = ParserState.None; - try (CloseableHttpResponse response = httpclient.execute(httpPost)) { - Rest.checkStatus(response, IcatExceptionType.INTERNAL); - try (JsonParser p = Json.createParser(response.getEntity().getContent())) { - String key = null; - while (p.hasNext()) { - Event e = p.next(); - if (e.equals(Event.KEY_NAME)) { - key = p.getString(); - } else if (state == ParserState.Results) { - if (e == (Event.START_ARRAY)) { - p.next(); - Long id = p.getLong(); - p.next(); - results.add(new ScoredEntityBaseBean(id, p.getBigDecimal().floatValue())); - p.next(); // skip the } - } - } else { // Not in results yet - if (e == (Event.VALUE_NUMBER) && key.equals("uid")) { - lsr.setUid(p.getLong()); - } else if (e == Event.START_ARRAY && key.equals("results")) { - state = ParserState.Results; - } - - } - - } - } - } - return lsr; - } catch (IOException e) { - throw new IcatException(IcatExceptionType.INTERNAL, e.getClass() + " " + e.getMessage()); - } - } - - public LuceneSearchResult investigations(Long uid, int maxResults) throws IcatException { - try (CloseableHttpClient httpclient = HttpClients.createDefault()) { - URI uri = new URIBuilder(server).setPath(basePath + "/investigations/" + uid) - .setParameter("maxResults", Integer.toString(maxResults)).build(); - return getLsr(uri, httpclient); - - } catch (IOException | URISyntaxException e) { - throw new IcatException(IcatExceptionType.INTERNAL, e.getClass() + " " + e.getMessage()); - } - } - - public LuceneSearchResult investigations(String user, String text, Date lower, Date upper, - List parms, List samples, String userFullName, int maxResults) throws IcatException { - try (CloseableHttpClient httpclient = HttpClients.createDefault()) { - URI uri = new URIBuilder(server).setPath(basePath + "/investigations") - .setParameter("maxResults", Integer.toString(maxResults)).build(); - logger.trace("Making call {}", uri); - - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - try (JsonGenerator gen = Json.createGenerator(baos)) { - gen.writeStartObject(); - if (user != null) { - gen.write("user", user); - } - if (text != null) { - gen.write("text", text); - } - if (lower != null) { - gen.write("lower", enc(lower)); - } - if (upper != null) { - gen.write("upper", enc(upper)); - } - if (parms != null && !parms.isEmpty()) { - gen.writeStartArray("params"); - for (ParameterPOJO parm : parms) { - gen.writeStartObject(); - if (parm.name != null) { - gen.write("name", parm.name); - } - if (parm.units != null) { - gen.write("units", parm.units); - } - if (parm.stringValue != null) { - gen.write("stringValue", parm.stringValue); - } - if (parm.lowerDateValue != null) { - gen.write("lowerDateValue", enc(parm.lowerDateValue)); - } - if (parm.upperDateValue != null) { - gen.write("upperDateValue", enc(parm.upperDateValue)); - } - if (parm.lowerNumericValue != null) { - gen.write("lowerNumericValue", parm.lowerNumericValue); - } - if (parm.upperNumericValue != null) { - gen.write("upperNumericValue", parm.upperNumericValue); - } - gen.writeEnd(); // object - } - gen.writeEnd(); // array - } - if (samples != null && !samples.isEmpty()) { - gen.writeStartArray("samples"); - for (String sample : samples) { - gen.write(sample); - } - gen.writeEnd(); // array - } - if (userFullName != null) { - gen.write("userFullName", userFullName); - } - gen.writeEnd(); // object - } - return getLsr(uri, httpclient, baos); - } catch (IOException | URISyntaxException e) { - throw new IcatException(IcatExceptionType.INTERNAL, e.getClass() + " " + e.getMessage()); - } - } - - public void lock(String entityName) throws IcatException { - try { - URI uri = new URIBuilder(server).setPath(basePath + "/lock/" + entityName).build(); - logger.trace("Making call {}", uri); - try (CloseableHttpClient httpclient = HttpClients.createDefault()) { - HttpPost httpPost = new HttpPost(uri); - try (CloseableHttpResponse response = httpclient.execute(httpPost)) { - Rest.checkStatus(response, IcatExceptionType.INTERNAL); - } - } - } catch (URISyntaxException | IOException e) { - throw new IcatException(IcatExceptionType.INTERNAL, e.getClass() + " " + e.getMessage()); - } - } - - public void unlock(String entityName) throws IcatException { - try { - URI uri = new URIBuilder(server).setPath(basePath + "/unlock/" + entityName).build(); - logger.trace("Making call {}", uri); - try (CloseableHttpClient httpclient = HttpClients.createDefault()) { - HttpPost httpPost = new HttpPost(uri); - try (CloseableHttpResponse response = httpclient.execute(httpPost)) { - Rest.checkStatus(response, IcatExceptionType.INTERNAL); - } - } - } catch (URISyntaxException | IOException e) { - throw new IcatException(IcatExceptionType.INTERNAL, e.getClass() + " " + e.getMessage()); - } - } - - public void modify(String json) throws IcatException { - try (CloseableHttpClient httpclient = HttpClients.createDefault()) { - URI uri = new URIBuilder(server).setPath(basePath + "/modify").build(); - HttpPost httpPost = new HttpPost(uri); - StringEntity input = new StringEntity(json); - input.setContentType(MediaType.APPLICATION_JSON); - httpPost.setEntity(input); - - try (CloseableHttpResponse response = httpclient.execute(httpPost)) { - Rest.checkStatus(response, IcatExceptionType.INTERNAL); - } - } catch (IOException | URISyntaxException e) { - throw new IcatException(IcatExceptionType.INTERNAL, e.getClass() + " " + e.getMessage()); - } - } - -} diff --git a/src/main/java/org/icatproject/core/manager/LuceneManager.java b/src/main/java/org/icatproject/core/manager/LuceneManager.java deleted file mode 100644 index f52e39bd4..000000000 --- a/src/main/java/org/icatproject/core/manager/LuceneManager.java +++ /dev/null @@ -1,551 +0,0 @@ -package org.icatproject.core.manager; - -import java.io.BufferedReader; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.io.PipedInputStream; -import java.io.PipedOutputStream; -import java.net.URI; -import java.net.URL; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.Date; -import java.util.Set; -import java.util.List; -import java.util.Map.Entry; -import java.util.Set; -import java.util.SortedSet; -import java.util.Timer; -import java.util.TimerTask; -import java.util.concurrent.Callable; -import java.util.concurrent.CompletionService; -import java.util.concurrent.ConcurrentSkipListMap; -import java.util.concurrent.ConcurrentSkipListSet; -import java.util.concurrent.ExecutorCompletionService; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; - -import jakarta.annotation.PostConstruct; -import jakarta.annotation.PreDestroy; -import jakarta.ejb.EJB; -import jakarta.ejb.Singleton; -import jakarta.ejb.Startup; -import jakarta.json.Json; -import jakarta.json.stream.JsonGenerator; -import jakarta.persistence.EntityManager; -import jakarta.persistence.EntityManagerFactory; -import jakarta.persistence.PersistenceUnit; - -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpPost; -import org.apache.http.client.utils.URIBuilder; -import org.apache.http.entity.InputStreamEntity; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClients; -import org.icatproject.core.IcatException; -import org.icatproject.core.IcatException.IcatExceptionType; -import org.icatproject.core.entity.EntityBaseBean; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.slf4j.Marker; -import org.slf4j.MarkerFactory; - -@Startup -@Singleton -public class LuceneManager { - - public class EnqueuedLuceneRequestHandler extends TimerTask { - - @Override - public void run() { - - synchronized (queueFileLock) { - if (queueFile.length() != 0) { - logger.debug("Will attempt to process {}", queueFile); - StringBuilder sb = new StringBuilder("["); - try (BufferedReader reader = new BufferedReader(new FileReader(queueFile))) { - String line; - while ((line = reader.readLine()) != null) { - if (sb.length() != 1) { - sb.append(','); - } - sb.append(line); - } - } catch (IOException e) { - logger.error("Problems reading from {} : {}", queueFile, e.getMessage()); - return; - } - sb.append(']'); - - try { - luceneApi.modify(sb.toString()); - } catch (IcatException e) { - // Record failures in a flat file to be examined - // periodically - synchronized (backlogHandlerFileLock) { - try { - FileWriter output = new FileWriter(backlogHandlerFile, true); - output.write(sb.toString() + "\n"); - output.close(); - } catch (IOException e2) { - logger.error("Problems writing to {} : {}", backlogHandlerFile, e2.getMessage()); - } - } - } finally { - queueFile.delete(); - } - } - } - } - } - - public class IndexSome implements Callable { - - private List ids; - private EntityManager manager; - private Class klass; - private String entityName; - private long start; - - @SuppressWarnings("unchecked") - public IndexSome(String entityName, List ids, EntityManagerFactory entityManagerFactory, long start) - throws IcatException { - try { - logger.debug("About to index {} {} records", ids.size(), entityName); - this.entityName = entityName; - klass = EntityInfoHandler.getClass(entityName); - this.ids = ids; - manager = entityManagerFactory.createEntityManager(); - this.start = start; - } catch (Exception e) { - logger.error("About to throw internal exception because of", e); - throw new IcatException(IcatExceptionType.INTERNAL, e.getMessage()); - } - } - - @Override - public Long call() throws Exception { - if (EntityInfoHandler.hasLuceneDoc(klass)) { - - URI uri = new URIBuilder(luceneApi.server).setPath(LuceneApi.basePath + "/addNow/" + entityName) - .build(); - try (CloseableHttpClient httpclient = HttpClients.createDefault()) { - HttpPost httpPost = new HttpPost(uri); - PipedOutputStream beanDocs = new PipedOutputStream(); - httpPost.setEntity(new InputStreamEntity(new PipedInputStream(beanDocs))); - getBeanDocExecutor.submit(() -> { - try (JsonGenerator gen = Json.createGenerator(beanDocs)) { - gen.writeStartArray(); - for (Long id : ids) { - EntityBaseBean bean = (EntityBaseBean) manager.find(klass, id); - if (bean != null) { - gen.writeStartArray(); - bean.getDoc(gen); - gen.writeEnd(); - } - } - gen.writeEnd(); - return null; - } catch (Exception e) { - logger.error("About to throw internal exception because of", e); - throw new IcatException(IcatExceptionType.INTERNAL, e.getMessage()); - } finally { - manager.close(); - } - }); - - try (CloseableHttpResponse response = httpclient.execute(httpPost)) { - Rest.checkStatus(response, IcatExceptionType.INTERNAL); - } - } - } - return start; - } - } - - private class PendingLuceneRequestHandler extends TimerTask { - - @Override - public void run() { - synchronized (backlogHandlerFileLock) { - if (backlogHandlerFile.length() != 0) { - logger.debug("Will attempt to process {}", backlogHandlerFile); - try (BufferedReader reader = new BufferedReader(new FileReader(backlogHandlerFile))) { - String line; - while ((line = reader.readLine()) != null) { - luceneApi.modify(line); - } - backlogHandlerFile.delete(); - logger.info("Pending lucene records now all inserted"); - } catch (IOException e) { - logger.error("Problems reading from {} : {}", backlogHandlerFile, e.getMessage()); - } catch (IcatException e) { - logger.error("Failed to put previously failed entries into lucene " + e.getMessage()); - } catch (Throwable e) { - logger.error("Something unexpected happened " + e.getClass() + " " + e.getMessage()); - } - logger.debug("finish processing"); - } - } - } - } - - private enum PopState { - STOPPING, STOPPED - } - - public class PopulateThread extends Thread { - - private EntityManager manager; - private EntityManagerFactory entityManagerFactory; - - public PopulateThread(EntityManagerFactory entityManagerFactory) { - this.entityManagerFactory = entityManagerFactory; - manager = entityManagerFactory.createEntityManager(); - logger.info("Start new populate thread"); - } - - @Override - public void run() { - - try { - while (!populateMap.isEmpty()) { - - populatingClassEntry = populateMap.firstEntry(); - - if (populatingClassEntry != null) { - luceneApi.lock(populatingClassEntry.getKey()); - - Long start = populatingClassEntry.getValue(); - - logger.info("Lucene Populating " + populatingClassEntry); - - CompletionService threads = new ExecutorCompletionService<>(populateExecutor); - SortedSet tasks = new ConcurrentSkipListSet<>(); - - while (true) { - - if (popState == PopState.STOPPING) { - logger.info("PopulateThread stopping as flag was set"); - break; - } - /* Get next block of ids */ - List ids = manager - .createQuery("SELECT e.id from " + populatingClassEntry.getKey() - + " e WHERE e.id > " + start + " ORDER BY e.id", Long.class) - .setMaxResults(populateBlockSize).getResultList(); - if (ids.size() == 0) { - break; - } - - Future fut; - /* Remove any completed ones */ - while ((fut = threads.poll()) != null) { - Long s = fut.get(); - if (s.equals(tasks.first())) { - populateMap.put(populatingClassEntry.getKey(), s); - } - tasks.remove(s); - } - - /* If full then wait */ - if (tasks.size() == maxThreads) { - fut = threads.take(); - Long s = fut.get(); - if (s.equals(tasks.first())) { - populateMap.put(populatingClassEntry.getKey(), s); - } - tasks.remove(s); - } - - logger.debug("About to submit " + ids.size() + " " + populatingClassEntry + " documents"); - threads.submit( - new IndexSome(populatingClassEntry.getKey(), ids, entityManagerFactory, start)); - tasks.add(start); - start = ids.get(ids.size() - 1); - - manager.clear(); - } - - /* Wait for the last few to finish */ - Future fut; - while (tasks.size() > 0) { - fut = threads.take(); - Long s = fut.get(); - if (s.equals(tasks.first())) { - populateMap.put(populatingClassEntry.getKey(), s); - } - tasks.remove(s); - } - - /* - * Unlock and commit the changes - */ - luceneApi.unlock(populatingClassEntry.getKey()); - populateMap.remove(populatingClassEntry.getKey()); - } - } - } catch (Throwable t) { - logger.error("Problem encountered in", t); - } finally { - manager.close(); - popState = PopState.STOPPED; - } - } - } - - final static Logger logger = LoggerFactory.getLogger(LuceneManager.class); - - final static Marker fatal = MarkerFactory.getMarker("FATAL"); - - /** - * The Set of classes for which population is requested - */ - private ConcurrentSkipListMap populateMap = new ConcurrentSkipListMap<>(); - /** The thread which does the population */ - private PopulateThread populateThread; - - private Entry populatingClassEntry; - - @PersistenceUnit(unitName = "icat") - private EntityManagerFactory entityManagerFactory; - - private int populateBlockSize; - - private ExecutorService getBeanDocExecutor; - - @EJB - PropertyHandler propertyHandler; - private PopState popState = PopState.STOPPED; - - private ExecutorService populateExecutor; - - private int maxThreads; - - private LuceneApi luceneApi; - - private boolean active; - - private Long backlogHandlerFileLock = 0L; - - private Long queueFileLock = 0L; - - private Timer timer; - - private Set entitiesToIndex; - - private File backlogHandlerFile; - - private File queueFile; - - public void addDocument(EntityBaseBean bean) throws IcatException { - String entityName = bean.getClass().getSimpleName(); - if (EntityInfoHandler.hasLuceneDoc(bean.getClass()) && entitiesToIndex.contains(entityName)) { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - try (JsonGenerator gen = Json.createGenerator(baos)) { - gen.writeStartArray(); - bean.getDoc(gen); - gen.writeEnd(); - } - enqueue(entityName, baos.toString(), null); - } - } - - public void enqueue(String entityName, String json, Long id) throws IcatException { - - StringBuilder sb = new StringBuilder(); - sb.append("[\"").append(entityName).append('"'); - if (id != null) { - sb.append(',').append(id); - } else { - sb.append(",null"); - } - if (json != null) { - sb.append(',').append(json); - } else { - sb.append(",null"); - } - sb.append(']'); - - synchronized (queueFileLock) { - try { - FileWriter output = new FileWriter(queueFile, true); - output.write(sb.toString() + "\n"); - output.close(); - } catch (IOException e) { - String msg = "Problems writing to " + queueFile + " " + e.getMessage(); - logger.error(msg); - throw new IcatException(IcatExceptionType.INTERNAL, msg); - } - } - - } - - public void clear() throws IcatException { - logger.info("Lucene clear called"); - popState = PopState.STOPPING; - while (populateThread != null && populateThread.getState() != Thread.State.TERMINATED) { - try { - Thread.sleep(1000); - } catch (InterruptedException e) { - // Do nothing - } - } - logger.debug("Lucene population terminated"); - } - - public void commit() throws IcatException { - pushPendingCalls(); - luceneApi.commit(); - } - - public LuceneSearchResult datafiles(String user, String text, Date lower, Date upper, List parms, - int blockSize) throws IcatException { - return luceneApi.datafiles(user, text, lower, upper, parms, blockSize); - } - - public LuceneSearchResult datafilesAfter(long uid, int blockSize) throws IcatException { - return luceneApi.datafiles(uid, blockSize); - } - - public LuceneSearchResult datasets(String user, String text, Date lower, Date upper, List parms, - int blockSize) throws IcatException { - return luceneApi.datasets(user, text, lower, upper, parms, blockSize); - } - - public LuceneSearchResult datasetsAfter(Long uid, int blockSize) throws IcatException { - return luceneApi.datasets(uid, blockSize); - } - - public void deleteDocument(EntityBaseBean bean) throws IcatException { - if (EntityInfoHandler.hasLuceneDoc(bean.getClass())) { - String entityName = bean.getClass().getSimpleName(); - Long id = bean.getId(); - enqueue(entityName, null, id); - } - } - - private void pushPendingCalls() { - timer.schedule(new EnqueuedLuceneRequestHandler(), 0L); - while (queueFile.length() != 0) { - try { - Thread.sleep(1000); - } catch (InterruptedException e) { - // Ignore - } - } - } - - @PreDestroy - private void exit() { - logger.info("Closing down LuceneManager"); - if (active) { - try { - populateExecutor.shutdown(); - getBeanDocExecutor.shutdown(); - pushPendingCalls(); - timer.cancel(); - timer = null; - logger.info("Closed down LuceneManager"); - } catch (Exception e) { - logger.error(fatal, "Problem closing down LuceneManager", e); - } - } - } - - public void freeSearcher(Long uid) throws IcatException { - luceneApi.freeSearcher(uid); - } - - public List getPopulating() { - List result = new ArrayList<>(); - for (Entry e : populateMap.entrySet()) { - result.add(e.getKey() + " " + e.getValue()); - } - return result; - } - - @PostConstruct - private void init() { - logger.info("Initialising LuceneManager"); - URL url = propertyHandler.getLuceneUrl(); - active = url != null; - if (active) { - try { - luceneApi = new LuceneApi(new URI(propertyHandler.getLuceneUrl().toString())); - populateBlockSize = propertyHandler.getLucenePopulateBlockSize(); - Path luceneDirectory = propertyHandler.getLuceneDirectory(); - backlogHandlerFile = luceneDirectory.resolve("backLog").toFile(); - queueFile = luceneDirectory.resolve("queue").toFile(); - maxThreads = Runtime.getRuntime().availableProcessors(); - populateExecutor = Executors.newWorkStealingPool(maxThreads); - getBeanDocExecutor = Executors.newCachedThreadPool(); - timer = new Timer(); - timer.schedule(new PendingLuceneRequestHandler(), 0L, - propertyHandler.getLuceneBacklogHandlerIntervalMillis()); - timer.schedule(new EnqueuedLuceneRequestHandler(), 0L, - propertyHandler.getLuceneEnqueuedRequestIntervalMillis()); - entitiesToIndex = propertyHandler.getEntitiesToIndex(); - logger.info("Initialised LuceneManager at {}", url); - } catch (Exception e) { - logger.error(fatal, "Problem setting up LuceneManager", e); - throw new IllegalStateException("Problem setting up LuceneManager"); - } - } else { - logger.info("LuceneManager is inactive"); - } - } - - public LuceneSearchResult investigations(String user, String text, Date lower, Date upper, - List parms, List samples, String userFullName, int blockSize) throws IcatException { - return luceneApi.investigations(user, text, lower, upper, parms, samples, userFullName, blockSize); - } - - public LuceneSearchResult investigationsAfter(Long uid, int blockSize) throws IcatException { - return luceneApi.investigations(uid, blockSize); - } - - public boolean isActive() { - return active; - } - - public void populate(String entityName, long minid) throws IcatException { - if (popState == PopState.STOPPING) { - while (populateThread != null && populateThread.getState() != Thread.State.TERMINATED) { - try { - Thread.sleep(1000); - } catch (InterruptedException e) { - // Do nothing - } - } - } - if (populateMap.put(entityName, minid) == null) { - logger.debug("Lucene population of {} requested", entityName); - } else { - throw new IcatException(IcatExceptionType.OBJECT_ALREADY_EXISTS, - "population of " + entityName + " already requested"); - } - if (populateThread == null || populateThread.getState() == Thread.State.TERMINATED) { - populateThread = new PopulateThread(entityManagerFactory); - populateThread.start(); - } - } - - public void updateDocument(EntityBaseBean bean) throws IcatException { - String entityName = bean.getClass().getSimpleName(); - if (EntityInfoHandler.hasLuceneDoc(bean.getClass()) && entitiesToIndex.contains(entityName)) { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - try (JsonGenerator gen = Json.createGenerator(baos)) { - gen.writeStartArray(); - bean.getDoc(gen); - gen.writeEnd(); - } - enqueue(entityName, baos.toString(), bean.getId()); - } - } - -} diff --git a/src/main/java/org/icatproject/core/manager/LuceneSearchResult.java b/src/main/java/org/icatproject/core/manager/LuceneSearchResult.java deleted file mode 100644 index b2ab1b2d6..000000000 --- a/src/main/java/org/icatproject/core/manager/LuceneSearchResult.java +++ /dev/null @@ -1,23 +0,0 @@ -package org.icatproject.core.manager; - -import java.util.ArrayList; -import java.util.List; - -public class LuceneSearchResult { - - private Long uid; - private List results = new ArrayList<>(); - - public List getResults() { - return results; - } - - public void setUid(Long uid) { - this.uid = uid; - } - - public Long getUid() { - return uid; - } - -} diff --git a/src/main/java/org/icatproject/core/manager/PropertyHandler.java b/src/main/java/org/icatproject/core/manager/PropertyHandler.java index 9bafe275c..601841321 100644 --- a/src/main/java/org/icatproject/core/manager/PropertyHandler.java +++ b/src/main/java/org/icatproject/core/manager/PropertyHandler.java @@ -225,6 +225,10 @@ public enum CallType { READ, WRITE, SESSION, INFO } + public enum SearchEngine { + LUCENE, ELASTICSEARCH, OPENSEARCH + } + public class ExtendedAuthenticator { private Authenticator authenticator; @@ -273,16 +277,15 @@ public Set getRootUserNames() { return rootUserNames; } - /** - * Configure which entities will be indexed by lucene on ingest + * Configure which entities will be indexed on ingest */ private Set entitiesToIndex = new HashSet(); - + public Set getEntitiesToIndex() { return entitiesToIndex; } - + public int getLifetimeMinutes() { return lifetimeMinutes; } @@ -300,12 +303,17 @@ public int getLifetimeMinutes() { private ContainerType containerType; private String jmsTopicConnectionFactory; private String digestKey; - private URL luceneUrl; - private int lucenePopulateBlockSize; - private Path luceneDirectory; - private long luceneBacklogHandlerIntervalMillis; + private SearchEngine searchEngine; + private List searchUrls = new ArrayList<>(); + private int searchPopulateBlockSize; + private int searchSearchBlockSize; + private Path searchDirectory; + private long searchBacklogHandlerIntervalMillis; + private long searchAggregateFilesIntervalMillis; + private long searchMaxSearchTimeMillis; + private String unitAliasOptions; private Map cluster = new HashMap<>(); - private long luceneEnqueuedRequestIntervalMillis; + private long searchEnqueuedRequestIntervalMillis; @PostConstruct private void init() { @@ -379,24 +387,28 @@ private void init() { formattedProps.add("rootUserNames " + names); /* entitiesToIndex */ - key = "lucene.entitiesToIndex"; + key = "search.entitiesToIndex"; if (props.has(key)) { String indexableEntities = props.getString(key); for (String indexableEntity : indexableEntities.split("\\s+")) { entitiesToIndex.add(indexableEntity); } - logger.info("lucene.entitiesToIndex: {}", entitiesToIndex.toString()); + logger.info("search.entitiesToIndex: {}", entitiesToIndex.toString()); } else { - /* If the property is not specified, we default to all the entities which + /* + * If the property is not specified, we default to all the entities which * currently override the EntityBaseBean.getDoc() method. This should * result in no change to behaviour if the property is not specified. */ - entitiesToIndex.addAll(Arrays.asList("Datafile", "Dataset", "Investigation", "InvestigationUser", - "DatafileParameter", "DatasetParameter", "InvestigationParameter", "Sample")); - logger.info("lucene.entitiesToIndex not set. Defaulting to: {}", entitiesToIndex.toString()); + entitiesToIndex.addAll(Arrays.asList("Datafile", "DatafileFormat", "DatafileParameter", + "Dataset", "DatasetParameter", "DatasetType", "DatasetTechnique", "Facility", "Instrument", + "InstrumentScientist", "Investigation", "InvestigationInstrument", "InvestigationParameter", + "InvestigationType", "InvestigationUser", "ParameterType", "Sample", "SampleType", + "SampleParameter", "User")); + logger.info("search.entitiesToIndex not set. Defaulting to: {}", entitiesToIndex.toString()); } - formattedProps.add("lucene.entitiesToIndex " + entitiesToIndex.toString()); - + formattedProps.add("search.entitiesToIndex " + entitiesToIndex.toString()); + /* notification.list */ key = "notification.list"; if (props.has(key)) { @@ -454,31 +466,68 @@ private void init() { logger.info("'log.list' entry not present so no JMS call logging will be performed"); } - /* Lucene Host */ - if (props.has("lucene.url")) { - luceneUrl = props.getURL("lucene.url"); - formattedProps.add("lucene.url" + " " + luceneUrl); + /* Search Host */ + if (props.has("search.engine")) { + try { + searchEngine = SearchEngine.valueOf(props.getString("search.engine").toUpperCase()); + } catch (IllegalArgumentException e) { + String msg = "Value " + props.getString("search.engine") + " of search.engine must be chosen from " + + Arrays.asList(SearchEngine.values()); + throw new IllegalStateException(msg); + } + + for (String urlString : props.getString("search.urls").split("\\s+")) { + try { + searchUrls.add(new URL(urlString)); + } catch (MalformedURLException e) { + abend("Url in search.urls " + urlString + " is not a valid URL"); + } + } + + // In principle, clustered engines like OPENSEARCH or ELASTICSEARCH should + // support multiple urls for the nodes in the cluster, however this is not yet + // implemented + if (searchUrls.size() != 1) { + String msg = "Exactly one value for search.urls must be provided when using " + searchEngine; + throw new IllegalStateException(msg); + } + formattedProps.add("search.urls" + " " + searchUrls.toString()); + logger.info("Using {} as search engine with url(s) {}", searchEngine, searchUrls); - lucenePopulateBlockSize = props.getPositiveInt("lucene.populateBlockSize"); - formattedProps.add("lucene.populateBlockSize" + " " + lucenePopulateBlockSize); + searchPopulateBlockSize = props.getPositiveInt("search.populateBlockSize"); + formattedProps.add("search.populateBlockSize" + " " + searchPopulateBlockSize); - luceneDirectory = props.getPath("lucene.directory"); - if (!luceneDirectory.toFile().isDirectory()) { - String msg = luceneDirectory + " is not a directory"; + searchSearchBlockSize = props.getPositiveInt("search.searchBlockSize"); + formattedProps.add("search.searchBlockSize" + " " + searchSearchBlockSize); + + searchDirectory = props.getPath("search.directory"); + if (!searchDirectory.toFile().isDirectory()) { + String msg = searchDirectory + " is not a directory"; logger.error(fatal, msg); throw new IllegalStateException(msg); } - formattedProps.add("lucene.directory" + " " + luceneDirectory); + formattedProps.add("search.directory" + " " + searchDirectory); + + searchBacklogHandlerIntervalMillis = props.getPositiveLong("search.backlogHandlerIntervalSeconds"); + formattedProps.add("search.backlogHandlerIntervalSeconds" + " " + searchBacklogHandlerIntervalMillis); + searchBacklogHandlerIntervalMillis *= 1000; - luceneBacklogHandlerIntervalMillis = props.getPositiveLong("lucene.backlogHandlerIntervalSeconds"); - formattedProps.add("lucene.backlogHandlerIntervalSeconds" + " " + luceneBacklogHandlerIntervalMillis); - luceneBacklogHandlerIntervalMillis *= 1000; + searchEnqueuedRequestIntervalMillis = props.getPositiveLong("search.enqueuedRequestIntervalSeconds"); + formattedProps.add("search.enqueuedRequestIntervalSeconds" + " " + searchEnqueuedRequestIntervalMillis); + searchEnqueuedRequestIntervalMillis *= 1000; - luceneEnqueuedRequestIntervalMillis = props.getPositiveLong("lucene.enqueuedRequestIntervalSeconds"); - formattedProps.add("lucene.enqueuedRequestIntervalSeconds" + " " + luceneEnqueuedRequestIntervalMillis); - luceneEnqueuedRequestIntervalMillis *= 1000; + searchAggregateFilesIntervalMillis = props.getNonNegativeLong("search.aggregateFilesIntervalSeconds"); + searchAggregateFilesIntervalMillis *= 1000; + + searchMaxSearchTimeMillis = props.getPositiveLong("search.maxSearchTimeSeconds"); + formattedProps.add("search.maxSearchTimeSeconds" + " " + searchMaxSearchTimeMillis); + searchMaxSearchTimeMillis *= 1000; + } else { + logger.info("'search.engine' entry not present so no free text search available"); } + unitAliasOptions = props.getString("units", ""); + /* * maxEntities, importCacheSize, exportCacheSize, maxIdsInQuery, key */ @@ -603,24 +652,44 @@ public String getKey() { return digestKey; } - public URL getLuceneUrl() { - return luceneUrl; + public SearchEngine getSearchEngine() { + return searchEngine; + } + + public List getSearchUrls() { + return searchUrls; + } + + public int getSearchPopulateBlockSize() { + return searchPopulateBlockSize; + } + + public int getSearchSearchBlockSize() { + return searchSearchBlockSize; + } + + public long getSearchBacklogHandlerIntervalMillis() { + return searchBacklogHandlerIntervalMillis; + } + + public long getSearchEnqueuedRequestIntervalMillis() { + return searchEnqueuedRequestIntervalMillis; } - public int getLucenePopulateBlockSize() { - return lucenePopulateBlockSize; + public long getSearchAggregateFilesIntervalMillis() { + return searchAggregateFilesIntervalMillis; } - public long getLuceneBacklogHandlerIntervalMillis() { - return luceneBacklogHandlerIntervalMillis; + public long getSearchMaxSearchTimeMillis() { + return searchMaxSearchTimeMillis; } - public long getLuceneEnqueuedRequestIntervalMillis() { - return luceneEnqueuedRequestIntervalMillis; + public Path getSearchDirectory() { + return searchDirectory; } - public Path getLuceneDirectory() { - return luceneDirectory; + public String getUnitAliasOptions() { + return unitAliasOptions; } } diff --git a/src/main/java/org/icatproject/core/manager/Rest.java b/src/main/java/org/icatproject/core/manager/Rest.java index dbf757327..e741eb7d6 100644 --- a/src/main/java/org/icatproject/core/manager/Rest.java +++ b/src/main/java/org/icatproject/core/manager/Rest.java @@ -18,7 +18,7 @@ public class Rest { - static void checkStatus(HttpResponse response, IcatExceptionType et) throws IcatException { + public static void checkStatus(HttpResponse response, IcatExceptionType et) throws IcatException { StatusLine status = response.getStatusLine(); if (status == null) { throw new IcatException(IcatExceptionType.INTERNAL, "Status line in response is empty"); diff --git a/src/main/java/org/icatproject/core/manager/ScoredEntityBaseBean.java b/src/main/java/org/icatproject/core/manager/ScoredEntityBaseBean.java deleted file mode 100644 index 34c58306d..000000000 --- a/src/main/java/org/icatproject/core/manager/ScoredEntityBaseBean.java +++ /dev/null @@ -1,21 +0,0 @@ -package org.icatproject.core.manager; - -public class ScoredEntityBaseBean { - - private long entityBaseBeanId; - private float score; - - public ScoredEntityBaseBean(long id, float score) { - this.entityBaseBeanId = id; - this.score = score; - } - - public long getEntityBaseBeanId() { - return entityBaseBeanId; - } - - public float getScore() { - return score; - } - -} diff --git a/src/main/java/org/icatproject/core/manager/search/FacetDimension.java b/src/main/java/org/icatproject/core/manager/search/FacetDimension.java new file mode 100644 index 000000000..d5308ba69 --- /dev/null +++ b/src/main/java/org/icatproject/core/manager/search/FacetDimension.java @@ -0,0 +1,42 @@ +package org.icatproject.core.manager.search; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * Holds information for a single faceted dimension, or field. + * Each dimension will have a list of FacetLabels, and to prevent ambiguity is + * associated with the target entity that was faceted. For example, both a + * Dataset and a DatasetParameter might have the "type.name" dimension. + */ +public class FacetDimension { + + private String target; + private String dimension; + private List facets = new ArrayList<>(); + + public FacetDimension(String target, String dimension) { + this.target = target; + this.dimension = dimension; + } + + public FacetDimension(String target, String dimension, FacetLabel... labels) { + this.target = target; + this.dimension = dimension; + Collections.addAll(facets, labels); + } + + public List getFacets() { + return facets; + } + + public String getDimension() { + return dimension; + } + + public String getTarget() { + return target; + } + +} diff --git a/src/main/java/org/icatproject/core/manager/search/FacetLabel.java b/src/main/java/org/icatproject/core/manager/search/FacetLabel.java new file mode 100644 index 000000000..6a159239f --- /dev/null +++ b/src/main/java/org/icatproject/core/manager/search/FacetLabel.java @@ -0,0 +1,58 @@ +package org.icatproject.core.manager.search; + +import jakarta.json.JsonNumber; +import jakarta.json.JsonObject; + +/** + * Holds information for a single label value pair. + * The value is the number of times the label is present in a particular facet + * dimension. + */ +public class FacetLabel { + + private String label; + private long value; + private JsonNumber from; + private JsonNumber to; + + public FacetLabel(String label, long value) { + this.label = label; + this.value = value; + } + + public FacetLabel(JsonObject jsonObject) { + this(jsonObject.getString("key"), jsonObject); + } + + public FacetLabel(String label, JsonObject jsonObject) { + this.label = label; + value = jsonObject.getJsonNumber("doc_count").longValueExact(); + if (jsonObject.containsKey("from")) { + from = jsonObject.getJsonNumber("from"); + } + if (jsonObject.containsKey("to")) { + to = jsonObject.getJsonNumber("to"); + } + } + + public String getLabel() { + return label; + } + + public long getValue() { + return value; + } + + public JsonNumber getFrom() { + return from; + } + + public JsonNumber getTo() { + return to; + } + + public String toString() { + return label + ": " + value; + } + +} diff --git a/src/main/java/org/icatproject/core/manager/search/LuceneApi.java b/src/main/java/org/icatproject/core/manager/search/LuceneApi.java new file mode 100644 index 000000000..45b7177a2 --- /dev/null +++ b/src/main/java/org/icatproject/core/manager/search/LuceneApi.java @@ -0,0 +1,262 @@ +package org.icatproject.core.manager.search; + +import java.io.IOException; +import java.io.PipedInputStream; +import java.io.PipedOutputStream; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutorService; + +import jakarta.json.Json; +import jakarta.json.JsonArrayBuilder; +import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; +import jakarta.json.JsonValue; +import jakarta.json.stream.JsonGenerator; +import jakarta.persistence.EntityManager; + +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.client.utils.URIBuilder; +import org.apache.http.entity.InputStreamEntity; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.icatproject.core.IcatException; +import org.icatproject.core.IcatException.IcatExceptionType; +import org.icatproject.core.entity.EntityBaseBean; +import org.icatproject.core.manager.Rest; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class LuceneApi extends SearchApi { + + public String basePath = "/icat.lucene"; + private static final Logger logger = LoggerFactory.getLogger(LuceneApi.class); + + /** + * Gets the target index from query and checks its validity. + * + * @param query JsonObject containing the criteria to search on. + * @return The lowercase target index. + * @throws IcatException If "target" was not a key in query, or if the value was + * not a supported index. + */ + private static String getTargetPath(JsonObject query) throws IcatException { + if (!query.containsKey("target")) { + throw new IcatException(IcatExceptionType.BAD_PARAMETER, + "'target' must be present in query for LuceneApi, but it was " + query); + } + String path = query.getString("target").toLowerCase(); + if (!indices.contains(path)) { + throw new IcatException(IcatExceptionType.BAD_PARAMETER, + "'target' must be one of " + indices + ", but it was " + path); + } + return path; + } + + @Override + public JsonObject buildSearchAfter(ScoredEntityBaseBean lastBean, String sort) throws IcatException { + // As icat.lucene always requires the Lucene id, shardIndex and score + // irrespective of the sort, override the default implementation + JsonObjectBuilder builder = Json.createObjectBuilder(); + builder.add("doc", lastBean.getEngineDocId()); + builder.add("shardIndex", lastBean.getShardIndex()); + float score = lastBean.getScore(); + if (!Float.isNaN(score)) { + builder.add("score", score); + } + JsonArrayBuilder arrayBuilder; + if (sort == null || sort.equals("") || sort.equals("{}")) { + arrayBuilder = Json.createArrayBuilder().add(score); + } else { + arrayBuilder = searchAfterArrayBuilder(lastBean, sort); + } + builder.add("fields", arrayBuilder.add(lastBean.getId())); + return builder.build(); + } + + public LuceneApi(URI server) { + super(server); + } + + @Override + public void addNow(String entityName, List ids, EntityManager manager, + Class klass, ExecutorService getBeanDocExecutor) + throws IcatException, IOException, URISyntaxException { + URI uri = new URIBuilder(server).setPath(basePath + "/addNow/" + entityName).build(); + + try (CloseableHttpClient httpclient = HttpClients.createDefault()) { + HttpPost httpPost = new HttpPost(uri); + PipedOutputStream beanDocs = new PipedOutputStream(); + httpPost.setEntity(new InputStreamEntity(new PipedInputStream(beanDocs))); + getBeanDocExecutor.submit(() -> { + try (JsonGenerator gen = Json.createGenerator(beanDocs)) { + gen.writeStartArray(); + for (Long id : ids) { + EntityBaseBean bean = (EntityBaseBean) manager.find(klass, id); + if (bean != null) { + gen.writeStartObject(); + bean.getDoc(manager, gen); + gen.writeEnd(); + } + } + gen.writeEnd(); + return null; + } catch (Exception e) { + logger.error("About to throw internal exception for ids {} because of", ids, e); + throw new IcatException(IcatExceptionType.INTERNAL, e.getMessage()); + } finally { + manager.close(); + } + }); + + try (CloseableHttpResponse response = httpclient.execute(httpPost)) { + Rest.checkStatus(response, IcatExceptionType.INTERNAL); + } + } + } + + @Override + public void clear() throws IcatException { + post(basePath + "/clear"); + } + + @Override + public void commit() throws IcatException { + post(basePath + "/commit"); + } + + @Override + public List facetSearch(String target, JsonObject facetQuery, Integer maxResults, Integer maxLabels) + throws IcatException { + String path = basePath + "/" + target + "/facet"; + + Map parameterMap = new HashMap<>(); + parameterMap.put("maxResults", maxResults.toString()); + parameterMap.put("maxLabels", maxLabels.toString()); + + JsonObject postResponse = postResponse(path, facetQuery.toString(), parameterMap); + + List results = new ArrayList<>(); + JsonObject aggregations = postResponse.getJsonObject("aggregations"); + for (String dimension : aggregations.keySet()) { + parseFacetsResponse(results, target, dimension, aggregations); + } + return results; + } + + @Override + public SearchResult getResults(JsonObject query, JsonValue searchAfter, Integer blockSize, String sort, + List fields) throws IcatException { + String indexPath = getTargetPath(query); + + Map parameterMap = new HashMap<>(); + parameterMap.put("maxResults", blockSize.toString()); + if (searchAfter != null) { + parameterMap.put("search_after", searchAfter.toString()); + } + if (sort != null) { + parameterMap.put("sort", sort); + } + + JsonObjectBuilder objectBuilder = Json.createObjectBuilder(); + objectBuilder.add("query", query); + if (fields != null && fields.size() > 0) { + JsonArrayBuilder arrayBuilder = Json.createArrayBuilder(); + fields.forEach((field) -> arrayBuilder.add(field)); + objectBuilder.add("fields", arrayBuilder.build()); + } + String queryString = objectBuilder.build().toString(); + + JsonObject postResponse = postResponse(basePath + "/" + indexPath, queryString, parameterMap); + SearchResult lsr = new SearchResult(); + List results = lsr.getResults(); + List resultsArray = postResponse.getJsonArray("results").getValuesAs(JsonObject.class); + for (JsonObject resultObject : resultsArray) { + int luceneDocId = resultObject.getInt("_id"); + int shardIndex = resultObject.getInt("_shardIndex"); + float score = Float.NaN; + if (resultObject.containsKey("_score")) { + score = resultObject.getJsonNumber("_score").bigDecimalValue().floatValue(); + } + JsonObject source = resultObject.getJsonObject("_source"); + ScoredEntityBaseBean result = new ScoredEntityBaseBean(luceneDocId, shardIndex, score, source); + results.add(result); + logger.trace("Result id {} with score {}", result.getId(), score); + } + if (postResponse.containsKey("search_after")) { + lsr.setSearchAfter(postResponse.getJsonObject("search_after")); + } + + return lsr; + } + + /** + * Locks the index for entityName, optionally removing all existing documents. While + * locked, document modifications will fail (excluding addNow as a result of a + * populate thread). + * + * A check is also performed against the minId and maxId used for population. + * This ensures that no data is duplicated in the index. + * + * @param entityName Index to lock. + * @param minId The exclusive minimum ICAT id being populated for. If + * Documents already exist with an id greater than this, the + * lock will fail. If null, treated as if it were + * Long.MIN_VALUE + * @param maxId The inclusive maximum ICAT id being populated for. If + * Documents already exist with an id less than or equal to + * this, the lock will fail. If null, treated as if it were + * Long.MAX_VALUE + * @param delete If true, all existing documents of entityName are deleted. + * @throws IcatException + */ + @Override + public void lock(String entityName, Long minId, Long maxId, Boolean delete) throws IcatException { + String path = basePath + "/lock/" + entityName; + try (CloseableHttpClient httpclient = HttpClients.createDefault()) { + URIBuilder builder = new URIBuilder(server).setPath(path); + if (minId != null) { + builder.addParameter("minId", minId.toString()); + } + if (maxId != null) { + builder.addParameter("maxId", maxId.toString()); + } + if (delete != null) { + builder.addParameter("delete", delete.toString()); + } + URI uri = builder.build(); + logger.debug("Making call {}", uri); + HttpPost httpPost = new HttpPost(uri); + try (CloseableHttpResponse response = httpclient.execute(httpPost)) { + int code = response.getStatusLine().getStatusCode(); + Rest.checkStatus(response, code == 400 ? IcatExceptionType.BAD_PARAMETER : IcatExceptionType.INTERNAL); + } + } catch (URISyntaxException | IOException e) { + throw new IcatException(IcatExceptionType.INTERNAL, e.getClass() + " " + e.getMessage()); + } + } + + /** + * Unlocks the index for entityName, committing all pending documents. While + * locked, document modifications will fail (excluding addNow as a result of a + * populate thread). + * + * @param entityName Index to lock. + * @throws IcatException + */ + @Override + public void unlock(String entityName) throws IcatException { + post(basePath + "/unlock/" + entityName); + } + + @Override + public void modify(String json) throws IcatException { + post(basePath + "/modify", json); + } + +} diff --git a/src/main/java/org/icatproject/core/manager/search/OpensearchApi.java b/src/main/java/org/icatproject/core/manager/search/OpensearchApi.java new file mode 100644 index 000000000..fe75ca383 --- /dev/null +++ b/src/main/java/org/icatproject/core/manager/search/OpensearchApi.java @@ -0,0 +1,1273 @@ +package org.icatproject.core.manager.search; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.StringReader; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.concurrent.ExecutorService; +import java.util.Set; + +import jakarta.json.Json; +import jakarta.json.JsonArray; +import jakarta.json.JsonArrayBuilder; +import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; +import jakarta.json.JsonReader; +import jakarta.json.JsonValue; +import jakarta.json.stream.JsonGenerator; +import jakarta.persistence.EntityManager; + +import org.apache.http.client.ClientProtocolException; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.methods.HttpHead; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.client.methods.HttpPut; +import org.apache.http.client.utils.URIBuilder; +import org.apache.http.entity.ContentType; +import org.apache.http.entity.StringEntity; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.icatproject.core.IcatException; +import org.icatproject.core.IcatException.IcatExceptionType; +import org.icatproject.core.entity.DatafileFormat; +import org.icatproject.core.entity.DatasetType; +import org.icatproject.core.entity.EntityBaseBean; +import org.icatproject.core.entity.Facility; +import org.icatproject.core.entity.InvestigationType; +import org.icatproject.core.entity.ParameterType; +import org.icatproject.core.entity.Sample; +import org.icatproject.core.entity.SampleType; +import org.icatproject.core.entity.Technique; +import org.icatproject.core.entity.User; +import org.icatproject.core.manager.Rest; +import org.icatproject.utils.IcatUnits; +import org.icatproject.utils.IcatUnits.Value; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The interface to Opensearch/Elasticsearch clusters is currently considered to + * be experimental. For the more widely used and extensively tested Lucene based + * engine, see {@link LuceneApi}. + */ +public class OpensearchApi extends SearchApi { + + private static enum ModificationType { + CREATE, UPDATE, DELETE + }; + + private static enum RelationType { + CHILD, NESTED_CHILD, NESTED_GRANDCHILD + }; + + private static class ParentRelation { + public RelationType relationType; + public String parentName; + public String joinField; + public Set fields; + + public ParentRelation(RelationType relationType, String parentName, String joinField, Set fields) { + this.relationType = relationType; + this.parentName = parentName; + this.joinField = joinField; + this.fields = fields; + } + } + + private boolean aggregateFiles = false; + public IcatUnits icatUnits; + protected static final Logger logger = LoggerFactory.getLogger(OpensearchApi.class); + private static JsonObject indexSettings = Json.createObjectBuilder().add("analysis", Json.createObjectBuilder() + .add("analyzer", Json.createObjectBuilder() + .add("default", Json.createObjectBuilder() + .add("tokenizer", "classic").add("filter", Json.createArrayBuilder() + .add("possessive_english").add("lowercase").add("porter_stem"))) + .add("default_search", Json.createObjectBuilder() + .add("tokenizer", "classic").add("filter", Json.createArrayBuilder() + .add("possessive_english").add("lowercase").add("porter_stem").add("synonym")))) + .add("filter", Json.createObjectBuilder() + .add("synonym", Json.createObjectBuilder() + .add("type", "synonym").add("synonyms_path", "synonym.txt")) + .add("possessive_english", Json.createObjectBuilder() + .add("type", "stemmer").add("langauge", "possessive_english")))) + .build(); + private static Map> relations = new HashMap<>(); + private static Map> defaultFieldsMap = new HashMap<>(); + private static Map> defaultFacetsMap = new HashMap<>(); + protected static final Set indices = new HashSet<>( + Arrays.asList("datafile", "dataset", "investigation", "instrumentscientist")); + + static { + // Non-nested children have a one to one relationship with an indexed entity and + // so do not form an array, and update specific fields by query + relations.put("datafileformat", Arrays.asList( + new ParentRelation(RelationType.CHILD, "datafile", "datafileFormat", DatafileFormat.docFields))); + relations.put("datasettype", Arrays.asList( + new ParentRelation(RelationType.CHILD, "dataset", "type", DatasetType.docFields))); + relations.put("investigationtype", Arrays.asList( + new ParentRelation(RelationType.CHILD, "investigation", "type", InvestigationType.docFields))); + relations.put("facility", Arrays.asList( + new ParentRelation(RelationType.CHILD, "investigation", "facility", Facility.docFields))); + relations.put("investigation", Arrays.asList( + new ParentRelation(RelationType.CHILD, "dataset", "investigation", + new HashSet<>(Arrays.asList("investigation.name", "investigation.id", "investigation.startDate", + "investigation.title"))), + new ParentRelation(RelationType.CHILD, "datafile", "investigation", + new HashSet<>(Arrays.asList("investigation.name", "investigation.id"))))); + relations.put("dataset", Arrays.asList( + new ParentRelation(RelationType.CHILD, "datafile", "dataset", + new HashSet<>(Arrays.asList("dataset.name", "dataset.id", "sample.id"))))); + relations.put("user", Arrays.asList( + new ParentRelation(RelationType.CHILD, "instrumentscientist", "user", User.docFields), + new ParentRelation(RelationType.NESTED_GRANDCHILD, "investigation", "investigationuser", + User.docFields), + new ParentRelation(RelationType.NESTED_GRANDCHILD, "dataset", "investigationuser", User.docFields), + new ParentRelation(RelationType.NESTED_GRANDCHILD, "datafile", "investigationuser", User.docFields))); + relations.put("sample", Arrays.asList( + new ParentRelation(RelationType.CHILD, "dataset", "sample", Sample.docFields), + new ParentRelation(RelationType.CHILD, "datafile", "sample", Sample.docFields), + new ParentRelation(RelationType.NESTED_CHILD, "investigation", "investigation", null))); + relations.put("sampletype", Arrays.asList( + new ParentRelation(RelationType.CHILD, "dataset", "sample.type", SampleType.docFields), + new ParentRelation(RelationType.CHILD, "datafile", "sample.type", SampleType.docFields), + new ParentRelation(RelationType.NESTED_GRANDCHILD, "investigation", "sample", SampleType.docFields))); + + // Nested children are indexed as an array of objects on their parent entity, + // and know their parent's id (N.B. InvestigationUsers are also mapped to + // Datasets and Datafiles, but using the investigation.id field) + relations.put("datafileparameter", Arrays.asList( + new ParentRelation(RelationType.NESTED_CHILD, "datafile", "datafile", null))); + relations.put("datasetparameter", Arrays.asList( + new ParentRelation(RelationType.NESTED_CHILD, "dataset", "dataset", null))); + relations.put("datasettechnique", Arrays.asList( + new ParentRelation(RelationType.NESTED_CHILD, "dataset", "dataset", null))); + relations.put("investigationparameter", Arrays.asList( + new ParentRelation(RelationType.NESTED_CHILD, "investigation", "investigation", null))); + relations.put("sampleparameter", Arrays.asList( + new ParentRelation(RelationType.NESTED_CHILD, "investigation", "sample", null), // Must be first + new ParentRelation(RelationType.NESTED_CHILD, "dataset", "sample", null), + new ParentRelation(RelationType.NESTED_CHILD, "datafile", "sample", null))); + relations.put("investigationuser", Arrays.asList( + new ParentRelation(RelationType.NESTED_CHILD, "investigation", "investigation", null), + new ParentRelation(RelationType.NESTED_CHILD, "dataset", "investigation", null), + new ParentRelation(RelationType.NESTED_CHILD, "datafile", "investigation", null))); + relations.put("investigationinstrument", Arrays.asList( + new ParentRelation(RelationType.NESTED_CHILD, "investigation", "investigation", null), + new ParentRelation(RelationType.NESTED_CHILD, "dataset", "investigation", null), + new ParentRelation(RelationType.NESTED_CHILD, "datafile", "investigation", null))); + relations.put("investigationfacilitycycle", Arrays.asList( + new ParentRelation(RelationType.NESTED_CHILD, "investigation", "investigation", null), + new ParentRelation(RelationType.NESTED_CHILD, "dataset", "investigation", null), + new ParentRelation(RelationType.NESTED_CHILD, "datafile", "investigation", null))); + + // Grandchildren are entities that are related to one of the nested + // children, but do not have a direct reference to one of the indexed entities, + // and so must be updated by query - they also only affect a subset of the + // nested fields, rather than an entire nested object + relations.put("parametertype", Arrays.asList( + new ParentRelation(RelationType.NESTED_GRANDCHILD, "investigation", "investigationparameter", + ParameterType.docFields), + new ParentRelation(RelationType.NESTED_GRANDCHILD, "investigation", "sampleparameter", + ParameterType.docFields), + new ParentRelation(RelationType.NESTED_GRANDCHILD, "dataset", "datasetparameter", + ParameterType.docFields), + new ParentRelation(RelationType.NESTED_GRANDCHILD, "dataset", "sampleparameter", + ParameterType.docFields), + new ParentRelation(RelationType.NESTED_GRANDCHILD, "datafile", "datafileparameter", + ParameterType.docFields), + new ParentRelation(RelationType.NESTED_GRANDCHILD, "datafile", "sampleparameter", + ParameterType.docFields))); + relations.put("technique", Arrays.asList( + new ParentRelation(RelationType.NESTED_GRANDCHILD, "dataset", "datasettechnique", + Technique.docFields))); + relations.put("instrument", Arrays.asList( + new ParentRelation(RelationType.NESTED_GRANDCHILD, "investigation", "investigationinstrument", + User.docFields), + new ParentRelation(RelationType.NESTED_GRANDCHILD, "dataset", "investigationinstrument", + User.docFields), + new ParentRelation(RelationType.NESTED_GRANDCHILD, "datafile", "investigationinstrument", + User.docFields))); + + defaultFieldsMap.put("_all", new ArrayList<>()); + defaultFieldsMap.put("datafile", + Arrays.asList("name", "description", "doi", "location", "datafileFormat.name", "sample.name")); + defaultFieldsMap.put("dataset", + Arrays.asList("name", "description", "doi", "sample.name", "sample.type.name", "type.name")); + defaultFieldsMap.put("investigation", + Arrays.asList("name", "visitId", "title", "summary", "doi", "facility.name")); + + defaultFacetsMap.put("datafile", Arrays.asList("datafileFormat.name")); + defaultFacetsMap.put("dataset", Arrays.asList("type.name")); + defaultFacetsMap.put("investigation", Arrays.asList("type.name")); + } + + public OpensearchApi(URI server) throws IcatException { + super(server); + icatUnits = new IcatUnits(); + initMappings(); + initScripts(); + } + + public OpensearchApi(URI server, String unitAliasOptions, boolean aggregateFiles) throws IcatException { + super(server); + icatUnits = new IcatUnits(unitAliasOptions); + this.aggregateFiles = aggregateFiles; + initMappings(); + initScripts(); + } + + /** + * Builds a JsonObject representation of the mapping of fields to their type. + * The default behaviour is for a field to be treated as text with a string + * field automatically generated with the suffix ".keyword". Therefore only + * nested and long fields need to be explicitly accounted for. + * + * @param index Index to build the mapping for. + * @return JsonObject of the document mapping. + */ + private static JsonObject buildMappings(String index) { + JsonObject typeLong = Json.createObjectBuilder().add("type", "long").build(); + JsonObjectBuilder propertiesBuilder = Json.createObjectBuilder().add("id", typeLong); + switch (index) { + case "investigation": + propertiesBuilder + .add("type.id", typeLong) + .add("facility.id", typeLong) + .add("fileSize", typeLong) + .add("fileCount", typeLong) + .add("sample", buildNestedMapping("investigation.id", "type.id")) + .add("sampleparameter", buildNestedMapping("sample.id", "type.id")) + .add("investigationparameter", buildNestedMapping("investigation.id", "type.id")) + .add("investigationuser", buildNestedMapping("investigation.id", "user.id")) + .add("investigationinstrument", buildNestedMapping("investigation.id", "instrument.id")) + .add("investigationfacilitycycle", buildNestedMapping("investigation.id", "facilityCycle.id")); + break; + + case "dataset": + propertiesBuilder + .add("investigation.id", typeLong) + .add("type.id", typeLong) + .add("sample.id", typeLong) + .add("sample.investigaion.id", typeLong) + .add("sample.type.id", typeLong) + .add("fileSize", typeLong) + .add("fileCount", typeLong) + .add("datasetparameter", buildNestedMapping("dataset.id", "type.id")) + .add("datasettechnique", buildNestedMapping("dataset.id", "technique.id")) + .add("investigationuser", buildNestedMapping("investigation.id", "user.id")) + .add("investigationinstrument", buildNestedMapping("investigation.id", "instrument.id")) + .add("investigationfacilitycycle", buildNestedMapping("investigation.id", "facilityCycle.id")) + .add("sampleparameter", buildNestedMapping("sample.id", "type.id")); + break; + + case "datafile": + propertiesBuilder + .add("investigation.id", typeLong) + .add("datafileFormat.id", typeLong) + .add("sample.investigaion.id", typeLong) + .add("sample.type.id", typeLong) + .add("fileSize", typeLong) + .add("fileCount", typeLong) + .add("datafileparameter", buildNestedMapping("datafile.id", "type.id")) + .add("investigationuser", buildNestedMapping("investigation.id", "user.id")) + .add("investigationinstrument", buildNestedMapping("investigation.id", "instrument.id")) + .add("investigationfacilitycycle", buildNestedMapping("investigation.id", "facilityCycle.id")) + .add("sampleparameter", buildNestedMapping("sample.id", "type.id")); + break; + + case "instrumentscientist": + propertiesBuilder + .add("instrument.id", typeLong) + .add("user.id", typeLong); + break; + + } + return Json.createObjectBuilder().add("properties", propertiesBuilder).build(); + } + + /** + * Builds a JsonObject representation of the fields on a nested object. + * + * @param idFields Id fields on the nested object which require the long type + * mapping. + * @return JsonObjectBuilder for the nested object. + */ + private static JsonObjectBuilder buildNestedMapping(String... idFields) { + JsonObjectBuilder propertiesBuilder = propertiesBuilder(idFields); + return buildNestedMapping(propertiesBuilder); + } + + private static JsonObjectBuilder buildNestedMapping(JsonObjectBuilder propertiesBuilder) { + return Json.createObjectBuilder().add("type", "nested").add("properties", propertiesBuilder); + } + + private static JsonObjectBuilder propertiesBuilder(String... idFields) { + JsonObject typeLong = Json.createObjectBuilder().add("type", "long").build(); + JsonObjectBuilder propertiesBuilder = Json.createObjectBuilder().add("id", typeLong); + for (String idField : idFields) { + propertiesBuilder.add(idField, typeLong); + } + return propertiesBuilder; + } + + @Override + public void addNow(String entityName, List ids, EntityManager manager, + Class klass, ExecutorService getBeanDocExecutor) + throws IcatException, IOException, URISyntaxException { + // getBeanDocExecutor is not used for this implementation, but is + // required for the @Override + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (JsonGenerator gen = Json.createGenerator(baos)) { + gen.writeStartArray(); + for (long id : ids) { + EntityBaseBean bean = (EntityBaseBean) manager.find(klass, id); + if (bean != null) { + gen.writeStartObject().writeStartObject("create"); + gen.write("_index", entityName).write("_id", bean.getId()); + gen.writeStartObject("doc"); + bean.getDoc(manager, gen); + gen.writeEnd().writeEnd().writeEnd(); + } + } + gen.writeEnd(); + } + modify(baos.toString()); + } + + @Override + public void clear() throws IcatException { + commit(); + String body = OpensearchQuery.matchAllQuery.toString(); + post("/_all/_delete_by_query", body); + } + + @Override + public void commit() throws IcatException { + post("/_refresh"); + } + + @Override + public List facetSearch(String target, JsonObject facetQuery, Integer maxResults, + Integer maxLabels) throws IcatException { + List results = new ArrayList<>(); + String dimensionPrefix = null; + String index = target.toLowerCase(); + if (!indices.contains(index) && relations.containsKey(index)) { + // If we're attempting to facet a nested entity, use the parent index + dimensionPrefix = index; + index = relations.get(index).get(0).parentName; + } + + JsonObject queryObject = facetQuery.getJsonObject("query"); + List defaultFields = defaultFieldsMap.get(index); + OpensearchQuery opensearchQuery = new OpensearchQuery(this); + opensearchQuery.parseQuery(queryObject, index, dimensionPrefix, defaultFields); + if (facetQuery.containsKey("dimensions")) { + JsonArray dimensions = facetQuery.getJsonArray("dimensions"); + opensearchQuery.parseFacets(dimensions, maxLabels, dimensionPrefix); + } else { + List dimensions = defaultFacetsMap.get(index); + opensearchQuery.parseFacets(dimensions, maxLabels, dimensionPrefix); + } + String body = opensearchQuery.body(); + + Map parameterMap = new HashMap<>(); + parameterMap.put("size", maxResults.toString()); + + JsonObject postResponse = postResponse("/" + index + "/_search", body, parameterMap); + + JsonObject aggregations = postResponse.getJsonObject("aggregations"); + if (dimensionPrefix != null) { + aggregations = aggregations.getJsonObject(dimensionPrefix); + } + for (String dimension : aggregations.keySet()) { + parseFacetsResponse(results, target, dimension, aggregations); + } + return results; + } + + @Override + public SearchResult getResults(JsonObject query, JsonValue searchAfter, Integer blockSize, String sort, + List requestedFields) throws IcatException { + String index = query.containsKey("target") ? query.getString("target").toLowerCase() : "_all"; + List defaultFields = defaultFieldsMap.get(index); + + OpensearchQuery opensearchQuery = new OpensearchQuery(this); + opensearchQuery.parseQuery(query, index, null, defaultFields); + opensearchQuery.parseSort(sort); + opensearchQuery.parseSearchAfter(searchAfter); + String body = opensearchQuery.body(); + + Map parameterMap = new HashMap<>(); + Map> joinedFields = new HashMap<>(); + buildParameterMap(blockSize, requestedFields, parameterMap, joinedFields); + + JsonObject postResponse = postResponse("/" + index + "/_search", body, parameterMap); + + SearchResult result = new SearchResult(); + List entities = result.getResults(); + JsonArray hits = postResponse.getJsonObject("hits").getJsonArray("hits"); + for (JsonObject hit : hits.getValuesAs(JsonObject.class)) { + float score = Float.NaN; + if (!hit.isNull("_score")) { + score = hit.getJsonNumber("_score").bigDecimalValue().floatValue(); + } + Integer id = new Integer(hit.getString("_id")); + JsonObject source = hit.getJsonObject("_source"); + // If there are fields requested from another index, join them to the source + for (String joinedEntityName : joinedFields.keySet()) { + String joinedIndex = joinedEntityName.toLowerCase(); + Set requestedJoinedFields = joinedFields.get(joinedEntityName); + Map joinedParameterMap = new HashMap<>(); + String fld; + String parentId; + if (joinedIndex.contains("investigation")) { + // Special case to allow datafiles and datasets join via their investigation.id + // field + fld = "investigation.id"; + if (index.equals("investigation")) { + parentId = source.getString("id"); + } else { + parentId = source.getString("investigation.id"); + } + } else { + fld = joinedIndex + ".id"; + parentId = source.getString("id"); + } + // Search for joined entities matching the id + JsonObject termQuery = OpensearchQuery.buildTermQuery(fld, parentId); + String joinedBody = Json.createObjectBuilder().add("query", termQuery).build().toString(); + buildParameterMap(blockSize, requestedJoinedFields, joinedParameterMap, null); + JsonObject joinedResponse = postResponse("/" + joinedIndex + "/_search", joinedBody, + joinedParameterMap); + // Parse the joined source and integrate it into the main source Json + JsonArray joinedHits = joinedResponse.getJsonObject("hits").getJsonArray("hits"); + JsonObjectBuilder sourceBuilder = Json.createObjectBuilder(); + source.entrySet().forEach(entry -> sourceBuilder.add(entry.getKey(), entry.getValue())); + JsonArrayBuilder joinedSourceBuilder = Json.createArrayBuilder(); + for (JsonValue joinedHit : joinedHits) { + JsonObject joinedHitObject = (JsonObject) joinedHit; + joinedSourceBuilder.add(joinedHitObject.getJsonObject("_source")); + } + source = sourceBuilder.add(joinedIndex, joinedSourceBuilder).build(); + } + entities.add(new ScoredEntityBaseBean(id, -1, score, source)); + } + + // If we're returning as many results as were asked for, setSearchAfter so + // subsequent searches can continue from the last result + if (hits.size() == blockSize) { + JsonObject lastHit = hits.getJsonObject(blockSize - 1); + if (lastHit.containsKey("sort")) { + result.setSearchAfter(lastHit.getJsonArray("sort")); + } else { + ScoredEntityBaseBean lastEntity = entities.get(blockSize - 1); + long id = lastEntity.getId(); + float score = lastEntity.getScore(); + result.setSearchAfter(Json.createArrayBuilder().add(score).add(id).build()); + } + } + + return result; + } + + /** + * Parses fields from requestedFields and set them in Map for the url + * parameters. + * + * @param blockSize The maximum number of results to return from a single + * search. + * @param requestedFields Fields that should be returned as part of the source + * @param parameterMap Map of key value pairs to be included in the url. + * @param joinedFields Map of indices to fields which should be returned that + * are NOT part of the main index/entity being searched. + * @throws IcatException if the field cannot be parsed. + */ + private void buildParameterMap(Integer blockSize, Iterable requestedFields, + Map parameterMap, Map> joinedFields) throws IcatException { + StringBuilder sb = new StringBuilder(); + for (String field : requestedFields) { + String[] splitString = field.split(" "); + if (splitString.length == 1) { + sb.append(splitString[0] + ","); + } else if (splitString.length == 2) { + if (joinedFields != null && indices.contains(splitString[0].toLowerCase())) { + if (joinedFields.containsKey(splitString[0])) { + joinedFields.get(splitString[0]).add(splitString[1]); + } else { + joinedFields.putIfAbsent(splitString[0], new HashSet<>(Arrays.asList(splitString[1]))); + } + } else { + sb.append(splitString[0].toLowerCase() + ","); + } + } else { + throw new IcatException(IcatExceptionType.BAD_PARAMETER, + "Could not parse field: " + field); + } + } + parameterMap.put("_source", sb.toString()); + parameterMap.put("size", blockSize.toString()); + } + + /** + * Create mappings for indices that do not already have them. + * + * @throws IcatException + */ + public void initMappings() throws IcatException { + for (String index : indices) { + if (!indexExists(index)) { + try (CloseableHttpClient httpclient = HttpClients.createDefault()) { + URI uri = new URIBuilder(server).setPath("/" + index).build(); + HttpPut httpPut = new HttpPut(uri); + JsonObjectBuilder bodyBuilder = Json.createObjectBuilder(); + bodyBuilder.add("settings", indexSettings).add("mappings", buildMappings(index)); + String body = bodyBuilder.build().toString(); + logger.debug("Making call {} with body {}", uri, body); + httpPut.setEntity(new StringEntity(body, ContentType.APPLICATION_JSON)); + try (CloseableHttpResponse response = httpclient.execute(httpPut)) { + Rest.checkStatus(response, IcatExceptionType.INTERNAL); + } + } catch (URISyntaxException | IOException e) { + throw new IcatException(IcatExceptionType.INTERNAL, e.getClass() + " " + e.getMessage()); + } + } + } + } + + /** + * @param index Name of an index (entity) to check the existence of + * @return Whether index exists on the cluster or not + * @throws IcatException + */ + private boolean indexExists(String index) throws IcatException { + try (CloseableHttpClient httpclient = HttpClients.createDefault()) { + URI uri = new URIBuilder(server).setPath("/" + index).build(); + logger.debug("Making call {}", uri); + HttpHead httpHead = new HttpHead(uri); + try (CloseableHttpResponse response = httpclient.execute(httpHead)) { + int statusCode = response.getStatusLine().getStatusCode(); + if (statusCode == 404) { + // If the index isn't present, we should get 404 + logger.debug("{} index does not exist", index); + return false; + } else { + // checkStatus will throw unless the code is 200 (index exists) + Rest.checkStatus(response, IcatExceptionType.INTERNAL); + logger.debug("{} index already exists", index); + return true; + } + } + } catch (URISyntaxException | IOException e) { + throw new IcatException(IcatExceptionType.INTERNAL, e.getClass() + " " + e.getMessage()); + } + } + + /** + * Create scripts for indices that do not already have them. + * + * @throws IcatException + */ + public void initScripts() throws IcatException { + for (Entry> entry : relations.entrySet()) { + String key = entry.getKey(); + ParentRelation relation = entry.getValue().get(0); + // Special cases + switch (key) { + case "parametertype": + // ParameterType can apply to 4 different nested objects + post("/_scripts/update_parametertype", + OpensearchScriptBuilder.buildParameterTypesScript(ParameterType.docFields, true)); + post("/_scripts/delete_parametertype", + OpensearchScriptBuilder.buildParameterTypesScript(ParameterType.docFields, false)); + continue; + + case "sample": + // Sample is a child of Datafile and Dataset... + post("/_scripts/update_sample", OpensearchScriptBuilder.buildChildScript(Sample.docFields, true)); + post("/_scripts/delete_sample", OpensearchScriptBuilder.buildChildScript(Sample.docFields, false)); + // ...but a nested child of Investigations + post("/_scripts/update_nestedsample", OpensearchScriptBuilder.buildNestedChildScript(key, true)); + post("/_scripts/delete_nestedsample", OpensearchScriptBuilder.buildNestedChildScript(key, false)); + String createScript = OpensearchScriptBuilder.buildCreateNestedChildScript(key); + post("/_scripts/create_" + key, createScript); + continue; + + case "sampletype": + // SampleType is a child of Datafile and Dataset... + post("/_scripts/update_sampletype", + OpensearchScriptBuilder.buildChildScript(SampleType.docFields, true)); + post("/_scripts/delete_sampletype", + OpensearchScriptBuilder.buildChildScript(SampleType.docFields, false)); + // ...but a nested grandchild of Investigations + post("/_scripts/update_nestedsampletype", + OpensearchScriptBuilder.buildGrandchildScript("sample", SampleType.docFields, true)); + post("/_scripts/delete_nestedsampletype", + OpensearchScriptBuilder.buildGrandchildScript("sample", SampleType.docFields, false)); + continue; + + } + String updateScript = ""; + String deleteScript = ""; + // Each type of relation needs a different script to update + switch (relation.relationType) { + case CHILD: + updateScript = OpensearchScriptBuilder.buildChildScript(relation.fields, true); + deleteScript = OpensearchScriptBuilder.buildChildScript(relation.fields, false); + break; + case NESTED_CHILD: + updateScript = OpensearchScriptBuilder.buildNestedChildScript(key, true); + deleteScript = OpensearchScriptBuilder.buildNestedChildScript(key, false); + String createScript = OpensearchScriptBuilder.buildCreateNestedChildScript(key); + post("/_scripts/create_" + key, createScript); + break; + case NESTED_GRANDCHILD: + updateScript = OpensearchScriptBuilder.buildGrandchildScript(relation.joinField, + relation.fields, true); + deleteScript = OpensearchScriptBuilder.buildGrandchildScript(relation.joinField, + relation.fields, false); + break; + } + post("/_scripts/update_" + key, updateScript); + post("/_scripts/delete_" + key, deleteScript); + } + post("/_scripts/fileSize", OpensearchScriptBuilder.buildFileSizeScript()); + } + + public void modify(String json) throws IcatException { + try (CloseableHttpClient httpclient = HttpClients.createDefault()) { + OpensearchBulk bulk = new OpensearchBulk(); + JsonReader jsonReader = Json.createReader(new StringReader(json)); + JsonArray outerArray = jsonReader.readArray(); + for (JsonObject operation : outerArray.getValuesAs(JsonObject.class)) { + parseModification(httpclient, bulk, operation); + } + + postModify("/_bulk", bulk.bulkBody()); + + if (bulk.updatesMap.size() > 0) { + for (String path : bulk.updatesMap.keySet()) { + for (String body : bulk.updatesMap.get(path)) { + postModify(path, body); + } + } + } + + if (bulk.investigationIds.size() > 0) { + // Ensure bulk changes are committed before checking for InvestigationUsers + commit(); + for (String investigationId : bulk.investigationIds) { + String path = "/investigation/_source/" + investigationId; + URI uriGet = new URIBuilder(server).setPath(path).build(); + HttpGet httpGet = new HttpGet(uriGet); + try (CloseableHttpResponse responseGet = httpclient.execute(httpGet)) { + if (responseGet.getStatusLine().getStatusCode() == 200) { + extractFromInvestigation(httpclient, investigationId, responseGet); + } + } + } + } + + buildFileSizeUpdates("investigation", bulk.investigationAggregations, bulk.fileAggregationBuilder); + buildFileSizeUpdates("dataset", bulk.datasetAggregations, bulk.fileAggregationBuilder); + postModify("/_bulk", bulk.fileAggregationBody()); + + postModify("/_bulk", bulk.deletedBody()); + } catch (IOException | URISyntaxException e) { + throw new IcatException(IcatExceptionType.INTERNAL, e.getClass() + " " + e.getMessage()); + } + } + + /** + * Parses a modification from operation, and adds it to bulk. + * + * @param httpclient The client being used to send HTTP + * @param bulk OpensearchBulk object recording the requests for updates + * @param operation JsonObject representing the operation to be performed as + * part of the bulk modification + * @throws IcatException + * @throws URISyntaxException + * @throws ClientProtocolException + * @throws IOException + */ + private void parseModification(CloseableHttpClient httpclient, OpensearchBulk bulk, JsonObject operation) + throws IcatException, URISyntaxException, ClientProtocolException, IOException { + Set operationKeys = operation.keySet(); + if (operationKeys.size() != 1) { + throw new IcatException(IcatExceptionType.BAD_PARAMETER, + "Operation should only have one key, but it had " + operationKeys); + } + String operationKey = operationKeys.toArray(new String[1])[0]; + ModificationType modificationType = ModificationType.valueOf(operationKey.toUpperCase()); + JsonObject innerOperation = operation.getJsonObject(modificationType.toString().toLowerCase()); + String index = innerOperation.getString("_index").toLowerCase(); + long id = innerOperation.getJsonNumber("_id").longValueExact(); + JsonObject document = innerOperation.containsKey("doc") ? innerOperation.getJsonObject("doc") : null; + logger.trace("{} {} with id {}", operationKey, index, id); + + if (relations.containsKey(index)) { + // Related entities (with or without an index) will have one or more other + // indices that need to be updated with their information + for (ParentRelation relation : relations.get(index)) { + modifyNestedEntity(bulk, id, index, document, modificationType, relation); + } + } + if (indices.contains(index)) { + // Also modify any main, indexable entities + modifyEntity(httpclient, bulk, id, index, document, modificationType); + } + } + + /** + * Commits to ensure index is up to date, then sends a POST request for + * modification. This may be bulk, a single update, update by query etc. + * + * @param path Path on the search engine to POST to + * @param body String of Json to send as the request body + * @throws IcatException + */ + private void postModify(String path, String body) throws IcatException { + if (body.length() > 0) { + commit(); + post(path, body); + } + } + + /** + * Builds commands for updating the fileSizes of the entities keyed in + * aggregations. + * + * @param entity Name of the entity/index to be updated. + * @param aggregations Map of aggregated fileSize changes with the + * entity ids as keys. + * @param fileSizeStringBuilder StringBuilder for constructing the bulk updates. + */ + private void buildFileSizeUpdates(String entity, Map aggregations, + StringBuilder fileSizeStringBuilder) { + if (aggregations.size() > 0) { + for (String id : aggregations.keySet()) { + JsonObject targetObject = Json.createObjectBuilder().add("_id", Long.valueOf(id)).add("_index", entity) + .build(); + JsonObject update = Json.createObjectBuilder().add("update", targetObject).build(); + long deltaFileSize = aggregations.get(id)[0]; + long deltaFileCount = aggregations.get(id)[1]; + JsonObjectBuilder paramsBuilder = Json.createObjectBuilder(); + JsonObjectBuilder scriptBuilder = Json.createObjectBuilder(); + paramsBuilder.add("deltaFileSize", deltaFileSize).add("deltaFileCount", deltaFileCount); + scriptBuilder.add("id", "fileSize").add("params", paramsBuilder); + JsonObjectBuilder bodyBuilder = Json.createObjectBuilder(); + String body = bodyBuilder.add("script", scriptBuilder).build().toString(); + fileSizeStringBuilder.append(update.toString()).append("\n").append(body).append("\n"); + } + } + } + + /** + * Gets the source of a Datafile and returns it. + * + * @param httpclient The client being used to send HTTP + * @param id ICAT entity id of the Datafile. + * @return The Datafile source. + * @throws IOException + * @throws URISyntaxException + * @throws ClientProtocolException + */ + private JsonObject extractSource(CloseableHttpClient httpclient, long id) + throws IOException, URISyntaxException, ClientProtocolException { + URI uriGet = new URIBuilder(server).setPath("/datafile/_source/" + id) + .build(); + HttpGet httpGet = new HttpGet(uriGet); + try (CloseableHttpResponse responseGet = httpclient.execute(httpGet)) { + if (responseGet.getStatusLine().getStatusCode() == 200) { + return Json.createReader(responseGet.getEntity().getContent()).readObject(); + } + } + return null; + } + + /** + * For cases when Datasets and Datafiles are created after an Investigation, + * some nested fields such as InvestigationUser and InvestigationInstrument may + * have already been indexed on the Investigation but not the Dataset/file as + * the latter did not yet exist. + * + * This method retrieves these arrays from the Investigation index ensuring that + * all information is available on all indices at the time of creation. + * + * @param httpclient The client being used to send HTTP + * @param investigationId Id of an investigation which may contain relevant + * information. + * @param responseGet The response from a GET request using the + * investigationId, which may or may not contain relevant + * information in the returned _source Json. + * @throws IOException + * @throws URISyntaxException + * @throws IcatException + * @throws ClientProtocolException + */ + private void extractFromInvestigation(CloseableHttpClient httpclient, String investigationId, + CloseableHttpResponse responseGet) + throws IOException, URISyntaxException, IcatException, ClientProtocolException { + JsonObject responseObject = Json.createReader(responseGet.getEntity().getContent()).readObject(); + if (responseObject.containsKey("investigationuser")) { + extractEntity(httpclient, investigationId, responseObject, "investigationuser", false); + } + if (responseObject.containsKey("investigationinstrument")) { + extractEntity(httpclient, investigationId, responseObject, "investigationinstrument", false); + } + if (responseObject.containsKey("investigationfacilitycycle")) { + extractEntity(httpclient, investigationId, responseObject, "investigationfacilitycycle", false); + } + if (responseObject.containsKey("sample")) { + extractEntity(httpclient, investigationId, responseObject, "sample", true); + } + } + + /** + * For cases when Datasets and Datafiles are created after an Investigation, + * some nested fields such as InvestigationUser and InvestigationInstrument may + * have already been indexed on the Investigation but not the Dataset/file as + * the latter did not yet exist. + * + * This method extracts a single entity and uses it to update the + * dataset/datafile indices. + * + * @param httpclient The client being used to send HTTP + * @param investigationId Id of an investigation which may contain relevant + * information. + * @param responseObject JsonObject to extract the entity from + * @param entityName Name of the entity being extracted + * @param addFields Whether to add individual fields (true) or the entire + * entity as one "doc" (false) + * @throws URISyntaxException + * @throws IcatException + * @throws IOException + * @throws ClientProtocolException + */ + private void extractEntity(CloseableHttpClient httpclient, String investigationId, JsonObject responseObject, + String entityName, boolean addFields) + throws URISyntaxException, IcatException, IOException, ClientProtocolException { + JsonArray jsonArray = responseObject.getJsonArray(entityName); + for (String index : new String[] { "datafile", "dataset" }) { + URI uri = new URIBuilder(server).setPath("/" + index + "/_update_by_query").build(); + HttpPost httpPost = new HttpPost(uri); + if (addFields) { + for (JsonObject document : jsonArray.getValuesAs(JsonObject.class)) { + String documentId = document.getString("id"); + JsonObject queryObject = OpensearchQuery.buildTermQuery(entityName + ".id", documentId); + JsonObjectBuilder paramsBuilder = Json.createObjectBuilder(); + JsonObjectBuilder scriptBuilder = Json.createObjectBuilder(); + for (String field : document.keySet()) { + paramsBuilder.add(entityName + "." + field, document.get(field)); + } + scriptBuilder.add("id", "update_" + entityName).add("params", paramsBuilder); + + updateWithExtractedEntity(httpclient, uri, httpPost, queryObject, scriptBuilder); + } + } else { + JsonObject queryObject = OpensearchQuery.buildTermQuery("investigation.id", investigationId); + JsonObjectBuilder paramsBuilder = Json.createObjectBuilder().add("doc", jsonArray); + JsonObjectBuilder scriptBuilder = Json.createObjectBuilder(); + scriptBuilder.add("id", "create_" + entityName).add("params", paramsBuilder); + + updateWithExtractedEntity(httpclient, uri, httpPost, queryObject, scriptBuilder); + } + } + } + + /** + * For cases when Datasets and Datafiles are created after an Investigation, + * some nested fields such as InvestigationUser and InvestigationInstrument may + * have already been indexed on the Investigation but not the Dataset/file as + * the latter did not yet exist. + * + * This updates an index with the result of the extraction. + * + * @param httpclient The client being used to send HTTP + * @param uri URI for the relevant _update_by_query path + * @param httpPost HttpPost to be sent + * @param queryObject JsonObject determining which entities should be updated + * @param scriptBuilder JsonObjectBuilder for the script used to perform the + * update + * @throws IcatException + * @throws IOException + * @throws ClientProtocolException + */ + private void updateWithExtractedEntity(CloseableHttpClient httpclient, URI uri, HttpPost httpPost, + JsonObject queryObject, JsonObjectBuilder scriptBuilder) + throws IcatException, IOException, ClientProtocolException { + JsonObjectBuilder bodyBuilder = Json.createObjectBuilder(); + String body = bodyBuilder.add("query", queryObject).add("script", scriptBuilder).build().toString(); + httpPost.setEntity(new StringEntity(body, ContentType.APPLICATION_JSON)); + logger.trace("Making call {} with body {}", uri, body); + try (CloseableHttpResponse response = httpclient.execute(httpPost)) { + Rest.checkStatus(response, IcatExceptionType.INTERNAL); + commit(); + } + } + + /** + * Performs more complex update of an entity nested to a parent, for example + * parameters. + * + * @param bulk OpensearchBulk object recording the requests for + * updates by query + * @param id Id of the entity. + * @param index Index of the entity. + * @param document JsonObject containing the key value pairs of the + * document fields. + * @param modificationType The type of operation to be performed. + * @param relation The relation between the nested entity and its + * parent. + * @throws URISyntaxException + * @throws IcatException + */ + private void modifyNestedEntity(OpensearchBulk bulk, long id, String index, JsonObject document, + ModificationType modificationType, ParentRelation relation) throws URISyntaxException, IcatException { + + switch (modificationType) { + case CREATE: + if (relation.parentName.equals(relation.joinField)) { + // If the target parent is the same as the joining field, we're appending the + // nested child to a list of objects which can be sent as a bulk update request + // since we have the parent id + document = convertDocumentUnits(document); + if (index.equals("sample")) { + // In order to make searching for sample information seamless between + // Investigations and Datasets/files, need to ensure that when nesting fields + // like "sample.name" under a "sample" object, we do not end up with + // "sample.sample.name" + JsonObjectBuilder documentBuilder = Json.createObjectBuilder(); + for (Entry entry : document.entrySet()) { + documentBuilder.add(entry.getKey().replace("sample.", ""), entry.getValue()); + } + createNestedEntity(bulk, id, index, documentBuilder.build(), relation); + } else { + createNestedEntity(bulk, id, index, document, relation); + } + } else if (index.equals("sampletype")) { + // Otherwise, in most cases we don't need to update, as User and ParameterType + // cannot be null on their parent InvestigationUser or InvestigationParameter + // when that parent is created so the information is captured. However, since + // SampleType can be null upon creation of a Sample, need to account for the + // creation of a SampleType at a later date. + updateNestedEntityByQuery(bulk, id, index, document, relation, true); + } else if (index.equals("sampleparameter")) { + // SampleParameter requires specific logic, as the join is performed using the + // Sample id rather than the SampleParameter id or the parent id. + if (document.containsKey("sample.id")) { + long sampleId = document.getJsonNumber("sample.id").longValueExact(); + updateNestedEntityByQuery(bulk, sampleId, index, document, relation, true); + } + } + break; + case UPDATE: + updateNestedEntityByQuery(bulk, id, index, document, relation, true); + break; + case DELETE: + updateNestedEntityByQuery(bulk, id, index, document, relation, false); + break; + } + } + + /** + * Create a new nested entity in an array on its parent. + * + * @param bulk OpensearchBulk object recording the requests for single + * updates + * @param id Id of the entity. + * @param index Index of the entity. + * @param document JsonObject containing the key value pairs of the document + * fields. + * @param relation The relation between the nested entity and its parent. + * @throws IcatException If parentId is missing from document. + * @throws URISyntaxException + */ + private void createNestedEntity(OpensearchBulk bulk, long id, String index, JsonObject document, + ParentRelation relation) throws IcatException, URISyntaxException { + + if (!document.containsKey(relation.joinField + ".id")) { + throw new IcatException(IcatExceptionType.BAD_PARAMETER, + relation.joinField + ".id not found in " + document); + } + + String parentId = document.getString(relation.joinField + ".id"); + String path = "/" + relation.parentName + "/_update/" + parentId; + + // For nested 0:* relationships, wrap single documents in an array + JsonArray docArray = Json.createArrayBuilder().add(document).build(); + JsonObjectBuilder paramsBuilder = Json.createObjectBuilder().add("id", id).add("doc", docArray); + String scriptId; + if (index.equals("sample") || index.equals("sampletype") && relation.parentName.equals("investigation")) { + scriptId = "update_nested" + index; + } else { + scriptId = "update_" + index; + } + + JsonObjectBuilder scriptBuilder = Json.createObjectBuilder().add("id", scriptId).add("params", paramsBuilder); + JsonObjectBuilder upsertBuilder = Json.createObjectBuilder().add(index, docArray); + JsonObjectBuilder payloadBuilder = Json.createObjectBuilder() + .add("upsert", upsertBuilder).add("script", scriptBuilder); + bulk.addUpdate(path, payloadBuilder.build().toString()); + } + + /** + * For existing nested objects, painless scripting must be used to update or + * delete them. + * + * @param bulk OpensearchBulk object recording the requests for updates by + * query + * @param id Id of the entity. + * @param index Index of the entity. + * @param document JsonObject containing the key value pairs of the + * document fields. + * @param relation The relation between the nested entity and its parent. + * @param update Whether to update, or if false delete nested entity + * with the specified id. + * @throws URISyntaxException + */ + private void updateNestedEntityByQuery(OpensearchBulk bulk, long id, String index, JsonObject document, + ParentRelation relation, boolean update) throws URISyntaxException { + + String path = "/" + relation.parentName + "/_update_by_query"; + + // Determine the Id of the painless script to use + String scriptId = update ? "update_" : "delete_"; + if (index.equals("sample") || index.equals("sampletype") && relation.parentName.equals("investigation")) { + scriptId += "nested" + index; + } else { + scriptId += index; + } + + // All updates/deletes require the entityId + JsonObjectBuilder paramsBuilder = Json.createObjectBuilder().add("id", id); + if (update) { + if (relation.fields == null) { + // Update affects all of the nested fields, so can add the entire document + document = convertDocumentUnits(document); + paramsBuilder.add("doc", Json.createArrayBuilder().add(document)); + } else { + // Need to update individual nested fields + convertScriptUnits(paramsBuilder, document, relation.fields); + } + } + JsonObjectBuilder scriptBuilder = Json.createObjectBuilder().add("id", scriptId).add("params", paramsBuilder); + String idField = relation.joinField.equals(relation.parentName) ? "id" : relation.joinField + ".id"; + // sample.id is a nested field on investigations, so need a nested query to + // successfully add sampleparameter + JsonObject queryObject = OpensearchQuery.buildTermQuery(idField, id); + if (relation.relationType.equals(RelationType.NESTED_GRANDCHILD) + || index.equals("sampleparameter") && relation.parentName.equals("investigation")) { + queryObject = OpensearchQuery.buildNestedQuery(relation.joinField, queryObject); + } + JsonObject bodyJson = Json.createObjectBuilder().add("query", queryObject).add("script", scriptBuilder).build(); + bulk.addUpdate(path, bodyJson.toString()); + } + + /** + * Gets "type.units" from the existing document, and adds "type.unitsSI" and the + * SI numeric value to the rebuilder if possible. + * + * @param document JsonObject of the original document. + * @param rebuilder JsonObjectBuilder being used to create a new document + * with converted units. + * @param valueString Field name of the numeric value. + * @param numericalValue Value to possibly be converted. + */ + private void convertUnits(JsonObject document, JsonObjectBuilder rebuilder, String valueString, + double numericalValue) { + String unitString = document.getString("type.units"); + Value value = icatUnits.convertValueToSiUnits(numericalValue, unitString); + if (value != null) { + rebuilder.add("type.unitsSI", value.units); + rebuilder.add(valueString + "SI", value.numericalValue); + } + } + + /** + * If appropriate, rebuilds document with conversion into SI units. + * + * @param document JsonObject containing the document field/values. + * @return Either the original JsonDocument, or a copy with SI units and values + * set. + */ + private JsonObject convertDocumentUnits(JsonObject document) { + if (!document.containsKey("type.units")) { + return document; + } + // Need to rebuild the document... + JsonObjectBuilder rebuilder = Json.createObjectBuilder(); + for (String key : document.keySet()) { + rebuilder.add(key, document.get(key)); + } + if (document.containsKey("numericValue")) { + double numericValue = document.getJsonNumber("numericValue").doubleValue(); + convertUnits(document, rebuilder, "numericValueSI", numericValue); + } + if (document.containsKey("rangeBottom")) { + double rangeBottom = document.getJsonNumber("rangeBottom").doubleValue(); + convertUnits(document, rebuilder, "rangeBottomSI", rangeBottom); + } + if (document.containsKey("rangeTop")) { + double rangeTop = document.getJsonNumber("rangeTop").doubleValue(); + convertUnits(document, rebuilder, "rangeTopSI", rangeTop); + } + document = rebuilder.build(); + return document; + } + + /** + * Builds the parameters for a painless script, converting into SI units if + * appropriate. + * + * @param paramsBuilder JsonObjectBuilder for the painless script parameters. + * @param document JsonObject containing the field/values. + * @param fields List of fields to be included in the parameters. + */ + private void convertScriptUnits(JsonObjectBuilder paramsBuilder, JsonObject document, + Set fields) { + for (String field : fields) { + if (document.containsKey(field)) { + if (field.equals("type.unitsSI")) { + convertUnits(document, paramsBuilder, "conversionFactor", 1.); + } else if (field.equals("numericValueSI")) { + continue; + } else { + paramsBuilder.add(field, document.get(field)); + } + } + } + } + + /** + * Adds modification command to bulk. If relevant, also adds to the list of + * investigationIds which may contain relevant information (e.g. nested + * InvestigationUsers). + * + * @param httpclient The client being used to send HTTP + * @param bulk OpensearchBulk object recording the requests for + * updates and aggregations + * @param id Id of the entity. + * @param index Index of the entity. + * @param document JsonObject containing the key value pairs of + * the + * document fields. + * @param modificationType The type of operation to be performed. + * @throws URISyntaxException + * @throws IOException + * @throws ClientProtocolException + */ + private void modifyEntity(CloseableHttpClient httpclient, OpensearchBulk bulk, long id, String index, + JsonObject document, ModificationType modificationType) + throws ClientProtocolException, IOException, URISyntaxException { + + JsonObject targetObject = Json.createObjectBuilder().add("_id", id).add("_index", index).build(); + JsonObject update = Json.createObjectBuilder().add("update", targetObject).build(); + JsonObject docAsUpsert; + switch (modificationType) { + case CREATE: + docAsUpsert = Json.createObjectBuilder().add("doc", document).add("doc_as_upsert", true).build(); + bulk.bulkBuilder.append(update.toString()).append("\n").append(docAsUpsert.toString()).append("\n"); + if (document.containsKey("investigation.id")) { + // In principle a Dataset/Datafile could be created after InvestigationUser + // entities are attached to an Investigation, so need to check for those + bulk.investigationIds.add(document.getString("investigation.id")); + } + break; + case UPDATE: + docAsUpsert = Json.createObjectBuilder().add("doc", document).add("doc_as_upsert", true).build(); + bulk.bulkBuilder.append(update.toString()).append("\n").append(docAsUpsert.toString()).append("\n"); + break; + case DELETE: + bulk.deletionBuilder.append(Json.createObjectBuilder().add("delete", targetObject).build().toString()) + .append("\n"); + break; + } + if (aggregateFiles && index.equals("datafile") && document.containsKey("fileSize")) { + aggregateFiles(modificationType, bulk, index, document, httpclient, id); + } + } + + /** + * Aggregates any change to file size to relevant paret entities. + * + * @param modificationType The type of operation to be performed + * @param bulk OpensearchBulk object recording the requests for + * updates and aggregations + * @param index Index of the entity + * @param document Document containing the parent entity ids + * @param httpclient CloseableHttpClient to use + * @param id Datafile id + * @throws ClientProtocolException + * @throws IOException + * @throws URISyntaxException + */ + private void aggregateFiles(ModificationType modificationType, OpensearchBulk bulk, String index, + JsonObject document, CloseableHttpClient httpclient, long id) + throws ClientProtocolException, IOException, URISyntaxException { + long deltaFileSize = 0; + long deltaFileCount = 0; + switch (modificationType) { + case CREATE: + deltaFileSize = document.getJsonNumber("fileSize").longValueExact(); + deltaFileCount = 1; + break; + case UPDATE: + deltaFileSize = document.getJsonNumber("fileSize").longValueExact() - extractFileSize(httpclient, id); + break; + case DELETE: + deltaFileSize = -extractFileSize(httpclient, id); + deltaFileCount = -1; + break; + } + incrementEntity(bulk.investigationAggregations, document, deltaFileSize, deltaFileCount, "investigation.id"); + incrementEntity(bulk.datasetAggregations, document, deltaFileSize, deltaFileCount, "dataset.id"); + } + + /** + * Increments the changes to a parent entity by the values of deltaFileSize and + * deltaFileCount. + * + * @param aggregations Map of aggregated fileSize changes with the parent ids + * as keys. + * @param document Document containing the parent entity id + * @param deltaFileSize Change in file size + * @param deltaFileCount Change in file count + * @param idField The field of the id of parent entity to be incremented + */ + private void incrementEntity(Map aggregations, JsonObject document, long deltaFileSize, + long deltaFileCount, String idField) { + if (document.containsKey(idField)) { + String id = document.getString(idField); + long[] runningFileSize = aggregations.getOrDefault(id, new long[] { 0, 0 }); + long[] newValue = new long[] { runningFileSize[0] + deltaFileSize, runningFileSize[1] + deltaFileCount }; + aggregations.put(id, newValue); + } + } + + /** + * @param httpclient CloseableHttpClient to use + * @param id Datafile id + * @return Size of the Datafile in bytes + * @throws IOException + * @throws URISyntaxException + * @throws ClientProtocolException + */ + private long extractFileSize(CloseableHttpClient httpclient, long id) + throws IOException, URISyntaxException, ClientProtocolException { + JsonObject source = extractSource(httpclient, id); + if (source != null && source.containsKey("fileSize")) { + return source.getJsonNumber("fileSize").longValueExact(); + } + return 0; + } +} diff --git a/src/main/java/org/icatproject/core/manager/search/OpensearchBulk.java b/src/main/java/org/icatproject/core/manager/search/OpensearchBulk.java new file mode 100644 index 000000000..26cbaac47 --- /dev/null +++ b/src/main/java/org/icatproject/core/manager/search/OpensearchBulk.java @@ -0,0 +1,57 @@ +package org.icatproject.core.manager.search; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +/** + * Holds information for the various types of request that need to be made as + * part of a bulk modification. + */ +public class OpensearchBulk { + + public Map> updatesMap = new HashMap<>(); + public Set investigationIds = new HashSet<>(); + public Map investigationAggregations = new HashMap<>(); + public Map datasetAggregations = new HashMap<>(); + public StringBuilder bulkBuilder = new StringBuilder(); + public StringBuilder deletionBuilder = new StringBuilder(); + public StringBuilder fileAggregationBuilder = new StringBuilder(); + + /** + * Adds a path and body for a single update to updatesMap, if not already + * present. + * + * @param path Path of request + * @param body Body of request + */ + public void addUpdate(String path, String body) { + Set bodies = updatesMap.getOrDefault(path, new HashSet<>()); + bodies.add(body); + updatesMap.put(path, bodies); + } + + /** + * @return String of updates that should be performed as a bulk request + */ + public String bulkBody() { + return bulkBuilder.toString(); + } + + /** + * @return String of deletes that should be performed as a bulk request + */ + public String deletedBody() { + return deletionBuilder.toString(); + } + + /** + * @return String of file aggregations that should be performed as a bulk + * request + */ + public String fileAggregationBody() { + return fileAggregationBuilder.toString(); + } + +} \ No newline at end of file diff --git a/src/main/java/org/icatproject/core/manager/search/OpensearchQuery.java b/src/main/java/org/icatproject/core/manager/search/OpensearchQuery.java new file mode 100644 index 000000000..1324ac04f --- /dev/null +++ b/src/main/java/org/icatproject/core/manager/search/OpensearchQuery.java @@ -0,0 +1,816 @@ +package org.icatproject.core.manager.search; + +import java.io.StringReader; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import jakarta.json.Json; +import jakarta.json.JsonArray; +import jakarta.json.JsonArrayBuilder; +import jakarta.json.JsonNumber; +import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; +import jakarta.json.JsonString; +import jakarta.json.JsonValue; +import jakarta.json.JsonValue.ValueType; + +import org.icatproject.core.IcatException; +import org.icatproject.core.IcatException.IcatExceptionType; +import org.icatproject.utils.IcatUnits.Value; + +/** + * Utilities for building queries in Json understood by Opensearch. + */ +public class OpensearchQuery { + + private static JsonObject matchAll = build("match_all", Json.createObjectBuilder()); + public static JsonObject matchAllQuery = build("query", matchAll); + + private JsonObjectBuilder builder = Json.createObjectBuilder(); + private OpensearchApi opensearchApi; + + public OpensearchQuery(OpensearchApi opensearchApi) { + this.opensearchApi = opensearchApi; + } + + /** + * @param filter Path to nested Object. + * @param should Any number of pre-built queries. + * @return {"bool": {"filter": [...filter], "should": [...should]}} + */ + public static JsonObject buildBoolQuery(List filter, List should) { + JsonObjectBuilder boolBuilder = Json.createObjectBuilder(); + addToBoolArray("should", should, boolBuilder); + addToBoolArray("filter", filter, boolBuilder); + return build("bool", boolBuilder); + } + + /** + * @param occur String of an occurance keyword ("filter", "should", "must" + * etc.) + * @param queries List of JsonObjects representing the queries to occur. + * @param boolBuilder Builder of the main boolean query. + */ + private static void addToBoolArray(String occur, List queries, JsonObjectBuilder boolBuilder) { + if (queries != null && queries.size() > 0) { + JsonArrayBuilder filterBuilder = Json.createArrayBuilder(); + for (JsonObject queryObject : queries) { + filterBuilder.add(queryObject); + } + boolBuilder.add(occur, filterBuilder); + } + } + + /** + * @param field Field containing the match. + * @param value Value to match. + * @return {"match": {"`field`.keyword": {"query": `value`, "operator": "and"}}} + */ + public static JsonObject buildMatchQuery(String field, String value) { + JsonObjectBuilder fieldBuilder = Json.createObjectBuilder().add("query", value).add("operator", "and"); + JsonObject matchBuilder = build(field + ".keyword", fieldBuilder); + return build("match", matchBuilder); + } + + /** + * @param path Path to nested Object. + * @param queryObjects Any number of pre-built queries. + * @return {"nested": {"path": `path`, "query": {"bool": {"filter": [...queryObjects]}}}} + */ + public static JsonObject buildNestedQuery(String path, JsonObject... queryObjects) { + JsonObject builtQueries; + if (queryObjects.length == 0) { + builtQueries = matchAllQuery; + } else if (queryObjects.length == 1) { + builtQueries = queryObjects[0]; + } else { + JsonArrayBuilder filterBuilder = Json.createArrayBuilder(); + for (JsonObject queryObject : queryObjects) { + filterBuilder.add(queryObject); + } + JsonObject boolObject = build("filter", filterBuilder.build()); + builtQueries = build("bool", boolObject); + } + JsonObjectBuilder nestedBuilder = Json.createObjectBuilder().add("path", path).add("query", builtQueries); + return build("nested", nestedBuilder); + } + + /** + * @param value String value to query for. + * @param fields List of fields to check for value. + * @return {"query_string": {"query": `value`, "fields": [...fields]}} + */ + public static JsonObject buildStringQuery(String value, String... fields) { + JsonObjectBuilder queryStringBuilder = Json.createObjectBuilder().add("query", value); + if (fields.length > 0) { + JsonArrayBuilder fieldsBuilder = Json.createArrayBuilder(); + for (String field : fields) { + fieldsBuilder.add(field); + } + queryStringBuilder.add("fields", fieldsBuilder); + } + return build("query_string", queryStringBuilder); + } + + /** + * @param field Field containing the term. + * @param value Term to match. + * @return {"term": {`field`: `value`}} + */ + public static JsonObject buildTermQuery(String field, String value) { + return build("term", Json.createObjectBuilder().add(field, value)); + } + + /** + * @param field Field containing the number. + * @param value Number to match. + * @return {"term": {`field`: `value`}} + */ + public static JsonObject buildTermQuery(String field, JsonNumber value) { + return build("term", build(field, value)); + } + + /** + * @param field Field containing the double value. + * @param value Double to match. + * @return {"term": {`field`: `value`}} + */ + public static JsonObject buildTermQuery(String field, double value) { + return build("term", Json.createObjectBuilder().add(field, value)); + } + + /** + * @param field Field containing on of the terms. + * @param values JsonArray of possible terms. + * @return {"terms": {`field`: `values`}} + */ + public static JsonObject buildTermsQuery(String field, JsonArray values) { + return build("terms", build(field, values)); + } + + /** + * @param field Field to apply the range to. + * @param lowerValue Lowest allowed value in the range. + * @param upperValue Highest allowed value in the range. + * @return {"range": {`field`: {"gte": `upperValue`, "lte": `lowerValue`}}} + */ + public static JsonObject buildDoubleRangeQuery(String field, Double lowerValue, Double upperValue) { + JsonObjectBuilder fieldBuilder = Json.createObjectBuilder(); + if (lowerValue != null) + fieldBuilder.add("gte", lowerValue); + if (upperValue != null) + fieldBuilder.add("lte", upperValue); + return buildRange(field, fieldBuilder); + } + + /** + * @param field Field to apply the range to. + * @param lowerValue Lowest allowed value in the range. + * @param upperValue Highest allowed value in the range. + * @return {"range": {`field`: {"gte": `upperValue`, "lte": `lowerValue`}}} + */ + public static JsonObject buildLongRangeQuery(String field, Long lowerValue, Long upperValue) { + JsonObjectBuilder fieldBuilder = Json.createObjectBuilder(); + if (lowerValue != null) + fieldBuilder.add("gte", lowerValue); + if (upperValue != null) + fieldBuilder.add("lte", upperValue); + return buildRange(field, fieldBuilder); + } + + /** + * @param field Field to apply the range to. + * @param lowerValue Lowest allowed value in the range. + * @param upperValue Highest allowed value in the range. + * @return {"range": {`field`: {"gte": `upperValue`, "lte": `lowerValue`}}} + */ + public static JsonObject buildRangeQuery(String field, JsonNumber lowerValue, JsonNumber upperValue) { + JsonObjectBuilder fieldBuilder = Json.createObjectBuilder(); + if (lowerValue != null) + fieldBuilder.add("gte", lowerValue); + if (upperValue != null) + fieldBuilder.add("lte", upperValue); + return buildRange(field, fieldBuilder); + } + + /** + * @param field Field to apply the range to + * @param fieldBuilder JsonObjectBuilder for the field + * @return {"range": {`field`: `fieldBuilder`}} + */ + private static JsonObject buildRange(String field, JsonObjectBuilder fieldBuilder) { + JsonObject rangeObject = build(field, fieldBuilder); + return build("range", rangeObject); + } + + /** + * @param field Field to facet. + * @param ranges JsonArray of ranges to allocate documents to. + * @return {"range": {"field": `field`, "keyed": true, "ranges": `ranges`}} + */ + public static JsonObject buildRangeFacet(String field, JsonArray ranges) { + JsonObjectBuilder rangeBuilder = Json.createObjectBuilder(); + rangeBuilder.add("field", field).add("keyed", true).add("ranges", ranges); + return build("range", rangeBuilder); + } + + /** + * @param field Field to facet. + * @param maxLabels Maximum number of labels per dimension. + * @return {"terms": {"field": `field`, "size": `maxLabels`}} + */ + public static JsonObject buildStringFacet(String field, int maxLabels) { + JsonObjectBuilder termsBuilder = Json.createObjectBuilder(); + termsBuilder.add("field", field).add("size", maxLabels); + return build("terms", termsBuilder); + } + + /** + * @param key Arbitrary key + * @param builder Arbitrary JsonObjectBuilder + * @return {`key`: `builder`}} + */ + private static JsonObject build(String key, JsonObjectBuilder builder) { + return Json.createObjectBuilder().add(key, builder).build(); + } + + /** + * @param key Arbitrary key + * @param value Arbitrary JsonValue + * @return {`key`: `value`}} + */ + private static JsonObject build(String key, JsonValue value) { + return Json.createObjectBuilder().add(key, value).build(); + } + + /** + * Extracts and parses a date value from jsonObject. If the value is a NUMBER + * (ms since epoch), then it is taken as is. If it is a STRING, then it is + * expected in the yyyyMMddHHmm format. + * + * @param jsonObject JsonObject to extract the date from. + * @param key Key of the date field to extract. + * @param offset In the event of the date being a string, we do not have + * second or ms precision. To ensure ranges are successful, + * it may be necessary to add 59999 ms to the parsed value + * as an offset. + * @param defaultValue The value to return if key is not present in jsonObject. + * @return Time since epoch in ms. + * @throws IcatException + */ + private static long parseDate(JsonObject jsonObject, String key, int offset, long defaultValue) + throws IcatException { + if (jsonObject.containsKey(key)) { + ValueType valueType = jsonObject.get(key).getValueType(); + switch (valueType) { + case STRING: + String dateString = jsonObject.getString(key); + try { + return SearchApi.decodeTime(dateString) + offset; + } catch (Exception e) { + throw new IcatException(IcatExceptionType.BAD_PARAMETER, + "Could not parse date " + dateString + " using expected format yyyyMMddHHmm"); + } + case NUMBER: + return jsonObject.getJsonNumber(key).longValueExact(); + default: + throw new IcatException(IcatExceptionType.BAD_PARAMETER, + "Dates should be represented by a NUMBER or STRING JsonValue, but got " + valueType); + } + } + return defaultValue; + } + + /** + * Parses incoming Json encoding the requested facets and uses bodyBuilder to + * construct Json that can be understood by Opensearch. + * + * @param dimensions JsonArray of JsonObjects representing dimensions to be + * faceted. + * @param maxLabels The maximum number of labels to collect for each + * dimension. + * @param dimensionPrefix Optional prefix to apply to the dimension names. This + * is needed to distinguish between potentially ambiguous + * dimensions, such as "(investigation.)type.name" and + * "(investigationparameter.)type.name". + */ + public void parseFacets(JsonArray dimensions, int maxLabels, String dimensionPrefix) { + JsonObjectBuilder aggsBuilder = Json.createObjectBuilder(); + for (JsonObject dimensionObject : dimensions.getValuesAs(JsonObject.class)) { + String dimensionString = dimensionObject.getString("dimension"); + String field = dimensionPrefix == null ? dimensionString : dimensionPrefix + "." + dimensionString; + if (dimensionObject.containsKey("ranges")) { + JsonArray ranges = dimensionObject.getJsonArray("ranges"); + aggsBuilder.add(dimensionString, buildRangeFacet(field, ranges)); + } else { + aggsBuilder.add(dimensionString, + buildStringFacet(field + ".keyword", maxLabels)); + } + } + buildFacetRequestJson(dimensionPrefix, aggsBuilder); + } + + /** + * Uses bodyBuilder to construct Json for faceting string fields. + * + * @param dimensions List of dimensions to perform string based faceting + * on. + * @param maxLabels The maximum number of labels to collect for each + * dimension. + * @param dimensionPrefix Optional prefix to apply to the dimension names. This + * is needed to distinguish between potentially ambiguous + * dimensions, such as "(investigation.)type.name" and + * "(investigationparameter.)type.name". + */ + public void parseFacets(List dimensions, int maxLabels, String dimensionPrefix) { + JsonObjectBuilder aggsBuilder = Json.createObjectBuilder(); + for (String dimensionString : dimensions) { + String field = dimensionPrefix == null ? dimensionString : dimensionPrefix + "." + dimensionString; + aggsBuilder.add(dimensionString, buildStringFacet(field + ".keyword", maxLabels)); + } + buildFacetRequestJson(dimensionPrefix, aggsBuilder); + } + + /** + * Finalises the construction of faceting Json by handling the possibility of + * faceting a nested object. + * + * @param dimensionPrefix Optional prefix to apply to the dimension names. This + * is needed to distinguish between potentially ambiguous + * dimensions, such as "(investigation.)type.name" and + * "(investigationparameter.)type.name". + * @param aggsBuilder JsonObjectBuilder that has the faceting details. + */ + private void buildFacetRequestJson(String dimensionPrefix, JsonObjectBuilder aggsBuilder) { + if (dimensionPrefix == null) { + builder.add("aggs", aggsBuilder); + } else { + builder.add("aggs", Json.createObjectBuilder() + .add(dimensionPrefix, Json.createObjectBuilder() + .add("nested", Json.createObjectBuilder().add("path", dimensionPrefix)) + .add("aggs", aggsBuilder))); + } + } + + /** + * Parses a filter object applied to a single field. Note that in the case that + * this field is actually a nested object, more complex logic will be applied to + * ensure that only object matching all nested filters are returned. + * + * @param filterBuilder Builder for the array of queries to filter by. + * @param field Field to apply the filter to. In the case of nested + * queries, this should only be the name of the top level + * field. For example "investigationparameter". + * @param value JsonValue representing the filter query. This can be a + * STRING for simple terms, or an OBJECT containing nested + * "value", "exact" or "range" filters. + * @throws IcatException + */ + private void parseFilter(JsonArrayBuilder filterBuilder, String field, JsonValue value) throws IcatException { + ValueType valueType = value.getValueType(); + switch (valueType) { + case STRING: + filterBuilder.add(buildTermQuery(field + ".keyword", ((JsonString) value).getString())); + return; + case OBJECT: + JsonObject valueObject = (JsonObject) value; + if (valueObject.containsKey("filter")) { + List queryObjectsList = new ArrayList<>(); + for (JsonObject nestedFilter : valueObject.getJsonArray("filter").getValuesAs(JsonObject.class)) { + String nestedField = nestedFilter.getString("field"); + if (nestedFilter.containsKey("value")) { + // String based term query + String stringValue = nestedFilter.getString("value"); + queryObjectsList.add(buildTermQuery(field + "." + nestedField + ".keyword", stringValue)); + } else if (nestedFilter.containsKey("exact")) { + parseExactFilter(field, queryObjectsList, nestedFilter, nestedField); + } else { + parseRangeFilter(field, queryObjectsList, nestedFilter, nestedField); + } + } + JsonObject[] queryObjects = queryObjectsList.toArray(new JsonObject[0]); + filterBuilder.add(buildNestedQuery(field, queryObjects)); + } else { + throw new IcatException(IcatExceptionType.BAD_PARAMETER, + "expected an ARRAY with the key 'filter', but received " + valueObject); + } + return; + + default: + throw new IcatException(IcatExceptionType.BAD_PARAMETER, + "filter values should be STRING, OBJECT or and ARRAY of the former, but were " + valueType); + } + + } + + /** + * Parses a range based filter for a single field. + * + * @param field Field to apply the filter to. In the case of nested + * queries, this should only be the name of the top + * level + * field. For example "investigationparameter" + * @param queryObjectsList List of JsonObjects to add the filter to + * @param nestedFilter The nested JsonObject which contains the details of + * the filter + * @param nestedField The nested field on which to actually apply the + * filter + */ + private void parseRangeFilter(String field, List queryObjectsList, JsonObject nestedFilter, + String nestedField) { + JsonNumber from = nestedFilter.getJsonNumber("from"); + JsonNumber to = nestedFilter.getJsonNumber("to"); + String units = nestedFilter.getString("units", null); + if (units != null) { + Value fromValue = opensearchApi.icatUnits.convertValueToSiUnits(from.doubleValue(), units); + Value toValue = opensearchApi.icatUnits.convertValueToSiUnits(to.doubleValue(), units); + if (fromValue != null && toValue != null) { + // If we were able to parse the units, apply query to the SI value + String fieldSI = field + "." + nestedField + "SI"; + queryObjectsList.add(buildDoubleRangeQuery(fieldSI, fromValue.numericalValue, toValue.numericalValue)); + } else { + // If units could not be parsed, make them part of the query on the raw data + queryObjectsList.add(buildRangeQuery(field + "." + nestedField, from, to)); + queryObjectsList.add(buildTermQuery(field + ".type.units.keyword", units)); + } + } else { + // If units were not provided, just apply to the raw data + queryObjectsList.add(buildRangeQuery(field + "." + nestedField, from, to)); + } + } + + /** + * Parses an exact filter for a single field. + * + * @param field Field to apply the filter to. In the case of nested + * queries, this should only be the name of the top + * level + * field. For example "investigationparameter" + * @param queryObjectsList List of JsonObjects to add the filter to + * @param nestedFilter The nested JsonObject which contains the details of + * the filter + * @param nestedField The nested field on which to actually apply the + * filter + */ + private void parseExactFilter(String field, List queryObjectsList, JsonObject nestedFilter, + String nestedField) { + JsonNumber exact = nestedFilter.getJsonNumber("exact"); + String units = nestedFilter.getString("units", null); + if (units != null) { + Value exactValue = opensearchApi.icatUnits.convertValueToSiUnits(exact.doubleValue(), units); + if (exactValue != null) { + // If we were able to parse the units, apply query to the SI value + JsonObject bottomQuery = buildDoubleRangeQuery(field + ".rangeBottomSI", null, exactValue.numericalValue); + JsonObject topQuery = buildDoubleRangeQuery(field + ".rangeTopSI", exactValue.numericalValue, null); + JsonObject inRangeQuery = buildBoolQuery(Arrays.asList(bottomQuery, topQuery), null); + JsonObject exactQuery = buildTermQuery(field + "." + nestedField + "SI", exactValue.numericalValue); + queryObjectsList.add(buildBoolQuery(null, Arrays.asList(inRangeQuery, exactQuery))); + } else { + // If units could not be parsed, make them part of the query on the raw data + JsonObject bottomQuery = buildRangeQuery(field + ".rangeBottom", null, exact); + JsonObject topQuery = buildRangeQuery(field + ".rangeTop", exact, null); + JsonObject inRangeQuery = buildBoolQuery(Arrays.asList(bottomQuery, topQuery), null); + JsonObject exactQuery = buildTermQuery(field + "." + nestedField, exact); + queryObjectsList.add(buildBoolQuery(null, Arrays.asList(inRangeQuery, exactQuery))); + queryObjectsList.add(buildTermQuery(field + ".type.units.keyword", units)); + } + } else { + // If units were not provided, just apply to the raw data + JsonObject bottomQuery = buildRangeQuery(field + ".rangeBottom", null, exact); + JsonObject topQuery = buildRangeQuery(field + ".rangeTop", exact, null); + JsonObject inRangeQuery = buildBoolQuery(Arrays.asList(bottomQuery, topQuery), null); + JsonObject exactQuery = buildTermQuery(field + "." + nestedField, exact); + queryObjectsList.add(buildBoolQuery(null, Arrays.asList(inRangeQuery, exactQuery))); + } + } + + /** + * Parses the search query from the incoming queryRequest into Json that the + * search cluster can understand. + * + * @param queryRequest The Json object containing the information on the + * requested query, NOT formatted for the search cluster. + * @param index The index to search. + * @param dimensionPrefix Used to build nested queries for arbitrary fields. + * @param defaultFields Default fields to apply parsed string queries to. + * @throws IcatException If the query cannot be parsed. + */ + public void parseQuery(JsonObject queryRequest, String index, String dimensionPrefix, List defaultFields) + throws IcatException { + // In general, we use a boolean query to compound queries on individual fields + JsonObjectBuilder queryBuilder = Json.createObjectBuilder(); + JsonObjectBuilder boolBuilder = Json.createObjectBuilder(); + + // Non-scored elements are added to the "filter" + JsonArrayBuilder filterBuilder = Json.createArrayBuilder(); + + long lowerTime = Long.MIN_VALUE; + long upperTime = Long.MAX_VALUE; + for (String queryKey : queryRequest.keySet()) { + switch (queryKey) { + case "target": + case "facets": + break; // Avoid using the target index, or facet request as a term in the search + case "lower": + lowerTime = parseDate(queryRequest, "lower", 0, Long.MIN_VALUE); + break; + case "upper": + upperTime = parseDate(queryRequest, "upper", 59999, Long.MAX_VALUE); + break; + case "filter": + parseQueryFilter(queryRequest, index, filterBuilder); + break; + case "text": + parseQueryText(queryRequest, index, defaultFields, boolBuilder); + break; + case "user": + parseQueryUser(queryRequest, filterBuilder); + break; + case "userFullName": + parseQueryUserFullName(queryRequest, filterBuilder); + break; + case "samples": + parseQuerySamples(queryRequest, filterBuilder); + break; + case "parameters": + parseQueryParameters(queryRequest, index, filterBuilder); + break; + default: + parseQueryDefault(queryRequest, dimensionPrefix, filterBuilder, queryKey); + } + } + + if (lowerTime != Long.MIN_VALUE || upperTime != Long.MAX_VALUE) { + if (index.equals("datafile")) { + // datafile has only one date field + filterBuilder.add(buildLongRangeQuery("date", lowerTime, upperTime)); + } else { + filterBuilder.add(buildLongRangeQuery("startDate", lowerTime, upperTime)); + filterBuilder.add(buildLongRangeQuery("endDate", lowerTime, upperTime)); + } + } + + JsonArray filterArray = filterBuilder.build(); + if (filterArray.size() > 0) { + boolBuilder.add("filter", filterArray); + } + builder.add("query", queryBuilder.add("bool", boolBuilder)); + } + + /** + * Parses a generic field name from the queryRequest, and adds them to + * filterBuilder. + * + * @param queryRequest JsonObject with the requested query + * @param dimensionPrefix Used to build nested queries for arbitrary fields + * @param filterBuilder JsonArrayBuilder for adding criteria to filter on + * @param queryKey The key from the queryRequest to be treated as a + * Document field + * @throws IcatException + */ + private void parseQueryDefault(JsonObject queryRequest, String dimensionPrefix, JsonArrayBuilder filterBuilder, + String queryKey) throws IcatException { + // If the term doesn't require special logic, handle according to type + JsonObject defaultTermQuery; + String field = queryKey; + if (dimensionPrefix != null) { + field = dimensionPrefix + "." + field; + } + ValueType valueType = queryRequest.get(queryKey).getValueType(); + switch (valueType) { + case STRING: + defaultTermQuery = buildTermQuery(field + ".keyword", queryRequest.getString(queryKey)); + break; + case NUMBER: + defaultTermQuery = buildTermQuery(field, queryRequest.getJsonNumber(queryKey)); + break; + case ARRAY: + // Only support array of String as list of ICAT ids is currently only use case + defaultTermQuery = buildTermsQuery(field, queryRequest.getJsonArray(queryKey)); + break; + default: + throw new IcatException(IcatExceptionType.BAD_PARAMETER, + "Query values should be ARRAY, STRING or NUMBER, but had value of type " + valueType); + } + if (dimensionPrefix != null) { + // e.g. "sample.id" should use a nested query as sample is nested on other + // entities + filterBuilder.add(buildNestedQuery(dimensionPrefix, defaultTermQuery)); + } else { + // Otherwise, we can associate the query directly with the searched entity + filterBuilder.add(defaultTermQuery); + } + } + + /** + * Parses parameters from the queryRequest, and adds them to filterBuilder. + * + * @param queryRequest JsonObject with the requested query + * @param index The index to search + * @param filterBuilder JsonArrayBuilder for adding criteria to filter on + * @throws IcatException + */ + private void parseQueryParameters(JsonObject queryRequest, String index, JsonArrayBuilder filterBuilder) + throws IcatException { + for (JsonObject parameterObject : queryRequest.getJsonArray("parameters").getValuesAs(JsonObject.class)) { + String path = index + "parameter"; + List parameterQueries = new ArrayList<>(); + if (parameterObject.containsKey("name")) { + String name = parameterObject.getString("name"); + parameterQueries.add(buildMatchQuery(path + ".type.name", name)); + } + if (parameterObject.containsKey("units")) { + String units = parameterObject.getString("units"); + parameterQueries.add(buildMatchQuery(path + ".type.units", units)); + } + if (parameterObject.containsKey("stringValue")) { + String stringValue = parameterObject.getString("stringValue"); + parameterQueries.add(buildMatchQuery(path + ".stringValue", stringValue)); + } else if (parameterObject.containsKey("lowerDateValue") && parameterObject.containsKey("upperDateValue")) { + long lower = parseDate(parameterObject, "lowerDateValue", 0, Long.MIN_VALUE); + long upper = parseDate(parameterObject, "upperDateValue", 59999, Long.MAX_VALUE); + parameterQueries.add(buildLongRangeQuery(path + ".dateTimeValue", lower, upper)); + } else if (parameterObject.containsKey("lowerNumericValue") + && parameterObject.containsKey("upperNumericValue")) { + JsonNumber lower = parameterObject.getJsonNumber("lowerNumericValue"); + JsonNumber upper = parameterObject.getJsonNumber("upperNumericValue"); + parameterQueries.add(buildRangeQuery(path + ".numericValue", lower, upper)); + } + filterBuilder.add(buildNestedQuery(path, parameterQueries.toArray(new JsonObject[0]))); + } + } + + /** + * Parses samples from the queryRequest, and adds them to filterBuilder. + * + * @param queryRequest JsonObject with the requested query + * @param filterBuilder JsonArrayBuilder for adding criteria to filter on + */ + private void parseQuerySamples(JsonObject queryRequest, JsonArrayBuilder filterBuilder) { + JsonArray samples = queryRequest.getJsonArray("samples"); + for (int i = 0; i < samples.size(); i++) { + String sample = samples.getString(i); + JsonObject stringQuery = buildStringQuery(sample, "sample.name", + "sample.type.name"); + filterBuilder.add(buildNestedQuery("sample", stringQuery)); + } + } + + /** + * Parses the userFullName from the queryRequest, and adds it to filterBuilder. + * This uses joins to InvestigationUser and performs a non-exact string match. + * + * @param queryRequest JsonObject with the requested query + * @param filterBuilder JsonArrayBuilder for adding criteria to filter on + * @throws IcatException + */ + private void parseQueryUserFullName(JsonObject queryRequest, JsonArrayBuilder filterBuilder) { + String fullName = queryRequest.getString("userFullName"); + JsonObject fullNameQuery = buildStringQuery(fullName, "investigationuser.user.fullName"); + filterBuilder.add(buildNestedQuery("investigationuser", fullNameQuery)); + } + + /** + * Parses the user from the queryRequest, and adds it to filterBuilder. This + * uses joins to both InvestigationUser and InstrumentScientist entities to + * mimic common ICAT rules that only allow users to see their "own" data by + * using an exact term match. + * + * @param queryRequest JsonObject with the requested query + * @param filterBuilder JsonArrayBuilder for adding criteria to filter on + * @throws IcatException + */ + private void parseQueryUser(JsonObject queryRequest, JsonArrayBuilder filterBuilder) throws IcatException { + String user = queryRequest.getString("user"); + // Because InstrumentScientist is on a separate index, we need to explicitly + // perform a search here + JsonObject termQuery = buildTermQuery("user.name.keyword", user); + String body = Json.createObjectBuilder().add("query", termQuery).build().toString(); + Map parameterMap = new HashMap<>(); + parameterMap.put("_source", "instrument.id"); + JsonObject postResponse = opensearchApi.postResponse("/instrumentscientist/_search", body, parameterMap); + JsonArray hits = postResponse.getJsonObject("hits").getJsonArray("hits"); + JsonArrayBuilder instrumentIdsBuilder = Json.createArrayBuilder(); + for (JsonObject hit : hits.getValuesAs(JsonObject.class)) { + String instrumentId = hit.getJsonObject("_source").getString("instrument.id"); + instrumentIdsBuilder.add(instrumentId); + } + JsonObject instrumentQuery = buildTermsQuery("investigationinstrument.instrument.id", + instrumentIdsBuilder.build()); + JsonObject nestedInstrumentQuery = buildNestedQuery("investigationinstrument", instrumentQuery); + // InvestigationUser should be a nested field on the main Document + JsonObject investigationUserQuery = buildMatchQuery("investigationuser.user.name", user); + JsonObject nestedUserQuery = buildNestedQuery("investigationuser", investigationUserQuery); + // At least one of being an InstrumentScientist or an InvestigationUser is + // necessary + JsonArrayBuilder array = Json.createArrayBuilder().add(nestedInstrumentQuery).add(nestedUserQuery); + filterBuilder.add(Json.createObjectBuilder().add("bool", Json.createObjectBuilder().add("should", array))); + } + + /** + * Parses text for a single field from the queryRequest, and adds it to + * boolBuilder. + * + * @param queryRequest JsonObject with the requested query + * @param index Index (entity) to apply the query to + * @param defaultFields If text does not contain specific field targetting, then + * matches will be attempting against the defaultFields + * @param boolBuilder JsonObjectBuilder for adding criteria to + */ + private void parseQueryText(JsonObject queryRequest, String index, List defaultFields, + JsonObjectBuilder boolBuilder) { + // The free text is the only element we perform scoring on, so "must" occur + JsonArrayBuilder arrayBuilder = Json.createArrayBuilder(); + String text = queryRequest.getString("text"); + arrayBuilder.add(buildStringQuery(text, defaultFields.toArray(new String[0]))); + if (index.equals("investigation")) { + JsonObject stringQuery = buildStringQuery(text, "sample.name", "sample.type.name"); + arrayBuilder.add(buildNestedQuery("sample", stringQuery)); + JsonObjectBuilder textBoolBuilder = Json.createObjectBuilder().add("should", arrayBuilder); + JsonObjectBuilder textMustBuilder = Json.createObjectBuilder().add("bool", textBoolBuilder); + boolBuilder.add("must", Json.createArrayBuilder().add(textMustBuilder)); + } else { + boolBuilder.add("must", arrayBuilder); + } + } + + /** + * Parses a filter for a single field from the queryRequest, and adds it to + * filterBuilder. + * + * @param queryRequest JsonObject with the requested query + * @param index Index (entity) to apply the query to + * @param filterBuilder JsonArrayBuilder for adding criteria to filter on + * @throws IcatException + */ + private void parseQueryFilter(JsonObject queryRequest, String index, JsonArrayBuilder filterBuilder) + throws IcatException { + JsonObject filterObject = queryRequest.getJsonObject("filter"); + for (String fld : filterObject.keySet()) { + JsonValue value = filterObject.get(fld); + String field = fld.replace(index + ".", ""); + if (value.getValueType().equals(ValueType.ARRAY)) { + JsonArrayBuilder arrayBuilder = Json.createArrayBuilder(); + for (JsonValue arrayValue : ((JsonArray) value).getValuesAs(JsonString.class)) { + parseFilter(arrayBuilder, field, arrayValue); + } + // If the key was just a nested entity (no ".") then we should FILTER all of our + // queries on that entity. + String occur = fld.contains(".") ? "should" : "filter"; + filterBuilder.add(Json.createObjectBuilder().add("bool", + Json.createObjectBuilder().add(occur, arrayBuilder))); + } else { + parseFilter(filterBuilder, field, value); + } + } + } + + /** + * Parse sort criteria and add it to the request body. + * + * @param sort String of JsonObject containing the sort criteria. + */ + public void parseSort(String sort) { + if (sort == null || sort.equals("")) { + builder.add("sort", Json.createArrayBuilder() + .add(Json.createObjectBuilder().add("_score", "desc")) + .add(Json.createObjectBuilder().add("id", "asc")).build()); + } else { + JsonObject sortObject = Json.createReader(new StringReader(sort)).readObject(); + JsonArrayBuilder sortArrayBuilder = Json.createArrayBuilder(); + for (String key : sortObject.keySet()) { + if (key.toLowerCase().contains("date") || key.startsWith("file")) { + // Dates and fileSize/fileCount are numeric, so can be used as is + sortArrayBuilder.add(Json.createObjectBuilder().add(key, sortObject.getString(key))); + } else { + // Text fields should use the .keyword field for sorting + sortArrayBuilder.add(Json.createObjectBuilder().add(key + ".keyword", sortObject.getString(key))); + } + } + builder.add("sort", sortArrayBuilder.add(Json.createObjectBuilder().add("id", "asc")).build()); + } + } + + /** + * Add searchAfter to the request body. + * + * @param searchAfter Possibly null JsonValue representing the last document of + * a previous search. + */ + public void parseSearchAfter(JsonValue searchAfter) { + if (searchAfter != null) { + builder.add("search_after", searchAfter); + } + } + + /** + * @return The parsed query, as a String with Json formatting + */ + public String body() { + return builder.build().toString(); + } + +} diff --git a/src/main/java/org/icatproject/core/manager/search/OpensearchScriptBuilder.java b/src/main/java/org/icatproject/core/manager/search/OpensearchScriptBuilder.java new file mode 100644 index 000000000..62e4832df --- /dev/null +++ b/src/main/java/org/icatproject/core/manager/search/OpensearchScriptBuilder.java @@ -0,0 +1,206 @@ +package org.icatproject.core.manager.search; + +import java.util.Set; + +import jakarta.json.Json; +import jakarta.json.JsonObjectBuilder; + +public class OpensearchScriptBuilder { + + /** + * Builds Json for creating a new script with the provided painless source code. + * + * @param source Painless source code as a String. + * @return Json for creating a new script. + */ + private static String buildScript(String source) { + JsonObjectBuilder builder = Json.createObjectBuilder().add("lang", "painless").add("source", source); + return Json.createObjectBuilder().add("script", builder).build().toString(); + } + + /** + * In order to access a specific nested child entity, access `childIndex` in + * later parts of the painless script. + * + * @param childName The name of the nested child entity. + * @param declareChildId Should be true for only the first time a child is found + * during a script so that the variable can be reused. + * @return Painless code for determining the id of a given child within a nested + * array. + */ + private static String findNestedChild(String childName, boolean declareChildId) { + String source; + if (declareChildId) { + source = "int childIndex = -1; int i = 0;"; + } else { + source = "childIndex = -1; i = 0;"; + } + return source + " if (ctx._source." + childName + " != null) " + + "{while (childIndex == -1 && i < ctx._source." + childName + ".size()) " + + "{if (ctx._source." + childName + ".get(i).id == params.id) {childIndex = i;} i++;}}"; + } + + /** + * @param childName The name of the nested child entity. + * @return Painless code for removing a given child within a nested array based + * on its id. + */ + private static String removeNestedChild(String childName) { + return findNestedChild(childName, true) + " if (childIndex != -1) {ctx._source." + childName + + ".remove(childIndex);}"; + } + + /** + * @param field The field belonging to the child entity to be modified. + * @param ctxSource The context source where the field can be found. + * @param update If true the script will replace the field, else the + * value will be deleted. + * @return Painless code for updating one field within ctxSource. + */ + private static String updateField(String field, String ctxSource, boolean update) { + if (update) { + if (field.equals("numericValueSI")) { + return "if (" + ctxSource + ".numericValue != null && params.containsKey('conversionFactor')) {" + + ctxSource + ".numericValueSI = params.conversionFactor * " + ctxSource + + ".numericValue;} else {" + ctxSource + ".remove('numericValueSI');}"; + } else { + return ctxSource + "['" + field + "']" + " = params['" + field + "']; "; + } + } else { + return ctxSource + ".remove('" + field + "'); "; + } + } + + /** + * Builds a script which updates specific fields on a parent entity that are set + * by (at most) a single non-nested child. + * + * @param docFields The fields belonging to the child entity to be modified. + * @param update If true the script will replace the docFields, else the + * value will be deleted. + * @return The painless script as a String. + */ + public static String buildChildScript(Set docFields, boolean update) { + String source = ""; + for (String field : docFields) { + source += updateField(field, "ctx._source", update); + } + return buildScript(source); + } + + /** + * Builds a script which sets the array of nested child entities to a new array. + * Note that this will overwrite any existing nested Objects. It should not be + * used to add a new entry to an existing array, but is more efficient in cases + * where we know the array will not yet be set. + * + * @param childName The name of the nested child entity. + * @return The painless script as a String. + */ + public static String buildCreateNestedChildScript(String childName) { + String source = "ctx._source." + childName + " = params.doc"; + return buildScript(source); + } + + /** + * Builds a script which updates or removes a single specific nested entity + * based on ICAT entity Id. + * + * @param childName The name of the nested child entity. + * @param update If true the script will replace a nested entity, else the + * nested entity will be removed from the array. + * @return The painless script as a String. + */ + public static String buildNestedChildScript(String childName, boolean update) { + String source = removeNestedChild(childName); + if (update) { + source += " if (ctx._source." + childName + " != null) {ctx._source." + childName + + ".addAll(params.doc);} else {ctx._source." + childName + " = params.doc;}"; + } + return buildScript(source); + } + + /** + * Builds a script which updates specific fields on a nested child entity that + * are set by a single grandchild. + * + * @param childName The name of the nested child entity. + * @param docFields The fields belonging to the grandchild entity to be + * modified. + * @param update If true the script will replace a nested entity, else the + * nested entity will be removed from the array. + * @return The painless script as a String. + */ + public static String buildGrandchildScript(String childName, Set docFields, boolean update) { + String source = findNestedChild(childName, true); + String ctxSource = "ctx._source." + childName + ".get(childIndex)"; + if (docFields != null) { + source += "if (childIndex != -1) { "; + for (String field : docFields) { + source += updateField(field, ctxSource, update); + } + source += " } "; + } + return buildScript(source); + } + + /** + * Builds a script which increments fileSize by deltaFileSize. If + * fileSize is null then deltaFileSize is taken as its new value. + * + * @return The painless script as a String. + */ + public static String buildFileSizeScript() { + String source = "if (ctx._source.fileSize != null) "; + source += "{ctx._source.fileSize += params.deltaFileSize;} else {ctx._source.fileSize = params.deltaFileSize;}"; + source += "if (ctx._source.fileCount != null) "; + source += "{ctx._source.fileCount += params.deltaFileCount;} else {ctx._source.fileCount = params.deltaFileCount;}"; + return buildScript(source); + } + + /** + * Modifies ParameterTypes with logic to ensure the update is applied to all + * possible Parameters (Investigation, Dataset, Datafile, Sample). + * + * @param fields The fields belonging to the ParameterType to be + * modified. + * @param update If true the script will replace a nested entity, else the + * nested entity will be removed from the array. + * @return + */ + public static String buildParameterTypesScript(Set docFields, boolean update) { + String source = buildParameterTypeScript(docFields, update, "investigationparameter", true); + source += buildParameterTypeScript(docFields, update, "datasetparameter", false); + source += buildParameterTypeScript(docFields, update, "datafileparameter", false); + source += buildParameterTypeScript(docFields, update, "sampleparameter", false); + return buildScript(source); + } + + /** + * Modifies a single type of Parameter (Investigation, Dataset, Datafile, + * Sample) with changes to a ParameterType. + * + * @param update If true the script will replace a nested entity, else + * the nested entity will be removed from the array + * @param nestedChildName Name of the Parameter entity to modify + * @param declareChildId Whether the childId needs to be declared. This should + * only be true for the first parameter in the script. + * @param fields The fields belonging to the ParameterType to be + * modified + * + * @return The script to modify the Parameter as a String + */ + private static String buildParameterTypeScript(Set docFields, boolean update, String nestedChildName, + boolean declareChildId) { + String ctxSource = "ctx._source." + nestedChildName + ".get(childIndex)"; + String source = findNestedChild(nestedChildName, declareChildId); + if (docFields != null) { + source += "if (childIndex != -1) { "; + for (String field : docFields) { + source += updateField(field, ctxSource, update); + } + source += " } "; + } + return source; + } +} diff --git a/src/main/java/org/icatproject/core/manager/ParameterPOJO.java b/src/main/java/org/icatproject/core/manager/search/ParameterPOJO.java similarity index 75% rename from src/main/java/org/icatproject/core/manager/ParameterPOJO.java rename to src/main/java/org/icatproject/core/manager/search/ParameterPOJO.java index 52271a7fc..c5d948dd8 100644 --- a/src/main/java/org/icatproject/core/manager/ParameterPOJO.java +++ b/src/main/java/org/icatproject/core/manager/search/ParameterPOJO.java @@ -1,4 +1,4 @@ -package org.icatproject.core.manager; +package org.icatproject.core.manager.search; import java.io.Serializable; import java.util.Date; @@ -6,13 +6,13 @@ @SuppressWarnings("serial") public class ParameterPOJO implements Serializable { - String name; - String units; - String stringValue; - Date lowerDateValue; - Date upperDateValue; - Double lowerNumericValue; - Double upperNumericValue; + public String name; + public String units; + public String stringValue; + public Date lowerDateValue; + public Date upperDateValue; + public Double lowerNumericValue; + public Double upperNumericValue; public ParameterPOJO(String name, String units, String stringValue) { this.name = name; @@ -43,7 +43,7 @@ public String toString() { if (stringValue != null) { sb.append(" stringValue:" + stringValue); } else if (lowerDateValue != null) { - sb.append(" lowerDateValue:" + lowerDateValue + " upperDateValue:" + upperDateValue); + sb.append(" lowerDateValue:" + lowerDateValue.getTime() + " upperDateValue:" + upperDateValue.getTime()); } else if (lowerNumericValue != null) { sb.append(", lowerNumericValue:" + lowerNumericValue + " upperNumericValue:" + upperNumericValue); } diff --git a/src/main/java/org/icatproject/core/manager/search/ScoredEntityBaseBean.java b/src/main/java/org/icatproject/core/manager/search/ScoredEntityBaseBean.java new file mode 100644 index 000000000..2f79c2de4 --- /dev/null +++ b/src/main/java/org/icatproject/core/manager/search/ScoredEntityBaseBean.java @@ -0,0 +1,72 @@ +package org.icatproject.core.manager.search; + +import jakarta.json.JsonObject; + +import org.icatproject.core.IcatException; +import org.icatproject.core.IcatException.IcatExceptionType; +import org.icatproject.core.manager.HasEntityId; + +public class ScoredEntityBaseBean implements HasEntityId { + + private long id; + private int shardIndex; + private int engineDocId; + private float score; + private JsonObject source; + + /** + * Represents a single entity returned from a search, and relevant search engine + * information. + * + * @param engineDocId The id of the search engine Document that represents this + * entity. This should not be confused with the + * entityBaseBeanId. This is needed in order to enable + * subsequent searches to "search after" Documents which have + * already been returned once. + * @param shardIndex The index of the shard that the entity was found on. This + * is only relevant when merging results with the icat.lucene + * component. + * @param score A float generated by the engine to indicate the relevance + * of the returned Document to the search term(s). Higher + * scores are more relevant. May be null if the results were + * not sorted by relevance. + * @param source JsonObject containing the requested fields of the Document + * as key-value pairs. At the very least, this should contain + * the ICAT "id" of the entity. + * @throws IcatException If "id" and the corresponding entityBaseBeanId are not + * a key-value pair in the source JsonObject. + */ + public ScoredEntityBaseBean(int engineDocId, int shardIndex, float score, JsonObject source) throws IcatException { + if (!source.containsKey("id")) { + throw new IcatException(IcatExceptionType.BAD_PARAMETER, + "Document source must have 'id' and the entityBaseBeanId as a key-value pair, but it was " + + source); + } + this.engineDocId = engineDocId; + this.shardIndex = shardIndex; + this.score = score; + this.source = source; + this.id = source.getJsonNumber("id").longValueExact(); + } + + public Long getId() { + return id; + } + + public int getEngineDocId() { + return engineDocId; + } + + public int getShardIndex() { + return shardIndex; + } + + public float getScore() { + return score; + } + + public JsonObject getSource() { + return source; + } + +} diff --git a/src/main/java/org/icatproject/core/manager/search/SearchApi.java b/src/main/java/org/icatproject/core/manager/search/SearchApi.java new file mode 100644 index 000000000..cecb986d6 --- /dev/null +++ b/src/main/java/org/icatproject/core/manager/search/SearchApi.java @@ -0,0 +1,513 @@ +package org.icatproject.core.manager.search; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.StringReader; +import java.net.URI; +import java.net.URISyntaxException; +import java.text.SimpleDateFormat; +import java.util.Arrays; +import java.util.Date; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TimeZone; +import java.util.Map.Entry; +import java.util.concurrent.ExecutorService; + +import jakarta.json.Json; +import jakarta.json.JsonArray; +import jakarta.json.JsonArrayBuilder; +import jakarta.json.JsonObject; +import jakarta.json.JsonReader; +import jakarta.json.JsonValue; +import jakarta.json.JsonValue.ValueType; +import jakarta.json.stream.JsonGenerator; +import jakarta.persistence.EntityManager; + +import org.apache.http.HttpEntity; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.client.utils.URIBuilder; +import org.apache.http.entity.ContentType; +import org.apache.http.entity.StringEntity; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.icatproject.core.IcatException; +import org.icatproject.core.IcatException.IcatExceptionType; +import org.icatproject.core.entity.EntityBaseBean; +import org.icatproject.core.manager.Rest; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public abstract class SearchApi { + + protected static final Logger logger = LoggerFactory.getLogger(SearchApi.class); + protected static SimpleDateFormat df; + protected static final Set indices = new HashSet<>(Arrays.asList("datafile", "dataset", "investigation")); + + protected URI server; + + static { + df = new SimpleDateFormat("yyyyMMddHHmm"); + TimeZone tz = TimeZone.getTimeZone("GMT"); + df.setTimeZone(tz); + } + + public SearchApi(URI server) { + this.server = server; + } + + /** + * Converts String into number of ms since epoch. + * + * @param value String representing a Date in the format "yyyyMMddHHmm". + * @return Number of ms since epoch, or null if value was null + * @throws java.text.ParseException + */ + protected static Long decodeTime(String value) throws java.text.ParseException { + if (value == null) { + return null; + } else { + synchronized (df) { + return df.parse(value).getTime(); + } + } + } + + /** + * Encodes the deletion of the provided entity as Json. + * + * @param bean Entity to be deleted from the search engine index. + * @return String of Json in the format + * {"delete": {"_index": `entityName`, "_id": `id`}} + */ + public static String encodeDeletion(EntityBaseBean bean) { + String entityName = bean.getClass().getSimpleName(); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (JsonGenerator gen = Json.createGenerator(baos)) { + gen.writeStartObject().writeStartObject("delete"); + gen.write("_index", entityName).write("_id", bean.getId()); + gen.writeEnd().writeEnd(); + } + return baos.toString(); + } + + /** + * Writes a key value pair to the JsonGenerator being used to encode an entity. + * + * @param gen JsonGenerator being used to encode. + * @param name Name of the field. + * @param value Double value to encode as a double. + */ + public static void encodeDouble(JsonGenerator gen, String name, Double value) { + gen.write(name, value); + } + + /** + * Writes a key value pair to the JsonGenerator being used to encode an entity. + * + * @param gen JsonGenerator being used to encode. + * @param name Name of the field. + * @param value Date value to encode as a long. + */ + public static void encodeLong(JsonGenerator gen, String name, Date value) { + gen.write(name, value.getTime()); + } + + /** + * Writes a key value pair to the JsonGenerator being used to encode an entity. + * + * @param gen JsonGenerator being used to encode. + * @param name Name of the field. + * @param value Long value to encode as a long. + */ + public static void encodeLong(JsonGenerator gen, String name, Long value, long defaultValue) { + if (value == null){ + gen.write(name, defaultValue); + } else { + gen.write(name, value); + } + } + + /** + * Writes a key value pair to the JsonGenerator being used to encode an entity. + * + * @param gen JsonGenerator being used to encode. + * @param name Name of the field. + * @param value Long value to encode as a long. + * @throws IcatException + */ + public static void encodeLong(JsonGenerator gen, String name, Long value) throws IcatException { + if (value == null){ + throw new IcatException(IcatExceptionType.BAD_PARAMETER, "Attempting to set " + name + " to null"); + } + gen.write(name, value); + } + + /** + * Encodes the creation or updating of the provided entity as Json. + * + * @param operation The operation to encode. Should either be "create" or + * "update". + * @param bean Entity to perform the operation on. + * @return String of Json in the format + * {`operation`: {"_index": `entityName`, "_id": `id`, "doc": {...}}} + * @throws IcatException + */ + public static String encodeOperation(EntityManager manager, String operation, EntityBaseBean bean) throws IcatException { + Long icatId = bean.getId(); + if (icatId == null) { + throw new IcatException(IcatExceptionType.BAD_PARAMETER, bean + " had null id"); + } + String entityName = bean.getClass().getSimpleName(); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (JsonGenerator gen = Json.createGenerator(baos)) { + gen.writeStartObject().writeStartObject(operation); + gen.write("_index", entityName).write("_id", icatId); + gen.writeStartObject("doc"); + bean.getDoc(manager, gen); + gen.writeEnd().writeEnd().writeEnd(); + } + return baos.toString(); + } + + /** + * Writes a key value pair to the JsonGenerator being used to encode an entity. + * + * @param gen JsonGenerator being used to encode. + * @param name Name of the field. + * @param value String value to encode as a string. + */ + public static void encodeString(JsonGenerator gen, String name, String value) { + if (value != null) { + gen.write(name, value); + } else { + logger.warn("Cannot encode field {} as it was unexpectedly null", name); + } + } + + /** + * Writes a key value pair to the JsonGenerator being used to encode an entity, + * provided that value is not null. + * + * @param gen JsonGenerator being used to encode. + * @param name Name of the field. + * @param value String value to encode as a string. + */ + public static void encodeNullableString(JsonGenerator gen, String name, String value) { + if (value != null) { + gen.write(name, value); + } + } + + /** + * Builds a Json representation of the final search result based on the sort + * criteria used. This allows future searches to efficiently "search after" this + * result. + * + * @param lastBean The last ScoredEntityBaseBean of the current search results. + * @param sort String representing a JsonObject of sort criteria. + * @return JsonValue representing the lastBean to allow future searches to + * search after it. + * @throws IcatException If the score of the lastBean is NaN, or one of the sort + * fields is not present in the source of the lastBean. + */ + public JsonValue buildSearchAfter(ScoredEntityBaseBean lastBean, String sort) throws IcatException { + JsonArrayBuilder arrayBuilder; + if (sort != null && !sort.equals("") || sort.equals("{}")) { + arrayBuilder = searchAfterArrayBuilder(lastBean, sort); + } else { + arrayBuilder = Json.createArrayBuilder(); + if (Float.isNaN(lastBean.getScore())) { + throw new IcatException(IcatExceptionType.INTERNAL, + "Cannot build searchAfter document from source as score was NaN."); + } + arrayBuilder.add(lastBean.getScore()); + } + arrayBuilder.add(lastBean.getId()); + return arrayBuilder.build(); + } + + /** + * Builds a Json representation of the sorted fields of the final search result. + * This allows future searches to efficiently "search after" this result. + * + * @param lastBean The last ScoredEntityBaseBean of the current search results. + * @param sort String representing a JsonObject of sort criteria. + * @return JsonArray representing the sorted fields to allow future searches to + * search after it. + * @throws IcatException If one of the sort fields is not present in the source + * of the lastBean. + */ + protected static JsonArrayBuilder searchAfterArrayBuilder(ScoredEntityBaseBean lastBean, String sort) + throws IcatException { + try (JsonReader reader = Json.createReader(new StringReader(sort))) { + JsonObject object = reader.readObject(); + JsonArrayBuilder arrayBuilder = Json.createArrayBuilder(); + for (String key : object.keySet()) { + if (!lastBean.getSource().containsKey(key)) { + throw new IcatException(IcatExceptionType.INTERNAL, + "Cannot build searchAfter document from source as sorted field " + key + " missing."); + } + JsonValue value = lastBean.getSource().get(key); + arrayBuilder.add(value); + } + return arrayBuilder; + } + } + + /** + * Parses the JsonObject response from the search engine into a FacetDimension, + * and adds it to results. + * + * @param results List of FacetDimensions to add the results from this + * dimension to. + * @param target The entity being targeted. + * @param dimension The dimension (field) being faceted. + * @param aggregations JsonObject containing the response from the search + * engine. + * @throws IcatException + */ + protected static void parseFacetsResponse(List results, String target, String dimension, + JsonObject aggregations) throws IcatException { + if (dimension.equals("doc_count")) { + // For nested aggregations, there is a doc_count entry at the same level as the + // dimension objects, but we're not interested in this + return; + } + FacetDimension facetDimension = new FacetDimension(target, dimension); + List facets = facetDimension.getFacets(); + + JsonObject aggregation = aggregations.getJsonObject(dimension); + JsonValue bucketsValue = aggregation.get("buckets"); + ValueType valueType = bucketsValue.getValueType(); + switch (valueType) { + case ARRAY: + List buckets = ((JsonArray) bucketsValue).getValuesAs(JsonObject.class); + if (buckets.size() == 0) { + return; + } + for (JsonObject bucket : buckets) { + FacetLabel facetLabel = new FacetLabel(bucket); + facets.add(facetLabel); + } + break; + case OBJECT: + JsonObject bucketsObject = (JsonObject) bucketsValue; + Set keySet = bucketsObject.keySet(); + if (keySet.size() == 0) { + return; + } + for (String key : keySet) { + JsonObject bucket = bucketsObject.getJsonObject(key); + FacetLabel facetLabel = new FacetLabel(key, bucket); + facets.add(facetLabel); + } + break; + default: + String msg = "Expected 'buckets' to have ARRAY or OBJECT type, but it was " + valueType; + throw new IcatException(IcatExceptionType.INTERNAL, msg); + } + results.add(facetDimension); + } + + /** + * Adds documents to the index identified by entityName immediately. + * Practically, this should be used for populating documents from existing + * database records as opposed to adding documents as they are created. + * + * @param entityName The entity to create documents for. + * @param ids List of ids corresponding to the documents to add. + * @param manager EntityManager for finding the beans from their id. + * @param klass Class of the entity to create documents for. + * @param getBeanDocExecutor + * @throws IcatException + * @throws IOException + * @throws URISyntaxException + */ + public abstract void addNow(String entityName, List ids, EntityManager manager, + Class klass, ExecutorService getBeanDocExecutor) + throws IcatException, IOException, URISyntaxException; + + /** + * This is only for testing purposes. Other calls to the service will not + * work properly while this operation is in progress. + * + * Deletes all documents across all indices. + * + * @throws IcatException + */ + public abstract void clear() throws IcatException; + + /** + * Commits any pending documents to their respective index. + * + * @throws IcatException + */ + public abstract void commit() throws IcatException; + + /** + * Perform faceting on an entity/index. The query associated with the request + * should determine which Documents to consider, and optionally the dimensions + * to facet. If no dimensions are provided, "sparse" faceting is performed + * across relevant string fields (but no Range faceting occurs). + * + * @param target Name of the entity/index to facet on. + * @param facetQuery JsonObject containing the criteria to facet on. + * @param maxResults The maximum number of results to include in the returned + * Json. + * @param maxLabels The maximum number of labels to return for each dimension + * of the facets. + * @return List of FacetDimensions that were collected for the query. + * @throws IcatException + */ + public abstract List facetSearch(String target, JsonObject facetQuery, Integer maxResults, + Integer maxLabels) throws IcatException; + + /** + * Gets SearchResult for query without sort or searchAfter (pagination). + * + * @param query JsonObject containing the criteria to search on. + * @param maxResults Maximum number of results to retrieve from the engine. + * @return SearchResult for the query. + * @throws IcatException + */ + public SearchResult getResults(JsonObject query, int maxResults) throws IcatException { + return getResults(query, null, maxResults, null, Arrays.asList("id")); + } + + /** + * Gets SearchResult for query without searchAfter (pagination). + * + * @param query JsonObject containing the criteria to search on. + * @param maxResults Maximum number of results to retrieve from the engine. + * @param sort String of Json representing the sort criteria. + * @return SearchResult for the query. + * @throws IcatException + */ + public SearchResult getResults(JsonObject query, int maxResults, String sort) throws IcatException { + return getResults(query, null, maxResults, sort, Arrays.asList("id")); + } + + /** + * Gets SearchResult for query. + * + * @param query JsonObject containing the criteria to search on. + * @param searchAfter JsonValue representing the last result of a previous + * search in order to skip results that have already been + * returned. + * @param blockSize Maximum number of results to retrieve from the engine. + * @param sort String of Json representing the sort criteria. + * @param requestedFields List of fields to return in the document source. + * @return SearchResult for the query. + * @throws IcatException + */ + public abstract SearchResult getResults(JsonObject query, JsonValue searchAfter, Integer blockSize, String sort, + List requestedFields) throws IcatException; + + /** + * Not implemented. + * + * @param entityName + * @param minId + * @param maxId + * @param delete + * @throws IcatException + */ + public void lock(String entityName, Long minId, Long maxId, Boolean delete) throws IcatException { + logger.info("Manually locking index not supported, no request sent"); + } + + /** + * Not implemented. + * + * @param entityName + * @throws IcatException + */ + public void unlock(String entityName) throws IcatException { + logger.info("Manually unlocking index not supported, no request sent"); + } + + /** + * Perform one or more document modification operations. + * + * @param json String of a JsonArray containing individual create/update/delete + * operations as JsonObjects. + * @throws IcatException + */ + public abstract void modify(String json) throws IcatException; + + /** + * POST to path without a body or response handling. + * + * @param path Path on the search engine to POST to. + * @throws IcatException + */ + protected void post(String path) throws IcatException { + postResponse(path, null, null); + } + + /** + * POST to path with a body but without response handling. + * + * @param path Path on the search engine to POST to. + * @param body String of Json to send as the request body. + * @throws IcatException + */ + protected void post(String path, String body) throws IcatException { + postResponse(path, body, null); + } + + /** + * POST to path with a body and response handling. + * + * @param path Path on the search engine to POST to. + * @param body String of Json to send as the request body. + * @return JsonObject returned by the search engine. + * @throws IcatException + */ + protected JsonObject postResponse(String path, String body) throws IcatException { + return postResponse(path, body, null); + } + + /** + * POST to path with a body and response handling. + * + * @param path Path on the search engine to POST to. + * @param body String of Json to send as the request body. + * @param parameterMap Map of parameters to encode in the URI. + * @return JsonObject returned by the search engine. + * @throws IcatException + */ + protected JsonObject postResponse(String path, String body, Map parameterMap) throws IcatException { + try (CloseableHttpClient httpclient = HttpClients.createDefault()) { + URIBuilder builder = new URIBuilder(server).setPath(path); + if (parameterMap != null) { + for (Entry entry : parameterMap.entrySet()) { + builder.addParameter(entry.getKey(), entry.getValue()); + } + } + URI uri = builder.build(); + HttpPost httpPost = new HttpPost(uri); + if (body != null) { + httpPost.setEntity(new StringEntity(body, ContentType.APPLICATION_JSON)); + } + logger.trace("Making call {} with body {}", uri, body); + try (CloseableHttpResponse response = httpclient.execute(httpPost)) { + int code = response.getStatusLine().getStatusCode(); + Rest.checkStatus(response, code == 400 ? IcatExceptionType.BAD_PARAMETER : IcatExceptionType.INTERNAL); + HttpEntity entity = response.getEntity(); + if (entity != null) { + JsonReader jsonReader = Json.createReader(entity.getContent()); + return jsonReader.readObject(); + } + return null; + } + } catch (URISyntaxException | IOException e) { + throw new IcatException(IcatExceptionType.INTERNAL, e.getClass() + " " + e.getMessage()); + } + } + +} diff --git a/src/main/java/org/icatproject/core/manager/search/SearchManager.java b/src/main/java/org/icatproject/core/manager/search/SearchManager.java new file mode 100644 index 000000000..d242298e0 --- /dev/null +++ b/src/main/java/org/icatproject/core/manager/search/SearchManager.java @@ -0,0 +1,811 @@ +package org.icatproject.core.manager.search; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.net.URI; +import java.net.URL; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Set; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.SortedSet; +import java.util.Timer; +import java.util.TimerTask; +import java.util.concurrent.Callable; +import java.util.concurrent.CompletionService; +import java.util.concurrent.ConcurrentSkipListMap; +import java.util.concurrent.ConcurrentSkipListSet; +import java.util.concurrent.ExecutorCompletionService; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + +import jakarta.annotation.PostConstruct; +import jakarta.annotation.PreDestroy; +import jakarta.ejb.EJB; +import jakarta.ejb.Singleton; +import jakarta.ejb.Startup; +import jakarta.json.Json; +import jakarta.json.JsonArrayBuilder; +import jakarta.json.JsonNumber; +import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; +import jakarta.json.JsonValue; +import jakarta.json.JsonValue.ValueType; +import jakarta.persistence.EntityManager; +import jakarta.persistence.EntityManagerFactory; +import jakarta.persistence.PersistenceUnit; + +import org.icatproject.core.IcatException; +import org.icatproject.core.IcatException.IcatExceptionType; +import org.icatproject.core.entity.Datafile; +import org.icatproject.core.entity.Dataset; +import org.icatproject.core.entity.EntityBaseBean; +import org.icatproject.core.entity.Investigation; +import org.icatproject.core.manager.EntityInfoHandler; +import org.icatproject.core.manager.GateKeeper; +import org.icatproject.core.manager.PropertyHandler; +import org.icatproject.core.manager.EntityInfoHandler.Relationship; +import org.icatproject.core.manager.PropertyHandler.SearchEngine; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.slf4j.Marker; +import org.slf4j.MarkerFactory; + +@Startup +@Singleton +public class SearchManager { + + public class EnqueuedSearchRequestHandler extends TimerTask { + + @Override + public void run() { + + synchronized (queueFileLock) { + if (queueFile.length() != 0) { + logger.debug("Will attempt to process {}", queueFile); + StringBuilder sb = new StringBuilder("["); + try (BufferedReader reader = new BufferedReader(new FileReader(queueFile))) { + String line; + while ((line = reader.readLine()) != null) { + if (sb.length() != 1) { + sb.append(','); + } + sb.append(line); + } + } catch (IOException e) { + logger.error("Problems reading from {} : {}", queueFile, e.getMessage()); + return; + } + sb.append(']'); + + try { + searchApi.modify(sb.toString()); + logger.info("Enqueued search documents now all indexed"); + } catch (Exception e) { + // Catch all exceptions so the Timer doesn't end unexpectedly + // Record failures in a flat file to be examined periodically + logger.error("Search engine failed to modify documents with error {} : {}", e.getClass(), + e.getMessage()); + try { + synchronizedWrite(sb.toString(), backlogHandlerFileLock, backlogHandlerFile); + } catch (IcatException e2) { + // Already logged the error + } + } finally { + queueFile.delete(); + logger.debug("finish processing, queue File removed"); + } + } + } + } + } + + public class IndexSome implements Callable { + + private List ids; + private EntityManager manager; + private Class klass; + private String entityName; + private long start; + + @SuppressWarnings("unchecked") + public IndexSome(String entityName, List ids, EntityManagerFactory entityManagerFactory, long start) + throws IcatException { + try { + logger.debug("About to index {} {} records after id {}", ids.size(), entityName, start); + this.entityName = entityName; + klass = EntityInfoHandler.getClass(entityName); + this.ids = ids; + manager = entityManagerFactory.createEntityManager(); + this.start = start; + } catch (Exception e) { + logger.error("About to throw internal exception because of", e); + throw new IcatException(IcatExceptionType.INTERNAL, e.getMessage()); + } + } + + @Override + public Long call() throws Exception { + if (EntityInfoHandler.hasSearchDoc(klass)) { + searchApi.addNow(entityName, ids, manager, klass, getBeanDocExecutor); + } + return start; + } + } + + private class PendingSearchRequestHandler extends TimerTask { + + @Override + public void run() { + synchronized (backlogHandlerFileLock) { + if (backlogHandlerFile.length() != 0) { + logger.debug("Will attempt to process {}", backlogHandlerFile); + try (BufferedReader reader = new BufferedReader(new FileReader(backlogHandlerFile))) { + String line; + while ((line = reader.readLine()) != null) { + searchApi.modify(line); + } + backlogHandlerFile.delete(); + logger.info("Pending search records now all inserted"); + } catch (IOException e) { + logger.error("Problems reading from {} : {}", backlogHandlerFile, e.getMessage()); + } catch (IcatException e) { + logger.error("Failed to put previously failed entries into search engine " + e.getMessage()); + } catch (Throwable e) { + logger.error("Something unexpected happened " + e.getClass() + " " + e.getMessage()); + } + logger.debug("finish processing"); + } + } + } + } + + /** + * Handles the the aggregation of the fileSize and fileCount fields for Dataset + * and Investigation entities. + */ + private class AggregateFilesHandler extends TimerTask { + + private EntityManager entityManager; + + public AggregateFilesHandler(EntityManager entityManager) { + this.entityManager = entityManager; + } + + @Override + public void run() { + aggregate(datasetAggregationFileLock, datasetAggregationFile, Dataset.class); + aggregate(investigationAggregationFileLock, investigationAggregationFile, Investigation.class); + } + + /** + * Performs aggregation by reading the unique id values from file and querying + * the DB for the full entity (including fileSize and fileCount fields). This is + * then submitted as an update to the search engine. + * + * @param fileLock Lock for the file + * @param file File to read the ids of entities from + * @param klass Class of the entity to be aggregated + */ + private void aggregate(Long fileLock, File file, Class klass) { + String entityName = klass.getSimpleName(); + synchronized (fileLock) { + if (file.length() != 0) { + logger.debug("Will attempt to process {}", file); + try (BufferedReader reader = new BufferedReader(new FileReader(file))) { + String line; + Set ids = new HashSet<>(); + while ((line = reader.readLine()) != null) { + if (ids.add(line)) { // True if id not yet encountered + String query = "SELECT e FROM " + entityName + " e WHERE e.id = " + line; + try { + EntityBaseBean entity = entityManager.createQuery(query, klass).getSingleResult(); + updateDocument(entityManager, entity); + } catch (Exception e) { + logger.error("{} with id {} not found, continue", entityName, line); + } + } + } + file.delete(); + logger.info(entityName + " aggregations performed"); + } catch (IOException e) { + logger.error("Problems reading from {} : {}", file, e.getMessage()); + } catch (Throwable e) { + logger.error("Something unexpected happened " + e.getClass() + " " + e.getMessage()); + } + logger.debug("finish processing"); + } + } + } + } + + private enum PopState { + STOPPING, STOPPED + } + + /** + * Holds relevant values for a Populate thread. + */ + private class PopulateBucket { + private Long minId; + private Long maxId; + private boolean delete; + + public PopulateBucket(Long minId, Long maxId, boolean delete) { + this.minId = minId; + this.maxId = maxId; + this.delete = delete; + } + } + + public class PopulateThread extends Thread { + + private EntityManager manager; + private EntityManagerFactory entityManagerFactory; + + public PopulateThread(EntityManagerFactory entityManagerFactory) { + this.entityManagerFactory = entityManagerFactory; + manager = entityManagerFactory.createEntityManager(); + logger.info("Start new populate thread"); + } + + @Override + public void run() { + + try { + while (!populateMap.isEmpty()) { + + populatingClassEntry = populateMap.firstEntry(); + + if (populatingClassEntry != null) { + PopulateBucket bucket = populatingClassEntry.getValue(); + Long start = bucket.minId != null && bucket.minId > 0 ? bucket.minId : 0; + searchApi.lock(populatingClassEntry.getKey(), bucket.minId, bucket.maxId, bucket.delete); + + logger.info("Search engine populating " + populatingClassEntry); + + CompletionService threads = new ExecutorCompletionService<>(populateExecutor); + SortedSet tasks = new ConcurrentSkipListSet<>(); + + while (true) { + + if (popState == PopState.STOPPING) { + logger.info("PopulateThread stopping as flag was set"); + break; + } + /* Get next block of ids */ + String query = "SELECT e.id from " + populatingClassEntry.getKey() + " e"; + if (bucket.maxId != null) { + // Add 1 from lower limit to get a half interval + query += " WHERE e.id BETWEEN " + (start + 1) + " AND " + (bucket.maxId); + } else { + query += " WHERE e.id > " + start; + } + query += " ORDER BY e.id"; + List ids = manager + .createQuery(query, Long.class) + .setMaxResults(populateBlockSize).getResultList(); + if (ids.size() == 0) { + break; + } + + Future fut; + /* Remove any completed ones */ + while ((fut = threads.poll()) != null) { + Long s = fut.get(); + if (s.equals(tasks.first())) { + PopulateBucket populateBucket = new PopulateBucket(s, bucket.maxId, bucket.delete); + populateMap.put(populatingClassEntry.getKey(), populateBucket); + } + tasks.remove(s); + } + + /* If full then wait */ + if (tasks.size() == maxThreads) { + fut = threads.take(); + Long s = fut.get(); + if (s.equals(tasks.first())) { + PopulateBucket populateBucket = new PopulateBucket(s, bucket.maxId, bucket.delete); + populateMap.put(populatingClassEntry.getKey(), populateBucket); + } + tasks.remove(s); + } + + logger.debug("About to submit {} {} documents from id {} onwards", ids.size(), + populatingClassEntry.getKey(), start); + threads.submit( + new IndexSome(populatingClassEntry.getKey(), ids, entityManagerFactory, start)); + tasks.add(start); + start = ids.get(ids.size() - 1); + + manager.clear(); + } + + /* Wait for the last few to finish */ + Future fut; + while (tasks.size() > 0) { + fut = threads.take(); + Long s = fut.get(); + if (s.equals(tasks.first())) { + PopulateBucket populateBucket = new PopulateBucket(s, bucket.maxId, bucket.delete); + populateMap.put(populatingClassEntry.getKey(), populateBucket); + } + tasks.remove(s); + } + + /* + * Unlock and commit the changes + */ + searchApi.unlock(populatingClassEntry.getKey()); + populateMap.remove(populatingClassEntry.getKey()); + } + } + } catch (Throwable t) { + logger.error("Problem encountered in", t); + populateMap.remove(populatingClassEntry.getKey()); + } finally { + manager.close(); + popState = PopState.STOPPED; + } + } + } + + final static Logger logger = LoggerFactory.getLogger(SearchManager.class); + + final static Marker fatal = MarkerFactory.getMarker("FATAL"); + + /** + * The Set of classes for which population is requested + */ + private ConcurrentSkipListMap populateMap = new ConcurrentSkipListMap<>(); + /** The thread which does the population */ + private PopulateThread populateThread; + + private Entry populatingClassEntry; + + @PersistenceUnit(unitName = "icat") + private EntityManagerFactory entityManagerFactory; + + private int populateBlockSize; + + private ExecutorService getBeanDocExecutor; + + @EJB + PropertyHandler propertyHandler; + private PopState popState = PopState.STOPPED; + + private ExecutorService populateExecutor; + + private int maxThreads; + + private SearchApi searchApi; + + private boolean active; + + private long aggregateFilesIntervalMillis; + + private Long backlogHandlerFileLock = 0L; + + private Long queueFileLock = 0L; + + private Long datasetAggregationFileLock = 0L; + + private Long investigationAggregationFileLock = 0L; + + private Timer timer; + + private Set entitiesToIndex; + + private File backlogHandlerFile; + + private File queueFile; + + private File datasetAggregationFile; + + private File investigationAggregationFile; + + private SearchEngine searchEngine; + + private List urls; + + private static final Map> publicSearchFields = new HashMap<>(); + + /** + * Gets (and if necessary, builds) the fields which should be returned as part + * of the document source from a search. + * + * @param gateKeeper GateKeeper instance. + * @param simpleName Name of the entity to get public fields for. + * @return List of fields which can be shown in search results provided the main + * entity is authorised. + * @throws IcatException + */ + public static List getPublicSearchFields(GateKeeper gateKeeper, String simpleName) throws IcatException { + if (gateKeeper.getPublicSearchFieldsStale() || publicSearchFields.size() == 0) { + logger.info("Building public search fields from public tables and steps"); + publicSearchFields.put("Datafile", buildPublicSearchFields(gateKeeper, Datafile.getDocumentFields())); + publicSearchFields.put("Dataset", buildPublicSearchFields(gateKeeper, Dataset.getDocumentFields())); + publicSearchFields.put("Investigation", + buildPublicSearchFields(gateKeeper, Investigation.getDocumentFields())); + gateKeeper.markPublicSearchFieldsFresh(); + } + List requestedFields = publicSearchFields.get(simpleName); + logger.debug("{} has public fields {}", simpleName, requestedFields); + return requestedFields; + } + + public void addDocument(EntityManager manager, EntityBaseBean bean) throws IcatException { + Class klass = bean.getClass(); + if (EntityInfoHandler.hasSearchDoc(klass) && entitiesToIndex.contains(klass.getSimpleName())) { + enqueue(SearchApi.encodeOperation(manager, "create", bean)); + enqueueAggregation(bean); + } + } + + private void enqueue(String json) throws IcatException { + synchronizedWrite(json, queueFileLock, queueFile); + } + + /** + * @param line String to write to file, followed by \n. + * @param fileLock Lock for the file + * @param file File to write to + * @throws IcatException + */ + private void synchronizedWrite(String line, Long fileLock, File file) throws IcatException { + synchronized (fileLock) { + try { + logger.trace("Writing {} to {}", line, file.getAbsolutePath()); + FileWriter output = new FileWriter(file, true); + output.write(line + "\n"); + output.close(); + } catch (IOException e) { + String msg = "Problems writing to " + queueFile + " " + e.getMessage(); + logger.error(msg); + throw new IcatException(IcatExceptionType.INTERNAL, msg); + } + } + } + + /** + * If bean is a Datafile and an aggregation interval is set, then the Datafile's + * Dataset and Investigation ids are written to file to be aggregated at a later + * date. + * + * @param bean Entity to consider for aggregation. + * @throws IcatException + */ + private void enqueueAggregation(EntityBaseBean bean) throws IcatException { + if (bean.getClass().getSimpleName().equals("Datafile") && aggregateFilesIntervalMillis > 0) { + Dataset dataset = ((Datafile) bean).getDataset(); + if (dataset != null) { + synchronizedWrite(dataset.getId().toString(), datasetAggregationFileLock, datasetAggregationFile); + Investigation investigation = dataset.getInvestigation(); + if (investigation != null) { + synchronizedWrite(investigation.getId().toString(), investigationAggregationFileLock, + investigationAggregationFile); + } + } + } + } + + public void clear() throws IcatException { + logger.info("Search engine clear called"); + popState = PopState.STOPPING; + while (populateThread != null && populateThread.getState() != Thread.State.TERMINATED) { + try { + Thread.sleep(1000); + } catch (InterruptedException e) { + // Do nothing + } + } + logger.debug("Search engine population terminated"); + } + + public void commit() throws IcatException { + pushPendingCalls(); + searchApi.commit(); + } + + public void deleteDocument(EntityBaseBean bean) throws IcatException { + if (EntityInfoHandler.hasSearchDoc(bean.getClass())) { + enqueue(SearchApi.encodeDeletion(bean)); + enqueueAggregation(bean); + } + } + + /** + * Builds a JsonObject for performing faceting against results from a previous + * search. + * + * @param results List of results from a previous search, containing entity + * ids. + * @param queryIdField The field to perform id querying against. + * @param facetJson JsonObject containing the dimensions to facet. + * @return {"query": {`idField`: [...]}, "dimensions": [...]} + */ + public static JsonObject buildFacetQuery(List results, String queryIdField, + JsonObject facetJson) { + JsonArrayBuilder arrayBuilder = Json.createArrayBuilder(); + results.forEach(r -> arrayBuilder.add(r.getId())); + JsonObject terms = Json.createObjectBuilder().add(queryIdField, arrayBuilder.build()).build(); + return buildFacetQuery(terms, facetJson); + } + + /** + * Builds a JsonObject for performing faceting against results from a previous + * search. + * + * @param results List of results from a previous search, containing + * entity ids. + * @param resultIdField The id(s) to extract from the results. + * @param queryIdField The id field to target with the query. + * @param facetJson JsonObject containing the dimensions to facet. + * @return {"query": {`idField`: [...]}, "dimensions": [...]} + */ + public static JsonObject buildFacetQuery(List results, String resultIdField, + String queryIdField, JsonObject facetJson) { + JsonArrayBuilder arrayBuilder = Json.createArrayBuilder(); + results.forEach(r -> { + JsonObject source = r.getSource(); + if (source.containsKey(resultIdField)) { + ValueType valueType = source.get(resultIdField).getValueType(); + if (valueType.equals(ValueType.NUMBER)) { + arrayBuilder.add(source.getJsonNumber(resultIdField)); + } else if (valueType.equals(ValueType.ARRAY)) { + source.getJsonArray(resultIdField).getValuesAs(JsonNumber.class).forEach(id -> { + arrayBuilder.add(id); + }); + } + } + }); + JsonObject terms = Json.createObjectBuilder().add(queryIdField, arrayBuilder.build()).build(); + return buildFacetQuery(terms, facetJson); + } + + /** + * Builds a JsonObject for performing faceting against results from a previous + * search. + * + * @param filterObject JsonObject to be used as a query. + * @param facetJson JsonObject containing the dimensions to facet. + * @return {"query": `filterObject`, "dimensions": [...]} + */ + public static JsonObject buildFacetQuery(JsonObject filterObject, JsonObject facetJson) { + JsonObjectBuilder objectBuilder = Json.createObjectBuilder().add("query", filterObject); + if (facetJson.containsKey("dimensions")) { + objectBuilder.add("dimensions", facetJson.getJsonArray("dimensions")); + } + return objectBuilder.build(); + } + + /** + * Checks if the underlying Relationship is allowed for a field on an entity. + * + * @param gateKeeper GateKeeper instance. + * @param map Map of fields to the Relationship that must be allowed in + * order to return the fields with search results for a + * particular entity. + * @return List of fields (keys) from map that have an allowed relationship + */ + private static List buildPublicSearchFields(GateKeeper gateKeeper, Map map) { + List fields = new ArrayList<>(); + for (Entry entry : map.entrySet()) { + boolean includeField = true; + if (entry.getValue() != null) { + for (Relationship relationship : entry.getValue()) { + if (!gateKeeper.allowed(relationship)) { + includeField = false; + logger.debug("Access to {} blocked by disallowed relationship between {} and {}", + entry.getKey(), + relationship.getOriginBean().getSimpleName(), + relationship.getDestinationBean().getSimpleName()); + break; + } + } + } + if (includeField) { + fields.add(entry.getKey()); + } + } + return fields; + } + + /** + * Builds a Json representation of the final search result based on the sort + * criteria used. This allows future searches to efficiently "search after" this + * result. + * + * @param lastBean The last ScoredEntityBaseBean of the current search results. + * @param sort String representing a JsonObject of sort criteria. + * @return JsonValue representing the lastBean to allow future searches to + * search after it. + * @throws IcatException If the score of the lastBean is NaN, or one of the sort + * fields is not present in the source of the lastBean. + */ + public JsonValue buildSearchAfter(ScoredEntityBaseBean lastBean, String sort) throws IcatException { + return searchApi.buildSearchAfter(lastBean, sort); + } + + private void pushPendingCalls() { + timer.schedule(new EnqueuedSearchRequestHandler(), 0L); + while (queueFile.length() != 0) { + try { + Thread.sleep(1000); + } catch (InterruptedException e) { + // Ignore + } + } + } + + @PreDestroy + private void exit() { + logger.info("Closing down SearchManager"); + if (active) { + try { + populateExecutor.shutdown(); + getBeanDocExecutor.shutdown(); + pushPendingCalls(); + timer.cancel(); + timer = null; + logger.info("Closed down SearchManager"); + } catch (Exception e) { + logger.error(fatal, "Problem closing down SearchManager", e); + } + } + } + + /** + * Perform faceting on an entity/index. The query associated with the request + * should determine which Documents to consider, and optionally the dimensions + * to facet. If no dimensions are provided, "sparse" faceting is performed + * across relevant string fields (but no Range faceting occurs). + * + * @param target Name of the entity/index to facet on. + * @param facetQuery JsonObject containing the criteria to facet on. + * @param maxResults The maximum number of results to include in the returned + * Json. + * @param maxLabels The maximum number of labels to return for each dimension + * of the facets. + * @return List of FacetDimensions that were collected for the query. + * @throws IcatException + */ + public List facetSearch(String target, JsonObject facetQuery, int maxResults, int maxLabels) + throws IcatException { + return searchApi.facetSearch(target, facetQuery, maxResults, maxLabels); + } + + public List getPopulating() { + List result = new ArrayList<>(); + for (Entry e : populateMap.entrySet()) { + result.add(e.getKey() + " " + e.getValue()); + } + return result; + } + + /** + * Gets SearchResult for query without searchAfter (pagination). + * + * @param query JsonObject containing the criteria to search on. + * @param maxResults Maximum number of results to retrieve from the engine. + * @param sort String of Json representing the sort criteria. + * @return SearchResult for the query. + * @throws IcatException + */ + public SearchResult freeTextSearch(JsonObject query, int maxResults, String sort) throws IcatException { + return searchApi.getResults(query, maxResults, sort); + } + + /** + * Gets SearchResult for query. + * + * @param query JsonObject containing the criteria to search on. + * @param searchAfter JsonValue representing the last result of a previous + * search in order to skip results that have already been + * returned. + * @param blockSize Maximum number of results to retrieve from the engine. + * @param sort String of Json representing the sort criteria. + * @param requestedFields List of fields to return in the document source. + * @return SearchResult for the query. + * @throws IcatException + */ + public SearchResult freeTextSearch(JsonObject query, JsonValue searchAfter, int blockSize, String sort, + List requestedFields) throws IcatException { + return searchApi.getResults(query, searchAfter, blockSize, sort, requestedFields); + } + + @PostConstruct + private void init() { + searchEngine = propertyHandler.getSearchEngine(); + logger.info("Initialising SearchManager for engine {}", searchEngine); + urls = propertyHandler.getSearchUrls(); + active = urls != null && urls.size() > 0; + if (active) { + try { + URI uri = propertyHandler.getSearchUrls().get(0).toURI(); + if (searchEngine == SearchEngine.LUCENE) { + searchApi = new LuceneApi(uri); + } else if (searchEngine == SearchEngine.ELASTICSEARCH || searchEngine == SearchEngine.OPENSEARCH) { + String unitAliasOptions = propertyHandler.getUnitAliasOptions(); + // If interval is not set then aggregate in real time + long aggregateFilesInterval = propertyHandler.getSearchAggregateFilesIntervalMillis(); + boolean aggregateFiles = aggregateFilesInterval == 0; + searchApi = new OpensearchApi(uri, unitAliasOptions, aggregateFiles); + } else { + throw new IcatException(IcatExceptionType.BAD_PARAMETER, + "Search engine {} not supported, must be one of " + SearchEngine.values()); + } + + populateBlockSize = propertyHandler.getSearchPopulateBlockSize(); + Path searchDirectory = propertyHandler.getSearchDirectory(); + backlogHandlerFile = searchDirectory.resolve("backLog").toFile(); + queueFile = searchDirectory.resolve("queue").toFile(); + datasetAggregationFile = searchDirectory.resolve("datasetAggregation").toFile(); + investigationAggregationFile = searchDirectory.resolve("investigationAggregation").toFile(); + maxThreads = Runtime.getRuntime().availableProcessors(); + populateExecutor = Executors.newWorkStealingPool(maxThreads); + getBeanDocExecutor = Executors.newCachedThreadPool(); + timer = new Timer(); + timer.schedule(new PendingSearchRequestHandler(), 0L, + propertyHandler.getSearchBacklogHandlerIntervalMillis()); + timer.schedule(new EnqueuedSearchRequestHandler(), 0L, + propertyHandler.getSearchEnqueuedRequestIntervalMillis()); + aggregateFilesIntervalMillis = propertyHandler.getSearchAggregateFilesIntervalMillis(); + if (aggregateFilesIntervalMillis > 0) { + EntityManager entityManager = entityManagerFactory.createEntityManager(); + timer.schedule(new AggregateFilesHandler(entityManager), 0L, aggregateFilesIntervalMillis); + } + entitiesToIndex = propertyHandler.getEntitiesToIndex(); + logger.info("Initialised SearchManager at {}", urls); + } catch (Exception e) { + logger.error(fatal, "Problem setting up SearchManager", e); + throw new IllegalStateException("Problem setting up SearchManager"); + } + } else { + logger.info("SearchManager is inactive"); + } + } + + public boolean isActive() { + return active; + } + + public void populate(String entityName, Long minId, Long maxId, boolean delete) throws IcatException { + if (popState == PopState.STOPPING) { + while (populateThread != null && populateThread.getState() != Thread.State.TERMINATED) { + try { + Thread.sleep(1000); + } catch (InterruptedException e) { + // Do nothing + } + } + } + if (populateMap.put(entityName, new PopulateBucket(minId, maxId, delete)) == null) { + logger.debug("Search engine population of {} requested", entityName); + } else { + throw new IcatException(IcatExceptionType.OBJECT_ALREADY_EXISTS, + "population of " + entityName + " already requested"); + } + if (populateThread == null || populateThread.getState() == Thread.State.TERMINATED) { + populateThread = new PopulateThread(entityManagerFactory); + populateThread.start(); + } + } + + public void updateDocument(EntityManager manager, EntityBaseBean bean) throws IcatException { + Class klass = bean.getClass(); + if (EntityInfoHandler.hasSearchDoc(klass) && entitiesToIndex.contains(klass.getSimpleName())) { + enqueue(SearchApi.encodeOperation(manager, "update", bean)); + enqueueAggregation(bean); + } + } + +} diff --git a/src/main/java/org/icatproject/core/manager/search/SearchResult.java b/src/main/java/org/icatproject/core/manager/search/SearchResult.java new file mode 100644 index 000000000..dbb5693ca --- /dev/null +++ b/src/main/java/org/icatproject/core/manager/search/SearchResult.java @@ -0,0 +1,52 @@ +package org.icatproject.core.manager.search; + +import java.util.ArrayList; +import java.util.List; + +import jakarta.json.JsonValue; + +/** + * Represents the results from a single search performed against the engine. + * Stores a list of ScoredEntityBaseBean, FacetDimension, and a JsonValue + * representing the last document returned if appropriate. + */ +public class SearchResult { + + private JsonValue searchAfter; + private List results = new ArrayList<>(); + private List dimensions; + + public SearchResult() { + } + + public SearchResult(List dimensions) { + this.dimensions = dimensions; + } + + public SearchResult(JsonValue searchAfter, List results, List dimensions) { + this.searchAfter = searchAfter; + this.results = results; + this.dimensions = dimensions; + } + + public List getDimensions() { + return dimensions; + } + + public void setDimensions(List dimensions) { + this.dimensions = dimensions; + } + + public List getResults() { + return results; + } + + public JsonValue getSearchAfter() { + return searchAfter; + } + + public void setSearchAfter(JsonValue searchAfter) { + this.searchAfter = searchAfter; + } + +} diff --git a/src/main/java/org/icatproject/exposed/ICAT.java b/src/main/java/org/icatproject/exposed/ICAT.java index 3605d8057..d5d8fa6a1 100644 --- a/src/main/java/org/icatproject/exposed/ICAT.java +++ b/src/main/java/org/icatproject/exposed/ICAT.java @@ -363,9 +363,9 @@ private void reportIcatException(IcatException e) throws IcatException { if (e.getType() == IcatExceptionType.INTERNAL) { ByteArrayOutputStream baos = new ByteArrayOutputStream(); e.printStackTrace(new PrintStream(baos)); - logger.debug("Internal exception " + baos.toString()); + logger.error("Internal exception " + baos.toString()); } else { - logger.debug("IcatException " + e.getType() + " " + e.getMessage() + logger.error("IcatException " + e.getType() + " " + e.getMessage() + (e.getOffset() >= 0 ? " at offset " + e.getOffset() : "")); } } diff --git a/src/main/java/org/icatproject/exposed/ICATRest.java b/src/main/java/org/icatproject/exposed/ICATRest.java index 4c551b8f2..3194ad612 100644 --- a/src/main/java/org/icatproject/exposed/ICATRest.java +++ b/src/main/java/org/icatproject/exposed/ICATRest.java @@ -12,7 +12,6 @@ import java.math.BigInteger; import java.nio.charset.StandardCharsets; import java.text.DateFormat; -import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; @@ -21,7 +20,6 @@ import java.util.Map; import java.util.Map.Entry; import java.util.Set; -import java.util.TimeZone; import jakarta.annotation.PostConstruct; import jakarta.annotation.Resource; @@ -69,17 +67,22 @@ import org.icatproject.core.Constants; import org.icatproject.core.IcatException; import org.icatproject.core.IcatException.IcatExceptionType; +import org.icatproject.core.entity.Datafile; +import org.icatproject.core.entity.Dataset; import org.icatproject.core.entity.EntityBaseBean; +import org.icatproject.core.entity.Investigation; import org.icatproject.core.entity.ParameterValueType; import org.icatproject.core.entity.StudyStatus; import org.icatproject.core.manager.EntityBeanManager; import org.icatproject.core.manager.EntityInfoHandler; import org.icatproject.core.manager.GateKeeper; -import org.icatproject.core.manager.ParameterPOJO; import org.icatproject.core.manager.Porter; import org.icatproject.core.manager.PropertyHandler; import org.icatproject.core.manager.PropertyHandler.ExtendedAuthenticator; -import org.icatproject.core.manager.ScoredEntityBaseBean; +import org.icatproject.core.manager.search.FacetDimension; +import org.icatproject.core.manager.search.FacetLabel; +import org.icatproject.core.manager.search.ScoredEntityBaseBean; +import org.icatproject.core.manager.search.SearchResult; import org.icatproject.utils.ContainerGetter.ContainerType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -93,24 +96,6 @@ public class ICATRest { private final static DateFormat df8601 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX"); - private static SimpleDateFormat df; - - static { - df = new SimpleDateFormat("yyyyMMddHHmm"); - TimeZone tz = TimeZone.getTimeZone("GMT"); - df.setTimeZone(tz); - } - - private static Date dec(String value) throws java.text.ParseException { - if (value == null) { - return null; - } else { - synchronized (df) { - return df.parse(value); - } - } - } - private Map authPlugins; @EJB @@ -928,9 +913,14 @@ public void logout(@Context HttpServletRequest request, @PathParam("sessionId") } /** - * perform a lucene search + * Perform a free text search against a dedicated (non-DB) search engine + * component for entity ids. + * + * @title Free text id search. * - * @title lucene search + * @deprecated in favour of {@link #searchDocuments}, which offers more + * functionality and returns full documents rather than just ICAT + * ids. * * @param sessionId * a sessionId of a user which takes the form @@ -1021,7 +1011,7 @@ public void logout(@Context HttpServletRequest request, @PathParam("sessionId") * @param maxCount * maximum number of entities to return * - * @return set of entities encoded as json + * @return set of entity ids and relevance scores encoded as json * * @throws IcatException * when something is wrong @@ -1029,8 +1019,10 @@ public void logout(@Context HttpServletRequest request, @PathParam("sessionId") @GET @Path("lucene/data") @Produces(MediaType.APPLICATION_JSON) + @Deprecated public String lucene(@Context HttpServletRequest request, @QueryParam("sessionId") String sessionId, - @QueryParam("query") String query, @QueryParam("maxCount") int maxCount) throws IcatException { + @QueryParam("query") String query, @QueryParam("maxCount") int maxCount) + throws IcatException { if (query == null) { throw new IcatException(IcatExceptionType.BAD_PARAMETER, "query is not set"); } @@ -1039,69 +1031,50 @@ public String lucene(@Context HttpServletRequest request, @QueryParam("sessionId try (JsonReader jr = Json.createReader(new ByteArrayInputStream(query.getBytes()))) { JsonObject jo = jr.readObject(); String target = jo.getString("target", null); - String user = jo.getString("user", null); - String text = jo.getString("text", null); - String lower = jo.getString("lower", null); - String upper = jo.getString("upper", null); - List parms = new ArrayList<>(); if (jo.containsKey("parameters")) { for (JsonValue val : jo.getJsonArray("parameters")) { - JsonObject parm = (JsonObject) val; - String name = parm.getString("name", null); + JsonObject parameter = (JsonObject) val; + String name = parameter.getString("name", null); if (name == null) { throw new IcatException(IcatExceptionType.BAD_PARAMETER, "name not set in one of parameters"); } - String units = parm.getString("units", null); + String units = parameter.getString("units", null); if (units == null) { throw new IcatException(IcatExceptionType.BAD_PARAMETER, "units not set in parameter '" + name + "'"); } - if (parm.containsKey("stringValue")) { - parms.add(new ParameterPOJO(name, units, parm.getString("stringValue"))); - } else if (parm.containsKey("lowerDateValue") && parm.containsKey("upperDateValue")) { - synchronized (df) { - parms.add(new ParameterPOJO(name, units, df.parse(parm.getString("lowerDateValue")), - df.parse(parm.getString("upperDateValue")))); - } - } else if (parm.containsKey("lowerNumericValue") && parm.containsKey("upperNumericValue")) { - parms.add(new ParameterPOJO(name, units, parm.getJsonNumber("lowerNumericValue").doubleValue(), - parm.getJsonNumber("upperNumericValue").doubleValue())); - } else { - throw new IcatException(IcatExceptionType.BAD_PARAMETER, parm.toString()); + // If we don't have either a string, pair of dates, or pair of numbers, throw + if (!(parameter.containsKey("stringValue") + || (parameter.containsKey("lowerDateValue") + && parameter.containsKey("upperDateValue")) + || (parameter.containsKey("lowerNumericValue") + && parameter.containsKey("upperNumericValue")))) { + throw new IcatException(IcatExceptionType.BAD_PARAMETER, parameter.toString()); } } } List objects; + Class klass; if (target.equals("Investigation")) { - List samples = new ArrayList<>(); - if (jo.containsKey("samples")) { - for (JsonValue val : jo.getJsonArray("samples")) { - JsonString samp = (JsonString) val; - samples.add(samp.getString()); - } - } - String userFullName = jo.getString("userFullName", null); - objects = beanManager.luceneInvestigations(userName, user, text, dec(lower), dec(upper), parms, samples, - userFullName, maxCount, manager, request.getRemoteAddr()); - + klass = Investigation.class; } else if (target.equals("Dataset")) { - objects = beanManager.luceneDatasets(userName, user, text, dec(lower), dec(upper), parms, maxCount, - manager, request.getRemoteAddr()); - + klass = Dataset.class; } else if (target.equals("Datafile")) { - objects = beanManager.luceneDatafiles(userName, user, text, dec(lower), dec(upper), parms, maxCount, - manager, request.getRemoteAddr()); - + klass = Datafile.class; } else { throw new IcatException(IcatExceptionType.BAD_PARAMETER, "target:" + target + " is not expected"); } + logger.debug("Free text search with query: {}", jo.toString()); + objects = beanManager.freeTextSearch(userName, jo, maxCount, manager, request.getRemoteAddr(), klass); JsonGenerator gen = Json.createGenerator(baos); gen.writeStartArray(); for (ScoredEntityBaseBean sb : objects) { gen.writeStartObject(); - gen.write("id", sb.getEntityBaseBeanId()); - gen.write("score", sb.getScore()); + gen.write("id", sb.getId()); + if (!Float.isNaN(sb.getScore())) { + gen.write("score", sb.getScore()); + } gen.writeEnd(); } gen.writeEnd(); @@ -1109,8 +1082,321 @@ public String lucene(@Context HttpServletRequest request, @QueryParam("sessionId return baos.toString(); } catch (JsonException e) { throw new IcatException(IcatExceptionType.BAD_PARAMETER, "JsonException " + e.getMessage()); - } catch (ParseException e) { - throw new IcatException(IcatExceptionType.BAD_PARAMETER, "ParserException " + e.getMessage()); + } + } + + /** + * Perform a free text search against a dedicated (non-DB) search engine + * component for entire Documents. + * + * @title Free text Document search. + * + * @param sessionId + * a sessionId of a user which takes the form + * 0d9a3706-80d4-4d29-9ff3-4d65d4308a24 + * @param query + * json encoded query object. One of the fields is "target" which + * must be "Investigation", "Dataset" or "Datafile". The other + * fields are all optional: + *
+ *
user
+ *
name of user as in the User table which may include a + * prefix
+ *
text
+ *
some text occurring somewhere in the entity. This is + * understood by the lucene parser but avoid trying to use fields.
+ *
lower
+ *
earliest date to search for in the form + * 201509030842 i.e. yyyyMMddHHmm using UTC as + * timezone. In the case of an investigation or data set search + * the date is compared with the start date and in the case of a + * data file the date field is used.
+ *
upper
+ *
latest date to search for in the form + * 201509030842 i.e. yyyyMMddHHmm using UTC as + * timezone. In the case of an investigation or data set search + * the date is compared with the end date and in the case of a + * data file the date field is used.
+ *
parameters
+ *
this holds a list of json parameter objects all of which + * must match. Parameters have the following fields, all of which + * are optional: + *
+ *
name
+ *
A wildcard search for a parameter with this name. + * Supported wildcards are *, which matches any + * character sequence (including the empty one), and + * ?, which matches any single character. + * \ is the escape character. Note this query can be + * slow, as it needs to iterate over many terms. In order to + * prevent extremely slow queries, a name should not start with + * the wildcard *
+ *
units
+ *
A wildcard search for a parameter with these units. + * Supported wildcards are *, which matches any + * character sequence (including the empty one), and + * ?, which matches any single character. + * \ is the escape character. Note this query can be + * slow, as it needs to iterate over many terms. In order to + * prevent extremely slow queries, units should not start with + * the wildcard *
+ *
stringValue
+ *
A wildcard search for a parameter stringValue. Supported + * wildcards are *, which matches any character + * sequence (including the empty one), and ?, which + * matches any single character. \ is the escape + * character. Note this query can be slow, as it needs to iterate + * over many terms. In order to prevent extremely slow queries, + * requested stringValues should not start with the wildcard + * *
+ *
lowerDateValue and upperDateValue
+ *
latest and highest date to search for in the form + * 201509030842 i.e. yyyyMMddHHmm using UTC as + * timezone. This should be used to search on parameters having a + * dateValue. If only one bound is set the restriction has not + * effect.
+ *
lowerNumericValue and upperNumericValue
+ *
This should be used to search on parameters having a + * numericValue. If only one bound is set the restriction has not + * effect.
+ *
+ *
+ *
samples
+ *
A json array of strings each of which must match text + * found in a sample. This is understood by the lucene parser but avoid trying to use fields. This is + * only respected in the case of an investigation search.
+ *
userFullName
+ *
Full name of user in the User table which may contain + * titles etc. Matching is done by the lucene parser but avoid trying to use fields. This is + * only respected in the case of an investigation search.
+ *
+ * @param searchAfter String representing the last returned document of a + * previous search, so that new results will be from after + * this document. The representation should be a JSON array, + * but the nature of the values will depend on the sort + * applied. + * + * @param sort json encoded sort object. Each key should be a field on + * the targeted Document, with a value of "asc" or "desc" to + * specify the order of the results. Multiple pairs can be + * provided, in which case each subsequent sort is used as a + * tiebreaker for the previous one. If no sort is specified, + * then results will be returned in order of relevance to the + * search query, with their search engine id as a tiebreaker. + * + * @param minCount minimum number of entities to return + * + * @param maxCount maximum number of entities to return + * + * @param restrict Whether to perform a quicker search which restricts the + * results based on an InvestigationUser or + * InstrumentScientist being able to read their "own" data. + * + * @return Set of entity ids, relevance scores and Document source encoded as + * json. + * + * @throws IcatException + * when something is wrong + */ + @GET + @Path("search/documents") + @Produces(MediaType.APPLICATION_JSON) + public String searchDocuments(@Context HttpServletRequest request, @QueryParam("sessionId") String sessionId, + @QueryParam("query") String query, @QueryParam("search_after") String searchAfter, + @QueryParam("minCount") int minCount, @QueryParam("maxCount") int maxCount, @QueryParam("sort") String sort, + @QueryParam("restrict") boolean restrict) throws IcatException { + if (query == null) { + throw new IcatException(IcatExceptionType.BAD_PARAMETER, "query is not set"); + } + if (minCount == 0) { + minCount = 10; + } + if (maxCount == 0) { + maxCount = 100; + } + String userName = beanManager.getUserName(sessionId, manager); + JsonValue searchAfterValue = null; + if (searchAfter != null && searchAfter.length() > 0) { + try (JsonReader jr = Json.createReader(new StringReader(searchAfter))) { + searchAfterValue = jr.read(); + } + } + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (JsonReader jr = Json.createReader(new StringReader(query))) { + JsonObject jo = jr.readObject(); + if (restrict && !jo.containsKey("user")) { + JsonObjectBuilder builder = Json.createObjectBuilder(); + for (Entry entry : jo.entrySet()) { + builder.add(entry.getKey(), entry.getValue()); + } + jo = builder.add("user", userName).build(); + } + String target = jo.getString("target", null); + if (jo.containsKey("parameters")) { + for (JsonValue val : jo.getJsonArray("parameters")) { + JsonObject parameter = (JsonObject) val; + String name = parameter.getString("name", null); + if (name == null) { + throw new IcatException(IcatExceptionType.BAD_PARAMETER, "name not set in one of parameters"); + } + String units = parameter.getString("units", null); + if (units == null) { + throw new IcatException(IcatExceptionType.BAD_PARAMETER, + "units not set in parameter '" + name + "'"); + } + // If we don't have either a string, pair of dates, or pair of numbers, throw + if (!(parameter.containsKey("stringValue") + || (parameter.containsKey("lowerDateValue") + && parameter.containsKey("upperDateValue")) + || (parameter.containsKey("lowerNumericValue") + && parameter.containsKey("upperNumericValue")))) { + throw new IcatException(IcatExceptionType.BAD_PARAMETER, + "value not set in parameter '" + name + "'"); + } + } + } + SearchResult result; + Class klass; + + if (target.equals("Investigation")) { + klass = Investigation.class; + } else if (target.equals("Dataset")) { + klass = Dataset.class; + } else if (target.equals("Datafile")) { + klass = Datafile.class; + } else { + throw new IcatException(IcatExceptionType.BAD_PARAMETER, "target:" + target + " is not expected"); + } + + result = beanManager.freeTextSearchDocs(userName, jo, searchAfterValue, minCount, maxCount, sort, + manager, request.getRemoteAddr(), klass); + + JsonGenerator gen = Json.createGenerator(baos); + gen.writeStartObject(); + JsonValue newSearchAfter = result.getSearchAfter(); + if (newSearchAfter != null) { + gen.write("search_after", newSearchAfter); + } + + List dimensions = result.getDimensions(); + if (dimensions != null && dimensions.size() > 0) { + gen.writeStartObject("dimensions"); + for (FacetDimension dimension : dimensions) { + gen.writeStartObject(dimension.getTarget() + "." + dimension.getDimension()); + for (FacetLabel label : dimension.getFacets()) { + gen.write(label.getLabel(), label.getValue()); + } + gen.writeEnd(); + } + gen.writeEnd(); + } + + gen.writeStartArray("results"); + for (ScoredEntityBaseBean sb : result.getResults()) { + gen.writeStartObject(); + gen.write("id", sb.getId()); + if (!Float.isNaN(sb.getScore())) { + gen.write("score", sb.getScore()); + } + gen.write("source", sb.getSource()); + gen.writeEnd(); + } + gen.writeEnd().writeEnd().close(); + return baos.toString(); + } catch (JsonException e) { + throw new IcatException(IcatExceptionType.BAD_PARAMETER, "JsonException " + e.getMessage()); + } + } + + /** + * Performs subsequent faceting for a particular query containing a list of ids. + * + * @title Document faceting. + * + * @param sessionId a sessionId of a user which takes the form + * 0d9a3706-80d4-4d29-9ff3-4d65d4308a24 + * @param query Json of the format + * { + * "target": `target`, + * "facets": [ + * { + * "target": `facetTarget`, + * "dimensions": [ + * {"dimension": `dimension`, "ranges": [{"key": `key`, "from": `from`, "to": `to`}, ...]}, + * ... + * ] + * }, + * ... + * ], + * "filter": {`termField`: `value`, `termsField`: [...], ...} + * } + * @return Facet labels and counts for the provided query + * @throws IcatException If something goes wrong + */ + @GET + @Path("facet/documents") + @Produces(MediaType.APPLICATION_JSON) + public String facetDocuments(@Context HttpServletRequest request, @QueryParam("sessionId") String sessionId, + @QueryParam("query") String query) throws IcatException { + if (query == null) { + throw new IcatException(IcatExceptionType.BAD_PARAMETER, "query is not set"); + } + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (JsonReader jr = Json.createReader(new StringReader(query))) { + JsonObject jo = jr.readObject(); + + String target = jo.getString("target", null); + + SearchResult result; + Class klass; + + if (target.equals("Investigation")) { + klass = Investigation.class; + } else if (target.equals("Dataset")) { + klass = Dataset.class; + } else if (target.equals("Datafile")) { + klass = Datafile.class; + } else { + throw new IcatException(IcatExceptionType.BAD_PARAMETER, "target:" + target + " is not expected"); + } + + result = beanManager.facetDocs(jo, klass); + + JsonGenerator gen = Json.createGenerator(baos); + gen.writeStartObject(); + List dimensions = result.getDimensions(); + if (dimensions != null && dimensions.size() > 0) { + gen.writeStartObject("dimensions"); + for (FacetDimension dimension : dimensions) { + gen.writeStartObject(dimension.getTarget() + "." + dimension.getDimension()); + for (FacetLabel label : dimension.getFacets()) { + logger.debug("From and to: ", label.getFrom(), label.getTo()); + if (label.getFrom() != null && label.getTo() != null) { + gen.writeStartObject(label.getLabel()); + gen.write("from", label.getFrom()); + gen.write("to", label.getTo()); + gen.write("count", label.getValue()); + gen.writeEnd(); + } else { + gen.write(label.getLabel(), label.getValue()); + } + } + gen.writeEnd(); + } + gen.writeEnd(); + } + + gen.writeEnd().close(); + return baos.toString(); + } catch (JsonException e) { + throw new IcatException(IcatExceptionType.BAD_PARAMETER, "JsonException " + e.getMessage()); } } @@ -1149,9 +1435,9 @@ public void gatekeeperMarkPublicStepsStale(@Context HttpServletRequest request) } /** - * Stop population of the lucene database if it is running. + * Stop population of the search engine if it is running. * - * @title Lucene Clear + * @title Search engine clear * * @param sessionId * a sessionId of a user listed in rootUserNames @@ -1161,15 +1447,15 @@ public void gatekeeperMarkPublicStepsStale(@Context HttpServletRequest request) */ @DELETE @Path("lucene/db") - public void luceneClear(@QueryParam("sessionId") String sessionId) throws IcatException { + public void searchClear(@QueryParam("sessionId") String sessionId) throws IcatException { checkRoot(sessionId); - beanManager.luceneClear(); + beanManager.searchClear(); } /** - * Forces a commit of the lucene database + * Forces a commit of the search engine * - * @title Lucene Commit + * @title Search engine commit * * @param sessionId * a sessionId of a user listed in rootUserNames @@ -1179,15 +1465,15 @@ public void luceneClear(@QueryParam("sessionId") String sessionId) throws IcatEx */ @POST @Path("lucene/db") - public void luceneCommit(@FormParam("sessionId") String sessionId) throws IcatException { + public void searchCommit(@FormParam("sessionId") String sessionId) throws IcatException { checkRoot(sessionId); - beanManager.luceneCommit(); + beanManager.searchCommit(); } /** - * Return a list of class names for which population is going on + * Return a list of class names for which search engine population is ongoing * - * @title lucene GetPopulating + * @title Search engine get populating * * @param sessionId * a sessionId of a user listed in rootUserNames @@ -1199,12 +1485,12 @@ public void luceneCommit(@FormParam("sessionId") String sessionId) throws IcatEx @GET @Path("lucene/db") @Produces(MediaType.APPLICATION_JSON) - public String luceneGetPopulating(@QueryParam("sessionId") String sessionId) throws IcatException { + public String searchGetPopulating(@QueryParam("sessionId") String sessionId) throws IcatException { checkRoot(sessionId); ByteArrayOutputStream baos = new ByteArrayOutputStream(); JsonGenerator gen = Json.createGenerator(baos); gen.writeStartArray(); - for (String name : beanManager.luceneGetPopulating()) { + for (String name : beanManager.searchGetPopulating()) { gen.write(name); } gen.writeEnd().close(); @@ -1238,6 +1524,10 @@ public void waitMillis(@FormParam("sessionId") String sessionId, @FormParam("ms" /** * Clear and repopulate lucene documents for the specified entityName * + * @deprecated in favour of {@link #searchPopulate}, which allows an upper limit + * on population to be set and makes deletion of existing documents + * optional. + * * @title Lucene Populate * * @param sessionId @@ -1252,10 +1542,37 @@ public void waitMillis(@FormParam("sessionId") String sessionId, @FormParam("ms" */ @POST @Path("lucene/db/{entityName}/{minid}") + @Deprecated public void lucenePopulate(@FormParam("sessionId") String sessionId, @PathParam("entityName") String entityName, @PathParam("minid") long minid) throws IcatException { checkRoot(sessionId); - beanManager.lucenePopulate(entityName, minid, manager); + beanManager.searchPopulate(entityName, minid, null, true, manager); + } + + /** + * Populates search engine documents for the specified entityName. + * + * Optionally, this will also delete all existing documents of entityName. This + * should only be used when repopulating from scratch is needed. + * + * @param sessionId a sessionId of a user listed in rootUserNames + * @param entityName the name of the entity + * @param minId Process entities with id values greater than (NOT equal to) + * this value + * @param maxId Process entities up to and including with id up to and + * including this value + * @param delete If true, then all existing documents of this type will be + * deleted before adding new ones. + * @throws IcatException when something is wrong + */ + @POST + @Consumes(MediaType.APPLICATION_FORM_URLENCODED) + @Path("lucene/db/{entityName}") + public void searchPopulate(@FormParam("sessionId") String sessionId, @PathParam("entityName") String entityName, + @FormParam("minId") Long minId, @FormParam("maxId") Long maxId, @FormParam("delete") boolean delete) + throws IcatException { + checkRoot(sessionId); + beanManager.searchPopulate(entityName, minId, maxId, delete, manager); } /** diff --git a/src/main/resources/logback.xml b/src/main/resources/logback.xml index 79947e122..a6ebbf16a 100644 --- a/src/main/resources/logback.xml +++ b/src/main/resources/logback.xml @@ -23,7 +23,7 @@ - + diff --git a/src/main/resources/run.properties b/src/main/resources/run.properties index 006bf2c99..2b59216f9 100644 --- a/src/main/resources/run.properties +++ b/src/main/resources/run.properties @@ -16,14 +16,24 @@ notification.Datafile = CU log.list = SESSION WRITE READ INFO -lucene.url = https://localhost.localdomain:8181 -lucene.populateBlockSize = 10000 -lucene.directory = ${HOME}/data/lucene -lucene.backlogHandlerIntervalSeconds = 60 -lucene.enqueuedRequestIntervalSeconds = 3 +# LUCENE, OPENSEARCH and ELASTICSEARCH engines are supported, however the latter two are considered experimental +search.engine = lucene +search.urls = https://localhost.localdomain:8181 +search.populateBlockSize = 10000 +# Recommend setting search.searchBlockSize equal to maxIdsInQuery, so that all results can be authorised at once +# If search.searchBlockSize > maxIdsInQuery, then multiple auth checks may be needed for a single search +# The optimal value depends on how likely a user's auth request fails: larger values are more efficient when rejection is more likely +search.searchBlockSize = 1000 +search.directory = ${HOME}/data/search +search.backlogHandlerIntervalSeconds = 60 +search.enqueuedRequestIntervalSeconds = 3 +search.aggregateFilesIntervalSeconds = 3600 +search.maxSearchTimeSeconds = 5 # Configure this option to prevent certain entities being indexed -# For example, remove Datafile and DatafileParameter -!lucene.entitiesToIndex = Datafile Dataset Investigation InvestigationUser DatafileParameter DatasetParameter InvestigationParameter Sample +# For example, remove Datafile and DatafileParameter if these are not of interest +# Note then when commented out, the full set of all possible entities will be indexed - to disable all search functionality, instead comment out search.engine or search.urls +!search.entitiesToIndex = Datafile DatafileFormat DatafileParameter Dataset DatasetParameter DatasetType DatasetTechnique Facility Instrument InstrumentScientist Investigation InvestigationInstrument InvestigationParameter InvestigationType InvestigationUser ParameterType Sample SampleType SampleParameter User +units = \u2103: celsius degC, K: kelvin !cluster = https://smfisher:8181 diff --git a/src/main/scripts/icatadmin b/src/main/scripts/icatadmin index 9e8b3685d..21f2c690d 100755 --- a/src/main/scripts/icatadmin +++ b/src/main/scripts/icatadmin @@ -159,37 +159,68 @@ def getPopulating(args): def populate(args): parser.set_usage(usagebase + "populate []") parser.set_description("Populate lucene (for that entry name)") + parser.add_option( + "-e", + "--entity-name", + action="append", + dest="entityName", + help="Name of entity to populate.", + ) + parser.add_option( + "--min-id", + dest="minId", + help="Minimum (exclusive) ICAT entity id to populate", + type="int", + ) + parser.add_option( + "--max-id", + dest="maxId", + help="Maximum (inclusive) ICAT entity id to populate", + type="int", + ) + parser.add_option( + "-d", + "--delete", + dest="delete", + action="store_true", + help="Whether to delete all existing documents for this index", + ) options, args = parser.parse_args(args) - - if len(args) == 0: - try: - sessionId = getService() - parameters = {"sessionId": sessionId} - for entity in "Datafile", "DatafileParameter", "Dataset", "DatasetParameter", "Investigation", "InvestigationParameter", "InvestigationUser", "Sample": - print(entity) - _process("lucene/db/" + entity + "/-1", parameters, "POST") - except Exception as e: - fatal(e) - return - - if len(args) == 1: - try: - sessionId = getService() - parameters = {"sessionId": sessionId} - entity = args[0] - _process("lucene/db/" + entity + "/-1", parameters, "POST") - except Exception as e: - fatal(e) - return - - if len(args) > 2: - fatal("Must have zero arguments after the operation 'populate' or one - the name of the entity or two with the name of the entity and minid") - + entities = options.entityName or [] + entities += args + if not entities: + # This does not need to include "nested" entities such as ParameterType, as this + # will be included in the READ operation on the DB implicitly + entities = [ + "Datafile", + "Dataset", + "Investigation", + "DatafileParameter", + "DatasetParameter", + "DatasetTechnique", + "InstrumentScientist", + "InvestigationFacilityCycle", + "InvestigationInstrument", + "InvestigationParameter", + "InvestigationUser", + "Sample", + "SampleParameter", + ] + try: sessionId = getService() parameters = {"sessionId": sessionId} - entity = args[0] - _process("lucene/db/" + entity + "/" + args[1], parameters, "POST") + if options.minId: + parameters["minId"] = options.minId + if options.maxId: + parameters["maxId"] = options.maxId + if options.delete: + parameters["delete"] = True + else: + parameters["delete"] = False + + for entity in entities: + _process("lucene/db/" + entity, parameters, "POST") except Exception as e: fatal(e) diff --git a/src/site/xhtml/installation.xhtml.vm b/src/site/xhtml/installation.xhtml.vm index 70912e5f9..b701e5d13 100644 --- a/src/site/xhtml/installation.xhtml.vm +++ b/src/site/xhtml/installation.xhtml.vm @@ -27,7 +27,7 @@ installation instructions installed on the server
  • Deployed ICAT authenticators.
  • -
  • A deployed icat.lucene server it you plan to use free-text search.
  • +
  • A deployed icat.lucene server of at least version 3.0.0 or Open/Elasticsearch cluster if you plan to use free-text search.
  • Python 3.6+ and the suds-community package installed on the server.
  • @@ -86,11 +86,41 @@

    Schema upgrade

    -

    Lucene database

    +

    Lucene indices

    - Any existing lucene database should be removed. The location of + Any existing lucene indices should be removed. The location of this would have been specified in the previous icat.properties file. - Ensure that the directory specified there is empty. + Ensure that the directory specified there is empty. Indices generated by + icat.lucene versions before 3 are no longer compatible. +

    +

    + An additional consequence of these changes to icat.lucene is that the Rules and + PublicSteps set in ICAT directly affect what metadata is returned from searches + against the search component. While the normal authorization process is applied + to the Investigation, Dataset and Datafiles that are returned as results, it is + also possible to include metadata from related entities with each result. For + example, the Instrument(s) used in an Investigation. In order to provide results + in a reasonable amount of time, the full authorization process cannot be + followed for these related entities. Instead, a related field will only be + returned if: +

      +
    • + The entity in question is a "public table" - that is, there is a Rule + providing READ access to all users for that entity. +
    • +
    • + There are one or more PublicSteps from the + Investigation/Dataset/Datafile entity to the entity of interest. +
    • +
    + It is entirely reasonable to decide that a PublicStep or public table Rule is + not appropriate for the entitiy in question, however be aware that this will + limit the metadata returned with the search results. +

    +

    + The same principle applies to the post-search faceting enabled in this release. + Only fields that are allowed to all users via one of the above methods will be + returned, to avoid exposing any unauthorized metadata.

    Database schema

    @@ -262,29 +292,61 @@ log via JMS calls. The types are specified by a space separated list of values taken from READ, WRITE, SESSION, INFO. -

    lucene.url
    -
    This is optional. It is the machine url of the icat.lucene - server if needed. It is needed for TopCAT to work.
    - -
    lucene.populateBlockSize
    -
    This is ignored if lucene.url is not set. The number of - entries to batch off to the lucene server when using lucenePopulate.
    - -
    lucene.directory
    -
    This is ignored if lucene.url is not set. Path of a directory - holding files for requests that are queued to go the icat.lucene - server.
    - -
    lucene.backlogHandlerIntervalSeconds
    -
    This is ignored if lucene.url is not set. How often to check - the backlog file.
    - -
    lucene.enqueuedRequestIntervalSecond
    -
    This is ignored if lucene.url is not set. How often to - transmit lucene requests to the icat.lucene server.
    - -
    lucene.entitiesToIndex = Datafile Dataset Investigation InvestigationUser DatafileParameter DatasetParameter InvestigationParameter Sample
    -
    The entities to index with Lucene. For example, remove 'Datafile' and 'DatafileParameter' if the number of datafiles exceeds lucene's limit of 2^32 entries in an index
    +
    search.engine
    +
    This is optional. Specifies the engine used for free-text searches. + Value should be one of LUCENE, OPENSEARCH and ELASTICSEARCH.
    + +
    search.urls
    +
    This is optional. It is the machine url of the search engine + server if needed.
    + +
    search.populateBlockSize
    +
    This is ignored if search.engine and search.urls are not set. The number of + entries to batch off to the search engine when populating the index.
    + +
    search.searchBlockSize
    +
    This is ignored if search.engine and search.urls are not set. Recommend + setting search.searchBlockSize equal to maxIdsInQuery, so that all results + can be authorised at once. If search.searchBlockSize > maxIdsInQuery, then + multiple auth checks may be needed for a single search. The optimal value + depends on how likely a user's auth request fails: larger values are more + efficient when rejection is more likely.
    + +
    search.directory
    +
    This is ignored if search.engine and search.urls are not set. Path of a + directoryholding files for requests that are queued to go the search engine. +
    + +
    search.backlogHandlerIntervalSeconds
    +
    This is ignored if search.engine and search.urls are not set. How often to + check the backlog file.
    + +
    search.enqueuedRequestIntervalSecond
    +
    This is ignored if search.engine and search.urls are not set. How often to + transmit requests to the search engine.
    + +
    search.aggregateFilesIntervalSeconds
    +
    This is ignored if search.engine and search.urls are not set. How often to + update file size and counts for Datasets and Investigations containing + recently modified Datafiles. If 0, then rather than being performed on timer + will update the parent documents in real time. Note that this can have a + significant performance impact.
    + +
    search.maxSearchTimeSeconds
    +
    This is ignored if search.engine and search.urls are not set. How long to + wait before cancelling a long-running search. This can prevent badly formed + queries from blocking other searches from completing.
    + +
    search.entitiesToIndex = Datafile Dataset Investigation InvestigationUser DatafileParameter DatasetParameter InvestigationParameter Sample
    +
    The entities to index with the search engine.
    + +
    search.units
    +
    This is optional. Recognised unit names/symbols. Each symbol recognised by + indriya's SimpleUnitFormat should be followed by a colon, and then a comma + separated list of units measuring the same property. If the unit is simply + an alias (e.g. "K: kelvin") this is sufficient. If a conversion is required, + it should be followed by this factor (e.g. "J: eV 1.602176634e-19"). + Different units can be separated by a semi-colon.
    jms.topicConnectionFactory
    This is optional and may be used to override the default @@ -400,18 +462,20 @@
    -
    populate [<entity name>]
    +
    populate [--min-id 0 --max-id 1 --delete] [<entity names>...]
    re-populates lucene for the specified entity name. This is useful if the database has been modified directly rather than by - using the ICAT API. This call is asynchronous and simply places the - request in a set of entity types to be populated. When the request is - processed all lucene entries of the specified entity type are first - cleared then the corresponding icat entries are scanned to - re-populate lucene. To find what it is doing please use the - "populating" operation described below. It may also be run without an - entity name in which case it will process all entities. The new - lucene index will not be seen until it is completely rebuilt. While - the index is being rebuilt ICAT can be used as normal as any lucene + using the ICAT API, or to backpopulate from the database after a breaking + change to the search engine. This call is asynchronous and simply places the + request in a set of entity types to be populated. By default runs over all + relevant entities, or names can be provided as arguments. Also has the + options "min-id" to specify a non-inclusive lower limit, and "max-id" for an + inclusive upper limit on the operation. If documents are found in this + range, then the operation will not proceed, unless "delete" is also + specified - in which case all existing documents are cleared first. + To find what it is doing please use the "populating" operation described + below. The new lucene index will not be seen until it is completely rebuilt. + While the index is being rebuilt ICAT can be used as normal as any lucene updates are stored to be applied later.
    populating
    diff --git a/src/site/xhtml/release-notes.xhtml b/src/site/xhtml/release-notes.xhtml index 7a6629c04..613c2d649 100644 --- a/src/site/xhtml/release-notes.xhtml +++ b/src/site/xhtml/release-notes.xhtml @@ -6,6 +6,18 @@

    ICAT Server Release Notes

    +

    6.1.0

    +

    Add support for Open/Elasticsearch engine backends for free text searches. Adds to REST endpoints for free-text searches, and deprecates old functionality. Significant changes to the functionality and performance of searches:

    +
      +
    • Ability to search on over 2 billion documents
    • +
    • Enable sorting on specific entity fields
    • +
    • "Infinitely" search the data by using the searchAfter parameter
    • +
    • Faceted searches
    • +
    • Replace single "text" field with specific fields that reflect the ICAT schema to allow field targeting
    • +
    • Support for unit conversion on numeric Parameters
    • +
    • Support for synonym injection
    • +
    +

    6.0.1

    Ensures that authorization rules are read in a separate transaction.

    diff --git a/src/test/java/org/icatproject/core/manager/TestEntityInfo.java b/src/test/java/org/icatproject/core/manager/TestEntityInfo.java index 782d228f5..c44fd686f 100644 --- a/src/test/java/org/icatproject/core/manager/TestEntityInfo.java +++ b/src/test/java/org/icatproject/core/manager/TestEntityInfo.java @@ -43,16 +43,19 @@ public void testBadname() throws Exception { } @Test - public void testHasLuceneDoc() throws Exception { - Set docdbeans = new HashSet<>(Arrays.asList("Investigation", "Dataset", "Datafile", - "InvestigationParameter", "DatasetParameter", "DatafileParameter", "InvestigationUser", "Sample")); + public void testHasSearchDoc() throws Exception { + Set docdbeans = new HashSet<>(Arrays.asList("Datafile", "DatafileFormat", "DatafileParameter", + "Dataset", "DatasetParameter", "DatasetTechnique", "DatasetType", "Facility", "Instrument", + "InstrumentScientist", "Investigation", "InvestigationFacilityCycle", "InvestigationInstrument", + "InvestigationParameter", "InvestigationType", "InvestigationUser", "ParameterType", "Sample", + "SampleType", "SampleParameter", "Technique", "User")); for (String beanName : EntityInfoHandler.getEntityNamesList()) { @SuppressWarnings("unchecked") Class bean = EntityInfoHandler.getClass(beanName); if (docdbeans.contains(beanName)) { - assertTrue(EntityInfoHandler.hasLuceneDoc(bean)); + assertTrue(EntityInfoHandler.hasSearchDoc(bean)); } else { - assertFalse(EntityInfoHandler.hasLuceneDoc(bean)); + assertFalse(EntityInfoHandler.hasSearchDoc(bean)); } } } @@ -175,11 +178,13 @@ public void testFields() throws Exception { + "startDate,studyInvestigations,summary,title,type,visitId", Investigation.class); testField("complete,dataCollectionDatasets,datafiles,datasetInstruments,datasetTechniques,description," - + "doi,endDate,fileCount,fileSize,investigation,location,name,parameters,sample,startDate,type", Dataset.class); - testField("dataCollectionDatafiles,dataCollectionDatasets,dataCollectionInvestigations,dataPublications,doi,jobsAsInput,jobsAsOutput,parameters", + + "doi,endDate,fileCount,fileSize,investigation,location,name,parameters,sample,startDate,type", + Dataset.class); + testField( + "dataCollectionDatafiles,dataCollectionDatasets,dataCollectionInvestigations,dataPublications,doi,jobsAsInput,jobsAsOutput,parameters", DataCollection.class); testField("application,arguments,inputDataCollection,outputDataCollection", Job.class); - testField( "description,endDate,name,pid,startDate,status,studyInvestigations,user",Study.class); + testField("description,endDate,name,pid,startDate,status,studyInvestigations,user", Study.class); testField("dataset,dateTimeValue,error,numericValue,rangeBottom,rangeTop,stringValue,type", DatasetParameter.class); testField( @@ -362,7 +367,8 @@ public void stringFields() throws Exception { testSF(Dataset.class, "name 255", "description 255", "location 255", "doi 255"); testSF(Keyword.class, "name 255"); testSF(InvestigationUser.class, "role 255"); - testSF(User.class, "name 255", "fullName 255", "givenName 255", "familyName 255", "affiliation 255", "email 255", "orcidId 255"); + testSF(User.class, "name 255", "fullName 255", "givenName 255", "familyName 255", "affiliation 255", + "email 255", "orcidId 255"); testSF(ParameterType.class, "pid 255", "description 255", "unitsFullName 255", "units 255", "name 255"); testSF(Job.class, "arguments 255"); testSF(Study.class, "name 255", "description 4000", "pid 255"); diff --git a/src/test/java/org/icatproject/core/manager/TestLucene.java b/src/test/java/org/icatproject/core/manager/TestLucene.java deleted file mode 100644 index 2d48db6cc..000000000 --- a/src/test/java/org/icatproject/core/manager/TestLucene.java +++ /dev/null @@ -1,611 +0,0 @@ -package org.icatproject.core.manager; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Date; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Queue; -import java.util.Set; -import java.util.concurrent.ConcurrentLinkedQueue; - -import jakarta.json.Json; -import jakarta.json.stream.JsonGenerator; -import jakarta.ws.rs.core.MediaType; - -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpPost; -import org.apache.http.client.utils.URIBuilder; -import org.apache.http.entity.StringEntity; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClients; -import org.icatproject.core.IcatException; -import org.icatproject.core.IcatException.IcatExceptionType; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TestLucene { - - static LuceneApi luceneApi; - private static URI uribase; - final static Logger logger = LoggerFactory.getLogger(TestLucene.class); - - @BeforeClass - public static void beforeClass() throws Exception { - String urlString = System.getProperty("luceneUrl"); - logger.info("Using lucene service at {}", urlString); - uribase = new URI(urlString); - luceneApi = new LuceneApi(uribase); - } - - String letters = "abcdefghijklmnopqrstuvwxyz"; - - long now = new Date().getTime(); - - int NUMINV = 10; - - int NUMUSERS = 5; - - int NUMDS = 30; - - int NUMDF = 100; - - int NUMSAMP = 15; - - private class QueueItem { - - private String entityName; - private Long id; - private String json; - - public QueueItem(String entityName, Long id, String json) { - this.entityName = entityName; - this.id = id; - this.json = json; - } - - } - - @Test - public void modify() throws IcatException { - Queue queue = new ConcurrentLinkedQueue<>(); - - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - try (JsonGenerator gen = Json.createGenerator(baos)) { - gen.writeStartArray(); - LuceneApi.encodeTextfield(gen, "text", "Elephants and Aardvarks"); - LuceneApi.encodeStringField(gen, "startDate", new Date()); - LuceneApi.encodeStringField(gen, "endDate", new Date()); - LuceneApi.encodeStoredId(gen, 42L); - LuceneApi.encodeStringField(gen, "dataset", 2001L); - gen.writeEnd(); - } - - String json = baos.toString(); - - queue.add(new QueueItem("Datafile", null, json)); - - queue.add(new QueueItem("Datafile", 42L, json)); - - queue.add(new QueueItem("Datafile", 42L, null)); - queue.add(new QueueItem("Datafile", 42L, null)); - - Iterator qiter = queue.iterator(); - if (qiter.hasNext()) { - StringBuilder sb = new StringBuilder("["); - - while (qiter.hasNext()) { - QueueItem item = qiter.next(); - if (sb.length() != 1) { - sb.append(','); - } - sb.append("[\"").append(item.entityName).append('"'); - if (item.id != null) { - sb.append(',').append(item.id); - } else { - sb.append(",null"); - } - if (item.json != null) { - sb.append(',').append(item.json); - } else { - sb.append(",null"); - } - sb.append(']'); - qiter.remove(); - } - sb.append(']'); - logger.debug("XXX " + sb.toString()); - - luceneApi.modify(sb.toString()); - } - - } - - private void addDocuments(String entityName, String json) throws IcatException { - try (CloseableHttpClient httpclient = HttpClients.createDefault()) { - URI uri = new URIBuilder(uribase).setPath(LuceneApi.basePath + "/addNow/" + entityName).build(); - HttpPost httpPost = new HttpPost(uri); - StringEntity input = new StringEntity(json); - input.setContentType(MediaType.APPLICATION_JSON); - httpPost.setEntity(input); - - try (CloseableHttpResponse response = httpclient.execute(httpPost)) { - Rest.checkStatus(response, IcatExceptionType.INTERNAL); - } - } catch (IOException | URISyntaxException e) { - throw new IcatException(IcatExceptionType.INTERNAL, e.getClass() + " " + e.getMessage()); - } - } - - @Before - public void before() throws Exception { - luceneApi.clear(); - } - - private void checkLsr(LuceneSearchResult lsr, Long... n) { - Set wanted = new HashSet<>(Arrays.asList(n)); - Set got = new HashSet<>(); - - for (ScoredEntityBaseBean q : lsr.getResults()) { - got.add(q.getEntityBaseBeanId()); - } - - Set missing = new HashSet<>(wanted); - missing.removeAll(got); - if (!missing.isEmpty()) { - for (Long l : missing) { - logger.error("Entry missing: {}", l); - } - fail("Missing entries"); - } - - missing = new HashSet<>(got); - missing.removeAll(wanted); - if (!missing.isEmpty()) { - for (Long l : missing) { - logger.error("Extra entry: {}", l); - } - fail("Extra entries"); - } - - } - - @Test - public void datafiles() throws Exception { - populate(); - - LuceneSearchResult lsr = luceneApi.datafiles(null, null, null, null, null, 5); - Long uid = lsr.getUid(); - - checkLsr(lsr, 0L, 1L, 2L, 3L, 4L); - System.out.println(uid); - lsr = luceneApi.datafiles(uid, 200); - assertTrue(lsr.getUid() == null); - assertEquals(95, lsr.getResults().size()); - luceneApi.freeSearcher(uid); - - lsr = luceneApi.datafiles("e4", null, null, null, null, 100); - checkLsr(lsr, 1L, 6L, 11L, 16L, 21L, 26L, 31L, 36L, 41L, 46L, 51L, 56L, 61L, 66L, 71L, 76L, 81L, 86L, 91L, 96L); - luceneApi.freeSearcher(lsr.getUid()); - - lsr = luceneApi.datafiles("e4", "dfbbb", null, null, null, 100); - checkLsr(lsr, 1L); - luceneApi.freeSearcher(lsr.getUid()); - - lsr = luceneApi.datafiles(null, "dfbbb", null, null, null, 100); - checkLsr(lsr, 1L, 27L, 53L, 79L); - luceneApi.freeSearcher(lsr.getUid()); - - lsr = luceneApi.datafiles(null, null, new Date(now + 60000 * 3), new Date(now + 60000 * 6), null, 100); - checkLsr(lsr, 3L, 4L, 5L, 6L); - luceneApi.freeSearcher(lsr.getUid()); - - lsr = luceneApi.datafiles("b1", "dsddd", new Date(now + 60000 * 3), new Date(now + 60000 * 6), null, 100); - checkLsr(lsr); - luceneApi.freeSearcher(lsr.getUid()); - - List pojos = new ArrayList<>(); - pojos.add(new ParameterPOJO(null, null, "v25")); - lsr = luceneApi.datafiles(null, null, new Date(now + 60000 * 3), new Date(now + 60000 * 6), pojos, 100); - checkLsr(lsr, 5L); - luceneApi.freeSearcher(lsr.getUid()); - - pojos = new ArrayList<>(); - pojos.add(new ParameterPOJO(null, null, "v25")); - lsr = luceneApi.datafiles(null, null, null, null, pojos, 100); - checkLsr(lsr, 5L); - luceneApi.freeSearcher(lsr.getUid()); - - pojos = new ArrayList<>(); - pojos.add(new ParameterPOJO(null, "u sss", null)); - lsr = luceneApi.datafiles(null, null, null, null, pojos, 100); - checkLsr(lsr, 13L, 65L); - luceneApi.freeSearcher(lsr.getUid()); - } - - @Test - public void datasets() throws Exception { - populate(); - LuceneSearchResult lsr = luceneApi.datasets(null, null, null, null, null, 5); - - Long uid = lsr.getUid(); - checkLsr(lsr, 0L, 1L, 2L, 3L, 4L); - System.out.println(uid); - lsr = luceneApi.datasets(uid, 100); - assertTrue(lsr.getUid() == null); - checkLsr(lsr, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, - 25L, 26L, 27L, 28L, 29L); - luceneApi.freeSearcher(uid); - - lsr = luceneApi.datasets("e4", null, null, null, null, 100); - checkLsr(lsr, 1L, 6L, 11L, 16L, 21L, 26L); - luceneApi.freeSearcher(lsr.getUid()); - - lsr = luceneApi.datasets("e4", "dsbbb", null, null, null, 100); - checkLsr(lsr, 1L); - luceneApi.freeSearcher(lsr.getUid()); - - lsr = luceneApi.datasets(null, "dsbbb", null, null, null, 100); - checkLsr(lsr, 1L, 27L); - luceneApi.freeSearcher(lsr.getUid()); - - lsr = luceneApi.datasets(null, null, new Date(now + 60000 * 3), new Date(now + 60000 * 6), null, 100); - checkLsr(lsr, 3L, 4L, 5L); - luceneApi.freeSearcher(lsr.getUid()); - - lsr = luceneApi.datasets("b1", "dsddd", new Date(now + 60000 * 3), new Date(now + 60000 * 6), null, 100); - checkLsr(lsr, 3L); - luceneApi.freeSearcher(lsr.getUid()); - - List pojos = new ArrayList<>(); - pojos.add(new ParameterPOJO(null, null, "v16")); - lsr = luceneApi.datasets(null, null, null, null, pojos, 100); - checkLsr(lsr, 4L); - luceneApi.freeSearcher(lsr.getUid()); - - pojos = new ArrayList<>(); - pojos.add(new ParameterPOJO(null, null, "v16")); - lsr = luceneApi.datasets(null, null, new Date(now + 60000 * 3), new Date(now + 60000 * 6), pojos, 100); - checkLsr(lsr, 4L); - luceneApi.freeSearcher(lsr.getUid()); - - pojos = new ArrayList<>(); - pojos.add(new ParameterPOJO(null, null, "v16")); - lsr = luceneApi.datasets("b1", "dsddd", new Date(now + 60000 * 3), new Date(now + 60000 * 6), pojos, 100); - checkLsr(lsr); - luceneApi.freeSearcher(lsr.getUid()); - - } - - private void fillParms(JsonGenerator gen, int i, String rel) { - int j = i % 26; - int k = (i + 5) % 26; - String name = "nm " + letters.substring(j, j + 1) + letters.substring(j, j + 1) + letters.substring(j, j + 1); - String units = "u " + letters.substring(k, k + 1) + letters.substring(k, k + 1) + letters.substring(k, k + 1); - - gen.writeStartArray(); - LuceneApi.encodeStringField(gen, "name", "S" + name); - LuceneApi.encodeStringField(gen, "units", units); - LuceneApi.encodeStringField(gen, "stringValue", "v" + i * i); - LuceneApi.encodeSortedDocValuesField(gen, rel, new Long(i)); - gen.writeEnd(); - System.out.println(rel + " " + i + " '" + "S" + name + "' '" + units + "' 'v" + i * i + "'"); - - gen.writeStartArray(); - LuceneApi.encodeStringField(gen, "name", "N" + name); - LuceneApi.encodeStringField(gen, "units", units); - LuceneApi.encodeDoubleField(gen, "numericValue", new Double(j * j)); - LuceneApi.encodeSortedDocValuesField(gen, rel, new Long(i)); - gen.writeEnd(); - System.out.println(rel + " " + i + " '" + "N" + name + "' '" + units + "' " + new Double(j * j)); - - gen.writeStartArray(); - LuceneApi.encodeStringField(gen, "name", "D" + name); - LuceneApi.encodeStringField(gen, "units", units); - LuceneApi.encodeStringField(gen, "dateTimeValue", new Date(now + 60000 * k * k)); - LuceneApi.encodeSortedDocValuesField(gen, rel, new Long(i)); - gen.writeEnd(); - System.out.println( - rel + " " + i + " '" + "D" + name + "' '" + units + "' '" + new Date(now + 60000 * k * k) + "'"); - - } - - @Test - public void investigations() throws Exception { - populate(); - - /* Blocked results */ - LuceneSearchResult lsr = luceneApi.investigations(null, null, null, null, null, null, null, 5); - Long uid = lsr.getUid(); - checkLsr(lsr, 0L, 1L, 2L, 3L, 4L); - System.out.println(uid); - lsr = luceneApi.investigations(uid, 6); - assertTrue(lsr.getUid() == null); - checkLsr(lsr, 5L, 6L, 7L, 8L, 9L); - luceneApi.freeSearcher(uid); - - lsr = luceneApi.investigations(null, null, null, null, null, null, "b", 100); - checkLsr(lsr, 1L, 3L, 5L, 7L, 9L); - luceneApi.freeSearcher(lsr.getUid()); - - lsr = luceneApi.investigations(null, null, null, null, null, null, "FN", 100); - checkLsr(lsr, 1L, 3L, 4L, 5L, 6L, 7L, 9L); - luceneApi.freeSearcher(lsr.getUid()); - - lsr = luceneApi.investigations(null, null, null, null, null, null, "FN AND \"b b\"", 100); - checkLsr(lsr, 1L, 3L, 5L, 7L, 9L); - luceneApi.freeSearcher(lsr.getUid()); - - lsr = luceneApi.investigations("b1", null, null, null, null, null, "b", 100); - checkLsr(lsr, 1L, 3L, 5L, 7L, 9L); - luceneApi.freeSearcher(lsr.getUid()); - - lsr = luceneApi.investigations("c1", null, null, null, null, null, "b", 100); - checkLsr(lsr); - luceneApi.freeSearcher(lsr.getUid()); - - lsr = luceneApi.investigations("b1", null, null, null, null, null, "b", 100); - checkLsr(lsr, 1L, 3L, 5L, 7L, 9L); - luceneApi.freeSearcher(lsr.getUid()); - - lsr = luceneApi.investigations(null, "l v", null, null, null, null, null, 100); - checkLsr(lsr, 4L); - luceneApi.freeSearcher(lsr.getUid()); - - lsr = luceneApi.investigations("b1", "d", null, null, null, null, "b", 100); - checkLsr(lsr, 3L); - luceneApi.freeSearcher(lsr.getUid()); - - lsr = luceneApi.investigations("b1", "d", new Date(now + 60000 * 3), new Date(now + 60000 * 6), null, null, "b", - 100); - checkLsr(lsr, 3L); - luceneApi.freeSearcher(lsr.getUid()); - - lsr = luceneApi.investigations(null, null, new Date(now + 60000 * 3), new Date(now + 60000 * 6), null, null, - null, 100); - checkLsr(lsr, 3L, 4L, 5L); - luceneApi.freeSearcher(lsr.getUid()); - - List pojos = new ArrayList<>(); - pojos.add(new ParameterPOJO(null, null, "v9")); - lsr = luceneApi.investigations(null, null, null, null, pojos, null, null, 100); - checkLsr(lsr, 3L); - luceneApi.freeSearcher(lsr.getUid()); - - pojos = new ArrayList<>(); - pojos.add(new ParameterPOJO(null, null, "v9")); - pojos.add(new ParameterPOJO(null, null, 7, 10)); - pojos.add(new ParameterPOJO(null, null, new Date(now + 60000 * 63), new Date(now + 60000 * 65))); - lsr = luceneApi.investigations(null, null, null, null, pojos, null, null, 100); - checkLsr(lsr, 3L); - luceneApi.freeSearcher(lsr.getUid()); - - pojos = new ArrayList<>(); - pojos.add(new ParameterPOJO(null, null, "v9")); - lsr = luceneApi.investigations("b1", "d", new Date(now + 60000 * 3), new Date(now + 60000 * 6), pojos, null, - "b", 100); - checkLsr(lsr, 3L); - luceneApi.freeSearcher(lsr.getUid()); - - pojos = new ArrayList<>(); - pojos.add(new ParameterPOJO(null, null, "v9")); - pojos.add(new ParameterPOJO(null, null, "v81")); - lsr = luceneApi.investigations(null, null, null, null, pojos, null, null, 100); - checkLsr(lsr); - luceneApi.freeSearcher(lsr.getUid()); - - pojos = new ArrayList<>(); - pojos.add(new ParameterPOJO("Snm ddd", "u iii", "v9")); - lsr = luceneApi.investigations(null, null, null, null, pojos, null, null, 100); - checkLsr(lsr, 3L); - luceneApi.freeSearcher(lsr.getUid()); - - List samples = Arrays.asList("ddd", "nnn"); - lsr = luceneApi.investigations(null, null, null, null, null, samples, null, 100); - checkLsr(lsr, 3L); - luceneApi.freeSearcher(lsr.getUid()); - - samples = Arrays.asList("ddd", "mmm"); - lsr = luceneApi.investigations(null, null, null, null, null, samples, null, 100); - checkLsr(lsr); - luceneApi.freeSearcher(lsr.getUid()); - - pojos = new ArrayList<>(); - pojos.add(new ParameterPOJO("Snm ddd", "u iii", "v9")); - samples = Arrays.asList("ddd", "nnn"); - lsr = luceneApi.investigations("b1", "d", new Date(now + 60000 * 3), new Date(now + 60000 * 6), pojos, samples, - "b", 100); - checkLsr(lsr, 3L); - luceneApi.freeSearcher(lsr.getUid()); - } - - @Test - public void locking() throws IcatException { - - try { - luceneApi.unlock("Dataset"); - fail(); - } catch (IcatException e) { - assertEquals("Lucene is not currently locked for Dataset", e.getMessage()); - } - luceneApi.lock("Dataset"); - try { - luceneApi.lock("Dataset"); - fail(); - } catch (IcatException e) { - assertEquals("Lucene already locked for Dataset", e.getMessage()); - } - luceneApi.unlock("Dataset"); - try { - luceneApi.unlock("Dataset"); - fail(); - } catch (IcatException e) { - assertEquals("Lucene is not currently locked for Dataset", e.getMessage()); - } - } - - /** - * Populate UserGroup, Investigation, InvestigationParameter, - * InvestigationUser, Dataset,DatasetParameter,Datafile, DatafileParameter - * and Sample - */ - private void populate() throws IcatException { - - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - try (JsonGenerator gen = Json.createGenerator(baos)) { - gen.writeStartArray(); - for (int i = 0; i < NUMINV; i++) { - for (int j = 0; j < NUMUSERS; j++) { - if (i % (j + 1) == 1) { - String fn = "FN " + letters.substring(j, j + 1) + " " + letters.substring(j, j + 1); - String name = letters.substring(j, j + 1) + j; - gen.writeStartArray(); - - LuceneApi.encodeTextfield(gen, "text", fn); - - LuceneApi.encodeStringField(gen, "name", name); - LuceneApi.encodeSortedDocValuesField(gen, "investigation", new Long(i)); - - gen.writeEnd(); - System.out.println("'" + fn + "' " + name + " " + i); - } - } - } - gen.writeEnd(); - } - addDocuments("InvestigationUser", baos.toString()); - - baos = new ByteArrayOutputStream(); - try (JsonGenerator gen = Json.createGenerator(baos)) { - gen.writeStartArray(); - for (int i = 0; i < NUMINV; i++) { - int j = i % 26; - int k = (i + 7) % 26; - int l = (i + 17) % 26; - String word = letters.substring(j, j + 1) + " " + letters.substring(k, k + 1) + " " - + letters.substring(l, l + 1); - gen.writeStartArray(); - LuceneApi.encodeTextfield(gen, "text", word); - LuceneApi.encodeStringField(gen, "startDate", new Date(now + i * 60000)); - LuceneApi.encodeStringField(gen, "endDate", new Date(now + (i + 1) * 60000)); - LuceneApi.encodeStoredId(gen, new Long(i)); - LuceneApi.encodeSortedDocValuesField(gen, "id", new Long(i)); - gen.writeEnd(); - System.out.println("INVESTIGATION '" + word + "' " + new Date(now + i * 60000) + " " + i); - } - gen.writeEnd(); - } - addDocuments("Investigation", baos.toString()); - - baos = new ByteArrayOutputStream(); - try (JsonGenerator gen = Json.createGenerator(baos)) { - gen.writeStartArray(); - for (int i = 0; i < NUMINV; i++) { - if (i % 2 == 1) { - fillParms(gen, i, "investigation"); - } - } - gen.writeEnd(); - } - addDocuments("InvestigationParameter", baos.toString()); - - baos = new ByteArrayOutputStream(); - try (JsonGenerator gen = Json.createGenerator(baos)) { - gen.writeStartArray(); - for (int i = 0; i < NUMDS; i++) { - int j = i % 26; - String word = "DS" + letters.substring(j, j + 1) + letters.substring(j, j + 1) - + letters.substring(j, j + 1); - gen.writeStartArray(); - LuceneApi.encodeTextfield(gen, "text", word); - LuceneApi.encodeStringField(gen, "startDate", new Date(now + i * 60000)); - LuceneApi.encodeStringField(gen, "endDate", new Date(now + (i + 1) * 60000)); - LuceneApi.encodeStoredId(gen, new Long(i)); - LuceneApi.encodeSortedDocValuesField(gen, "id", new Long(i)); - LuceneApi.encodeStringField(gen, "investigation", new Long(i % NUMINV)); - gen.writeEnd(); - System.out.println("DATASET '" + word + "' " + new Date(now + i * 60000) + " " + i + " " + i % NUMINV); - } - gen.writeEnd(); - } - addDocuments("Dataset", baos.toString()); - - baos = new ByteArrayOutputStream(); - try (JsonGenerator gen = Json.createGenerator(baos)) { - gen.writeStartArray(); - for (int i = 0; i < NUMDS; i++) { - if (i % 3 == 1) { - fillParms(gen, i, "dataset"); - } - } - gen.writeEnd(); - } - addDocuments("DatasetParameter", baos.toString()); - - baos = new ByteArrayOutputStream(); - try (JsonGenerator gen = Json.createGenerator(baos)) { - gen.writeStartArray(); - for (int i = 0; i < NUMDF; i++) { - int j = i % 26; - String word = "DF" + letters.substring(j, j + 1) + letters.substring(j, j + 1) - + letters.substring(j, j + 1); - gen.writeStartArray(); - LuceneApi.encodeTextfield(gen, "text", word); - LuceneApi.encodeStringField(gen, "date", new Date(now + i * 60000)); - LuceneApi.encodeStoredId(gen, new Long(i)); - LuceneApi.encodeStringField(gen, "dataset", new Long(i % NUMDS)); - gen.writeEnd(); - System.out.println("DATAFILE '" + word + "' " + new Date(now + i * 60000) + " " + i + " " + i % NUMDS); - - } - gen.writeEnd(); - } - addDocuments("Datafile", baos.toString()); - - baos = new ByteArrayOutputStream(); - try (JsonGenerator gen = Json.createGenerator(baos)) { - gen.writeStartArray(); - for (int i = 0; i < NUMDF; i++) { - if (i % 4 == 1) { - fillParms(gen, i, "datafile"); - } - } - gen.writeEnd(); - } - addDocuments("DatafileParameter", baos.toString()); - - baos = new ByteArrayOutputStream(); - try (JsonGenerator gen = Json.createGenerator(baos)) { - gen.writeStartArray(); - for (int i = 0; i < NUMSAMP; i++) { - int j = i % 26; - String word = "SType " + letters.substring(j, j + 1) + letters.substring(j, j + 1) - + letters.substring(j, j + 1); - gen.writeStartArray(); - LuceneApi.encodeTextfield(gen, "text", word); - LuceneApi.encodeSortedDocValuesField(gen, "investigation", new Long(i % NUMINV)); - gen.writeEnd(); - System.out.println("SAMPLE '" + word + "' " + i % NUMINV); - } - gen.writeEnd(); - - } - addDocuments("Sample", baos.toString()); - - luceneApi.commit(); - - } - -} diff --git a/src/test/java/org/icatproject/core/manager/TestSearchApi.java b/src/test/java/org/icatproject/core/manager/TestSearchApi.java new file mode 100644 index 000000000..f5cd83054 --- /dev/null +++ b/src/test/java/org/icatproject/core/manager/TestSearchApi.java @@ -0,0 +1,1562 @@ +package org.icatproject.core.manager; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.fail; + +import java.lang.reflect.Array; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Date; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import jakarta.json.Json; +import jakarta.json.JsonArray; +import jakarta.json.JsonArrayBuilder; +import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; +import jakarta.json.JsonValue; +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceUnit; + +import org.icatproject.core.IcatException; +import org.icatproject.core.IcatException.IcatExceptionType; +import org.icatproject.core.entity.Datafile; +import org.icatproject.core.entity.DatafileFormat; +import org.icatproject.core.entity.DatafileParameter; +import org.icatproject.core.entity.Dataset; +import org.icatproject.core.entity.DatasetParameter; +import org.icatproject.core.entity.DatasetTechnique; +import org.icatproject.core.entity.DatasetType; +import org.icatproject.core.entity.EntityBaseBean; +import org.icatproject.core.entity.Facility; +import org.icatproject.core.entity.FacilityCycle; +import org.icatproject.core.entity.Instrument; +import org.icatproject.core.entity.InstrumentScientist; +import org.icatproject.core.entity.Investigation; +import org.icatproject.core.entity.InvestigationFacilityCycle; +import org.icatproject.core.entity.InvestigationInstrument; +import org.icatproject.core.entity.InvestigationParameter; +import org.icatproject.core.entity.InvestigationType; +import org.icatproject.core.entity.InvestigationUser; +import org.icatproject.core.entity.Parameter; +import org.icatproject.core.entity.ParameterType; +import org.icatproject.core.entity.Sample; +import org.icatproject.core.entity.SampleParameter; +import org.icatproject.core.entity.SampleType; +import org.icatproject.core.entity.Technique; +import org.icatproject.core.entity.User; +import org.icatproject.core.manager.search.FacetDimension; +import org.icatproject.core.manager.search.FacetLabel; +import org.icatproject.core.manager.search.LuceneApi; +import org.icatproject.core.manager.search.OpensearchApi; +import org.icatproject.core.manager.search.ParameterPOJO; +import org.icatproject.core.manager.search.ScoredEntityBaseBean; +import org.icatproject.core.manager.search.SearchApi; +import org.icatproject.core.manager.search.SearchResult; +import org.junit.Before; +import org.junit.Ignore; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@RunWith(Parameterized.class) +public class TestSearchApi { + + private class Filter { + private String fld; + private String value; + private JsonArray array; + + public Filter(String fld, String... values) { + this.fld = fld; + if (values.length == 1) { + this.value = values[0]; + } + JsonArrayBuilder arrayBuilder = Json.createArrayBuilder(); + for (String value : values) { + arrayBuilder.add(value); + } + array = arrayBuilder.build(); + } + + public Filter(String fld, JsonObject... values) { + this.fld = fld; + JsonArrayBuilder arrayBuilder = Json.createArrayBuilder(); + for (JsonObject value : values) { + arrayBuilder.add(value); + } + array = arrayBuilder.build(); + } + } + + @PersistenceUnit(unitName = "icat") + private EntityManager manager; + + private static final String SEARCH_AFTER_NOT_NULL = "Expected searchAfter to be set, but it was null"; + private static final List datafileFields = Arrays.asList("id", "name", "location", "datafileFormat.name", + "date", "dataset.id", "dataset.name", "investigation.id", "investigation.name", + "InvestigationInstrument instrument.id", "InvestigationFacilityCycle facilityCycle.id"); + private static final List datasetFields = Arrays.asList("id", "name", "startDate", "endDate", + "investigation.id", "investigation.name", "investigation.title", "investigation.startDate", + "InvestigationInstrument instrument.id", "InvestigationFacilityCycle facilityCycle.id"); + private static final List investigationFields = Arrays.asList("id", "name", "title", "startDate", "endDate", + "InvestigationInstrument instrument.id", "InvestigationInstrument instrument.name", + "InvestigationInstrument instrument.fullName", "InvestigationFacilityCycle facilityCycle.id"); + + private static Facility facility = new Facility(); + private static FacilityCycle facilityCycle = new FacilityCycle(); + private static InvestigationType investigationType = new InvestigationType(); + static { + facility.setName("facility"); + facility.setId(0L); + facilityCycle.setFacility(facility); + facilityCycle.setId(0L); + investigationType.setName("type"); + investigationType.setId(0L); + } + + final static Logger logger = LoggerFactory.getLogger(TestSearchApi.class); + + @Parameterized.Parameters + public static Iterable data() throws URISyntaxException, IcatException { + String searchEngine = System.getProperty("searchEngine"); + String searchUrls = System.getProperty("searchUrls"); + URI searchUri = new URI(searchUrls); + logger.info("Using {} service at {}", searchEngine, searchUrls); + switch (searchEngine) { + case "LUCENE": + return Arrays.asList(new LuceneApi(searchUri)); + case "OPENSEARCH": + case "ELASTICSEARCH": + return Arrays.asList(new OpensearchApi(searchUri, "\u2103: celsius", false)); + default: + String msg = "Search engine must be one of LUCENE, OPENSEARCH or ELASTICSEARCH but was " + searchEngine; + throw new IcatException(IcatExceptionType.BAD_PARAMETER, msg); + } + } + + @Parameterized.Parameter + public SearchApi searchApi; + + String letters = "abcdefghijklmnopqrstuvwxyz"; + Date date = new Date(); + long now = date.getTime(); + int NUMINV = 10; + int NUMUSERS = 5; + int NUMDS = 30; + int NUMDF = 100; + int NUMSAMP = 15; + + /** + * Utility function for building a Query from individual arguments + */ + public static JsonObject buildQuery(String target, String user, String text, Date lower, Date upper, + List parameters, String userFullName, Filter... filters) { + JsonObjectBuilder builder = Json.createObjectBuilder(); + if (target != null) { + builder.add("target", target); + } + if (user != null) { + builder.add("user", user); + } + if (text != null) { + builder.add("text", text); + } + if (lower != null) { + builder.add("lower", lower.getTime()); + } + if (upper != null) { + builder.add("upper", upper.getTime()); + } + if (parameters != null && !parameters.isEmpty()) { + JsonArrayBuilder parametersBuilder = Json.createArrayBuilder(); + for (ParameterPOJO parameter : parameters) { + JsonObjectBuilder parameterBuilder = Json.createObjectBuilder(); + if (parameter.name != null) { + parameterBuilder.add("name", parameter.name); + } + if (parameter.units != null) { + parameterBuilder.add("units", parameter.units); + } + if (parameter.stringValue != null) { + parameterBuilder.add("stringValue", parameter.stringValue); + } + if (parameter.lowerDateValue != null) { + parameterBuilder.add("lowerDateValue", parameter.lowerDateValue.getTime()); + } + if (parameter.upperDateValue != null) { + parameterBuilder.add("upperDateValue", parameter.upperDateValue.getTime()); + } + if (parameter.lowerNumericValue != null) { + parameterBuilder.add("lowerNumericValue", parameter.lowerNumericValue); + } + if (parameter.upperNumericValue != null) { + parameterBuilder.add("upperNumericValue", parameter.upperNumericValue); + } + parametersBuilder.add(parameterBuilder); + } + builder.add("parameters", parametersBuilder); + } + if (userFullName != null) { + builder.add("userFullName", userFullName); + } + if (filters.length > 0) { + JsonObjectBuilder filterBuilder = Json.createObjectBuilder(); + for (Filter filter : filters) { + if (filter.value != null) { + filterBuilder.add(filter.fld, filter.value); + } else { + filterBuilder.add(filter.fld, filter.array); + } + } + builder.add("filter", filterBuilder); + } + return builder.build(); + } + + private static JsonObject buildFacetIdQuery(String idField, long... idValues) { + JsonArrayBuilder arrayBuilder = Json.createArrayBuilder(); + for (long id : idValues) { + arrayBuilder.add(id); + } + return Json.createObjectBuilder().add(idField, arrayBuilder).build(); + } + + private static JsonObject buildFacetRangeObject(String key, double from, double to) { + return Json.createObjectBuilder().add("from", from).add("to", to).add("key", key).build(); + } + + private static JsonObject buildFacetRangeObject(String key, long from, long to) { + return Json.createObjectBuilder().add("from", from).add("to", to).add("key", key).build(); + } + + private static JsonObject buildFacetRangeRequest(JsonObject queryObject, String dimension, + JsonObject... rangeObjects) { + JsonArrayBuilder rangesBuilder = Json.createArrayBuilder(); + for (JsonObject rangeObject : rangeObjects) { + rangesBuilder.add(rangeObject); + } + JsonObjectBuilder rangedDimensionBuilder = Json.createObjectBuilder().add("dimension", dimension).add("ranges", + rangesBuilder); + JsonArrayBuilder rangedDimensionsBuilder = Json.createArrayBuilder().add(rangedDimensionBuilder); + return Json.createObjectBuilder().add("query", queryObject).add("dimensions", rangedDimensionsBuilder).build(); + } + + private static JsonObject buildFacetStringRequest(String idField, long idValue, String dimension) { + JsonObject idQuery = buildFacetIdQuery(idField, idValue); + JsonObjectBuilder stringDimensionBuilder = Json.createObjectBuilder().add("dimension", dimension); + JsonArrayBuilder stringDimensionsBuilder = Json.createArrayBuilder().add(stringDimensionBuilder); + return Json.createObjectBuilder().add("query", idQuery).add("dimensions", stringDimensionsBuilder).build(); + } + + private JsonObject buildFacetSparseRequest(JsonObject facetIdQuery) { + return Json.createObjectBuilder().add("query", facetIdQuery).build(); + } + + private void checkDatafile(ScoredEntityBaseBean datafile) { + JsonObject source = datafile.getSource(); + assertNotNull(source); + Set expectedKeys = new HashSet<>(Arrays.asList("id", "name", "location", "date", "dataset.id", + "dataset.name", "investigation.id", "investigation.name", "investigationinstrument", + "investigationfacilitycycle")); + assertEquals(expectedKeys, source.keySet()); + assertEquals(0, source.getJsonNumber("id").longValueExact()); + assertEquals("DFaaa", source.getString("name")); + assertEquals("/dir/DFaaa", source.getString("location")); + assertNotNull(source.getJsonNumber("date")); + assertEquals(0, source.getJsonNumber("dataset.id").longValueExact()); + assertEquals("DSaaa", source.getString("dataset.name")); + assertEquals(0, source.getJsonNumber("investigation.id").longValueExact()); + assertEquals("a h r", source.getString("investigation.name")); + JsonArray instruments = source.getJsonArray("investigationinstrument"); + assertEquals(1, instruments.size()); + assertEquals(0, instruments.getJsonObject(0).getJsonNumber("instrument.id").longValueExact()); + JsonArray facilityCycles = source.getJsonArray("investigationfacilitycycle"); + assertEquals(1, facilityCycles.size()); + assertEquals(0, facilityCycles.getJsonObject(0).getJsonNumber("facilityCycle.id").longValueExact()); + } + + private void checkDataset(ScoredEntityBaseBean dataset) { + JsonObject source = dataset.getSource(); + assertNotNull(source); + Set expectedKeys = new HashSet<>(Arrays.asList("id", "name", "startDate", "endDate", "investigation.id", + "investigation.name", "investigation.title", "investigation.startDate", "investigationinstrument", + "investigationfacilitycycle")); + assertEquals(expectedKeys, source.keySet()); + assertEquals(0, source.getJsonNumber("id").longValueExact()); + assertEquals("DSaaa", source.getString("name")); + assertNotNull(source.getJsonNumber("startDate")); + assertNotNull(source.getJsonNumber("endDate")); + assertEquals(0, source.getJsonNumber("investigation.id").longValueExact()); + assertEquals("a h r", source.getString("investigation.name")); + assertEquals("title", source.getString("investigation.title")); + assertNotNull(source.getJsonNumber("investigation.startDate")); + JsonArray instruments = source.getJsonArray("investigationinstrument"); + assertEquals(1, instruments.size()); + assertEquals(0, instruments.getJsonObject(0).getJsonNumber("instrument.id").longValueExact()); + JsonArray facilityCycles = source.getJsonArray("investigationfacilitycycle"); + assertEquals(1, facilityCycles.size()); + assertEquals(0, facilityCycles.getJsonObject(0).getJsonNumber("facilityCycle.id").longValueExact()); + } + + private void checkFacets(List facetDimensions, FacetDimension... dimensions) { + assertEquals(dimensions.length, facetDimensions.size()); + for (int i = 0; i < dimensions.length; i++) { + FacetDimension expectedFacet = dimensions[i]; + FacetDimension actualFacet = facetDimensions.get(i); + assertEquals(expectedFacet.getDimension(), actualFacet.getDimension()); + List expectedLabels = expectedFacet.getFacets(); + List actualLabels = actualFacet.getFacets(); + String message = "Expected " + expectedLabels.toString() + " but got " + actualLabels.toString(); + assertEquals(message, expectedLabels.size(), actualLabels.size()); + for (int j = 0; j < expectedLabels.size(); j++) { + FacetLabel expectedLabel = expectedLabels.get(j); + FacetLabel actualLabel = actualLabels.get(j); + String label = expectedLabel.getLabel(); + long expectedValue = expectedLabel.getValue(); + long actualValue = actualLabel.getValue(); + assertEquals(label, actualLabel.getLabel()); + message = "Label <" + label + ">: "; + assertEquals(message, expectedValue, actualValue); + } + } + } + + private void checkInvestigation(ScoredEntityBaseBean investigation) { + JsonObject source = investigation.getSource(); + assertNotNull(source); + Set expectedKeys = new HashSet<>(Arrays.asList( + "id", "name", "title", "startDate", "endDate", "investigationinstrument", + "investigationfacilitycycle")); + assertEquals(expectedKeys, source.keySet()); + assertEquals(0, source.getJsonNumber("id").longValueExact()); + assertEquals("a h r", source.getString("name")); + assertNotNull(source.getJsonNumber("startDate")); + assertNotNull(source.getJsonNumber("endDate")); + JsonArray instruments = source.getJsonArray("investigationinstrument"); + assertEquals(1, instruments.size()); + assertEquals(0, instruments.getJsonObject(0).getJsonNumber("instrument.id").longValueExact()); + assertEquals("bl0", instruments.getJsonObject(0).getString("instrument.name")); + assertEquals("Beamline 0", instruments.getJsonObject(0).getString("instrument.fullName")); + JsonArray facilityCycles = source.getJsonArray("investigationfacilitycycle"); + assertEquals(1, facilityCycles.size()); + assertEquals(0, facilityCycles.getJsonObject(0).getJsonNumber("facilityCycle.id").longValueExact()); + } + + private void checkResults(SearchResult lsr, Long... n) { + Set wanted = new HashSet<>(Arrays.asList(n)); + Set got = new HashSet<>(); + + for (ScoredEntityBaseBean q : lsr.getResults()) { + got.add(q.getId()); + } + + Set missing = new HashSet<>(wanted); + missing.removeAll(got); + if (!missing.isEmpty()) { + for (Long l : missing) { + logger.error("Entry missing: {}", l); + } + fail("Missing entries"); + } + + missing = new HashSet<>(got); + missing.removeAll(wanted); + if (!missing.isEmpty()) { + for (Long l : missing) { + logger.error("Extra entry: {}", l); + } + fail("Extra entries"); + } + + } + + private void checkOrder(SearchResult lsr, Long... n) { + List results = lsr.getResults(); + if (n.length != results.size()) { + checkResults(lsr, n); + } + for (int i = 0; i < n.length; i++) { + long resultId = results.get(i).getId(); + long expectedId = (long) Array.get(n, i); + if (resultId != expectedId) { + fail("Expected id " + expectedId + " in position " + i + " but got " + resultId); + } + } + } + + private Datafile datafile(long id, String name, String location, Date date, Dataset dataset) { + Datafile datafile = new Datafile(); + datafile.setId(id); + datafile.setName(name); + datafile.setLocation(location); + datafile.setDatafileModTime(date); + datafile.setDataset(dataset); + return datafile; + } + + private DatafileFormat datafileFormat(long id, String name) { + DatafileFormat datafileFormat = new DatafileFormat(); + datafileFormat.setId(id); + datafileFormat.setName(name); + return datafileFormat; + } + + private Dataset dataset(long id, String name, Date startDate, Date endDate, Investigation investigation) { + DatasetType type = new DatasetType(); + type.setName("type"); + type.setId(0L); + Dataset dataset = new Dataset(); + dataset.setId(id); + dataset.setName(name); + dataset.setCreateTime(startDate); + dataset.setModTime(endDate); + dataset.setType(type); + dataset.setInvestigation(investigation); + return dataset; + } + + private Investigation investigation(long id, String name, Date startDate, Date endDate) { + Investigation investigation = new Investigation(); + investigation.setId(id); + investigation.setName(name); + investigation.setVisitId("visitId"); + investigation.setTitle("title"); + investigation.setCreateTime(startDate); + investigation.setModTime(endDate); + investigation.setFacility(facility); + investigation.setType(investigationType); + return investigation; + } + + private InvestigationUser investigationUser(long id, long userId, String name, String fullName, + Investigation investigation) { + User user = new User(); + user.setName(name); + user.setFullName(fullName); + user.setId(userId); + InvestigationUser investigationUser = new InvestigationUser(); + investigationUser.setId(id); + investigationUser.setInvestigation(investigation); + investigationUser.setUser(user); + return investigationUser; + } + + private Parameter parameter(long id, Date value, ParameterType parameterType, EntityBaseBean parent) { + Parameter parameter = parameter(id, parameterType, parent); + parameter.setDateTimeValue(value); + return parameter; + } + + private Parameter parameter(long id, String value, ParameterType parameterType, EntityBaseBean parent) { + Parameter parameter = parameter(id, parameterType, parent); + parameter.setStringValue(value); + return parameter; + } + + private Parameter parameter(long id, double value, ParameterType parameterType, EntityBaseBean parent) { + Parameter parameter = parameter(id, parameterType, parent); + parameter.setNumericValue(value); + return parameter; + } + + private Parameter parameter(long id, String value, double rangeBottom, double rangeTop, ParameterType parameterType, + EntityBaseBean parent) { + Parameter parameter = parameter(id, parameterType, parent); + parameter.setStringValue(value); + parameter.setRangeBottom(rangeBottom); + parameter.setRangeTop(rangeTop); + return parameter; + } + + private Parameter parameter(long id, ParameterType parameterType, EntityBaseBean parent) { + Parameter parameter; + if (parent instanceof Datafile) { + parameter = new DatafileParameter(); + ((DatafileParameter) parameter).setDatafile((Datafile) parent); + } else if (parent instanceof Dataset) { + parameter = new DatasetParameter(); + ((DatasetParameter) parameter).setDataset((Dataset) parent); + } else if (parent instanceof Investigation) { + parameter = new InvestigationParameter(); + ((InvestigationParameter) parameter).setInvestigation((Investigation) parent); + } else if (parent instanceof Sample) { + parameter = new SampleParameter(); + ((SampleParameter) parameter).setSample((Sample) parent); + } else { + fail(parent.getClass().getSimpleName() + " is not valid"); + return null; + } + parameter.setType(parameterType); + parameter.setId(id); + return parameter; + } + + private ParameterType parameterType(long id, String name, String units) { + ParameterType parameterType = new ParameterType(); + parameterType.setId(id); + parameterType.setName(name); + parameterType.setUnits(units); + return parameterType; + } + + private Sample sample(long id, String name, Investigation investigation) { + SampleType sampleType = new SampleType(); + sampleType.setId(0L); + sampleType.setName("test"); + Sample sample = new Sample(); + sample.setId(id); + sample.setName(name); + sample.setInvestigation(investigation); + sample.setType(sampleType); + return sample; + } + + private void modify(String... operations) throws IcatException { + StringBuilder sb = new StringBuilder("["); + for (String operation : operations) { + if (sb.length() != 1) { + sb.append(','); + } + sb.append(operation); + } + sb.append(']'); + searchApi.modify(sb.toString()); + searchApi.commit(); + } + + private void populateParameters(List queue, int i, EntityBaseBean parent) throws IcatException { + int j = i % 26; + int k = (i + 5) % 26; + String name = "nm " + letters.substring(j, j + 1) + letters.substring(j, j + 1) + letters.substring(j, j + 1); + String units = "u " + letters.substring(k, k + 1) + letters.substring(k, k + 1) + letters.substring(k, k + 1); + ParameterType dateParameterType = parameterType(0, "D" + name, units); + ParameterType numericParameterType = parameterType(0, "N" + name, units); + ParameterType stringParameterType = parameterType(0, "S" + name, units); + Parameter dateParameter = parameter(3 * i, new Date(now + 60000 * k * k), dateParameterType, parent); + Parameter numericParameter = parameter(3 * i + 1, new Double(j * j), numericParameterType, parent); + Parameter stringParameter = parameter(3 * i + 2, "v" + i * i, stringParameterType, parent); + queue.add(SearchApi.encodeOperation(manager, "create", dateParameter)); + queue.add(SearchApi.encodeOperation(manager, "create", numericParameter)); + queue.add(SearchApi.encodeOperation(manager, "create", stringParameter)); + } + + /** + * Populate UserGroup, Investigation, InvestigationParameter, + * InvestigationUser, Dataset,DatasetParameter,Datafile, DatafileParameter + * and Sample + */ + private void populate() throws IcatException { + List queue = new ArrayList<>(); + long investigationUserId = 0; + + Instrument instrumentZero = populateInstrument(queue, 0); + Instrument instrumentOne = populateInstrument(queue, 1); + Technique techniqueZero = populateTechnique(queue, 0); + Technique techniqueOne = populateTechnique(queue, 1); + + for (int investigationId = 0; investigationId < NUMINV; investigationId++) { + String word = word(investigationId % 26, (investigationId + 7) % 26, (investigationId + 17) % 26); + Date startDate = new Date(now + investigationId * 60000); + Date endDate = new Date(now + (investigationId + 1) * 60000); + Investigation investigation = investigation(investigationId, word, startDate, endDate); + queue.add(SearchApi.encodeOperation(manager, "create", investigation)); + + InvestigationFacilityCycle investigationFacilityCycle = new InvestigationFacilityCycle(); + investigationFacilityCycle.setId(new Long(investigationId)); + investigationFacilityCycle.setFacilityCycle(facilityCycle); + investigationFacilityCycle.setInvestigation(investigation); + queue.add(SearchApi.encodeOperation(manager, "create", investigationFacilityCycle)); + + InvestigationInstrument investigationInstrument = new InvestigationInstrument(); + investigationInstrument.setId(new Long(investigationId)); + if (investigationId % 2 == 0) { + investigationInstrument.setInstrument(instrumentZero); + } else { + investigationInstrument.setInstrument(instrumentOne); + } + investigationInstrument.setInvestigation(investigation); + queue.add(SearchApi.encodeOperation(manager, "create", investigationInstrument)); + + for (int userId = 0; userId < NUMUSERS; userId++) { + if (investigationId % (userId + 1) == 1) { + String fullName = "FN " + letters.substring(userId, userId + 1) + " " + + letters.substring(userId, userId + 1); + String name = letters.substring(userId, userId + 1) + userId; + InvestigationUser investigationUser = investigationUser(investigationUserId, userId, name, fullName, + investigation); + queue.add(SearchApi.encodeOperation(manager, "create", investigationUser)); + investigationUserId++; + } + } + + if (investigationId % 2 == 1) { + populateParameters(queue, investigationId, investigation); + } + + for (int sampleBatch = 0; sampleBatch * NUMINV < NUMSAMP; sampleBatch++) { + int sampleId = sampleBatch * NUMINV + investigationId; + if (sampleId >= NUMSAMP) { + break; + } + } + + for (int datasetBatch = 0; datasetBatch * NUMINV < NUMDS; datasetBatch++) { + int datasetId = datasetBatch * NUMINV + investigationId; + if (datasetId >= NUMDS) { + break; + } + startDate = new Date(now + datasetId * 60000); + endDate = new Date(now + (datasetId + 1) * 60000); + word = word("DS", datasetId % 26); + Dataset dataset = dataset(datasetId, word, startDate, endDate, investigation); + + if (datasetId % 2 == 0) { + populateDatasetTechnique(queue, techniqueZero, dataset); + } else { + populateDatasetTechnique(queue, techniqueOne, dataset); + } + + if (datasetId < NUMSAMP) { + word = word("SType ", datasetId); + Sample sample = sample(datasetId, word, investigation); + queue.add(SearchApi.encodeOperation(manager, "create", sample)); + dataset.setSample(sample); + } + + queue.add(SearchApi.encodeOperation(manager, "create", dataset)); + + if (datasetId % 3 == 1) { + populateParameters(queue, datasetId, dataset); + } + + for (int datafileBatch = 0; datafileBatch * NUMDS < NUMDF; datafileBatch++) { + int datafileId = datafileBatch * NUMDS + datasetId; + if (datafileId >= NUMDF) { + break; + } + word = word("DF", datafileId % 26); + Datafile datafile = datafile(datafileId, word, "/dir/" + word, new Date(now + datafileId * 60000), + dataset); + queue.add(SearchApi.encodeOperation(manager, "create", datafile)); + + if (datafileId % 4 == 1) { + populateParameters(queue, datafileId, datafile); + } + } + } + } + + modify(queue.toArray(new String[0])); + } + + /** + * Queues creation of an Instrument and a corresponding instrument scientist. + * + * @param queue Queue to add create operations to. + * @param instrumentId ICAT entity Id to use for the instrument/instrument + * scientist. + * @return The Instrument entity created. + * @throws IcatException + */ + private Instrument populateInstrument(List queue, long instrumentId) throws IcatException { + Instrument instrument = new Instrument(); + instrument.setId(instrumentId); + instrument.setName("bl" + instrumentId); + instrument.setFullName("Beamline " + instrumentId); + queue.add(SearchApi.encodeOperation(manager, "create", instrument)); + User user = new User(); + user.setId(new Long(NUMUSERS) + instrumentId); + user.setName("scientist_" + instrumentId); + InstrumentScientist instrumentScientist = new InstrumentScientist(); + instrumentScientist.setId(instrumentId); + instrumentScientist.setInstrument(instrument); + instrumentScientist.setUser(user); + queue.add(SearchApi.encodeOperation(manager, "create", instrumentScientist)); + return instrument; + } + + /** + * Queues creation of an Technique. + * + * @param queue Queue to add create operations to. + * @param techniqueId ICAT entity Id to use for the Technique. + * @return The Technique entity created. + * @throws IcatException + */ + private Technique populateTechnique(List queue, long techniqueId) throws IcatException { + Technique technique = new Technique(); + technique.setId(techniqueId); + technique.setName("technique" + techniqueId); + technique.setDescription("Technique number " + techniqueId); + technique.setPid(Long.toString(techniqueId)); + queue.add(SearchApi.encodeOperation(manager, "create", technique)); + return technique; + } + + /** + * Queues creation of an DatasetTechnique. + * + * @param queue Queue to add create operations to. + * @return The DatasetTechnique entity created. + * @throws IcatException + */ + private void populateDatasetTechnique(List queue, Technique technique, Dataset dataset) + throws IcatException { + DatasetTechnique datasetTechnique = new DatasetTechnique(); + datasetTechnique.setId(technique.getId() * 100 + dataset.getId()); + datasetTechnique.setTechnique(technique); + datasetTechnique.setDataset(dataset); + queue.add(SearchApi.encodeOperation(manager, "create", datasetTechnique)); + } + + private String word(int j, int k, int l) { + String jString = letters.substring(j, j + 1); + String kString = letters.substring(k, k + 1); + String lString = letters.substring(l, l + 1); + return jString + " " + kString + " " + lString; + } + + private String word(String prefix, int j) { + String jString = letters.substring(j, j + 1); + return prefix + jString + jString + jString; + } + + @Before + public void before() throws Exception { + searchApi.clear(); + } + + @Test + public void datafiles() throws Exception { + populate(); + JsonObjectBuilder sortBuilder = Json.createObjectBuilder(); + String sort; + + // Test size and searchAfter + JsonObject query = buildQuery("Datafile", null, null, null, null, null, null); + SearchResult lsr = searchApi.getResults(query, null, 5, null, datafileFields); + JsonValue searchAfter = lsr.getSearchAfter(); + checkResults(lsr, 0L, 1L, 2L, 3L, 4L); + checkDatafile(lsr.getResults().get(0)); + assertNotNull(SEARCH_AFTER_NOT_NULL, searchAfter); + + lsr = searchApi.getResults(query, searchAfter, 200, null, datafileFields); + assertNull(lsr.getSearchAfter()); + assertEquals(95, lsr.getResults().size()); + + // Test searchAfter preserves the sorting of original search (asc) + sort = sortBuilder.add("date", "asc").build().toString(); + lsr = searchApi.getResults(query, null, 5, sort, datafileFields); + checkOrder(lsr, 0L, 1L, 2L, 3L, 4L); + searchAfter = lsr.getSearchAfter(); + assertNotNull(SEARCH_AFTER_NOT_NULL, searchAfter); + lsr = searchApi.getResults(query, searchAfter, 5, sort, datafileFields); + checkOrder(lsr, 5L, 6L, 7L, 8L, 9L); + searchAfter = lsr.getSearchAfter(); + assertNotNull(SEARCH_AFTER_NOT_NULL, searchAfter); + + // Test searchAfter preserves the sorting of original search (desc) + sort = sortBuilder.add("date", "desc").build().toString(); + lsr = searchApi.getResults(query, null, 5, sort, datafileFields); + checkOrder(lsr, 99L, 98L, 97L, 96L, 95L); + searchAfter = lsr.getSearchAfter(); + assertNotNull(SEARCH_AFTER_NOT_NULL, searchAfter); + lsr = searchApi.getResults(query, searchAfter, 5, sort, datafileFields); + checkOrder(lsr, 94L, 93L, 92L, 91L, 90L); + searchAfter = lsr.getSearchAfter(); + assertNotNull(SEARCH_AFTER_NOT_NULL, searchAfter); + + // Test tie breaks on fields with identical values (asc) + sort = sortBuilder.add("name", "asc").build().toString(); + lsr = searchApi.getResults(query, null, 5, sort, datafileFields); + checkOrder(lsr, 0L, 26L, 52L, 78L, 1L); + searchAfter = lsr.getSearchAfter(); + assertNotNull(SEARCH_AFTER_NOT_NULL, searchAfter); + sort = sortBuilder.add("name", "asc").add("date", "desc").build().toString(); + lsr = searchApi.getResults(query, null, 5, sort, datafileFields); + checkOrder(lsr, 78L, 52L, 26L, 0L, 79L); + searchAfter = lsr.getSearchAfter(); + assertNotNull(SEARCH_AFTER_NOT_NULL, searchAfter); + + // Test tie breaks on fields with identical values (desc) + sort = sortBuilder.add("name", "desc").build().toString(); + lsr = searchApi.getResults(query, null, 5, sort, datafileFields); + checkOrder(lsr, 25L, 51L, 77L, 24L, 50L); + searchAfter = lsr.getSearchAfter(); + assertNotNull(SEARCH_AFTER_NOT_NULL, searchAfter); + sort = sortBuilder.add("name", "desc").add("date", "desc").build().toString(); + lsr = searchApi.getResults(query, null, 5, sort, datafileFields); + checkOrder(lsr, 77L, 51L, 25L, 76L, 50L); + searchAfter = lsr.getSearchAfter(); + assertNotNull(SEARCH_AFTER_NOT_NULL, searchAfter); + + query = buildQuery("Datafile", "e4", null, null, null, null, null); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr, 1L, 6L, 11L, 16L, 21L, 26L, 31L, 36L, 41L, 46L, 51L, 56L, 61L, 66L, 71L, 76L, 81L, 86L, 91L, + 96L); + + // Test instrumentScientists only see their data + query = buildQuery("Datafile", "scientist_0", null, null, null, null, null); + lsr = searchApi.getResults(query, 5, null); + checkResults(lsr, 0L, 2L, 4L, 6L, 8L); + + query = buildQuery("Datafile", "e4", "dfbbb", null, null, null, null); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr, 1L); + + query = buildQuery("Datafile", null, "dfbbb", null, null, null, null); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr, 1L, 27L, 53L, 79L); + + query = buildQuery("Datafile", null, null, new Date(now + 60000 * 3), + new Date(now + 60000 * 6), null, null); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr, 3L, 4L, 5L, 6L); + + query = buildQuery("Datafile", "b1", "dsddd", new Date(now + 60000 * 3), + new Date(now + 60000 * 6), null, null); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr); + + // Target visitId + query = buildQuery("Datafile", null, "visitId:visitId", null, null, null, null); + lsr = searchApi.getResults(query, 5, null); + checkResults(lsr, 0L, 1L, 2L, 3L, 4L); + query = buildQuery("Datafile", null, "visitId:qwerty", null, null, null, null); + lsr = searchApi.getResults(query, 5, null); + checkResults(lsr); + + // Target sample.name + query = buildQuery("Datafile", null, "sample.name:ddd", null, null, null, null); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr, 3L, 33L, 63L, 93L); + + // Multiple samples associated with investigation 3 + query = buildQuery("Datafile", null, "ddd nnn", null, null, null, null); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr, 3L, 13L, 33L, 43L, 63L, 73L, 93L); + + // By default, sample ddd OR sample mmm gives two + query = buildQuery("Datafile", null, "ddd mmm", null, null, null, null); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr, 3L, 12L, 33L, 42L, 63L, 72L, 93L); + + // AND logic should not return any results + query = buildQuery("Datafile", null, "+ddd +mmm", null, null, null, null); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr); + + List pojos = new ArrayList<>(); + pojos.add(new ParameterPOJO(null, null, "v25")); + query = buildQuery("Datafile", null, null, new Date(now + 60000 * 3), + new Date(now + 60000 * 6), pojos, null); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr, 5L); + + pojos = new ArrayList<>(); + pojos.add(new ParameterPOJO(null, null, "v25")); + query = buildQuery("Datafile", null, null, null, null, pojos, null); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr, 5L); + + pojos = new ArrayList<>(); + pojos.add(new ParameterPOJO(null, "u sss", null)); + query = buildQuery("Datafile", null, null, null, null, pojos, null); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr, 13L, 65L); + } + + @Test + public void datasets() throws Exception { + populate(); + JsonObjectBuilder sortBuilder = Json.createObjectBuilder(); + String sort; + + JsonObject query = buildQuery("Dataset", null, null, null, null, null, null); + SearchResult lsr = searchApi.getResults(query, null, 5, null, datasetFields); + checkResults(lsr, 0L, 1L, 2L, 3L, 4L); + checkDataset(lsr.getResults().get(0)); + JsonValue searchAfter = lsr.getSearchAfter(); + assertNotNull(SEARCH_AFTER_NOT_NULL, searchAfter); + lsr = searchApi.getResults(query, searchAfter, 100, null, datasetFields); + assertNull(lsr.getSearchAfter()); + checkResults(lsr, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, + 25L, 26L, 27L, 28L, 29L); + + // Test searchAfter preserves the sorting of original search (asc) + sort = sortBuilder.add("date", "asc").build().toString(); + lsr = searchApi.getResults(query, 5, sort); + checkOrder(lsr, 0L, 1L, 2L, 3L, 4L); + searchAfter = lsr.getSearchAfter(); + assertNotNull(SEARCH_AFTER_NOT_NULL, searchAfter); + lsr = searchApi.getResults(query, searchAfter, 5, sort, datasetFields); + checkOrder(lsr, 5L, 6L, 7L, 8L, 9L); + searchAfter = lsr.getSearchAfter(); + assertNotNull(SEARCH_AFTER_NOT_NULL, searchAfter); + + // Test searchAfter preserves the sorting of original search (desc) + sort = sortBuilder.add("date", "desc").build().toString(); + lsr = searchApi.getResults(query, 5, sort); + checkOrder(lsr, 29L, 28L, 27L, 26L, 25L); + searchAfter = lsr.getSearchAfter(); + assertNotNull(SEARCH_AFTER_NOT_NULL, searchAfter); + lsr = searchApi.getResults(query, searchAfter, 5, sort, datasetFields); + checkOrder(lsr, 24L, 23L, 22L, 21L, 20L); + searchAfter = lsr.getSearchAfter(); + assertNotNull(SEARCH_AFTER_NOT_NULL, searchAfter); + + // Test tie breaks on fields with identical values (asc) + sort = sortBuilder.add("name", "asc").build().toString(); + lsr = searchApi.getResults(query, null, 5, sort, datasetFields); + checkOrder(lsr, 0L, 26L, 1L, 27L, 2L); + searchAfter = lsr.getSearchAfter(); + assertNotNull(SEARCH_AFTER_NOT_NULL, searchAfter); + sort = sortBuilder.add("name", "asc").add("date", "desc").build().toString(); + lsr = searchApi.getResults(query, 5, sort); + checkOrder(lsr, 26L, 0L, 27L, 1L, 28L); + searchAfter = lsr.getSearchAfter(); + assertNotNull(SEARCH_AFTER_NOT_NULL, searchAfter); + + lsr = searchApi.getResults(buildQuery("Dataset", "e4", null, null, null, null, null), 100, + null); + checkResults(lsr, 1L, 6L, 11L, 16L, 21L, 26L); + + // Test instrumentScientists only see their data + query = buildQuery("Dataset", "scientist_0", null, null, null, null, null); + lsr = searchApi.getResults(query, 5, null); + checkResults(lsr, 0L, 2L, 4L, 6L, 8L); + + // Test filter + query = buildQuery("Dataset", null, null, null, null, null, null, new Filter("dataset.type.name", "type")); + lsr = searchApi.getResults(query, 5, null); + checkResults(lsr, 0L, 1L, 2L, 3L, 4L); + query = buildQuery("Dataset", null, null, null, null, null, null, + new Filter("dataset.type.name", "type", "typo")); + lsr = searchApi.getResults(query, 5, null); + checkResults(lsr, 0L, 1L, 2L, 3L, 4L); + query = buildQuery("Dataset", null, null, null, null, null, null, new Filter("dataset.type.name", "typo")); + lsr = searchApi.getResults(query, 5, null); + checkResults(lsr); + + lsr = searchApi.getResults(buildQuery("Dataset", "e4", "dsbbb", null, null, null, null), 100, + null); + checkResults(lsr, 1L); + + lsr = searchApi.getResults(buildQuery("Dataset", null, "dsbbb", null, null, null, null), 100, + null); + checkResults(lsr, 1L, 27L); + + lsr = searchApi.getResults(buildQuery("Dataset", null, null, new Date(now + 60000 * 3), + new Date(now + 60000 * 6), null, null), 100, null); + checkResults(lsr, 3L, 4L, 5L); + + lsr = searchApi.getResults(buildQuery("Dataset", "b1", "dsddd", new Date(now + 60000 * 3), + new Date(now + 60000 * 6), null, null), 100, null); + checkResults(lsr, 3L); + + // Target visitId + query = buildQuery("Dataset", null, "visitId:visitId", null, null, null, null); + lsr = searchApi.getResults(query, 5, null); + checkResults(lsr, 0L, 1L, 2L, 3L, 4L); + query = buildQuery("Dataset", null, "visitId:qwerty", null, null, null, null); + lsr = searchApi.getResults(query, 5, null); + checkResults(lsr); + + // Target sample.name + query = buildQuery("Dataset", null, "sample.name:ddd", null, null, null, null); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr, 3L); + + // Multiple samples associated with investigation 3 + query = buildQuery("Dataset", null, "ddd nnn", null, null, null, null); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr, 3L, 13L); + + // By default, sample ddd OR sample mmm gives two + query = buildQuery("Dataset", null, "ddd mmm", null, null, null, null); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr, 3L, 12L); + + // AND logic should not return any results + query = buildQuery("Dataset", null, "+ddd +mmm", null, null, null, null); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr); + + List pojos = new ArrayList<>(); + pojos.add(new ParameterPOJO(null, null, "v16")); + lsr = searchApi.getResults(buildQuery("Dataset", null, null, null, null, pojos, null), 100, + null); + checkResults(lsr, 4L); + + pojos = new ArrayList<>(); + pojos.add(new ParameterPOJO(null, null, "v16")); + lsr = searchApi.getResults(buildQuery("Dataset", null, null, new Date(now + 60000 * 3), + new Date(now + 60000 * 6), pojos, null), 100, null); + checkResults(lsr, 4L); + + pojos = new ArrayList<>(); + pojos.add(new ParameterPOJO(null, null, "v16")); + lsr = searchApi.getResults(buildQuery("Dataset", "b1", "dsddd", new Date(now + 60000 * 3), + new Date(now + 60000 * 6), pojos, null), 100, null); + checkResults(lsr); + + // Test DatasetTechnique Facets + JsonObject stringFacetRequestZero = buildFacetStringRequest("dataset.id", 0, "technique.name"); + JsonObject stringFacetRequestOne = buildFacetStringRequest("dataset.id", 1, "technique.name"); + FacetDimension facetZero = new FacetDimension("", "technique.name", new FacetLabel("technique0", 1L)); + FacetDimension facetOne = new FacetDimension("", "technique.name", new FacetLabel("technique1", 1L)); + checkFacets(searchApi.facetSearch("DatasetTechnique", stringFacetRequestZero, 5, 5), facetZero); + checkFacets(searchApi.facetSearch("DatasetTechnique", stringFacetRequestOne, 5, 5), facetOne); + + // Test instrument.name Facets + JsonObject instrumentFacetRequestZero = buildFacetStringRequest("investigation.id", 0, "instrument.name"); + JsonObject instrumentFacetRequestOne = buildFacetStringRequest("investigation.id", 1, "instrument.name"); + FacetDimension instrumentFacetZero = new FacetDimension("", "instrument.name", new FacetLabel("bl0", 1L)); + FacetDimension instrumentFacetOne = new FacetDimension("", "instrument.name", new FacetLabel("bl1", 1L)); + checkFacets(searchApi.facetSearch("InvestigationInstrument", instrumentFacetRequestZero, 5, 5), instrumentFacetZero); + checkFacets(searchApi.facetSearch("InvestigationInstrument", instrumentFacetRequestOne, 5, 5), instrumentFacetOne); + } + + @Test + public void investigations() throws Exception { + populate(); + JsonObjectBuilder sortBuilder = Json.createObjectBuilder(); + String sort; + + /* Blocked results */ + JsonObject query = buildQuery("Investigation", null, null, null, null, null, null); + SearchResult lsr = searchApi.getResults(query, null, 5, null, investigationFields); + checkResults(lsr, 0L, 1L, 2L, 3L, 4L); + checkInvestigation(lsr.getResults().get(0)); + JsonValue searchAfter = lsr.getSearchAfter(); + assertNotNull(SEARCH_AFTER_NOT_NULL, searchAfter); + lsr = searchApi.getResults(query, searchAfter, 6, null, investigationFields); + checkResults(lsr, 5L, 6L, 7L, 8L, 9L); + searchAfter = lsr.getSearchAfter(); + assertNull(searchAfter); + + // Test searchAfter preserves the sorting of original search (asc) + sort = sortBuilder.add("date", "asc").build().toString(); + lsr = searchApi.getResults(query, 5, sort); + checkOrder(lsr, 0L, 1L, 2L, 3L, 4L); + searchAfter = lsr.getSearchAfter(); + assertNotNull(SEARCH_AFTER_NOT_NULL, searchAfter); + lsr = searchApi.getResults(query, searchAfter, 5, sort, investigationFields); + checkOrder(lsr, 5L, 6L, 7L, 8L, 9L); + searchAfter = lsr.getSearchAfter(); + assertNotNull(SEARCH_AFTER_NOT_NULL, searchAfter); + + // Test searchAfter preserves the sorting of original search (desc) + sort = sortBuilder.add("date", "desc").build().toString(); + lsr = searchApi.getResults(query, 5, sort); + checkOrder(lsr, 9L, 8L, 7L, 6L, 5L); + searchAfter = lsr.getSearchAfter(); + assertNotNull(SEARCH_AFTER_NOT_NULL, searchAfter); + lsr = searchApi.getResults(query, searchAfter, 5, sort, investigationFields); + checkOrder(lsr, 4L, 3L, 2L, 1L, 0L); + searchAfter = lsr.getSearchAfter(); + assertNotNull(SEARCH_AFTER_NOT_NULL, searchAfter); + + // Test instrumentScientists only see their data + query = buildQuery("Investigation", "scientist_0", null, null, null, null, null); + lsr = searchApi.getResults(query, 5, null); + checkResults(lsr, 0L, 2L, 4L, 6L, 8L); + + // Test filter + query = buildQuery("Investigation", null, null, null, null, null, null, + new Filter("investigation.type.name", "type")); + lsr = searchApi.getResults(query, 5, null); + checkResults(lsr, 0L, 1L, 2L, 3L, 4L); + query = buildQuery("Investigation", null, null, null, null, null, null, + new Filter("investigation.type.name", "type", "typo")); + lsr = searchApi.getResults(query, 5, null); + checkResults(lsr, 0L, 1L, 2L, 3L, 4L); + query = buildQuery("Investigation", null, null, null, null, null, null, + new Filter("investigation.type.name", "typo")); + lsr = searchApi.getResults(query, 5, null); + checkResults(lsr); + + query = buildQuery("Investigation", null, null, null, null, null, "b"); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr, 1L, 3L, 5L, 7L, 9L); + + query = buildQuery("Investigation", null, null, null, null, null, "FN"); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr, 1L, 3L, 4L, 5L, 6L, 7L, 9L); + + query = buildQuery("Investigation", null, null, null, null, null, "FN AND \"b b\""); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr, 1L, 3L, 5L, 7L, 9L); + + query = buildQuery("Investigation", "b1", null, null, null, null, "b"); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr, 1L, 3L, 5L, 7L, 9L); + + query = buildQuery("Investigation", "c1", null, null, null, null, "b"); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr); + + query = buildQuery("Investigation", null, "l v", null, null, null, null); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr, 4L); + + query = buildQuery("Investigation", "b1", "d", null, null, null, "b"); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr, 3L); + + query = buildQuery("Investigation", "b1", "d", new Date(now + 60000 * 3), new Date(now + 60000 * 6), + null, "b"); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr, 3L); + + query = buildQuery("Investigation", null, null, new Date(now + 60000 * 3), new Date(now + 60000 * 6), + null, null); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr, 3L, 4L, 5L); + + List pojos = new ArrayList<>(); + pojos.add(new ParameterPOJO(null, null, "v9")); + query = buildQuery("Investigation", null, null, null, null, pojos, null); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr, 3L); + + pojos = new ArrayList<>(); + pojos.add(new ParameterPOJO(null, null, "v9")); + pojos.add(new ParameterPOJO(null, null, 7, 10)); + pojos.add(new ParameterPOJO(null, null, new Date(now + 60000 * 63), new Date(now + 60000 * 65))); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr, 3L); + + pojos = new ArrayList<>(); + pojos.add(new ParameterPOJO(null, null, "v9")); + query = buildQuery("Investigation", "b1", "d", new Date(now + 60000 * 3), new Date(now + 60000 * 6), + pojos, "b"); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr, 3L); + + pojos = new ArrayList<>(); + pojos.add(new ParameterPOJO(null, null, "v9")); + pojos.add(new ParameterPOJO(null, null, "v81")); + query = buildQuery("Investigation", null, null, null, null, pojos, null); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr); + + pojos = new ArrayList<>(); + pojos.add(new ParameterPOJO("Snm ddd", "u iii", "v9")); + query = buildQuery("Investigation", null, null, null, null, pojos, null); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr, 3L); + + // Target visitId + query = buildQuery("Investigation", null, "visitId:visitId", null, null, null, null); + lsr = searchApi.getResults(query, 5, null); + checkResults(lsr, 0L, 1L, 2L, 3L, 4L); + query = buildQuery("Investigation", null, "visitId:qwerty", null, null, null, null); + lsr = searchApi.getResults(query, 5, null); + checkResults(lsr); + + // Target sample.name + query = buildQuery("Investigation", null, "sample.name:ddd", null, null, null, null); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr, 3L); + + // Multiple samples associated with investigation 3 + query = buildQuery("Investigation", null, "ddd nnn", null, null, null, null); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr, 3L); + + // By default, sample ddd OR sample mmm gives two investigations + query = buildQuery("Investigation", null, "ddd mmm", null, null, null, null); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr, 2L, 3L); + + // AND logic should not return any results + query = buildQuery("Investigation", null, "+ddd +mmm", null, null, null, null); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr); + + // Fields on Investigation and Sample + query = buildQuery("Investigation", null, "visitId ddd", null, null, null, null); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L); + // ID 3 should be most relevant since it matches both terms + lsr = searchApi.getResults(query, 1, null); + checkResults(lsr, 3L); + // Specifying fields should not alter behaviour + query = buildQuery("Investigation", null, "visitId:visitId sample.name:ddd", null, null, null, null); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L); + // Individual MUST should work when applied to either an Investigation or Sample + query = buildQuery("Investigation", null, "+visitId:visitId", null, null, null, null); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L); + query = buildQuery("Investigation", null, "+sample.name:ddd", null, null, null, null); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr, 3L); + // This query is expected to fail, as we apply both terms to Investigation and + // Sample (since we have no fields) and neither possesses both terms. + query = buildQuery("Investigation", null, "+visitId +ddd", null, null, null, null); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr); + + pojos = new ArrayList<>(); + pojos.add(new ParameterPOJO("Snm ddd", "u iii", "v9")); + query = buildQuery("Investigation", "b1", "d ddd nnnn", new Date(now + 60000 * 3), new Date(now + 60000 * 6), + pojos, "b"); + lsr = searchApi.getResults(query, 100, null); + checkResults(lsr, 3L); + + // Sample filtering + query = buildQuery("Investigation", null, null, null, null, null, null, new Filter("sample.sample.type.name", "test")); + lsr = searchApi.getResults(query, 5, null); + checkResults(lsr, 0L, 1L, 2L, 3L, 4L); + + query = buildQuery("Investigation", null, null, null, null, null, null, new Filter("sample.sample.type.name", "fail")); + lsr = searchApi.getResults(query, 5, null); + checkResults(lsr); + + } + + @Test + public void locking() throws IcatException { + // Only LuceneApi needs manually locking + if (searchApi instanceof LuceneApi) { + logger.info("Performing locking tests for {}", searchApi.getClass().getSimpleName()); + try { + searchApi.unlock("Dataset"); + fail(); + } catch (IcatException e) { + assertEquals("Lucene is not currently locked for Dataset", e.getMessage()); + } + searchApi.lock("Dataset", 0L, 1L, true); + try { + searchApi.lock("Dataset", 0L, 1L, true); + fail(); + } catch (IcatException e) { + assertEquals("Lucene already locked for Dataset", e.getMessage()); + } + searchApi.unlock("Dataset"); + try { + searchApi.unlock("Dataset"); + fail(); + } catch (IcatException e) { + assertEquals("Lucene is not currently locked for Dataset", e.getMessage()); + } + } else { + logger.info("Locking tests not relevant for {}", searchApi.getClass().getSimpleName()); + } + } + + @Ignore // Aggregating in real time is really slow, so don't test + @Test + public void fileSizeAggregation() throws IcatException { + // Build entities + Investigation investigation = investigation(0, "name", date, date); + Dataset dataset = dataset(0, "name", date, date, investigation); + Datafile datafile = datafile(0, "name", "/dir", new Date(0), dataset); + datafile.setFileSize(123L); + + // Build queries + JsonObject datafileQuery = buildQuery("Datafile", null, "*", null, null, null, null); + JsonObject datasetQuery = buildQuery("Dataset", null, "*", null, null, null, null); + JsonObject investigationQuery = buildQuery("Investigation", null, "*", null, null, null, null); + List fields = Arrays.asList("id", "fileSize", "fileCount"); + + // Create + String createInvestigation = SearchApi.encodeOperation(manager, "create", investigation); + String createDataset = SearchApi.encodeOperation(manager, "create", dataset); + String createDatafile = SearchApi.encodeOperation(manager, "create", datafile); + modify(createInvestigation, createDataset, createDatafile); + checkFileSize(datafileQuery, fields, 123, 1); + checkFileSize(datasetQuery, fields, 123, 1); + checkFileSize(investigationQuery, fields, 123, 1); + + // Update + datafile.setFileSize(456L); + modify(SearchApi.encodeOperation(manager, "update", datafile)); + checkFileSize(datafileQuery, fields, 456, 1); + checkFileSize(datasetQuery, fields, 456, 1); + checkFileSize(investigationQuery, fields, 456, 1); + + // Delete + modify(SearchApi.encodeOperation(manager, "delete", datafile)); + checkFileSize(datasetQuery, fields, 0, 0); + checkFileSize(investigationQuery, fields, 0, 0); + } + + private void checkFileSize(JsonObject query, List fields, long expectedFileSize, long expectedFileCount) + throws IcatException { + SearchResult results = searchApi.getResults(query, null, 5, null, fields); + checkResults(results, 0L); + JsonObject source = results.getResults().get(0).getSource(); + long fileSize = source.getJsonNumber("fileSize").longValueExact(); + long fileCount = source.getJsonNumber("fileCount").longValueExact(); + assertEquals(expectedFileSize, fileSize); + assertEquals(expectedFileCount, fileCount); + } + + @Test + public void modifyDatafile() throws IcatException { + // Build entities + DatafileFormat pdfFormat = datafileFormat(0, "pdf"); + DatafileFormat pngFormat = datafileFormat(0, "png"); + Investigation investigation = investigation(0, "name", date, date); + Dataset dataset = dataset(0, "name", date, date, investigation); + Datafile elephantDatafile = datafile(42, "Elephants and Aardvarks", "/dir", new Date(0), dataset); + Datafile rhinoDatafile = datafile(42, "Rhinos and Aardvarks", "/dir", new Date(3), dataset); + rhinoDatafile.setDatafileFormat(pdfFormat); + + // Build queries + JsonObject elephantQuery = buildQuery("Datafile", null, "elephant", null, null, null, null); + JsonObject rhinoQuery = buildQuery("Datafile", null, "rhino", null, null, null, null); + JsonObject pdfQuery = buildQuery("Datafile", null, "datafileFormat.name:pdf", null, null, null, null); + JsonObject pngQuery = buildQuery("Datafile", null, "datafileFormat.name:png", null, null, null, null); + JsonObject lowRange = buildFacetRangeObject("low", 0L, 2L); + JsonObject highRange = buildFacetRangeObject("high", 2L, 4L); + JsonObject facetIdQuery = buildFacetIdQuery("id", 42); + JsonObject rangeFacetRequest = buildFacetRangeRequest(facetIdQuery, "date", lowRange, highRange); + JsonObject stringFacetRequest = buildFacetStringRequest("id", 42, "datafileFormat.name"); + JsonObject sparseFacetRequest = buildFacetSparseRequest(facetIdQuery); + FacetDimension lowFacet = new FacetDimension("", "date", new FacetLabel("low", 1L), new FacetLabel("high", 0L)); + FacetDimension highFacet = new FacetDimension("", "date", new FacetLabel("low", 0L), + new FacetLabel("high", 1L)); + FacetDimension pdfFacet = new FacetDimension("", "datafileFormat.name", new FacetLabel("pdf", 1L)); + FacetDimension pngFacet = new FacetDimension("", "datafileFormat.name", new FacetLabel("png", 1L)); + + // Original + modify(SearchApi.encodeOperation(manager, "create", elephantDatafile)); + checkResults(searchApi.getResults(elephantQuery, null, 5, null, datafileFields), 42L); + checkResults(searchApi.getResults(rhinoQuery, null, 5, null, datafileFields)); + checkResults(searchApi.getResults(pdfQuery, null, 5, null, datafileFields)); + checkResults(searchApi.getResults(pngQuery, null, 5, null, datafileFields)); + checkFacets(searchApi.facetSearch("Datafile", stringFacetRequest, 5, 5)); + checkFacets(searchApi.facetSearch("Datafile", sparseFacetRequest, 5, 5)); + checkFacets(searchApi.facetSearch("Datafile", rangeFacetRequest, 5, 5), lowFacet); + + // Change name and add a format + modify(SearchApi.encodeOperation(manager, "update", rhinoDatafile)); + checkResults(searchApi.getResults(elephantQuery, null, 5, null, datafileFields)); + checkResults(searchApi.getResults(rhinoQuery, null, 5, null, datafileFields), 42L); + checkResults(searchApi.getResults(pdfQuery, null, 5, null, datafileFields), 42L); + checkResults(searchApi.getResults(pngQuery, null, 5, null, datafileFields)); + checkFacets(searchApi.facetSearch("Datafile", stringFacetRequest, 5, 5), pdfFacet); + checkFacets(searchApi.facetSearch("Datafile", sparseFacetRequest, 5, 5), pdfFacet); + checkFacets(searchApi.facetSearch("Datafile", rangeFacetRequest, 5, 5), highFacet); + + // Change just the format + modify(SearchApi.encodeOperation(manager, "update", pngFormat)); + checkResults(searchApi.getResults(elephantQuery, null, 5, null, datafileFields)); + checkResults(searchApi.getResults(rhinoQuery, null, 5, null, datafileFields), 42L); + checkResults(searchApi.getResults(pdfQuery, null, 5, null, datafileFields)); + checkResults(searchApi.getResults(pngQuery, null, 5, null, datafileFields), 42L); + checkFacets(searchApi.facetSearch("Datafile", stringFacetRequest, 5, 5), pngFacet); + checkFacets(searchApi.facetSearch("Datafile", sparseFacetRequest, 5, 5), pngFacet); + checkFacets(searchApi.facetSearch("Datafile", rangeFacetRequest, 5, 5), highFacet); + + // Remove the format + modify(SearchApi.encodeOperation(manager, "delete", pngFormat)); + checkResults(searchApi.getResults(elephantQuery, null, 5, null, datafileFields)); + checkResults(searchApi.getResults(rhinoQuery, null, 5, null, datafileFields), 42L); + checkResults(searchApi.getResults(pdfQuery, null, 5, null, datafileFields)); + checkResults(searchApi.getResults(pngQuery, null, 5, null, datafileFields)); + checkFacets(searchApi.facetSearch("Datafile", stringFacetRequest, 5, 5)); + checkFacets(searchApi.facetSearch("Datafile", sparseFacetRequest, 5, 5)); + checkFacets(searchApi.facetSearch("Datafile", rangeFacetRequest, 5, 5), highFacet); + + // Remove the file + modify(SearchApi.encodeDeletion(elephantDatafile), SearchApi.encodeDeletion(rhinoDatafile)); + checkResults(searchApi.getResults(elephantQuery, 5)); + checkResults(searchApi.getResults(rhinoQuery, 5)); + checkResults(searchApi.getResults(pdfQuery, 5)); + checkResults(searchApi.getResults(pngQuery, 5)); + + // Multiple commands at once + modify(SearchApi.encodeOperation(manager, "create", elephantDatafile), + SearchApi.encodeOperation(manager, "update", rhinoDatafile), + SearchApi.encodeDeletion(elephantDatafile), + SearchApi.encodeDeletion(rhinoDatafile)); + checkResults(searchApi.getResults(elephantQuery, 5)); + checkResults(searchApi.getResults(rhinoQuery, 5)); + checkResults(searchApi.getResults(pdfQuery, 5)); + checkResults(searchApi.getResults(pngQuery, 5)); + } + + @Test + public void unitConversion() throws IcatException { + // Build queries for raw and SI values + JsonObjectBuilder objectBuilder = Json.createObjectBuilder(); + String lowKey = "272.5_273.5"; + String midKey = "272999.5_273000.5"; + String highKey = "273272.5_273273.5"; + JsonObject lowRange = buildFacetRangeObject(lowKey, 272.5, 273.5); + JsonObject midRange = buildFacetRangeObject(midKey, 272999.5, 273000.5); + JsonObject highRange = buildFacetRangeObject(highKey, 273272.5, 273273.5); + JsonObject mKQuery = objectBuilder.add("type.units", "mK").build(); + JsonObject celsiusQuery = objectBuilder.add("type.units", "celsius").build(); + JsonObject wrongQuery = objectBuilder.add("type.units", "wrong").build(); + JsonObject kelvinQuery = objectBuilder.add("type.unitsSI", "Kelvin").build(); + JsonObject mKFacetQuery = buildFacetRangeRequest(mKQuery, "numericValue", lowRange, midRange, highRange); + JsonObject celsiusFacetQuery = buildFacetRangeRequest(celsiusQuery, "numericValue", lowRange, midRange, + highRange); + JsonObject wrongFacetQuery = buildFacetRangeRequest(wrongQuery, "numericValue", lowRange, midRange, highRange); + JsonObject systemFacetQuery = buildFacetRangeRequest(kelvinQuery, "numericValueSI", lowRange, midRange, + highRange); + + // Build expected values + FacetDimension rawExpectedFacet = new FacetDimension("", "numericValue", + new FacetLabel(lowKey, 0L), new FacetLabel(midKey, 1L), new FacetLabel(highKey, 0L)); + FacetDimension lowExpectedFacet = new FacetDimension("", "numericValueSI", + new FacetLabel(lowKey, 1L), new FacetLabel(midKey, 0L), new FacetLabel(highKey, 0L)); + FacetDimension highExpectedFacet = new FacetDimension("", "numericValueSI", + new FacetLabel(lowKey, 0L), new FacetLabel(midKey, 0L), new FacetLabel(highKey, 1L)); + FacetDimension noneExpectedFacet = new FacetDimension("", "numericValueSI", + new FacetLabel(lowKey, 0L), new FacetLabel(midKey, 0L), new FacetLabel(highKey, 0L)); + + // Build entities + Investigation investigation = investigation(0L, "name", date, date); + ParameterType parameterType = parameterType(0, "parameter", "mK"); + Parameter parameter = parameter(0, 273000, parameterType, investigation); + + // Create with units of mK + modify(SearchApi.encodeOperation(manager, "create", investigation), SearchApi.encodeOperation(manager, "create", parameter)); + // Assert the raw value is still 273000 (mK) + checkFacets(searchApi.facetSearch("InvestigationParameter", mKFacetQuery, 5, 5), rawExpectedFacet); + // Assert the SI value is 273 (K) + checkFacets(searchApi.facetSearch("InvestigationParameter", systemFacetQuery, 5, 5), lowExpectedFacet); + + // Change units only to "celsius" + parameterType.setUnits("celsius"); + modify(SearchApi.encodeOperation(manager, "update", parameter)); + // Assert the raw value is still 273000 (deg C) + checkFacets(searchApi.facetSearch("InvestigationParameter", celsiusFacetQuery, 5, 5), rawExpectedFacet); + // Assert the SI value is 273273.15 (K) + checkFacets(searchApi.facetSearch("InvestigationParameter", systemFacetQuery, 5, 5), highExpectedFacet); + + // Change units to something wrong + parameterType.setUnits("wrong"); + modify(SearchApi.encodeOperation(manager, "update", parameterType)); + // Assert the raw value is still 273000 (wrong) + checkFacets(searchApi.facetSearch("InvestigationParameter", wrongFacetQuery, 5, 5), rawExpectedFacet); + // Assert that the SI value has not been set due to conversion failing + checkFacets(searchApi.facetSearch("InvestigationParameter", systemFacetQuery, 5, 5), noneExpectedFacet); + } + + @Test + public void exactFilter() throws IcatException { + // Build entities + Investigation numericInvestigation = investigation(0, "numeric", date, date); + Investigation rangeInvestigation = investigation(1, "range", date, date); + ParameterType numericParameterType = parameterType(0, "numericParameter", "K"); + ParameterType rangeParameterType = parameterType(1, "rangeParameter", "K"); + Parameter numericParameter = parameter(0, 273, numericParameterType, numericInvestigation); + Parameter rangeParameter = parameter(1, "270 - 275", 270, 275, rangeParameterType, rangeInvestigation); + + JsonObjectBuilder filterBuilder = Json.createObjectBuilder(); + JsonArrayBuilder arrayBuilder = Json.createArrayBuilder(); + JsonObject value = Json.createObjectBuilder().add("field", "numericValue").add("exact", 273).build(); + JsonObject numericName = Json.createObjectBuilder().add("field", "type.name").add("value", "numericParameter") + .build(); + arrayBuilder.add(numericName).add(value); + filterBuilder.add("key", "key").add("label", "label").add("filter", arrayBuilder); + JsonObject numericFilter = filterBuilder.build(); + + filterBuilder = Json.createObjectBuilder(); + arrayBuilder = Json.createArrayBuilder(); + JsonObject rangeName = Json.createObjectBuilder().add("field", "type.name").add("value", "rangeParameter") + .build(); + arrayBuilder.add(rangeName).add(value); + filterBuilder.add("key", "key").add("label", "label").add("filter", arrayBuilder); + JsonObject rangeFilter = filterBuilder.build(); + + // Create + modify(SearchApi.encodeOperation(manager, "create", numericInvestigation), + SearchApi.encodeOperation(manager, "create", rangeInvestigation), + SearchApi.encodeOperation(manager, "create", numericParameter), + SearchApi.encodeOperation(manager, "create", rangeParameter)); + + JsonObject query = buildQuery("Investigation", null, null, null, null, null, null, + new Filter("investigationparameter", numericFilter)); + SearchResult lsr = searchApi.getResults(query, 5, null); + checkResults(lsr, 0L); + + query = buildQuery("Investigation", null, null, null, null, null, null, + new Filter("investigationparameter", rangeFilter)); + lsr = searchApi.getResults(query, 5, null); + checkResults(lsr, 1L); + } + + @Test + public void sampleParameters() throws IcatException { + // Build entities + Investigation investigation = investigation(0, "investigation", date, date); + Dataset dataset = dataset(1, "dataset", date, date, investigation); + Datafile datafile = datafile(2, "datafile", "datafile.txt", date, dataset); + Sample sample = sample(3, "sample", investigation); + ParameterType parameterType = parameterType(4, "parameter", "K"); + SampleParameter parameter = (SampleParameter) parameter(5, "stringValue", parameterType, sample); + dataset.setSample(sample); + + // Queries and expected responses + JsonObjectBuilder sampleQuery = Json.createObjectBuilder().add("sample.id", Json.createArrayBuilder().add(3)); + JsonObjectBuilder dimension = Json.createObjectBuilder().add("dimension", "type.name"); + JsonArrayBuilder dimensions = Json.createArrayBuilder().add(dimension); + JsonObject sampleParameterFacetQuery = Json.createObjectBuilder().add("query", sampleQuery).add("dimensions", dimensions).build(); + + JsonObjectBuilder sampleInvestigationQuery = Json.createObjectBuilder().add("sample.investigation.id", Json.createArrayBuilder().add(0)); + JsonObjectBuilder sampleTypeDimension = Json.createObjectBuilder().add("dimension", "sample.type.name"); + JsonArrayBuilder sampleTypeDimensions = Json.createArrayBuilder().add(sampleTypeDimension); + JsonObject sampleTypeFacetQuery = Json.createObjectBuilder().add("query", sampleInvestigationQuery).add("dimensions", sampleTypeDimensions).build(); + + JsonObject facetIdQuery = buildFacetIdQuery("id", 1, 2); + JsonObject sparseRequest = buildFacetSparseRequest(facetIdQuery); + + JsonObjectBuilder filterBuilder = Json.createObjectBuilder(); + JsonArrayBuilder arrayBuilder = Json.createArrayBuilder(); + JsonObject value = Json.createObjectBuilder().add("field", "stringValue").add("value", "stringValue").build(); + JsonObject numericName = Json.createObjectBuilder().add("field", "type.name").add("value", "parameter").build(); + arrayBuilder.add(numericName).add(value); + filterBuilder.add("key", "key").add("label", "label").add("filter", arrayBuilder); + JsonObject filter = filterBuilder.build(); + + FacetDimension sampleParemeterFacet = new FacetDimension("", "type.name", new FacetLabel("parameter", 1L)); + FacetDimension sampleTypeFacet = new FacetDimension("", "sample.type.name", new FacetLabel("test", 1L)); + FacetDimension datasetTypeFacet = new FacetDimension("", "type.name", new FacetLabel("type", 1L)); + JsonObject investigationQuery = buildQuery("Investigation", null, null, null, null, null, null, + new Filter("sampleparameter", filter)); + JsonObject datasetQuery = buildQuery("Dataset", null, null, null, null, null, null, + new Filter("sampleparameter", filter)); + JsonObject datafileQuery = buildQuery("Datafile", null, null, null, null, null, null, + new Filter("sampleparameter", filter)); + + // Create + modify(SearchApi.encodeOperation(manager, "create", investigation), + SearchApi.encodeOperation(manager, "create", dataset), + SearchApi.encodeOperation(manager, "create", datafile), + SearchApi.encodeOperation(manager, "create", sample), + SearchApi.encodeOperation(manager, "create", parameterType), + SearchApi.encodeOperation(manager, "create", parameter)); + + // Test + checkFacets(searchApi.facetSearch("SampleParameter", sampleParameterFacetQuery, 5, 5), sampleParemeterFacet); + checkFacets(searchApi.facetSearch("Sample", sampleTypeFacetQuery, 5, 5), sampleTypeFacet); + checkFacets(searchApi.facetSearch("Dataset", sparseRequest, 5, 5), datasetTypeFacet, sampleTypeFacet); + checkFacets(searchApi.facetSearch("Datafile", sparseRequest, 5, 5), sampleTypeFacet); + + SearchResult lsr = searchApi.getResults(investigationQuery, null, 5, null, investigationFields); + checkResults(lsr, 0L); + lsr = searchApi.getResults(datasetQuery, null, 5, null, datasetFields); + checkResults(lsr, 1L); + lsr = searchApi.getResults(datafileQuery, null, 5, null, datafileFields); + checkResults(lsr, 2L); + } + +} diff --git a/src/test/java/org/icatproject/integration/TestRS.java b/src/test/java/org/icatproject/integration/TestRS.java index 53e4875c2..ba4dd30a4 100644 --- a/src/test/java/org/icatproject/integration/TestRS.java +++ b/src/test/java/org/icatproject/integration/TestRS.java @@ -2,6 +2,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -19,7 +20,10 @@ import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.InputStream; +import java.io.StringReader; +import java.net.MalformedURLException; import java.net.URI; +import java.net.URISyntaxException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -37,16 +41,21 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.Map.Entry; import java.util.regex.Pattern; import jakarta.json.Json; import jakarta.json.JsonArray; +import jakarta.json.JsonArrayBuilder; import jakarta.json.JsonNumber; import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; import jakarta.json.JsonValue; import jakarta.json.stream.JsonGenerator; -import org.icatproject.core.manager.LuceneApi; +import org.icatproject.core.manager.search.LuceneApi; +import org.icatproject.core.manager.search.OpensearchApi; +import org.icatproject.core.manager.search.SearchApi; import org.icatproject.icat.client.ICAT; import org.icatproject.icat.client.IcatException; import org.icatproject.icat.client.IcatException.IcatExceptionType; @@ -56,6 +65,7 @@ import org.icatproject.icat.client.Session.DuplicateAction; import org.icatproject.EntityBaseBean; import org.icatproject.Facility; +import org.icatproject.PublicStep; /** * These tests are for those aspects that cannot be tested by the core tests. In @@ -63,9 +73,38 @@ */ public class TestRS { + private static final String NO_DIMENSIONS = "Did not expect responseObject to contain 'dimensions', but it did"; + private static final DateFormat dft = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ"); private static WSession wSession; private static long end; private static long start; + private static SearchApi searchApi; + + /** + * Utility function for manually clearing the search engine indices based on the + * System properties + * + * @throws URISyntaxException + * @throws MalformedURLException + * @throws org.icatproject.core.IcatException + */ + private static void clearSearch() + throws URISyntaxException, MalformedURLException, org.icatproject.core.IcatException { + if (searchApi == null) { + String searchEngine = System.getProperty("searchEngine"); + String urlString = System.getProperty("searchUrls"); + URI uribase = new URI(urlString); + if (searchEngine.equals("LUCENE")) { + searchApi = new LuceneApi(uribase); + } else if (searchEngine.equals("OPENSEARCH") || searchEngine.equals("ELASTICSEARCH")) { + searchApi = new OpensearchApi(uribase); + } else { + throw new RuntimeException( + "searchEngine must be one of LUCENE, OPENSEARCH, ELASTICSEARCH, but it was " + searchEngine); + } + } + searchApi.clear(); + } @BeforeClass public static void beforeClass() throws Exception { @@ -88,14 +127,26 @@ public void clearSession() throws Exception { wSession.clearAuthz(); } - @Ignore("Test fails because of bug in eclipselink") - @Test - public void testDistinctBehaviour() throws Exception { + private Session rootSession() throws URISyntaxException, IcatException { ICAT icat = new ICAT(System.getProperty("serverUrl")); Map credentials = new HashMap<>(); credentials.put("username", "root"); credentials.put("password", "password"); - Session session = icat.login("db", credentials); + return icat.login("db", credentials); + } + + private Session piOneSession() throws URISyntaxException, IcatException { + ICAT icat = new ICAT(System.getProperty("serverUrl")); + Map credentials = new HashMap<>(); + credentials.put("username", "piOne"); + credentials.put("password", "piOne"); + return icat.login("db", credentials); + } + + @Ignore("Test fails because of bug in eclipselink") + @Test + public void testDistinctBehaviour() throws Exception { + Session session = rootSession(); Path path = Paths.get(this.getClass().getResource("/icat.port").toURI()); session.importMetaData(path, DuplicateAction.CHECK, Attributes.USER); @@ -115,97 +166,244 @@ public void TestJsoniseBean() throws Exception { DateFormat dft = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX"); Session session = createAndPopulate(); - /* Expected: <[{"User":{"id":8148,"createId":"db/notroot","createTime":"2019-03-11T14:14:47.000Z","modId":"db/notroot","modTime":"2019-03-11T14:14:47.000Z","affiliation":"Unseen University","familyName":"Worblehat","fullName":"Dr. Horace Worblehat","givenName":"Horace","instrumentScientists":[],"investigationUsers":[],"name":"db/lib","studies":[],"userGroups":[]}}]> */ + /* + * Expected: <[{"User":{"id":8148,"createId":"db/notroot","createTime": + * "2019-03-11T14:14:47.000Z","modId":"db/notroot","modTime": + * "2019-03-11T14:14:47.000Z","affiliation":"Unseen University","familyName": + * "Worblehat","fullName":"Dr. Horace Worblehat","givenName":"Horace", + * "instrumentScientists":[],"investigationUsers":[],"name":"db/lib","studies":[ + * ],"userGroups":[]}}]> + */ JsonArray user_response = search(session, "SELECT u from User u WHERE u.name = 'db/lib'", 1); collector.checkThat(user_response.getJsonObject(0).containsKey("User"), is(true)); JsonObject user = user_response.getJsonObject(0).getJsonObject("User"); - collector.checkThat(user.getJsonNumber("id").isIntegral(), is(true)); // Check Integer conversion - collector.checkThat(user.getString("createId"), is("db/notroot")); // Check String conversion - - /* Expected: <[{"Facility":{"id":2852,"createId":"db/notroot","createTime":"2019-03-11T15:58:33.000Z","modId":"db/notroot","modTime":"2019-03-11T15:58:33.000Z","applications":[],"datafileFormats":[],"datasetTypes":[],"daysUntilRelease":90,"facilityCycles":[],"instruments":[],"investigationTypes":[],"investigations":[],"name":"Test port facility","parameterTypes":[],"sampleTypes":[]}}]> */ + collector.checkThat(user.getJsonNumber("id").isIntegral(), is(true)); // Check Integer conversion + collector.checkThat(user.getString("createId"), is("db/notroot")); // Check String conversion + + /* + * Expected: <[{"Facility":{"id":2852,"createId":"db/notroot","createTime": + * "2019-03-11T15:58:33.000Z","modId":"db/notroot","modTime": + * "2019-03-11T15:58:33.000Z","applications":[],"datafileFormats":[], + * "datasetTypes":[],"daysUntilRelease":90,"facilityCycles":[],"instruments":[], + * "investigationTypes":[],"investigations":[],"name":"Test port facility" + * ,"parameterTypes":[],"sampleTypes":[]}}]> + */ JsonArray fac_response = search(session, "SELECT f from Facility f WHERE f.name = 'Test port facility'", 1); collector.checkThat(fac_response.getJsonObject(0).containsKey("Facility"), is(true)); - /* Expected: <[{"Instrument":{"id":1449,"createId":"db/notroot","createTime":"2019-03-11T15:58:33.000Z","modId":"db/notroot","modTime":"2019-03-11T15:58:33.000Z","fullName":"EDDI - Energy Dispersive Diffraction","instrumentScientists":[],"investigationInstruments":[],"name":"EDDI","pid":"ig:0815","shifts":[]}}]> */ + /* + * Expected: <[{"Instrument":{"id":1449,"createId":"db/notroot","createTime": + * "2019-03-11T15:58:33.000Z","modId":"db/notroot","modTime": + * "2019-03-11T15:58:33.000Z","fullName":"EDDI - Energy Dispersive Diffraction" + * ,"instrumentScientists":[],"investigationInstruments":[],"name":"EDDI","pid": + * "ig:0815","shifts":[]}}]> + */ JsonArray inst_response = search(session, "SELECT i from Instrument i WHERE i.name = 'EDDI'", 1); collector.checkThat(inst_response.getJsonObject(0).containsKey("Instrument"), is(true)); - /* Expected: <[{"InvestigationType":{"id":3401,"createId":"db/notroot","createTime":"2019-03-11T15:58:33.000Z","modId":"db/notroot","modTime":"2019-03-11T15:58:33.000Z","investigations":[],"name":"atype"}}]> */ + /* + * Expected: + * <[{"InvestigationType":{"id":3401,"createId":"db/notroot","createTime": + * "2019-03-11T15:58:33.000Z","modId":"db/notroot","modTime": + * "2019-03-11T15:58:33.000Z","investigations":[],"name":"atype"}}]> + */ JsonArray it_response = search(session, "SELECT it from InvestigationType it WHERE it.name = 'atype'", 1); collector.checkThat(it_response.getJsonObject(0).containsKey("InvestigationType"), is(true)); - /* Expected: <[{"ParameterType":{"id":5373,"createId":"db/notroot","createTime":"2019-03-11T15:58:33.000Z","modId":"db/notroot","modTime":"2019-03-11T15:58:33.000Z","applicableToDataCollection":false,"applicableToDatafile":true,"applicableToDataset":true,"applicableToInvestigation":true,"applicableToSample":false,"dataCollectionParameters":[],"datafileParameters":[],"datasetParameters":[],"enforced":false,"investigationParameters":[],"minimumNumericValue":73.4,"name":"temp","permissibleStringValues":[],"pid":"pt:25c","sampleParameters":[],"units":"degrees Kelvin","valueType":"NUMERIC","verified":false}}]> */ + /* + * Expected: <[{"ParameterType":{"id":5373,"createId":"db/notroot","createTime": + * "2019-03-11T15:58:33.000Z","modId":"db/notroot","modTime": + * "2019-03-11T15:58:33.000Z","applicableToDataCollection":false, + * "applicableToDatafile":true,"applicableToDataset":true, + * "applicableToInvestigation":true,"applicableToSample":false, + * "dataCollectionParameters":[],"datafileParameters":[],"datasetParameters":[], + * "enforced":false,"investigationParameters":[],"minimumNumericValue":73.4, + * "name":"temp","permissibleStringValues":[],"pid":"pt:25c","sampleParameters": + * [],"units":"degrees Kelvin","valueType":"NUMERIC","verified":false}}]> + */ JsonArray pt_response = search(session, "SELECT pt from ParameterType pt WHERE pt.name = 'temp'", 1); collector.checkThat(pt_response.getJsonObject(0).containsKey("ParameterType"), is(true)); - collector.checkThat((Double) pt_response.getJsonObject(0).getJsonObject("ParameterType").getJsonNumber("minimumNumericValue").doubleValue(), is(73.4)); // Check Double conversion - collector.checkThat((Boolean) pt_response.getJsonObject(0).getJsonObject("ParameterType").getBoolean("enforced"), is(Boolean.FALSE)); // Check boolean conversion - collector.checkThat(pt_response.getJsonObject(0).getJsonObject("ParameterType").getJsonString("valueType").getString(), is("NUMERIC")); // Check ParameterValueType conversion - - /* Expected: <[{"Investigation":{"id":4814,"createId":"db/notroot","createTime":"2019-03-11T15:58:33.000Z","modId":"db/notroot","modTime":"2019-03-11T15:58:33.000Z","datasets":[],"endDate":"2010-12-31T23:59:59.000Z","investigationGroups":[],"investigationInstruments":[],"investigationUsers":[],"keywords":[],"name":"expt1","parameters":[],"publications":[],"samples":[],"shifts":[],"startDate":"2010-01-01T00:00:00.000Z","studyInvestigations":[],"title":"a title at the beginning","visitId":"zero"}},{"Investigation":{"id":4815,"createId":"db/notroot","createTime":"2019-03-11T15:58:33.000Z","modId":"db/notroot","modTime":"2019-03-11T15:58:33.000Z","datasets":[],"endDate":"2011-12-31T23:59:59.000Z","investigationGroups":[],"investigationInstruments":[],"investigationUsers":[],"keywords":[],"name":"expt1","parameters":[],"publications":[],"samples":[],"shifts":[],"startDate":"2011-01-01T00:00:00.000Z","studyInvestigations":[],"title":"a title in the middle","visitId":"one"}},{"Investigation":{"id":4816,"createId":"db/notroot","createTime":"2019-03-11T15:58:33.000Z","modId":"db/notroot","modTime":"2019-03-11T15:58:33.000Z","datasets":[],"endDate":"2012-12-31T23:59:59.000Z","investigationGroups":[],"investigationInstruments":[],"investigationUsers":[],"keywords":[],"name":"expt1","parameters":[],"publications":[],"samples":[],"shifts":[],"startDate":"2012-01-01T00:00:00.000Z","studyInvestigations":[],"title":"a title at the end","visitId":"two"}}]> */ + collector.checkThat((Double) pt_response.getJsonObject(0).getJsonObject("ParameterType") + .getJsonNumber("minimumNumericValue").doubleValue(), is(73.4)); // Check Double conversion + collector.checkThat( + (Boolean) pt_response.getJsonObject(0).getJsonObject("ParameterType").getBoolean("enforced"), + is(Boolean.FALSE)); // Check boolean conversion + collector.checkThat( + pt_response.getJsonObject(0).getJsonObject("ParameterType").getJsonString("valueType").getString(), + is("NUMERIC")); // Check ParameterValueType conversion + + /* + * Expected: <[{"Investigation":{"id":4814,"createId":"db/notroot","createTime": + * "2019-03-11T15:58:33.000Z","modId":"db/notroot","modTime": + * "2019-03-11T15:58:33.000Z","datasets":[],"endDate":"2010-12-31T23:59:59.000Z" + * ,"investigationGroups":[],"investigationInstruments":[],"investigationUsers": + * [],"keywords":[],"name":"expt1","parameters":[],"publications":[],"samples":[ + * ],"shifts":[],"startDate":"2010-01-01T00:00:00.000Z","studyInvestigations":[] + * ,"title":"a title at the beginning","visitId":"zero"}},{"Investigation":{"id" + * :4815,"createId":"db/notroot","createTime":"2019-03-11T15:58:33.000Z","modId" + * :"db/notroot","modTime":"2019-03-11T15:58:33.000Z","datasets":[],"endDate": + * "2011-12-31T23:59:59.000Z","investigationGroups":[], + * "investigationInstruments":[],"investigationUsers":[],"keywords":[],"name": + * "expt1","parameters":[],"publications":[],"samples":[],"shifts":[], + * "startDate":"2011-01-01T00:00:00.000Z","studyInvestigations":[], + * "title":"a title in the middle","visitId":"one"}},{"Investigation":{"id":4816 + * ,"createId":"db/notroot","createTime":"2019-03-11T15:58:33.000Z","modId": + * "db/notroot","modTime":"2019-03-11T15:58:33.000Z","datasets":[],"endDate": + * "2012-12-31T23:59:59.000Z","investigationGroups":[], + * "investigationInstruments":[],"investigationUsers":[],"keywords":[],"name": + * "expt1","parameters":[],"publications":[],"samples":[],"shifts":[], + * "startDate":"2012-01-01T00:00:00.000Z","studyInvestigations":[], + * "title":"a title at the end","visitId":"two"}}]> + */ JsonArray inv_response = search(session, "SELECT inv from Investigation inv WHERE inv.name = 'expt1'", 3); collector.checkThat(inv_response.getJsonObject(0).containsKey("Investigation"), is(true)); - /* Expected: <[{"InvestigationUser":{"id":4723,"createId":"db/notroot","createTime":"2019-03-11T15:58:33.000Z","modId":"db/notroot","modTime":"2019-03-11T15:58:33.000Z","role":"troublemaker"}}]> */ - JsonArray invu_response = search(session, "SELECT invu from InvestigationUser invu WHERE invu.role = 'troublemaker'", 1); + /* + * Expected: + * <[{"InvestigationUser":{"id":4723,"createId":"db/notroot","createTime": + * "2019-03-11T15:58:33.000Z","modId":"db/notroot","modTime": + * "2019-03-11T15:58:33.000Z","role":"troublemaker"}}]> + */ + JsonArray invu_response = search(session, + "SELECT invu from InvestigationUser invu WHERE invu.role = 'troublemaker'", 1); collector.checkThat(invu_response.getJsonObject(0).containsKey("InvestigationUser"), is(true)); - /* Expected: <[{"Shift":{"id":2995,"createId":"db/notroot","createTime":"2019-03-11T15:58:33.000Z","modId":"db/notroot","modTime":"2019-03-11T15:58:33.000Z","comment":"waiting","endDate":"2013-12-31T22:59:59.000Z","startDate":"2013-12-31T11:00:00.000Z"}}]> */ + /* + * Expected: <[{"Shift":{"id":2995,"createId":"db/notroot","createTime": + * "2019-03-11T15:58:33.000Z","modId":"db/notroot","modTime": + * "2019-03-11T15:58:33.000Z","comment":"waiting","endDate": + * "2013-12-31T22:59:59.000Z","startDate":"2013-12-31T11:00:00.000Z"}}]> + */ JsonArray shift_response = search(session, "SELECT shift from Shift shift WHERE shift.comment = 'waiting'", 1); collector.checkThat(shift_response.getJsonObject(0).containsKey("Shift"), is(true)); - /* Expected: <[{"SampleType":{"id":3220,"createId":"db/notroot","createTime":"2019-03-11T15:58:33.000Z","modId":"db/notroot","modTime":"2019-03-11T15:58:33.000Z","molecularFormula":"C","name":"diamond","safetyInformation":"fairly harmless","samples":[]}}]> */ + /* + * Expected: <[{"SampleType":{"id":3220,"createId":"db/notroot","createTime": + * "2019-03-11T15:58:33.000Z","modId":"db/notroot","modTime": + * "2019-03-11T15:58:33.000Z","molecularFormula":"C","name":"diamond", + * "safetyInformation":"fairly harmless","samples":[]}}]> + */ JsonArray st_response = search(session, "SELECT st from SampleType st WHERE st.name = 'diamond'", 1); collector.checkThat(st_response.getJsonObject(0).containsKey("SampleType"), is(true)); - /* Expected: <[{"Sample":{"id":2181,"createId":"db/notroot","createTime":"2019-03-11T15:58:33.000Z","modId":"db/notroot","modTime":"2019-03-11T15:58:33.000Z","datasets":[],"name":"Koh-I-Noor","parameters":[],"pid":"sdb:374717"}}]> */ + /* + * Expected: <[{"Sample":{"id":2181,"createId":"db/notroot","createTime": + * "2019-03-11T15:58:33.000Z","modId":"db/notroot","modTime": + * "2019-03-11T15:58:33.000Z","datasets":[],"name":"Koh-I-Noor","parameters":[], + * "pid":"sdb:374717"}}]> + */ JsonArray s_response = search(session, "SELECT s from Sample s WHERE s.name = 'Koh-I-Noor'", 1); collector.checkThat(s_response.getJsonObject(0).containsKey("Sample"), is(true)); - /* Expected: <[{"InvestigationParameter":{"id":1123,"createId":"db/notroot","createTime":"2019-03-11T15:58:33.000Z","modId":"db/notroot","modTime":"2019-03-11T15:58:33.000Z","stringValue":"green"}}]> */ - JsonArray invp_response = search(session, "SELECT invp from InvestigationParameter invp WHERE invp.stringValue = 'green'", 1); + /* + * Expected: + * <[{"InvestigationParameter":{"id":1123,"createId":"db/notroot","createTime": + * "2019-03-11T15:58:33.000Z","modId":"db/notroot","modTime": + * "2019-03-11T15:58:33.000Z","stringValue":"green"}}]> + */ + JsonArray invp_response = search(session, + "SELECT invp from InvestigationParameter invp WHERE invp.stringValue = 'green'", 1); collector.checkThat(invp_response.size(), equalTo(1)); collector.checkThat(invp_response.getJsonObject(0).containsKey("InvestigationParameter"), is(true)); - /* Expected: <[{"DatasetType":{"id":1754,"createId":"db/notroot","createTime":"2019-03-11T15:58:33.000Z","modId":"db/notroot","modTime":"2019-03-11T15:58:33.000Z","datasets":[],"name":"calibration"}}]> */ + /* + * Expected: <[{"DatasetType":{"id":1754,"createId":"db/notroot","createTime": + * "2019-03-11T15:58:33.000Z","modId":"db/notroot","modTime": + * "2019-03-11T15:58:33.000Z","datasets":[],"name":"calibration"}}]> + */ JsonArray dst_response = search(session, "SELECT dst from DatasetType dst WHERE dst.name = 'calibration'", 1); collector.checkThat(dst_response.getJsonObject(0).containsKey("DatasetType"), is(true)); - /* Expected: <[{"Dataset":{"id":8128,"createId":"db/notroot","createTime":"2019-03-12T11:40:26.000Z","modId":"db/notroot","modTime":"2019-03-12T11:40:26.000Z","complete":true,"dataCollectionDatasets":[],"datafiles":[],"description":"alpha","endDate":"2014-05-16T04:28:26.000Z","name":"ds1","parameters":[],"startDate":"2014-05-16T04:28:26.000Z"}}]> */ + /* + * Expected: <[{"Dataset":{"id":8128,"createId":"db/notroot","createTime": + * "2019-03-12T11:40:26.000Z","modId":"db/notroot","modTime": + * "2019-03-12T11:40:26.000Z","complete":true,"dataCollectionDatasets":[], + * "datafiles":[],"description":"alpha","endDate":"2014-05-16T04:28:26.000Z", + * "name":"ds1","parameters":[],"startDate":"2014-05-16T04:28:26.000Z"}}]> + */ JsonArray ds_response = search(session, "SELECT ds from Dataset ds WHERE ds.name = 'ds1'", 1); collector.checkThat(ds_response.getJsonObject(0).containsKey("Dataset"), is(true)); - collector.checkThat(dft.parse(ds_response.getJsonObject(0).getJsonObject("Dataset").getString("startDate")), isA(Date.class)); //Check Date conversion - - /* Expected: <[{"DatasetParameter":{"id":4632,"createId":"db/notroot","createTime":"2019-03-12T13:30:33.000Z","modId":"db/notroot","modTime":"2019-03-12T13:30:33.000Z","stringValue":"green"}}]> */ - JsonArray dsp_response = search(session, "SELECT dsp from DatasetParameter dsp WHERE dsp.stringValue = 'green'", 1); + collector.checkThat(dft.parse(ds_response.getJsonObject(0).getJsonObject("Dataset").getString("startDate")), + isA(Date.class)); // Check Date conversion + + /* + * Expected: + * <[{"DatasetParameter":{"id":4632,"createId":"db/notroot","createTime": + * "2019-03-12T13:30:33.000Z","modId":"db/notroot","modTime": + * "2019-03-12T13:30:33.000Z","stringValue":"green"}}]> + */ + JsonArray dsp_response = search(session, "SELECT dsp from DatasetParameter dsp WHERE dsp.stringValue = 'green'", + 1); collector.checkThat(dsp_response.size(), equalTo(1)); collector.checkThat(dsp_response.getJsonObject(0).containsKey("DatasetParameter"), is(true)); - /* Expected: <[{"Datafile":{"id":15643,"createId":"db/notroot","createTime":"2019-03-12T13:30:33.000Z","modId":"db/notroot","modTime":"2019-03-12T13:30:33.000Z","dataCollectionDatafiles":[],"destDatafiles":[],"fileSize":17,"name":"df2","parameters":[],"sourceDatafiles":[]}}]> */ + /* + * Expected: <[{"Datafile":{"id":15643,"createId":"db/notroot","createTime": + * "2019-03-12T13:30:33.000Z","modId":"db/notroot","modTime": + * "2019-03-12T13:30:33.000Z","dataCollectionDatafiles":[],"destDatafiles":[], + * "fileSize":17,"name":"df2","parameters":[],"sourceDatafiles":[]}}]> + */ JsonArray df_response = search(session, "SELECT df from Datafile df WHERE df.name = 'df2'", 1); collector.checkThat(df_response.size(), equalTo(1)); collector.checkThat(df_response.getJsonObject(0).containsKey("Datafile"), is(true)); - /* Expected: <[{"DatafileParameter":{"id":1938,"createId":"db/notroot","createTime":"2019-03-12T13:30:33.000Z","modId":"db/notroot","modTime":"2019-03-12T13:30:33.000Z","stringValue":"green"}}]> */ - JsonArray dfp_response = search(session, "SELECT dfp from DatafileParameter dfp WHERE dfp.stringValue = 'green'", 1); + /* + * Expected: + * <[{"DatafileParameter":{"id":1938,"createId":"db/notroot","createTime": + * "2019-03-12T13:30:33.000Z","modId":"db/notroot","modTime": + * "2019-03-12T13:30:33.000Z","stringValue":"green"}}]> + */ + JsonArray dfp_response = search(session, + "SELECT dfp from DatafileParameter dfp WHERE dfp.stringValue = 'green'", 1); collector.checkThat(dfp_response.size(), equalTo(1)); collector.checkThat(dfp_response.getJsonObject(0).containsKey("DatafileParameter"), is(true)); - /* Expected: <[{"Application":{"id":2972,"createId":"db/notroot","createTime":"2019-03-12T13:30:33.000Z","modId":"db/notroot","modTime":"2019-03-12T13:30:33.000Z","jobs":[],"name":"aprog","version":"1.2.3"}},{"Application":{"id":2973,"createId":"db/notroot","createTime":"2019-03-12T13:30:33.000Z","modId":"db/notroot","modTime":"2019-03-12T13:30:33.000Z","jobs":[],"name":"aprog","version":"1.2.6"}}]> */ + /* + * Expected: <[{"Application":{"id":2972,"createId":"db/notroot","createTime": + * "2019-03-12T13:30:33.000Z","modId":"db/notroot","modTime": + * "2019-03-12T13:30:33.000Z","jobs":[],"name":"aprog","version":"1.2.3"}},{ + * "Application":{"id":2973,"createId":"db/notroot","createTime": + * "2019-03-12T13:30:33.000Z","modId":"db/notroot","modTime": + * "2019-03-12T13:30:33.000Z","jobs":[],"name":"aprog","version":"1.2.6"}}]> + */ JsonArray a_response = search(session, "SELECT a from Application a WHERE a.name = 'aprog'", 2); collector.checkThat(a_response.size(), equalTo(2)); collector.checkThat(a_response.getJsonObject(0).containsKey("Application"), is(true)); - /* Expected: <[{DataCollection":{"id":4485,"createId":"db/notroot","createTime":"2019-03-12T13:30:33.000Z","modId":"db/notroot","modTime":"2019-03-12T13:30:33.000Z","dataCollectionDatafiles":[],"dataCollectionDatasets":[],"jobsAsInput":[],"jobsAsOutput":[],"parameters":[]}},{"DataCollection":{"id":4486,"createId":"db/notroot","createTime":"2019-03-12T13:30:33.000Z","modId":"db/notroot","modTime":"2019-03-12T13:30:33.000Z","dataCollectionDatafiles":[],"dataCollectionDatasets":[],"jobsAsInput":[],"jobsAsOutput":[],"parameters":[]}},{"DataCollection":{"id":4487,"createId":"db/notroot","createTime":"2019-03-12T13:30:33.000Z","modId":"db/notroot","modTime":"2019-03-12T13:30:33.000Z","dataCollectionDatafiles":[],"dataCollectionDatasets":[],"jobsAsInput":[],"jobsAsOutput":[],"parameters":[]}}]> */ + /* + * Expected: + * <[{DataCollection":{"id":4485,"createId":"db/notroot","createTime":"2019-03- + * 12T13:30:33.000Z","modId":"db/notroot","modTime":"2019-03-12T13:30:33. + * 000Z","dataCollectionDatafiles":[],"dataCollectionDatasets":[],"jobsAsInput":[],"jobsAsOutput":[],"parameters":[]}},{"DataCollection":{"id":4486,"createId":"db + * /notroot","createTime":"2019-03-12T13:30:33.000Z","modId":"db/ + * notroot","modTime":"2019-03-12T13:30:33. + * 000Z","dataCollectionDatafiles":[],"dataCollectionDatasets":[],"jobsAsInput":[],"jobsAsOutput":[],"parameters":[]}},{"DataCollection":{"id":4487,"createId":"db + * /notroot","createTime":"2019-03-12T13:30:33.000Z","modId":"db/ + * notroot","modTime":"2019-03-12T13:30:33. + * 000Z","dataCollectionDatafiles":[],"dataCollectionDatasets":[],"jobsAsInput":[],"jobsAsOutput":[],"parameters + * ":[]}}]> + */ JsonArray dc_response = search(session, "SELECT dc from DataCollection dc", 3); collector.checkThat(dc_response.size(), equalTo(3)); collector.checkThat(dc_response.getJsonObject(0).containsKey("DataCollection"), is(true)); - /* Expected: <[{"DataCollectionDatafile":{"id":4362,"createId":"db/notroot","createTime":"2019-03-12T13:30:33.000Z","modId":"db/notroot","modTime":"2019-03-12T13:30:33.000Z"}},{"DataCollectionDatafile":{"id":4363,"createId":"db/notroot","createTime":"2019-03-12T13:30:33.000Z","modId":"db/notroot","modTime":"2019-03-12T13:30:33.000Z"}}]> */ + /* + * Expected: + * <[{"DataCollectionDatafile":{"id":4362,"createId":"db/notroot","createTime": + * "2019-03-12T13:30:33.000Z","modId":"db/notroot","modTime": + * "2019-03-12T13:30:33.000Z"}},{"DataCollectionDatafile":{"id":4363,"createId": + * "db/notroot","createTime":"2019-03-12T13:30:33.000Z","modId":"db/notroot", + * "modTime":"2019-03-12T13:30:33.000Z"}}]> + */ JsonArray dcdf_response = search(session, "SELECT dcdf from DataCollectionDatafile dcdf", 2); collector.checkThat(dcdf_response.getJsonObject(0).containsKey("DataCollectionDatafile"), is(true)); - /* Expected: <[{"Job":{"id":1634,"createId":"db/notroot","createTime":"2019-03-12T13:30:33.000Z","modId":"db/notroot","modTime":"2019-03-12T13:30:33.000Z"}}]> */ + /* + * Expected: <[{"Job":{"id":1634,"createId":"db/notroot","createTime": + * "2019-03-12T13:30:33.000Z","modId":"db/notroot","modTime": + * "2019-03-12T13:30:33.000Z"}}]> + */ JsonArray j_response = search(session, "SELECT j from Job j", 1); collector.checkThat(j_response.getJsonObject(0).containsKey("Job"), is(true)); } @@ -308,6 +506,9 @@ public void testClone() throws Exception { } + /** + * Tests the old lucene/data endpoint + */ @Test public void testLuceneDatafiles() throws Exception { Session session = setupLuceneTest(); @@ -329,32 +530,29 @@ public void testLuceneDatafiles() throws Exception { // Set text and parameters array = searchDatafiles(session, null, "df2", null, null, parameters, 20, 1); checkResultFromLuceneSearch(session, "df2", array, "Datafile", "name"); + + // Search with a user who should not see any results + Session piOneSession = piOneSession(); + searchDatafiles(piOneSession, null, null, null, null, null, 20, 0); } + /** + * Tests the old lucene/data endpoint + */ @Test public void testLuceneDatasets() throws Exception { - Session session = setupLuceneTest(); - DateFormat dft = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ"); - // All datasets searchDatasets(session, null, null, null, null, null, 20, 5); // Use the user - Set names = new HashSet<>(); JsonArray array = searchDatasets(session, "db/tr", null, null, null, null, 20, 3); - for (int i = 0; i < 3; i++) { - long n = array.getJsonObject(i).getJsonNumber("id").longValueExact(); - JsonObject result = Json.createReader(new ByteArrayInputStream(session.get("Dataset", n).getBytes())) - .readObject(); - names.add(result.getJsonObject("Dataset").getString("name")); + JsonObject result = Json.createReader(new StringReader(session.get("Dataset", n))).readObject(); + assertEquals("ds" + (i + 1), result.getJsonObject("Dataset").getString("name")); } - assertTrue(names.contains("ds1")); - assertTrue(names.contains("ds2")); - assertTrue(names.contains("ds3")); // Try a bad user searchDatasets(session, "db/fred", null, null, null, null, 20, 0); @@ -364,71 +562,80 @@ public void testLuceneDatasets() throws Exception { // Try parameters List parameters = new ArrayList<>(); - parameters.add(new ParameterForLucene("colour", "name", "green")); - parameters.add(new ParameterForLucene("birthday", "date", dft.parse("2014-05-16T16:58:26+0000"), - dft.parse("2014-05-16T16:58:26+0000"))); - parameters.add(new ParameterForLucene("current", "amps", 140, 165)); - + ParameterForLucene stringParameter = new ParameterForLucene("colour", "name", "green"); + Date birthday = dft.parse("2014-05-16T16:58:26+0000"); + ParameterForLucene dateParameter = new ParameterForLucene("birthday", "date", birthday, birthday); + ParameterForLucene numericParameter = new ParameterForLucene("current", "amps", 140, 165); + array = searchDatasets(session, null, null, null, null, Arrays.asList(stringParameter), 20, 1); + array = searchDatasets(session, null, null, null, null, Arrays.asList(dateParameter), 20, 1); + array = searchDatasets(session, null, null, null, null, Arrays.asList(numericParameter), 20, 1); + parameters.add(stringParameter); + parameters.add(dateParameter); + parameters.add(numericParameter); array = searchDatasets(session, null, null, null, null, parameters, 20, 1); - array = searchDatasets(session, null, "gamma AND ds3", dft.parse("2014-05-16T05:09:03+0000"), - dft.parse("2014-05-16T05:15:26+0000"), parameters, 20, 1); + Date lower = dft.parse("2014-05-16T05:09:03+0000"); + Date upper = dft.parse("2014-05-16T05:15:26+0000"); + array = searchDatasets(session, null, "gamma AND ds3", lower, upper, parameters, 20, 1); checkResultFromLuceneSearch(session, "gamma", array, "Dataset", "description"); + + // Search with a user who should not see any results + Session piOneSession = piOneSession(); + searchDatasets(piOneSession, null, null, null, null, null, 20, 0); } + /** + * Tests the old lucene/data endpoint + */ @Test public void testLuceneInvestigations() throws Exception { Session session = setupLuceneTest(); - DateFormat dft = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ"); + Date lowerOrigin = dft.parse("2011-01-01T00:00:00+0000"); + Date lowerSecond = dft.parse("2011-01-01T00:00:01+0000"); + Date lowerMinute = dft.parse("2011-01-01T00:01:00+0000"); + Date upperOrigin = dft.parse("2011-12-31T23:59:59+0000"); + Date upperSecond = dft.parse("2011-12-31T23:59:58+0000"); + Date upperMinute = dft.parse("2011-12-31T23:58:00+0000"); + String textAnd = "title AND one"; + String textTwo = "title AND two"; + String textPlus = "title + one"; searchInvestigations(session, null, null, null, null, null, null, null, 20, 3); List parameters = new ArrayList<>(); parameters.add(new ParameterForLucene("colour", "name", "green")); - JsonArray array = searchInvestigations(session, "db/tr", "title AND one", dft.parse("2011-01-01T00:00:00+0000"), - dft.parse("2011-12-31T23:59:59+0000"), parameters, Arrays.asList("ford AND rust", "koh* AND diamond"), - "Professor", 20, 1); + JsonArray array = searchInvestigations(session, "db/tr", textAnd, lowerOrigin, upperOrigin, parameters, + null, "Professor", 20, 1); checkResultFromLuceneSearch(session, "one", array, "Investigation", "visitId"); // change user - searchInvestigations(session, "db/fred", "title AND one", null, null, parameters, null, null, 20, 0); + searchInvestigations(session, "db/fred", textAnd, null, null, parameters, null, null, 20, 0); // change text - searchInvestigations(session, "db/tr", "title AND two", null, null, parameters, null, null, 20, 0); + searchInvestigations(session, "db/tr", textTwo, null, null, parameters, null, null, 20, 0); // Only working to a minute - array = searchInvestigations(session, "db/tr", "title AND one", dft.parse("2011-01-01T00:00:01+0000"), - dft.parse("2011-12-31T23:59:59+0000"), parameters, null, null, 20, 1); + array = searchInvestigations(session, "db/tr", textAnd, lowerSecond, upperOrigin, parameters, null, null, 20, + 1); checkResultFromLuceneSearch(session, "one", array, "Investigation", "visitId"); - array = searchInvestigations(session, "db/tr", "title AND one", dft.parse("2011-01-01T00:00:00+0000"), - dft.parse("2011-12-31T23:59:58+0000"), parameters, null, null, 20, 1); + array = searchInvestigations(session, "db/tr", textAnd, lowerOrigin, upperSecond, parameters, null, null, 20, + 1); checkResultFromLuceneSearch(session, "one", array, "Investigation", "visitId"); - searchInvestigations(session, "db/tr", "title AND one", dft.parse("2011-01-01T00:01:00+0000"), - dft.parse("2011-12-31T23:59:59+0000"), parameters, null, null, 20, 0); + searchInvestigations(session, "db/tr", textAnd, lowerMinute, upperOrigin, parameters, null, null, 20, 0); - searchInvestigations(session, "db/tr", "title AND one", dft.parse("2011-01-01T00:00:00+0000"), - dft.parse("2011-12-31T23:58:00+0000"), parameters, null, null, 20, 0); + searchInvestigations(session, "db/tr", textAnd, lowerOrigin, upperMinute, parameters, null, null, 20, 0); // Change parameters List badParameters = new ArrayList<>(); badParameters.add(new ParameterForLucene("color", "name", "green")); - searchInvestigations(session, "db/tr", "title AND one", dft.parse("2011-01-01T00:00:00+0000"), - dft.parse("2011-12-31T23:59:59+0000"), badParameters, Arrays.asList("ford + rust", "koh + diamond"), - null, 20, 0); - - // Change samples - searchInvestigations(session, "db/tr", "title AND one", dft.parse("2011-01-01T00:00:00+0000"), - dft.parse("2011-12-31T23:59:59+0000"), parameters, Arrays.asList("ford AND rust", "kog* AND diamond"), - null, 20, 0); + searchInvestigations(session, "db/tr", textAnd, lowerOrigin, upperOrigin, badParameters, null, null, 20, 0); // Change userFullName - searchInvestigations(session, "db/tr", "title + one", dft.parse("2011-01-01T00:00:00+0000"), - dft.parse("2011-12-31T23:59:59+0000"), parameters, Arrays.asList("ford AND rust", "koh* AND diamond"), - "Doctor", 20, 0); + searchInvestigations(session, "db/tr", textPlus, lowerOrigin, upperOrigin, parameters, null, "Doctor", 20, 0); // Try provoking an error badParameters = new ArrayList<>(); @@ -439,6 +646,477 @@ public void testLuceneInvestigations() throws Exception { } catch (IcatException e) { assertEquals(IcatExceptionType.BAD_PARAMETER, e.getType()); } + + // Search with a user who should not see any results + Session piOneSession = piOneSession(); + searchInvestigations(piOneSession, null, null, null, null, null, null, null, 20, 0); + } + + /** + * Tests the new search/documents endpoint + */ + @Test + public void testSearchDatafiles() throws Exception { + Session session = setupLuceneTest(); + JsonObject responseObject; + JsonValue searchAfter; + Map expectation = new HashMap<>(); + expectation.put("investigation.id", null); + expectation.put("date", "notNull"); + + List parameters = new ArrayList<>(); + parameters.add(new ParameterForLucene("colour", "name", "green")); + + // All data files + searchDatafiles(session, null, null, null, null, null, null, 10, null, null, 3); + + // Use the user + searchDatafiles(session, "db/tr", null, null, null, null, null, 10, null, null, 3); + + // Try a bad user + searchDatafiles(session, "db/fred", null, null, null, null, null, 10, null, null, 0); + + // Set text and parameters + responseObject = searchDatafiles(session, null, "df2", null, null, parameters, null, 10, null, null, 1); + assertFalse(responseObject.containsKey("search_after")); + expectation.put("name", "df2"); + checkResultsSource(responseObject, Arrays.asList(expectation), true); + + // Try sorting and searchAfter + String sort = Json.createObjectBuilder().add("name", "desc").add("date", "asc").add("fileSize", "asc").build() + .toString(); + responseObject = searchDatafiles(session, null, null, null, null, null, null, 1, sort, null, 1); + searchAfter = responseObject.get("search_after"); + assertNotNull(searchAfter); + expectation.put("name", "df3"); + checkResultsSource(responseObject, Arrays.asList(expectation), false); + + responseObject = searchDatafiles(session, null, null, null, null, null, searchAfter.toString(), 1, sort, null, + 1); + searchAfter = responseObject.get("search_after"); + assertNotNull(searchAfter); + expectation.put("name", "df2"); + checkResultsSource(responseObject, Arrays.asList(expectation), false); + + // Test that changes to the public steps/tables are reflected in returned fields + PublicStep ps = new PublicStep(); + ps.setOrigin("Datafile"); + ps.setField("dataset"); + + ps.setId(wSession.create(ps)); + responseObject = searchDatafiles(session, null, "df2", null, null, null, null, 10, null, null, 1); + assertFalse(responseObject.containsKey("search_after")); + expectation.put("investigation.id", "notNull"); + checkResultsSource(responseObject, Arrays.asList(expectation), true); + + wSession.delete(ps); + responseObject = searchDatafiles(session, null, "df2", null, null, null, null, 10, null, null, 1); + assertFalse(responseObject.containsKey("search_after")); + expectation.put("investigation.id", null); + checkResultsSource(responseObject, Arrays.asList(expectation), true); + + wSession.addRule(null, "Dataset", "R"); + responseObject = searchDatafiles(session, null, "df2", null, null, null, null, 10, null, null, 1); + assertFalse(responseObject.containsKey("search_after")); + expectation.put("investigation.id", "notNull"); + checkResultsSource(responseObject, Arrays.asList(expectation), true); + + wSession.delRule(null, "Dataset", "R"); + responseObject = searchDatafiles(session, null, "df2", null, null, null, null, 10, null, null, 1); + assertFalse(responseObject.containsKey("search_after")); + expectation.put("investigation.id", null); + checkResultsSource(responseObject, Arrays.asList(expectation), true); + + // Test searching with someone without authz for the Datafile(s) + searchDatafiles(piSession(), null, null, null, null, null, null, 10, null, null, 0); + + // Test no facets match on Datafiles + JsonArray facets = buildFacetRequest("Datafile"); + responseObject = searchDatafiles(session, null, null, null, null, null, null, 10, null, facets, 3); + assertFalse(responseObject.containsKey("search_after")); + assertFalse(NO_DIMENSIONS, responseObject.containsKey("dimensions")); + + // Test no facets match on DatafileParameters due to lack of READ access + facets = buildFacetRequest("DatafileParameter"); + responseObject = searchDatafiles(session, null, null, null, null, null, null, 10, null, facets, 3); + assertFalse(responseObject.containsKey("search_after")); + assertFalse(NO_DIMENSIONS, responseObject.containsKey("dimensions")); + + // Test facets match on DatafileParameters + wSession.addRule(null, "DatafileParameter", "R"); + responseObject = searchDatafiles(session, null, null, null, null, null, null, 10, null, facets, 3); + assertFalse(responseObject.containsKey("search_after")); + checkFacets(responseObject, "DatafileParameter.type.name", Arrays.asList("colour"), Arrays.asList(1L)); + } + + /** + * Tests the new search/documents endpoint + */ + @Test + public void testSearchDatasets() throws Exception { + Session session = setupLuceneTest(); + JsonObject responseObject; + JsonValue searchAfter; + Map expectation = new HashMap<>(); + expectation.put("startDate", "notNull"); + expectation.put("endDate", "notNull"); + expectation.put("investigation.id", "notNull"); + expectation.put("sample.name", null); + expectation.put("sample.type.name", null); + expectation.put("type.name", null); + + // All datasets + searchDatasets(session, null, null, null, null, null, null, 10, null, null, 5); + + // Use the user + responseObject = searchDatasets(session, "db/tr", null, null, null, null, null, 10, null, null, 3); + List> expectations = new ArrayList<>(); + expectation.put("name", "ds1"); + expectations.add(new HashMap<>(expectation)); + expectation.put("name", "ds2"); + expectations.add(new HashMap<>(expectation)); + expectation.put("name", "ds3"); + expectations.add(new HashMap<>(expectation)); + checkResultsSource(responseObject, expectations, true); + + // Try a bad user + searchDatasets(session, "db/fred", null, null, null, null, null, 10, null, null, 0); + + // Try text + responseObject = searchDatasets(session, null, "gamma AND ds3", null, null, null, null, 10, null, null, 1); + assertFalse(responseObject.containsKey("search_after")); + checkResultsSource(responseObject, Arrays.asList(expectation), true); + + // Try parameters + Date lower = dft.parse("2014-05-16T05:09:03+0000"); + Date upper = dft.parse("2014-05-16T05:15:26+0000"); + List parameters = new ArrayList<>(); + Date parameterDate = dft.parse("2014-05-16T16:58:26+0000"); + parameters.add(new ParameterForLucene("colour", "name", "green")); + responseObject = searchDatasets(session, null, null, null, null, parameters, null, 10, null, null, 1); + assertFalse(responseObject.containsKey("search_after")); + checkResultsSource(responseObject, Arrays.asList(expectation), true); + + parameters.add(new ParameterForLucene("birthday", "date", parameterDate, parameterDate)); + responseObject = searchDatasets(session, null, null, null, null, parameters, null, 10, null, null, 1); + assertFalse(responseObject.containsKey("search_after")); + checkResultsSource(responseObject, Arrays.asList(expectation), true); + + parameters.add(new ParameterForLucene("current", "amps", 140, 165)); + responseObject = searchDatasets(session, null, null, null, null, parameters, null, 10, null, null, 1); + assertFalse(responseObject.containsKey("search_after")); + checkResultsSource(responseObject, Arrays.asList(expectation), true); + + responseObject = searchDatasets(session, null, "gamma AND ds3", lower, upper, parameters, null, 10, null, null, + 1); + assertFalse(responseObject.containsKey("search_after")); + checkResultsSource(responseObject, Arrays.asList(expectation), true); + + // Try sorting and searchAfter + String sort = Json.createObjectBuilder().add("name", "desc").add("date", "asc").add("fileSize", "asc").build() + .toString(); + responseObject = searchDatasets(session, null, null, null, null, null, null, 1, sort, null, 1); + searchAfter = responseObject.get("search_after"); + assertNotNull(searchAfter); + expectation.put("name", "ds4"); + checkResultsSource(responseObject, Arrays.asList(expectation), false); + + responseObject = searchDatasets(session, null, null, null, null, null, searchAfter.toString(), 1, sort, null, + 1); + searchAfter = responseObject.get("search_after"); + assertNotNull(searchAfter); + expectation.put("name", "ds3"); + checkResultsSource(responseObject, Arrays.asList(expectation), false); + + // Test that changes to the public steps/tables are reflected in returned fields + PublicStep ps = new PublicStep(); + ps.setOrigin("Dataset"); + ps.setField("type"); + + ps.setId(wSession.create(ps)); + responseObject = searchDatasets(session, null, "ds1", null, null, null, null, 10, null, null, 1); + assertFalse(responseObject.containsKey("search_after")); + expectation.put("name", "ds1"); + expectation.put("type.name", "calibration"); + checkResultsSource(responseObject, Arrays.asList(expectation), true); + + wSession.addRule(null, "Sample", "R"); + responseObject = searchDatasets(session, null, "ds1", null, null, null, null, 10, null, null, 1); + assertFalse(responseObject.containsKey("search_after")); + expectation.put("sample.name", "Koh-I-Noor"); + checkResultsSource(responseObject, Arrays.asList(expectation), true); + + wSession.delete(ps); + responseObject = searchDatasets(session, null, "ds1", null, null, null, null, 10, null, null, 1); + assertFalse(responseObject.containsKey("search_after")); + expectation.put("type.name", null); + checkResultsSource(responseObject, Arrays.asList(expectation), true); + + wSession.delRule(null, "Sample", "R"); + responseObject = searchDatasets(session, null, "ds1", null, null, null, null, 10, null, null, 1); + assertFalse(responseObject.containsKey("search_after")); + expectation.put("sample.name", null); + checkResultsSource(responseObject, Arrays.asList(expectation), true); + + // Test searching with someone without authz for the Dataset(s) + searchDatasets(piSession(), null, null, null, null, null, null, 10, null, null, 0); + + // Test facets match on Datasets + JsonArray facets = buildFacetRequest("Dataset"); + responseObject = searchDatasets(session, null, null, null, null, null, null, 10, null, facets, 5); + assertFalse(responseObject.containsKey("search_after")); + checkFacets(responseObject, "Dataset.type.name", Arrays.asList("calibration"), Arrays.asList(5L)); + + // Test no facets match on DatasetParameters due to lack of READ access + facets = buildFacetRequest("DatasetParameter"); + responseObject = searchDatasets(session, null, null, null, null, null, null, 10, null, facets, 5); + assertFalse(responseObject.containsKey("search_after")); + assertFalse(NO_DIMENSIONS, + responseObject.containsKey("dimensions")); + + // Test facets match on DatasetParameters + wSession.addRule(null, "DatasetParameter", "R"); + responseObject = searchDatasets(session, null, null, null, null, null, null, 10, null, facets, 5); + assertFalse(responseObject.containsKey("search_after")); + checkFacets(responseObject, "DatasetParameter.type.name", + Arrays.asList("colour", "birthday", "current"), + Arrays.asList(1L, 1L, 1L)); + } + + /** + * Tests the new search/documents endpoint + */ + @Test + public void testSearchInvestigations() throws Exception { + Session session = setupLuceneTest(); + JsonObject responseObject; + JsonValue searchAfter; + Map expectation = new HashMap<>(); + expectation.put("name", "expt1"); + expectation.put("startDate", "notNull"); + expectation.put("endDate", "notNull"); + expectation.put("type.name", null); + expectation.put("facility.name", null); + + Date lowerOrigin = dft.parse("2011-01-01T00:00:00+0000"); + Date lowerSecond = dft.parse("2011-01-01T00:00:01+0000"); + Date lowerMinute = dft.parse("2011-01-01T00:01:00+0000"); + Date upperOrigin = dft.parse("2011-12-31T23:59:59+0000"); + Date upperSecond = dft.parse("2011-12-31T23:59:58+0000"); + Date upperMinute = dft.parse("2011-12-31T23:58:00+0000"); + String samplesSingular = "sample.name:ford AND sample.type.name:rust"; + String samplesMultiple = "sample.name:ford sample.type.name:rust sample.name:koh sample.type.name:diamond"; + String samplesBad = "sample.name:kog* AND sample.type.name:diamond"; + String textAnd = "title AND one"; + String textTwo = "title AND two"; + String textPlus = "title + one"; + + searchInvestigations(session, null, null, null, null, null, null, null, 10, null, null, 3); + + List parameters = new ArrayList<>(); + parameters.add(new ParameterForLucene("colour", "name", "green")); + responseObject = searchInvestigations(session, "db/tr", null, null, null, null, + null, null, 10, null, null, 2); + responseObject = searchInvestigations(session, "db/tr", null, lowerOrigin, upperOrigin, null, + null, null, 10, null, null, 1); + responseObject = searchInvestigations(session, "db/tr", textAnd, lowerOrigin, upperOrigin, null, + null, null, 10, null, null, 1); + responseObject = searchInvestigations(session, "db/tr", textAnd, lowerOrigin, upperOrigin, parameters, + null, null, 10, null, null, 1); + responseObject = searchInvestigations(session, "db/tr", textAnd, lowerOrigin, upperOrigin, parameters, + "Professor", null, 10, null, null, 1); + assertFalse(responseObject.containsKey("search_after")); + checkResultsSource(responseObject, Arrays.asList(expectation), true); + + // change user + searchInvestigations(session, "db/fred", textAnd, null, null, parameters, null, null, 10, null, null, 0); + + // change text + searchInvestigations(session, "db/tr", textTwo, null, null, parameters, null, null, 10, null, null, 0); + + // Only working to a minute + responseObject = searchInvestigations(session, "db/tr", textAnd, lowerSecond, upperOrigin, parameters, + null, null, 10, null, null, 1); + assertFalse(responseObject.containsKey("search_after")); + checkResultsSource(responseObject, Arrays.asList(expectation), true); + + responseObject = searchInvestigations(session, "db/tr", textAnd, lowerOrigin, upperSecond, parameters, + null, null, 10, null, null, 1); + assertFalse(responseObject.containsKey("search_after")); + checkResultsSource(responseObject, Arrays.asList(expectation), true); + + searchInvestigations(session, "db/tr", textAnd, lowerMinute, upperOrigin, parameters, null, null, + 10, null, null, 0); + + searchInvestigations(session, "db/tr", textAnd, lowerOrigin, upperMinute, parameters, null, null, + 10, null, null, 0); + + // Change parameters + List badParameters = new ArrayList<>(); + badParameters.add(new ParameterForLucene("color", "name", "green")); + searchInvestigations(session, "db/tr", textAnd, lowerOrigin, upperOrigin, badParameters, null, + null, 10, null, null, 0); + + // Change samples + searchInvestigations(session, "db/tr", samplesSingular, lowerOrigin, upperOrigin, parameters, null, null, + 10, null, null, 1); + searchInvestigations(session, "db/tr", samplesMultiple, lowerOrigin, upperOrigin, parameters, null, null, + 10, null, null, 1); + searchInvestigations(session, "db/tr", samplesBad, lowerOrigin, upperOrigin, parameters, null, null, + 10, null, null, 0); + + // Change userFullName + searchInvestigations(session, "db/tr", textPlus, lowerOrigin, upperOrigin, parameters, "Doctor", + null, 10, null, null, 0); + + // Try sorting and searchAfter + // Note as all the investigations have the same name/date, we cannot + // meaningfully sort them, however still check that the search succeeds in + // returning a non-null searchAfter object + String sort = Json.createObjectBuilder().add("name", "desc").add("date", "asc").add("fileSize", "asc").build() + .toString(); + responseObject = searchInvestigations(session, null, null, null, null, null, null, null, 1, sort, null, 1); + searchAfter = responseObject.get("search_after"); + assertNotNull(searchAfter); + checkResultsSource(responseObject, Arrays.asList(expectation), false); + + // Test that changes to the public steps/tables are reflected in returned fields + PublicStep ps = new PublicStep(); + ps.setOrigin("Investigation"); + ps.setField("type"); + + ps.setId(wSession.create(ps)); + responseObject = searchInvestigations(session, null, textAnd, null, null, null, null, null, 10, null, + null, 1); + assertFalse(responseObject.containsKey("search_after")); + expectation.put("type.name", "atype"); + checkResultsSource(responseObject, Arrays.asList(expectation), true); + + wSession.addRule(null, "Facility", "R"); + responseObject = searchInvestigations(session, null, textAnd, null, null, null, null, null, 10, null, + null, 1); + assertFalse(responseObject.containsKey("search_after")); + expectation.put("facility.name", "Test port facility"); + checkResultsSource(responseObject, Arrays.asList(expectation), true); + + wSession.delete(ps); + responseObject = searchInvestigations(session, null, textAnd, null, null, null, null, null, 10, null, + null, 1); + assertFalse(responseObject.containsKey("search_after")); + expectation.put("type.name", null); + checkResultsSource(responseObject, Arrays.asList(expectation), true); + + wSession.delRule(null, "Facility", "R"); + responseObject = searchInvestigations(session, null, textAnd, null, null, null, null, null, 10, null, + null, 1); + assertFalse(responseObject.containsKey("search_after")); + expectation.put("facility.name", null); + checkResultsSource(responseObject, Arrays.asList(expectation), true); + + // Test searching with someone without authz for the Investigation(s) + searchInvestigations(piSession(), null, null, null, null, null, null, null, 10, null, null, 0); + + // Test facets match on Investigations + JsonArray facets = buildFacetRequest("Investigation"); + responseObject = searchInvestigations(session, null, null, null, null, null, null, null, 10, null, facets, + 3); + assertFalse(responseObject.containsKey("search_after")); + checkFacets(responseObject, "Investigation.type.name", Arrays.asList("atype"), Arrays.asList(3L)); + + // Test no facets match on InvestigationParameters due to lack of READ access + facets = buildFacetRequest("InvestigationParameter"); + responseObject = searchInvestigations(session, null, null, null, null, null, null, null, 10, null, facets, + 3); + assertFalse(responseObject.containsKey("search_after")); + assertFalse(NO_DIMENSIONS, responseObject.containsKey("dimensions")); + + // Test facets match on InvestigationParameters + wSession.addRule(null, "InvestigationParameter", "R"); + responseObject = searchInvestigations(session, null, null, null, null, null, null, null, 10, null, facets, + 3); + assertFalse(responseObject.containsKey("search_after")); + checkFacets(responseObject, "InvestigationParameter.type.name", Arrays.asList("colour"), + Arrays.asList(1L)); + + // Test no facets match on Sample due to lack of READ access + facets = buildFacetRequest("Sample", "sample.type.name"); + responseObject = searchInvestigations(session, null, null, null, null, null, null, null, 10, null, facets, + 3); + assertFalse(responseObject.containsKey("search_after")); + assertFalse(NO_DIMENSIONS, responseObject.containsKey("dimensions")); + + // Test facets match on Sample + wSession.addRule(null, "Sample", "R"); + responseObject = searchInvestigations(session, null, null, null, null, null, null, null, 10, null, facets, + 3); + assertFalse(responseObject.containsKey("search_after")); + checkFacets(responseObject, "Sample.sample.type.name", Arrays.asList("diamond", "rust"), + Arrays.asList(1L, 1L)); + } + + @Test + public void testSearchParameterValidation() throws Exception { + Session session = setupLuceneTest(); + List badParameters = new ArrayList<>(); + + badParameters = Arrays.asList(new ParameterForLucene(null, null, null)); + try { + searchInvestigations(session, null, null, null, null, badParameters, null, null, 10, null, null, 0); + fail("BAD_PARAMETER exception not caught"); + } catch (IcatException e) { + assertEquals(IcatExceptionType.BAD_PARAMETER, e.getType()); + assertEquals("name not set in one of parameters", e.getMessage()); + } + + badParameters = Arrays.asList(new ParameterForLucene("color", null, null)); + try { + searchInvestigations(session, null, null, null, null, badParameters, null, null, 10, null, null, 0); + fail("BAD_PARAMETER exception not caught"); + } catch (IcatException e) { + assertEquals(IcatExceptionType.BAD_PARAMETER, e.getType()); + assertEquals("units not set in parameter 'color'", e.getMessage()); + } + + badParameters = Arrays.asList(new ParameterForLucene("color", "string", null)); + try { + searchInvestigations(session, null, null, null, null, badParameters, null, null, 10, null, null, 0); + fail("BAD_PARAMETER exception not caught"); + } catch (IcatException e) { + assertEquals(IcatExceptionType.BAD_PARAMETER, e.getType()); + assertEquals("value not set in parameter 'color'", e.getMessage()); + } + } + + private JsonArray buildFacetRequest(String target) { + return buildFacetRequest(target, "type.name"); + } + + private JsonArray buildFacetRequest(String target, String dimension) { + JsonObjectBuilder builder = Json.createObjectBuilder(); + JsonObjectBuilder dimensionBuilder = Json.createObjectBuilder().add("dimension", dimension); + JsonArrayBuilder dimensions = Json.createArrayBuilder().add(dimensionBuilder); + builder.add("target", target).add("dimensions", dimensions); + return Json.createArrayBuilder().add(builder).build(); + } + + private void checkFacets(JsonObject responseObject, String dimension, List expectedLabels, + List expectedCounts) { + Set responseKeys = responseObject.keySet(); + String dimensionsMessage = "Expected responseObject to contain 'dimensions', but it had keys " + responseKeys; + assertTrue(dimensionsMessage, responseObject.containsKey("dimensions")); + + JsonObject dimensions = responseObject.getJsonObject("dimensions"); + Set dimensionKeys = dimensions.keySet(); + String dimensionMessage = "Expected 'dimensions' to contain " + dimension + " but keys were " + dimensionKeys; + assertTrue(dimensionMessage, dimensions.containsKey(dimension)); + + JsonObject labelsObject = dimensions.getJsonObject(dimension); + assertEquals(expectedLabels.size(), labelsObject.size()); + for (int i = 0; i < expectedLabels.size(); i++) { + String expectedLabel = expectedLabels.get(i); + assertTrue(labelsObject.containsKey(expectedLabel)); + assertEquals(expectedCounts.get(i), new Long(labelsObject.getJsonNumber(expectedLabel).longValueExact())); + } } private void checkResultFromLuceneSearch(Session session, String val, JsonArray array, String ename, String field) @@ -448,6 +1126,57 @@ private void checkResultFromLuceneSearch(Session session, String val, JsonArray assertEquals(val, result.getJsonObject(ename).getString(field)); } + private JsonArray checkResultsSize(int n, String responseString) { + JsonArray result = Json.createReader(new ByteArrayInputStream(responseString.getBytes())).readArray(); + assertEquals(n, result.size()); + return result; + } + + private JsonObject checkResultsArraySize(int n, String responseString) { + JsonObject responseObject = Json.createReader(new ByteArrayInputStream(responseString.getBytes())).readObject(); + JsonArray results = responseObject.getJsonArray("results"); + assertEquals(n, results.size()); + return responseObject; + } + + private void checkResultsSource(JsonObject responseObject, List> expectations, Boolean scored) { + JsonArray results = responseObject.getJsonArray("results"); + assertEquals(expectations.size(), results.size()); + for (int i = 0; i < expectations.size(); i++) { + JsonObject result = results.getJsonObject(i); + assertTrue("id not present in " + result.toString(), result.containsKey("id")); + String message = "score " + (scored ? "not " : "") + "present in " + result.toString(); + assertEquals(message, scored, result.containsKey("score")); + + assertTrue(result.containsKey("source")); + JsonObject source = result.getJsonObject("source"); + assertTrue(source.containsKey("id")); + Map expectation = expectations.get(i); + for (Entry entry : expectation.entrySet()) { + String key = entry.getKey(); + String value = entry.getValue(); + if (value == null) { + assertFalse("Source " + source.toString() + " should NOT contain " + key, + source.containsKey(key)); + } else if (value.equals("notNull")) { + assertTrue("Source " + source.toString() + " should contain " + key, source.containsKey(key)); + } else { + assertTrue("Source " + source.toString() + " should contain " + key, source.containsKey(key)); + assertEquals(value, source.getString(key)); + } + } + } + } + + private Session piSession() throws URISyntaxException, IcatException { + ICAT icat = new ICAT(System.getProperty("serverUrl")); + Map credentials = new HashMap<>(); + credentials.put("username", "piOne"); + credentials.put("password", "piOne"); + Session piSession = icat.login("db", credentials); + return piSession; + } + private Session setupLuceneTest() throws Exception { ICAT icat = new ICAT(System.getProperty("serverUrl")); Map credentials = new HashMap<>(); @@ -460,11 +1189,7 @@ private Session setupLuceneTest() throws Exception { Session rootSession = icat.login("db", credentials); rootSession.luceneClear(); // Stop populating - - String urlString = System.getProperty("luceneUrl"); - URI uribase = new URI(urlString); - LuceneApi luceneApi = new LuceneApi(uribase); - luceneApi.clear(); // Really empty the db + clearSearch(); // Really empty the db List props = wSession.getProperties(); System.out.println(props); @@ -477,24 +1202,68 @@ private Session setupLuceneTest() throws Exception { return session; } - private JsonArray searchDatasets(Session session, String user, String text, Date lower, Date upper, + /** + * For use with the old lucene/data endpoint + */ + private JsonArray searchDatafiles(Session session, String user, String text, Date lower, Date upper, List parameters, int maxResults, int n) throws IcatException { - JsonArray result = Json - .createReader(new ByteArrayInputStream( - session.searchDatasets(user, text, lower, upper, parameters, maxResults).getBytes())) - .readArray(); - assertEquals(n, result.size()); - return result; + String responseString = session.searchDatafiles(user, text, lower, upper, parameters, maxResults); + return checkResultsSize(n, responseString); } - private JsonArray searchDatafiles(Session session, String user, String text, Date lower, Date upper, + /** + * For use with the old lucene/data endpoint + */ + private JsonArray searchDatasets(Session session, String user, String text, Date lower, Date upper, List parameters, int maxResults, int n) throws IcatException { - JsonArray result = Json - .createReader(new ByteArrayInputStream( - session.searchDatafiles(user, text, lower, upper, parameters, maxResults).getBytes())) - .readArray(); - assertEquals(n, result.size()); - return result; + String responseString = session.searchDatasets(user, text, lower, upper, parameters, maxResults); + return checkResultsSize(n, responseString); + } + + /** + * For use with the old lucene/data endpoint + */ + private JsonArray searchInvestigations(Session session, String user, String text, Date lower, Date upper, + List parameters, List samples, String userFullName, int maxResults, int n) + throws IcatException { + String responseString = session.searchInvestigations(user, text, lower, upper, parameters, samples, + userFullName, maxResults); + return checkResultsSize(n, responseString); + } + + /** + * For use with the new search/documents endpoint + */ + private JsonObject searchDatafiles(Session session, String user, String text, Date lower, Date upper, + List parameters, String searchAfter, int maxCount, String sort, JsonArray facets, int n) + throws IcatException { + String responseString = session.searchDatafiles(user, text, lower, upper, parameters, searchAfter, maxCount, + sort, + facets); + return checkResultsArraySize(n, responseString); + } + + /** + * For use with the new search/documents endpoint + */ + private JsonObject searchDatasets(Session session, String user, String text, Date lower, Date upper, + List parameters, String searchAfter, int maxCount, String sort, JsonArray facets, int n) + throws IcatException { + String responseString = session.searchDatasets(user, text, lower, upper, parameters, searchAfter, maxCount, + sort, + facets); + return checkResultsArraySize(n, responseString); + } + + /** + * For use with the new search/documents endpoint + */ + private JsonObject searchInvestigations(Session session, String user, String text, Date lower, Date upper, + List parameters, String userFullName, String searchAfter, int maxCount, String sort, + JsonArray facets, int n) throws IcatException { + String responseString = session.searchInvestigations(user, text, lower, upper, parameters, userFullName, + searchAfter, maxCount, sort, facets); + return checkResultsArraySize(n, responseString); } @Test @@ -517,9 +1286,10 @@ public void testGet() throws Exception { long fid = search(session, "Facility.id", 1).getJsonNumber(0).longValueExact(); + String query = "Facility INCLUDE InvestigationType"; JsonObject fac = Json .createReader( - new ByteArrayInputStream(session.get("Facility INCLUDE InvestigationType", fid).getBytes())) + new ByteArrayInputStream(session.get(query, fid).getBytes())) .readObject().getJsonObject("Facility"); assertEquals("Test port facility", fac.getString("name")); @@ -533,6 +1303,13 @@ public void testGet() throws Exception { } Collections.sort(names); assertEquals(Arrays.asList("atype", "btype"), names); + + // Search with a user who should not see any results + Session piOneSession = piOneSession(); + wSession.addRule(null, "Facility", "R"); + fac = Json.createReader(new StringReader(piOneSession.get(query, fid))).readObject().getJsonObject("Facility"); + its = fac.getJsonArray("investigationTypes"); + assertEquals(0, its.size()); } @Test @@ -583,11 +1360,7 @@ public void testSearchWithNew() throws Exception { @Test public void testWait() throws Exception { - ICAT icat = new ICAT(System.getProperty("serverUrl")); - Map credentials = new HashMap<>(); - credentials.put("username", "root"); - credentials.put("password", "password"); - Session rootSession = icat.login("db", credentials); + Session rootSession = rootSession(); long t = System.currentTimeMillis(); rootSession.waitMillis(1000L); System.out.println(System.currentTimeMillis() - t); @@ -607,14 +1380,15 @@ public void testSearch() throws Exception { JsonArray array; - JsonObject user = search(session, "SELECT u FROM User u WHERE u.name = 'db/lib'", 1).getJsonObject(0).getJsonObject("User"); + JsonObject user = search(session, "SELECT u FROM User u WHERE u.name = 'db/lib'", 1).getJsonObject(0) + .getJsonObject("User"); assertEquals("Horace", user.getString("givenName")); assertEquals("Worblehat", user.getString("familyName")); assertEquals("Unseen University", user.getString("affiliation")); String query = "SELECT inv FROM Investigation inv JOIN inv.shifts AS s " - + "WHERE s.instrument.pid = 'ig:0815' AND s.comment = 'beamtime' " - + "AND s.startDate <= '2014-01-01 12:00:00' AND s.endDate >= '2014-01-01 12:00:00'"; + + "WHERE s.instrument.pid = 'ig:0815' AND s.comment = 'beamtime' " + + "AND s.startDate <= '2014-01-01 12:00:00' AND s.endDate >= '2014-01-01 12:00:00'"; JsonObject inv = search(session, query, 1).getJsonObject(0).getJsonObject("Investigation"); assertEquals("expt1", inv.getString("name")); assertEquals("zero", inv.getString("visitId")); @@ -715,6 +1489,14 @@ public void testSearch() throws Exception { Collections.sort(names); assertEquals(Arrays.asList("atype", "btype"), names); } + + // Search with a user who should not see any results + Session piOneSession = piOneSession(); + wSession.addRule(null, "Facility", "R"); + JsonObject searchResult = search(piOneSession, "Facility INCLUDE InvestigationType", 1).getJsonObject(0); + JsonArray investigationTypes = searchResult.getJsonObject("Facility").getJsonArray("investigationTypes"); + System.out.println(investigationTypes); + assertEquals(0, investigationTypes.size()); } @Test @@ -742,14 +1524,15 @@ public void testSearchLists() throws Exception { wSession.addRule(null, "Facility", "R"); search(notrootSession, query, 3); // notroot is in user group giving CRUD to all, so should see all 3 search(piOneSession, query, 0); // piOne should pass for Facility, but not for any Investigation - + wSession.addRule(null, "SELECT i FROM Investigation i WHERE i.visitId = 'zero'", "R"); search(notrootSession, query, 3); // notroot is in user group giving CRUD to all, so should see all 3 JsonArray results = search(piOneSession, query, 1); // piOne should pass for Facility, one Investigation JsonObject result = results.getJsonObject(0); JsonObject investigation = result.getJsonObject("Investigation"); - assertEquals("Wrong visitId in "+ investigation.toString(), "zero", investigation.getString("visitId", null)); - + String visitId = investigation.getString("visitId", null); + assertEquals("Wrong visitId in " + investigation.toString(), "zero", visitId); + query = "SELECT f.investigationTypes FROM Facility f"; wSession.addRule(null, "InvestigationType", "R"); search(notrootSession, query, 2); // notroot is in user group giving CRUD to all, so should see both @@ -1077,17 +1860,6 @@ private JsonArray search(Session session, String query, int n) throws IcatExcept return result; } - private JsonArray searchInvestigations(Session session, String user, String text, Date lower, Date upper, - List parameters, List samples, String userFullName, int maxResults, int n) - throws IcatException { - JsonArray result = Json.createReader(new ByteArrayInputStream( - session.searchInvestigations(user, text, lower, upper, parameters, samples, userFullName, maxResults) - .getBytes())) - .readArray(); - assertEquals(n, result.size()); - return result; - } - @Test public void testWriteGood() throws Exception { @@ -1113,7 +1885,7 @@ public void testWriteGood() throws Exception { JsonArray array = search(session, "SELECT it.name, it.facility.name FROM InvestigationType it WHERE it.id = " + newInvTypeId, 1) - .getJsonArray(0); + .getJsonArray(0); assertEquals("ztype", array.getString(0)); assertEquals("Test port facility", array.getString(1)); @@ -1575,14 +2347,65 @@ private void exportMetaDataDump(Map credentials) throws Exceptio Files.delete(dump2); } + @Test + public void exportMetaDataQueryUser() throws Exception { + Session rootSession = rootSession(); + Session piOneSession = piOneSession(); + Path path = Paths.get(this.getClass().getResource("/icat.port").toURI()); + + // Get known configuration + rootSession.importMetaData(path, DuplicateAction.CHECK, Attributes.ALL); + String query = "Investigation INCLUDE Facility, Dataset"; + Path dump1 = Files.createTempFile("dump1", ".tmp"); + Path dump2 = Files.createTempFile("dump2", ".tmp"); + + // piOne should only be able to dump the Investigation, but not have R access to + // Dataset, Facility + wSession.addRule(null, "Investigation", "R"); + try (InputStream stream = piOneSession.exportMetaData(query, Attributes.USER)) { + Files.copy(stream, dump1, StandardCopyOption.REPLACE_EXISTING); + } + // piOne should now be able to dump all due to rules giving R access + wSession.addRule(null, "Facility", "R"); + wSession.addRule(null, "Dataset", "R"); + try (InputStream stream = piOneSession.exportMetaData(query, Attributes.USER)) { + Files.copy(stream, dump2, StandardCopyOption.REPLACE_EXISTING); + } + List restrictedLines = Files.readAllLines(dump1); + List permissiveLines = Files.readAllLines(dump2); + String restrictiveMessage = " appeared in export, but piOne use should not have access"; + String permissiveMessage = " did not appear in export, but piOne should have access"; + + boolean containsInvestigations = false; + for (String line : restrictedLines) { + System.out.println(line); + containsInvestigations = containsInvestigations || line.startsWith("Investigation"); + assertFalse("Dataset" + restrictiveMessage, line.startsWith("Dataset")); + assertFalse("Facility" + restrictiveMessage, line.startsWith("Facility")); + } + assertTrue("Investigation" + permissiveMessage, containsInvestigations); + + containsInvestigations = false; + boolean containsDatasets = false; + boolean containsFacilities = false; + for (String line : permissiveLines) { + System.out.println(line); + containsInvestigations = containsInvestigations || line.startsWith("Investigation"); + containsDatasets = containsDatasets || line.startsWith("Dataset"); + containsFacilities = containsFacilities || line.startsWith("Facility"); + } + assertTrue("Investigation" + permissiveMessage, containsInvestigations); + assertTrue("Dataset" + permissiveMessage, containsDatasets); + assertTrue("Facility" + permissiveMessage, containsFacilities); + + Files.delete(dump1); + Files.delete(dump2); + } + @Ignore("Test fails - appears brittle to differences in timezone") @Test public void exportMetaDataQuery() throws Exception { - ICAT icat = new ICAT(System.getProperty("serverUrl")); - Map credentials = new HashMap<>(); - credentials.put("username", "root"); - credentials.put("password", "password"); - Session session = icat.login("db", credentials); + Session session = rootSession(); Path path = Paths.get(this.getClass().getResource("/icat.port").toURI()); // Get known configuration @@ -1613,11 +2436,7 @@ public void exportMetaDataQuery() throws Exception { @Test public void importMetaDataAllNotRoot() throws Exception { - ICAT icat = new ICAT(System.getProperty("serverUrl")); - Map credentials = new HashMap<>(); - credentials.put("username", "piOne"); - credentials.put("password", "piOne"); - Session session = icat.login("db", credentials); + Session session = piOneSession(); Path path = Paths.get(this.getClass().getResource("/icat.port").toURI()); try { session.importMetaData(path, DuplicateAction.CHECK, Attributes.ALL); @@ -1628,11 +2447,7 @@ public void importMetaDataAllNotRoot() throws Exception { } private void importMetaData(Attributes attributes, String userName) throws Exception { - ICAT icat = new ICAT(System.getProperty("serverUrl")); - Map credentials = new HashMap<>(); - credentials.put("username", "root"); - credentials.put("password", "password"); - Session session = icat.login("db", credentials); + Session session = rootSession(); Path path = Paths.get(this.getClass().getResource("/icat.port").toURI()); start = System.currentTimeMillis(); @@ -1740,17 +2555,13 @@ public void testLucenePopulate() throws Exception { Session session = icat.login("db", credentials); session.luceneClear(); // Stop populating - - String urlString = System.getProperty("luceneUrl"); - URI uribase = new URI(urlString); - LuceneApi luceneApi = new LuceneApi(uribase); - luceneApi.clear(); // Really empty the db + clearSearch(); // Really empty the db assertTrue(session.luceneGetPopulating().isEmpty()); - session.lucenePopulate("Dataset", -1); - session.lucenePopulate("Datafile", -1); - session.lucenePopulate("Investigation", -1); + session.lucenePopulate("Dataset", 0); + session.lucenePopulate("Datafile", 0); + session.lucenePopulate("Investigation", 0); do { Thread.sleep(1000); diff --git a/src/test/java/org/icatproject/integration/TestWS.java b/src/test/java/org/icatproject/integration/TestWS.java index 130e38193..557cecfcc 100644 --- a/src/test/java/org/icatproject/integration/TestWS.java +++ b/src/test/java/org/icatproject/integration/TestWS.java @@ -71,7 +71,7 @@ */ public class TestWS { - private static final String version = "6.0."; + private static final String version = "6.1."; private static Random random; private static WSession session; diff --git a/src/test/java/org/icatproject/integration/WSession.java b/src/test/java/org/icatproject/integration/WSession.java index 196fc1126..f44497b21 100644 --- a/src/test/java/org/icatproject/integration/WSession.java +++ b/src/test/java/org/icatproject/integration/WSession.java @@ -452,9 +452,9 @@ public void logout() throws IcatException_Exception { icat.logout(sessionId); } - // This assumes that the lucene.commitSeconds is set to 1 for testing + // This assumes that the search.commitSeconds is set to 1 for testing // purposes - public void synchLucene() throws InterruptedException { + public void synchSearch() throws InterruptedException { Thread.sleep(2000); } diff --git a/src/test/scripts/prepare_test.py b/src/test/scripts/prepare_test.py index f2bb8cf1f..4ebcefb4f 100644 --- a/src/test/scripts/prepare_test.py +++ b/src/test/scripts/prepare_test.py @@ -8,12 +8,17 @@ from zipfile import ZipFile import subprocess -if len(sys.argv) != 4: +if len(sys.argv) != 5: raise RuntimeError("Wrong number of arguments") containerHome = sys.argv[1] icat_url = sys.argv[2] -lucene_url = sys.argv[3] +search_engine = sys.argv[3] +search_urls = sys.argv[4] + +if search_engine not in ["LUCENE", "OPENSEARCH", "ELASTICSEARCH"]: + raise RuntimeError("Search engine %s unrecognised, " % search_engine + + "should be one of LUCENE, ELASTICSEARCH, OPENSEARCH") subst = dict(os.environ) @@ -23,29 +28,33 @@ shutil.copy("src/main/config/run.properties.example", "src/test/install/run.properties.example") -if not os.path.exists("src/test/install/run.properties"): - with open("src/test/install/run.properties", "w") as f: - contents = [ - "lifetimeMinutes = 120", - "rootUserNames = db/root", - "maxEntities = 10000", - "maxIdsInQuery = 500", - "importCacheSize = 50", - "exportCacheSize = 50", - "authn.list = db", - "authn.db.url = %s" % icat_url, - "notification.list = Dataset Datafile", - "notification.Dataset = CU", - "notification.Datafile = CU", - "log.list = SESSION WRITE READ INFO", - "lucene.url = %s" % lucene_url, - "lucene.populateBlockSize = 10000", - "lucene.directory = %s/data/lucene" % subst["HOME"], - "lucene.backlogHandlerIntervalSeconds = 60", - "lucene.enqueuedRequestIntervalSeconds = 3", - "key = wombat" - ] - f.write("\n".join(contents)) +with open("src/test/install/run.properties", "w") as f: + contents = [ + "lifetimeMinutes = 120", + "rootUserNames = db/root", + "maxEntities = 10000", + "maxIdsInQuery = 500", + "importCacheSize = 50", + "exportCacheSize = 50", + "authn.list = db", + "authn.db.url = %s" % icat_url, + "authn.simple.url = %s" % icat_url, + "notification.list = Dataset Datafile", + "notification.Dataset = CU", + "notification.Datafile = CU", + "log.list = SESSION WRITE READ INFO", + "search.engine = %s" % search_engine, + "search.urls = %s" % search_urls, + "search.populateBlockSize = 10000", + "search.searchBlockSize = 1000", + "search.directory = %s/data/search" % subst["HOME"], + "search.backlogHandlerIntervalSeconds = 60", + "search.enqueuedRequestIntervalSeconds = 3", + "search.aggregateFilesIntervalSeconds = 3600", + "search.maxSearchTimeSeconds = 5", + "key = wombat" + ] + f.write("\n".join(contents)) if not os.path.exists("src/test/install/setup.properties"): with open("src/test/install/setup.properties", "w") as f: