Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migration HttpSolrClient to Http2SolrClient and ConcurrentUpdateHttp2SolrClient #10241

Merged
merged 20 commits into from
Jan 22, 2025
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions doc/release-notes/10241-new-solr-client.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[HttpSolrClient](https://solr.apache.org/docs/9_4_1/solrj/org/apache/solr/client/solrj/impl/HttpSolrClient.html) is deprecated as of Solr 9, and which will be removed in a future major release of Solr. It's recommended to use [Http2SolrClient](https://solr.apache.org/docs/9_4_1/solrj/org/apache/solr/client/solrj/impl/Http2SolrClient.html) instead.

[Solr documentation](https://solr.apache.org/guide/solr/latest/deployment-guide/solrj.html#types-of-solrclients) describe it as a _async, non-blocking and general-purpose client that leverage HTTP/2 using the Jetty Http library_.

With Solr 9.4.1, the Http2SolrClient is indicate as experimental. But since the 9.6 version of Solr, this mention is no longer maintained.

The ConcurrentUpdateHttp2SolrClient is now also used in some cases, which is supposed to be more efficient for indexing.

For more information, see issue [#10161](https://github.com/IQSS/dataverse/issues/10161) and pull request [#10241](https://github.com/IQSS/dataverse/pull/10241)
3 changes: 0 additions & 3 deletions doc/sphinx-guides/source/installation/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3356,9 +3356,6 @@ please find all known feature flags below. Any of these flags can be activated u
* - reduce-solr-deletes
- Avoids deleting and recreating solr documents for dataset files when reindexing.
- ``Off``
* - reduce-solr-deletes
- Avoids deleting and recreating solr documents for dataset files when reindexing.
- ``Off``
* - disable-return-to-author-reason
- Removes the reason field in the `Publish/Return To Author` dialog that was added as a required field in v6.2 and makes the reason an optional parameter in the :ref:`return-a-dataset` API call.
- ``Off``
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@
import edu.harvard.iq.dataverse.util.FileMetadataUtil;
import java.util.Comparator;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.impl.BaseHttpSolrClient.RemoteSolrException;
import org.apache.solr.client.solrj.response.FacetField;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
Expand Down Expand Up @@ -1041,7 +1041,7 @@ public Set<Long> getFileIdsInVersionFromSolr(Long datasetVersionId, String patte

try {
queryResponse = solrClientService.getSolrClient().query(solrQuery);
} catch (HttpSolrClient.RemoteSolrException ex) {
} catch (RemoteSolrException ex) {
logger.fine("Remote Solr Exception: " + ex.getLocalizedMessage());
String msg = ex.getLocalizedMessage();
if (msg.contains(SearchFields.FILE_DELETED)) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package edu.harvard.iq.dataverse.search;

import java.io.IOException;
import java.util.logging.Logger;

import org.apache.solr.client.solrj.SolrClient;

import edu.harvard.iq.dataverse.settings.JvmSettings;
import edu.harvard.iq.dataverse.util.SystemConfig;
import jakarta.ejb.EJB;

/**
* Generics methods for Solr clients implementations
*
* @author jeromeroucou
*/
public abstract class AbstractSolrClientService {
private static final Logger logger = Logger.getLogger(AbstractSolrClientService.class.getCanonicalName());

@EJB
SystemConfig systemConfig;

public abstract void init();
public abstract void close();
public abstract SolrClient getSolrClient();
public abstract void setSolrClient(SolrClient solrClient);

public void close(SolrClient solrClient) {
if (solrClient != null) {
try {
solrClient.close();
} catch (IOException e) {
logger.warning("Solr closing error: " + e);
}
solrClient = null;
}
}

public void reInitialize() {
close();
init();
}

public String getSolrUrl() {
// Get from MPCONFIG. Might be configured by a sysadmin or simply return the
// default shipped with resources/META-INF/microprofile-config.properties.
final String protocol = JvmSettings.SOLR_PROT.lookup();
final String path = JvmSettings.SOLR_PATH.lookup();
return protocol + "://" + this.systemConfig.getSolrHostColonPort() + path;
}
}
83 changes: 38 additions & 45 deletions src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java
Original file line number Diff line number Diff line change
@@ -1,8 +1,33 @@
package edu.harvard.iq.dataverse.search;

import edu.harvard.iq.dataverse.*;
import edu.harvard.iq.dataverse.ControlledVocabularyValue;
import edu.harvard.iq.dataverse.DataFile;
import edu.harvard.iq.dataverse.DataFileServiceBean;
import edu.harvard.iq.dataverse.DataFileTag;
import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.DatasetField;
import edu.harvard.iq.dataverse.DatasetFieldCompoundValue;
import edu.harvard.iq.dataverse.DatasetFieldConstant;
import edu.harvard.iq.dataverse.DatasetFieldServiceBean;
import edu.harvard.iq.dataverse.DatasetFieldType;
import edu.harvard.iq.dataverse.DatasetFieldValue;
import edu.harvard.iq.dataverse.DatasetFieldValueValidator;
import edu.harvard.iq.dataverse.DatasetLinkingServiceBean;
import edu.harvard.iq.dataverse.DatasetServiceBean;
import edu.harvard.iq.dataverse.DatasetVersion;
import edu.harvard.iq.dataverse.DatasetVersion.VersionState;
import edu.harvard.iq.dataverse.DatasetVersionServiceBean;
import edu.harvard.iq.dataverse.Dataverse;
import edu.harvard.iq.dataverse.DataverseLinkingServiceBean;
import edu.harvard.iq.dataverse.DataverseServiceBean;
import edu.harvard.iq.dataverse.DvObject;
import edu.harvard.iq.dataverse.DvObject.DType;
import edu.harvard.iq.dataverse.DvObjectServiceBean;
import edu.harvard.iq.dataverse.Embargo;
import edu.harvard.iq.dataverse.FileMetadata;
import edu.harvard.iq.dataverse.GlobalId;
import edu.harvard.iq.dataverse.PermissionServiceBean;
import edu.harvard.iq.dataverse.Retention;
import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean;
import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinUserServiceBean;
import edu.harvard.iq.dataverse.batch.util.LoggingUtil;
Expand Down Expand Up @@ -48,8 +73,6 @@
import java.util.logging.Logger;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import jakarta.annotation.PostConstruct;
import jakarta.annotation.PreDestroy;
import jakarta.ejb.AsyncResult;
import jakarta.ejb.Asynchronous;
import jakarta.ejb.EJB;
Expand All @@ -66,11 +89,9 @@

import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrQuery.SortClause;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.UpdateResponse;
import org.apache.solr.common.SolrDocument;
Expand Down Expand Up @@ -125,16 +146,15 @@ public class IndexServiceBean {
@EJB
SettingsServiceBean settingsService;
@EJB
SolrClientService solrClientService;
SolrClientService solrClientService; // only for query index on Solr
@EJB
SolrClientIndexService solrClientIndexService; // only for add, update, or remove index on Solr
@EJB
DataFileServiceBean dataFileService;

@EJB
VariableServiceBean variableService;

@EJB
IndexBatchServiceBean indexBatchService;


@EJB
DatasetFieldServiceBean datasetFieldService;

Expand All @@ -157,37 +177,10 @@ public class IndexServiceBean {
private static final String IN_REVIEW_STRING = "In Review";
private static final String DEACCESSIONED_STRING = "Deaccessioned";
public static final String HARVESTED = "Harvested";
private String rootDataverseName;
private Dataverse rootDataverseCached;
SolrClient solrServer;

private VariableMetadataUtil variableMetadataUtil;

@PostConstruct
public void init() {
// Get from MPCONFIG. Might be configured by a sysadmin or simply return the default shipped with
// resources/META-INF/microprofile-config.properties.
String protocol = JvmSettings.SOLR_PROT.lookup();
String path = JvmSettings.SOLR_PATH.lookup();

String urlString = protocol + "://" + systemConfig.getSolrHostColonPort() + path;
solrServer = new HttpSolrClient.Builder(urlString).build();

rootDataverseName = findRootDataverseCached().getName();
}

@PreDestroy
public void close() {
if (solrServer != null) {
try {
solrServer.close();
} catch (IOException e) {
logger.warning("Solr closing error: " + e);
}
solrServer = null;
}
}

@TransactionAttribute(REQUIRES_NEW)
public Future<String> indexDataverseInNewTransaction(Dataverse dataverse) throws SolrServerException, IOException{
return indexDataverse(dataverse, false);
Expand Down Expand Up @@ -326,7 +319,7 @@ public Future<String> indexDataverse(Dataverse dataverse, boolean processPaths)
String status;
try {
if (dataverse.getId() != null) {
solrClientService.getSolrClient().add(docs);
solrClientIndexService.getSolrClient().add(docs);
} else {
logger.info("WARNING: indexing of a dataverse with no id attempted");
}
Expand Down Expand Up @@ -1745,7 +1738,7 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set<Long> d
final SolrInputDocuments docs = toSolrDocs(indexableDataset, datafilesInDraftVersion);

try {
solrClientService.getSolrClient().add(docs.getDocuments());
solrClientIndexService.getSolrClient().add(docs.getDocuments());
} catch (SolrServerException | IOException ex) {
if (ex.getCause() instanceof SolrServerException) {
throw new SolrServerException(ex);
Expand Down Expand Up @@ -2007,7 +2000,7 @@ private void updatePathForExistingSolrDocs(DvObject object) throws SolrServerExc

sid.removeField(SearchFields.SUBTREE);
sid.addField(SearchFields.SUBTREE, paths);
UpdateResponse addResponse = solrClientService.getSolrClient().add(sid);
UpdateResponse addResponse = solrClientIndexService.getSolrClient().add(sid);
if (object.isInstanceofDataset()) {
for (DataFile df : dataset.getFiles()) {
solrQuery.setQuery(SearchUtil.constructQuery(SearchFields.ENTITY_ID, df.getId().toString()));
Expand All @@ -2020,7 +2013,7 @@ private void updatePathForExistingSolrDocs(DvObject object) throws SolrServerExc
}
sid.removeField(SearchFields.SUBTREE);
sid.addField(SearchFields.SUBTREE, paths);
addResponse = solrClientService.getSolrClient().add(sid);
addResponse = solrClientIndexService.getSolrClient().add(sid);
}
}
}
Expand Down Expand Up @@ -2062,7 +2055,7 @@ public String delete(Dataverse doomed) {
logger.fine("deleting Solr document for dataverse " + doomed.getId());
UpdateResponse updateResponse;
try {
updateResponse = solrClientService.getSolrClient().deleteById(solrDocIdentifierDataverse + doomed.getId());
updateResponse = solrClientIndexService.getSolrClient().deleteById(solrDocIdentifierDataverse + doomed.getId());
} catch (SolrServerException | IOException ex) {
return ex.toString();
}
Expand All @@ -2082,7 +2075,7 @@ public String removeSolrDocFromIndex(String doomed) {
logger.fine("deleting Solr document: " + doomed);
UpdateResponse updateResponse;
try {
updateResponse = solrClientService.getSolrClient().deleteById(doomed);
updateResponse = solrClientIndexService.getSolrClient().deleteById(doomed);
} catch (SolrServerException | IOException ex) {
return ex.toString();
}
Expand Down Expand Up @@ -2285,7 +2278,7 @@ public List<String> findPermissionsInSolrOnly() throws SearchException {
boolean done = false;
while (!done) {
q.set(CursorMarkParams.CURSOR_MARK_PARAM, cursorMark);
QueryResponse rsp = solrServer.query(q);
QueryResponse rsp = solrClientService.getSolrClient().query(q);
String nextCursorMark = rsp.getNextCursorMark();
logger.fine("Next cursor mark (1K entries): " + nextCursorMark);
SolrDocumentList list = rsp.getResults();
Expand Down Expand Up @@ -2367,7 +2360,7 @@ private List<String> findDvObjectInSolrOnly(String type) throws SearchException
solrQuery.set(CursorMarkParams.CURSOR_MARK_PARAM, cursorMark);
QueryResponse rsp = null;
try {
rsp = solrServer.query(solrQuery);
rsp = solrClientService.getSolrClient().query(solrQuery);
} catch (SolrServerException | IOException ex) {
throw new SearchException("Error searching Solr type: " + type, ex);

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package edu.harvard.iq.dataverse.search;

import java.util.logging.Logger;

import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.impl.ConcurrentUpdateHttp2SolrClient;
import org.apache.solr.client.solrj.impl.Http2SolrClient;

import jakarta.annotation.PostConstruct;
import jakarta.annotation.PreDestroy;
import jakarta.ejb.Singleton;
import jakarta.inject.Named;

/**
* Solr client to provide insert/update/delete operations.
* Don't use this service with queries to Solr, use {@link SolrClientService} instead.
*/
@Named
@Singleton
public class SolrClientIndexService extends AbstractSolrClientService {

private static final Logger logger = Logger.getLogger(SolrClientIndexService.class.getCanonicalName());

private SolrClient solrClient;

@PostConstruct
public void init() {
solrClient = new ConcurrentUpdateHttp2SolrClient.Builder(
getSolrUrl(), new Http2SolrClient.Builder().build()).build();
}

@PreDestroy
public void close() {
close(solrClient);
}

public SolrClient getSolrClient() {
// Should never happen - but?
if (solrClient == null) {
init();
}
return solrClient;
}

public void setSolrClient(SolrClient solrClient) {
this.solrClient = solrClient;
}

}
Loading
Loading