Skip to content

Commit

Permalink
make dataset type searchable and facetable #10517
Browse files Browse the repository at this point in the history
  • Loading branch information
pdurbin committed Jul 23, 2024
1 parent 25b2ea5 commit 2b83f22
Show file tree
Hide file tree
Showing 8 changed files with 74 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
<collDate cycle="P1" event="end" date="1006-01-01">1006-01-01</collDate>
<collDate cycle="P2" event="start" date="1006-02-01">1006-02-01</collDate>
<collDate cycle="P2" event="end" date="1006-02-02">1006-02-02</collDate>
<dataKind>software</dataKind>
<dataKind>workflow</dataKind>
<nation>Afghanistan</nation>
<geogCover>GeographicCoverageCity1</geogCover>
<geogCover>GeographicCoverageStateProvince1</geogCover>
Expand Down
2 changes: 2 additions & 0 deletions doc/sphinx-guides/source/user/dataset-types.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ Intro

Datasets can have a dataset type such as "dataset", "software", or "workflow".

When browsing or searching, these types appear under a facet called "Dataset Type".

Enabling Dataset Types
======================

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import edu.harvard.iq.dataverse.dataaccess.DataAccess;
import edu.harvard.iq.dataverse.dataaccess.DataAccessRequest;
import edu.harvard.iq.dataverse.dataaccess.StorageIO;
import edu.harvard.iq.dataverse.dataset.DatasetType;
import edu.harvard.iq.dataverse.datavariable.DataVariable;
import edu.harvard.iq.dataverse.datavariable.VariableMetadata;
import edu.harvard.iq.dataverse.datavariable.VariableMetadataUtil;
Expand Down Expand Up @@ -1000,6 +1001,13 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set<Long
solrInputDocument.addField(SearchFields.METADATA_SOURCE, rdvName); //rootDataverseName);
}

if (FeatureFlags.DATASET_TYPES.enabled()) {
DatasetType datasetType = dataset.getDatasetType();
if (datasetType != null) {
solrInputDocument.addField(SearchFields.DATASET_TYPE, datasetType.getBaseType().toString());
}
}

DatasetVersion datasetVersion = indexableDataset.getDatasetVersion();
String parentDatasetTitle = "TBD";
if (datasetVersion != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,10 @@ more targeted results for just datasets. The format is YYYY (i.e.
public static final String DATASET_PUBLICATION_DATE = "dsPublicationDate";
public static final String DATASET_PERSISTENT_ID = "dsPersistentId";
public static final String DATASET_VERSION_ID = "datasetVersionId";
/**
* Datasets can be software, workflow, etc. See the DatasetType object.
*/
public static final String DATASET_TYPE = "datasetType_s";

public static final String VARIABLE_NAME = "variableName";
public static final String VARIABLE_LABEL = "variableLabel";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,9 @@ public SolrQueryResponse search(
// Facets to Retrieve
// -----------------------------------
solrQuery.addFacetField(SearchFields.METADATA_TYPES);
if (FeatureFlags.DATASET_TYPES.enabled()) {
solrQuery.addFacetField(SearchFields.DATASET_TYPE);
}
solrQuery.addFacetField(SearchFields.DATAVERSE_CATEGORY);
solrQuery.addFacetField(SearchFields.METADATA_SOURCE);
solrQuery.addFacetField(SearchFields.PUBLICATION_YEAR);
Expand Down Expand Up @@ -484,6 +487,7 @@ public SolrQueryResponse search(
String identifier = (String) solrDocument.getFieldValue(SearchFields.IDENTIFIER);
String citation = (String) solrDocument.getFieldValue(SearchFields.DATASET_CITATION);
String citationPlainHtml = (String) solrDocument.getFieldValue(SearchFields.DATASET_CITATION_HTML);
String datasetType = (String) solrDocument.getFieldValue(SearchFields.DATASET_TYPE);
String persistentUrl = (String) solrDocument.getFieldValue(SearchFields.PERSISTENT_URL);
String name = (String) solrDocument.getFieldValue(SearchFields.NAME);
String nameSort = (String) solrDocument.getFieldValue(SearchFields.NAME_SORT);
Expand Down Expand Up @@ -641,6 +645,9 @@ public SolrQueryResponse search(
if (authors != null) {
solrSearchResult.setDatasetAuthors(authors);
}
if (datasetType != null) {
solrSearchResult.setDatasetType(datasetType);
}
} else if (type.equals("files")) {
String parentGlobalId = null;
Object parentGlobalIdObject = solrDocument.getFieldValue(SearchFields.PARENT_IDENTIFIER);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder;

public class SolrSearchResult {
// TODO: remove all tabs from this file
private static final Logger logger = Logger.getLogger(SolrSearchResult.class.getCanonicalName());

private String id;
Expand Down Expand Up @@ -72,6 +73,7 @@ public class SolrSearchResult {
private String dataverseAffiliation;
private String citation;
private String citationHtml;
private String datasetType;
/**
* Files and datasets might have a UNF. Dataverses don't.
*/
Expand Down Expand Up @@ -948,6 +950,14 @@ public void setCitationHtml(String citationHtml) {
this.citationHtml = citationHtml;
}

public String getDatasetType() {
return datasetType;
}

public void setDatasetType(String datasetType) {
this.datasetType = datasetType;
}

public String getFiletype() {
return filetype;
}
Expand Down
3 changes: 2 additions & 1 deletion src/main/java/propertyFiles/staticSearchFields.properties
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@ staticSearchFields.dvObjectType=Type
staticSearchFields.fileTag=File Tag
staticSearchFields.fileAccess=Access
staticSearchFields.publicationStatus=Publication Status
staticSearchFields.subject_ss=Subject
staticSearchFields.subject_ss=Subject
staticSearchFields.datasetType_s=Dataset Type
44 changes: 40 additions & 4 deletions src/test/java/edu/harvard/iq/dataverse/api/DatasetTypesIT.java
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
package edu.harvard.iq.dataverse.api;

import edu.harvard.iq.dataverse.search.SearchFields;
import io.restassured.RestAssured;
import io.restassured.path.json.JsonPath;
import io.restassured.response.Response;
import static jakarta.ws.rs.core.Response.Status.CREATED;
import static jakarta.ws.rs.core.Response.Status.OK;
import org.hamcrest.CoreMatchers;
import static org.hamcrest.CoreMatchers.equalTo;
import static org.junit.jupiter.api.Assertions.assertEquals;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
Expand Down Expand Up @@ -45,10 +48,31 @@ public void testCreateSoftwareDatasetNative() {
String datasetType = JsonPath.from(getDatasetJson.getBody().asString()).getString("data.datasetType");
System.out.println("datasetType: " + datasetType);
assertEquals("software", datasetType);

Response searchDraft = UtilIT.searchAndShowFacets("id:dataset_" + datasetId + "_draft", apiToken);
searchDraft.prettyPrint();
searchDraft.then().assertThat()
.body("data.total_count", CoreMatchers.is(1))
.body("data.count_in_response", CoreMatchers.is(1))
.body("data.facets[0].datasetType_s.friendly", CoreMatchers.is("Dataset Type"))
.body("data.facets[0].datasetType_s.labels[0].software", CoreMatchers.is(1))
.statusCode(OK.getStatusCode());

UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken).then().assertThat().statusCode(OK.getStatusCode());
UtilIT.publishDatasetViaNativeApi(datasetPid, "major", apiToken).then().assertThat().statusCode(OK.getStatusCode());

// Response searchAsGuest = UtilIT.search(SearchFields.DATASET_TYPE + ":software", null);
// searchAsGuest.prettyPrint();
// searchAsGuest.then().assertThat()
// .body("data.total_count", CoreMatchers.is(1))
// .body("data.count_in_response", CoreMatchers.is(1))
// .body("data.facets[0].datasetType_s.friendly", CoreMatchers.is("Dataset Type"))
// .body("data.facets[0].datasetType_s.labels[0].software", CoreMatchers.is(1))
// .statusCode(OK.getStatusCode());
}

@Test
public void testCreateSoftwareDatasetSemantic() {
public void testCreateWorkflowDatasetSemantic() {
Response createUser = UtilIT.createRandomUser();
createUser.then().assertThat().statusCode(OK.getStatusCode());
String username = UtilIT.getUsernameFromResponse(createUser);
Expand Down Expand Up @@ -76,6 +100,7 @@ public void testCreateSoftwareDatasetSemantic() {
String datasetType = JsonPath.from(getDatasetJson.getBody().asString()).getString("data.datasetType");
System.out.println("datasetType: " + datasetType);
assertEquals("software", datasetType);

}

@Test
Expand Down Expand Up @@ -113,7 +138,7 @@ public void testImportJson() {
}

@Test
public void testImportDDI() {
public void testImportDdiWorkflow() {
Response createUser = UtilIT.createRandomUser();
createUser.then().assertThat().statusCode(OK.getStatusCode());
String username = UtilIT.getUsernameFromResponse(createUser);
Expand All @@ -126,11 +151,13 @@ public void testImportDDI() {
String dataverseAlias = UtilIT.getAliasFromResponse(createDataverse);
Integer dataverseId = UtilIT.getDataverseIdFromResponse(createDataverse);

UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken).then().assertThat().statusCode(OK.getStatusCode());

String jsonIn = UtilIT.getDatasetJson("doc/sphinx-guides/source/_static/api/dataset-create-software-ddi.xml");

String randomString = UtilIT.getRandomString(6);

Response importJson = UtilIT.importDatasetDDIViaNativeApi(apiToken, dataverseAlias, jsonIn, "doi:10.5072/FK2/" + randomString, "no");
Response importJson = UtilIT.importDatasetDDIViaNativeApi(apiToken, dataverseAlias, jsonIn, "doi:10.5072/FK2/" + randomString, "yes");
importJson.prettyPrint();
importJson.then().assertThat().statusCode(CREATED.getStatusCode());

Expand All @@ -142,7 +169,16 @@ public void testImportDDI() {
getDatasetJson.then().assertThat().statusCode(OK.getStatusCode());
String datasetType = JsonPath.from(getDatasetJson.getBody().asString()).getString("data.datasetType");
System.out.println("datasetType: " + datasetType);
assertEquals("software", datasetType);
assertEquals("workflow", datasetType);

Response search = UtilIT.searchAndShowFacets("id:dataset_" + datasetId, apiToken);
search.prettyPrint();
search.then().assertThat()
.body("data.total_count", CoreMatchers.is(1))
.body("data.count_in_response", CoreMatchers.is(1))
.body("data.facets[0].datasetType_s.friendly", CoreMatchers.is("Dataset Type"))
.body("data.facets[0].datasetType_s.labels[0].workflow", CoreMatchers.is(1))
.statusCode(OK.getStatusCode());

}

Expand Down

0 comments on commit 2b83f22

Please sign in to comment.