Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2243 schema.org json ld #4252

Merged
merged 23 commits into from
Nov 20, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
1b62596
stub out dataset in json-ld format #3793
pdurbin Oct 31, 2017
5ad88fc
better author name parsing (could be an org!) #3793 #2243
pdurbin Nov 1, 2017
2cc958d
fix a number of issues (listed below) #3793 #2243
pdurbin Nov 1, 2017
ad71c6a
use same date format as meta name="DC.date" #2243
pdurbin Nov 2, 2017
80b5a88
limit to non-published, not just non-drafts #2243
pdurbin Nov 2, 2017
485a5ca
don't even try to figure out if the author is a person or not #2243
pdurbin Nov 2, 2017
171c8f3
move getJsonLd method to DatasetVersion entity #2243
pdurbin Nov 2, 2017
6c5f044
use dateModified and proper schemaVersion URL #2243
pdurbin Nov 2, 2017
8f3083c
delete cruft (unused method) #2243
pdurbin Nov 3, 2017
b1db8ee
rename to publicationDateAsString and improve javadoc #2243
pdurbin Nov 3, 2017
f8ca59f
add tests for getJsonLd and getPublicationDateAsString #2243
pdurbin Nov 3, 2017
1aa323a
remove unused imports used in this branch #2243
pdurbin Nov 3, 2017
c941781
explain why ui:insert lines are in the template #2243
pdurbin Nov 3, 2017
8c74e37
A few quick fixes for getJsonLd() (and the corresponding test in Data…
landreev Nov 7, 2017
9f1d057
one more addition for #2243 - added temporalCoverage.
landreev Nov 7, 2017
7d03e70
consistency between DC.subject and JSON-LD keywords #2243
pdurbin Nov 7, 2017
8b8391f
added topicClassifications and kewords to JSONLD. (#2243)
landreev Nov 15, 2017
67882ff
the ld json fragment should now be structured as specified in the iss…
landreev Nov 16, 2017
e0399c1
...and a quick fix for the "temporalCoverage" entry (#2243)
landreev Nov 16, 2017
a2742c5
latest changest to ld json formatting, making the fragment pass the g…
landreev Nov 17, 2017
0801d56
ldjson should will only be embedded into the page if this is the LATE…
landreev Nov 17, 2017
eec1163
Per conversation with jgautier stipped the '@type="person"' attribute…
landreev Nov 17, 2017
a756751
Merge branch 'develop' into 2243-schema.org-json-ld #2243
pdurbin Nov 17, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 33 additions & 27 deletions src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
import edu.harvard.iq.dataverse.datasetutility.TwoRavensHelper;
import edu.harvard.iq.dataverse.datasetutility.WorldMapPermissionHelper;
import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException;
import edu.harvard.iq.dataverse.engine.command.impl.GetLatestPublishedDatasetVersionCommand;
import edu.harvard.iq.dataverse.engine.command.impl.RequestRsyncScriptCommand;
import edu.harvard.iq.dataverse.engine.command.impl.PublishDatasetResult;
import edu.harvard.iq.dataverse.engine.command.impl.RestrictFileCommand;
Expand Down Expand Up @@ -3944,23 +3945,6 @@ public String getDescription() {
return workingVersion.getDescriptionPlainText();
}

/**
* dataset publication date unpublished datasets will return an empty
* string.
*
* @return String dataset publication date (dd MMM yyyy).
*/
public String getPublicationDate() {
assert (null != workingVersion);
if (DatasetVersion.VersionState.DRAFT == workingVersion.getVersionState()) {
return "";
}
Date rel_date = workingVersion.getReleaseTime();
SimpleDateFormat fmt = new SimpleDateFormat("yyyy-MM-dd");
String r = fmt.format(rel_date.getTime());
return r;
}

/**
* dataset authors
*
Expand All @@ -3971,16 +3955,6 @@ public List<String> getDatasetAuthors() {
return workingVersion.getDatasetAuthorNames();
}

/**
* dataset subjects
*
* @return array of String containing the subjects for a page
*/
public List<String> getDatasetSubjects() {
assert (null != workingVersion);
return workingVersion.getDatasetSubjects();
}

/**
* publisher (aka - name of root dataverse)
*
Expand Down Expand Up @@ -4066,4 +4040,36 @@ public List<DatasetField> getDatasetSummaryFields() {
return DatasetUtil.getDatasetSummaryFields(workingVersion, customFields);
}

Boolean thisLatestReleasedVersion = null;

public boolean isThisLatestReleasedVersion() {
if (thisLatestReleasedVersion != null) {
return thisLatestReleasedVersion;
}

if (!workingVersion.isPublished()) {
thisLatestReleasedVersion = false;
return false;
}

DatasetVersion latestPublishedVersion = null;
Command<DatasetVersion> cmd = new GetLatestPublishedDatasetVersionCommand(dvRequestService.getDataverseRequest(), dataset);
try {
latestPublishedVersion = commandEngine.submit(cmd);
} catch (Exception ex) {
// whatever...
}

thisLatestReleasedVersion = workingVersion.equals(latestPublishedVersion);

return thisLatestReleasedVersion;

}

public String getJsonLd() {
if (isThisLatestReleasedVersion()) {
return workingVersion.getJsonLd();
}
return "";
}
}
264 changes: 263 additions & 1 deletion src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
import edu.harvard.iq.dataverse.util.MarkupChecker;
import edu.harvard.iq.dataverse.DatasetFieldType.FieldType;
import edu.harvard.iq.dataverse.util.StringUtil;
import edu.harvard.iq.dataverse.util.SystemConfig;
import edu.harvard.iq.dataverse.workflows.WorkflowComment;
import java.io.Serializable;
import java.math.BigDecimal;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
Expand All @@ -17,6 +19,9 @@
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.json.Json;
import javax.json.JsonArrayBuilder;
import javax.json.JsonObjectBuilder;
import javax.persistence.CascadeType;
import javax.persistence.Column;
import javax.persistence.Entity;
Expand Down Expand Up @@ -142,6 +147,9 @@ public enum License {

@Transient
private String contributorNames;

@Transient
private String jsonLd;

@OneToMany(mappedBy="datasetVersion", cascade={CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST})
private List<DatasetVersionUser> datasetVersionUsers;
Expand Down Expand Up @@ -417,6 +425,10 @@ public boolean isReleased() {
return versionState.equals(VersionState.RELEASED);
}

public boolean isPublished() {
return isReleased();
}

public boolean isDraft() {
return versionState.equals(VersionState.DRAFT);
}
Expand Down Expand Up @@ -706,6 +718,42 @@ public List<DatasetAuthor> getDatasetAuthors() {
return retList;
}

public List<String> getTimePeriodsCovered() {
List <String> retList = new ArrayList<>();
for (DatasetField dsf : this.getDatasetFields()) {
if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.timePeriodCovered)) {
for (DatasetFieldCompoundValue timePeriodValue : dsf.getDatasetFieldCompoundValues()) {
String start = "";
String end = "";
for (DatasetField subField : timePeriodValue.getChildDatasetFields()) {
if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.timePeriodCoveredStart)) {
if (subField.isEmptyForDisplay()) {
start = null;
} else {
// we want to use "getValue()", as opposed to "getDisplayValue()" here -
// as the latter method prepends the value with the word "Start:"!
start = subField.getValue();
}
}
if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.timePeriodCoveredEnd)) {
if (subField.isEmptyForDisplay()) {
end = null;
} else {
// see the comment above
end = subField.getValue();
}
}

}
if (start != null && end != null) {
retList.add(start + "/" + end);
}
}
}
}
return retList;
}

/**
* @return List of Strings containing the names of the authors.
*/
Expand All @@ -729,7 +777,55 @@ public List<String> getDatasetSubjects() {
}
return subjects;
}


/**
* @return List of Strings containing the version's Topic Classifications
*/
public List<String> getTopicClassifications() {
return getCompoundChildFieldValues(DatasetFieldConstant.topicClassification, DatasetFieldConstant.topicClassValue);
}

/**
* @return List of Strings containing the version's Keywords
*/
public List<String> getKeywords() {
return getCompoundChildFieldValues(DatasetFieldConstant.keyword, DatasetFieldConstant.keywordValue);
}

/**
* @return List of Strings containing the version's PublicationCitations
*/
public List<String> getPublicationCitationValues() {
return getCompoundChildFieldValues(DatasetFieldConstant.publication, DatasetFieldConstant.publicationCitation);
}

/**
* @param parentFieldName compound dataset field A (from DatasetFieldConstant.*)
* @param childFieldName dataset field B, child field of A (from DatasetFieldConstant.*)
* @return List of values of the child field
*/
public List<String> getCompoundChildFieldValues(String parentFieldName, String childFieldName) {
List<String> keywords = new ArrayList<>();
for (DatasetField dsf : this.getDatasetFields()) {
if (dsf.getDatasetFieldType().getName().equals(parentFieldName)) {
for (DatasetFieldCompoundValue keywordFieldValue : dsf.getDatasetFieldCompoundValues()) {
for (DatasetField subField : keywordFieldValue.getChildDatasetFields()) {
if (subField.getDatasetFieldType().getName().equals(childFieldName)) {
String keyword = subField.getValue();
// Field values should NOT be empty or, especially, null,
// - in the ideal world. But as we are realizing, they CAN
// be null in real life databases. So, a check, just in case:
if (!StringUtil.isEmpty(keyword)) {
keywords.add(subField.getValue());
}
}
}
}
}
}
return keywords;
}

public String getDatasetProducersString(){
String retVal = "";
for (DatasetField dsf : this.getDatasetFields()) {
Expand Down Expand Up @@ -1099,4 +1195,170 @@ public List<WorkflowComment> getWorkflowComments() {
return workflowComments;
}

/**
* dataset publication date unpublished datasets will return an empty
* string.
*
* @return String dataset publication date in ISO 8601 format (yyyy-MM-dd).
*/
public String getPublicationDateAsString() {
if (DatasetVersion.VersionState.DRAFT == this.getVersionState()) {
return "";
}
Date rel_date = this.getReleaseTime();
SimpleDateFormat fmt = new SimpleDateFormat("yyyy-MM-dd");
String r = fmt.format(rel_date.getTime());
return r;
}

// TODO: Make this more performant by writing the output to the database or a file?
// Agree - now that this has grown into a somewhat complex chunk of formatted
// metadata - and not just a couple of values inserted into the page html -
// it feels like it would make more sense to treat it as another supported
// export format, that can be produced once and cached.
// The problem with that is that the export subsystem assumes there is only
// one metadata export in a given format per dataset (it uses the current
// released (published) version. This JSON fragment is generated for a
// specific released version - and we can have multiple released versions.
// So something will need to be modified to accommodate this. -- L.A.

public String getJsonLd() {
// We show published datasets only for "datePublished" field below.
if (!this.isPublished()) {
return "";
}

if (jsonLd != null) {
return jsonLd;
}
JsonObjectBuilder job = Json.createObjectBuilder();
job.add("@context", "http://schema.org");
job.add("@type", "Dataset");
job.add("identifier", this.getDataset().getPersistentURL());
job.add("name", this.getTitle());
JsonArrayBuilder authors = Json.createArrayBuilder();
for (DatasetAuthor datasetAuthor : this.getDatasetAuthors()) {
JsonObjectBuilder author = Json.createObjectBuilder();
String name = datasetAuthor.getName().getValue();
String affiliation = datasetAuthor.getAffiliation().getValue();
// We are aware of "givenName" and "familyName" but instead of a person it might be an organization such as "Gallup Organization".
//author.add("@type", "Person");
author.add("name", name);
if (!StringUtil.isEmpty(affiliation)) {
author.add("affiliation", affiliation);
}
authors.add(author);
}
job.add("author", authors);
/**
* We are aware that there is a "datePublished" field but it means "Date
* of first broadcast/publication." This only makes sense for a 1.0
* version.
*/
String datePublished = this.getDataset().getPublicationDateFormattedYYYYMMDD();
if (datePublished != null) {
job.add("datePublished", datePublished);
}

/**
* "dateModified" is more appropriate for a version: "The date on which
* the CreativeWork was most recently modified or when the item's entry
* was modified within a DataFeed."
*/
job.add("dateModified", this.getPublicationDateAsString());
job.add("version", this.getVersionNumber().toString());
job.add("description", this.getDescriptionPlainText());
/**
* "keywords" - contains subject(s), datasetkeyword(s) and topicclassification(s)
* metadata fields for the version. -- L.A.
* (see #2243 for details/discussion/feedback from Google)
*/
JsonArrayBuilder keywords = Json.createArrayBuilder();

for (String subject : this.getDatasetSubjects()) {
keywords.add(subject);
}

for (String topic : this.getTopicClassifications()) {
keywords.add(topic);
}

for (String keyword : this.getKeywords()) {
keywords.add(keyword);
}

job.add("keywords", keywords);

/**
* citation:
* (multiple) publicationCitation values, if present:
*/

List<String> publicationCitations = getPublicationCitationValues();
if (publicationCitations.size() > 0) {
JsonArrayBuilder citation = Json.createArrayBuilder();
for (String pubCitation : publicationCitations) {
//citationEntry.add("@type", "Dataset");
//citationEntry.add("text", pubCitation);
citation.add(pubCitation);
}
job.add("citation", citation);
}

/**
* temporalCoverage:
* (if available)
*/

List<String> timePeriodsCovered = this.getTimePeriodsCovered();
if (timePeriodsCovered.size() > 0) {
JsonArrayBuilder temporalCoverage = Json.createArrayBuilder();
for (String timePeriod : timePeriodsCovered) {
temporalCoverage.add(timePeriod);
}
job.add("temporalCoverage", temporalCoverage);
}

/**
* spatialCoverage (if available)
* TODO
* (punted, for now - see #2243)
*
*/

/**
* funder (if available)
* TODO
* (punted, for now - see #2243)
*/

job.add("schemaVersion", "https://schema.org/version/3.3");

TermsOfUseAndAccess terms = this.getTermsOfUseAndAccess();
if (terms != null) {
JsonObjectBuilder license = Json.createObjectBuilder().add("@type", "Dataset");

if (TermsOfUseAndAccess.License.CC0.equals(terms.getLicense())) {
license.add("text", "CC0").add("url", "https://creativecommons.org/publicdomain/zero/1.0/");
} else {
license.add("text", terms.getTermsOfUse());
}

job.add("license",license);
}

job.add("includedInDataCatalog", Json.createObjectBuilder()
.add("@type", "DataCatalog")
.add("name", this.getRootDataverseNameforCitation())
.add("url", SystemConfig.getDataverseSiteUrlStatic())
);

job.add("provider", Json.createObjectBuilder()
.add("@type", "Organization")
.add("name", "Dataverse")
);
jsonLd = job.build().toString();
return jsonLd;
}

}
Loading