From 1602a4332ec6a8aeac31a32d431ead6884758315 Mon Sep 17 00:00:00 2001 From: Victor Felder Date: Wed, 4 Sep 2019 14:29:04 +0200 Subject: [PATCH] feat(query): get labels with language preferences --- .../__snapshots__/datasetquery.test.ts.snap | 234 +++++++++++++++--- src/__tests__/datasetquery.test.ts | 44 ++++ src/__tests__/filter.test.ts | 3 +- src/dataset.ts | 6 +- src/datasetquery.ts | 162 ++++++++++-- 5 files changed, 391 insertions(+), 58 deletions(-) diff --git a/src/__tests__/__snapshots__/datasetquery.test.ts.snap b/src/__tests__/__snapshots__/datasetquery.test.ts.snap index 7b3f6f1..55aabf9 100644 --- a/src/__tests__/__snapshots__/datasetquery.test.ts.snap +++ b/src/__tests__/__snapshots__/datasetquery.test.ts.snap @@ -10,7 +10,11 @@ WHERE { ?observation ?tmpVar2. ?observation ?tmpVar3. ?observation . - OPTIONAL { ?raum (|) ?raumLabel. } + OPTIONAL { + ?raum (|) ?raumLabelLang. + FILTER((LANG(?raumLabelLang)) = \\"\\"^^) + } + BIND(COALESCE(?raumLabelLang, ?raum) AS ?raumLabel) } GROUP BY (?raum) (?raumLabel) LIMIT 10" @@ -26,7 +30,11 @@ WHERE { ?observation ?tmpVar2. ?observation ?tmpVar3. ?observation . - OPTIONAL { ?raum (|) ?raumLabel. } + OPTIONAL { + ?raum (|) ?raumLabelLang. + FILTER((LANG(?raumLabelLang)) = \\"\\"^^) + } + BIND(COALESCE(?raumLabelLang, ?raum) AS ?raumLabel) FILTER(?raum NOT IN()) } GROUP BY (?raum) (?raumLabel) @@ -50,10 +58,26 @@ WHERE { OPTIONAL { ?observation ?datenstand. } OPTIONAL { ?observation ?erwarteteAktualisierung. } OPTIONAL { ?observation ?korrektur. } - OPTIONAL { ?betriebsart (|) ?betriebsartLabel. } - OPTIONAL { ?geschlecht (|) ?geschlechtLabel. } - OPTIONAL { ?raum (|) ?raumLabel. } - OPTIONAL { ?zeit (|) ?zeitLabel. } + OPTIONAL { + ?betriebsart (|) ?betriebsartLabelLang. + FILTER((LANG(?betriebsartLabelLang)) = \\"\\"^^) + } + BIND(COALESCE(?betriebsartLabelLang, ?betriebsart) AS ?betriebsartLabel) + OPTIONAL { + ?geschlecht (|) ?geschlechtLabelLang. + FILTER((LANG(?geschlechtLabelLang)) = \\"\\"^^) + } + BIND(COALESCE(?geschlechtLabelLang, ?geschlecht) AS ?geschlechtLabel) + OPTIONAL { + ?raum (|) ?raumLabelLang. + FILTER((LANG(?raumLabelLang)) = \\"\\"^^) + } + BIND(COALESCE(?raumLabelLang, ?raum) AS ?raumLabel) + OPTIONAL { + ?zeit (|) ?zeitLabelLang. + FILTER((LANG(?zeitLabelLang)) = \\"\\"^^) + } + BIND(COALESCE(?zeitLabelLang, ?zeit) AS ?zeitLabel) } GROUP BY (?betriebsart) (?betriebsartLabel) (?geschlecht) (?geschlechtLabel) (?raum) (?raumLabel) (?zeit) (?zeitLabel) (?quelle) (?glossar) (?fussnote) (?datenstand) (?erwarteteAktualisierung) (?korrektur) LIMIT 10" @@ -75,10 +99,26 @@ WHERE { OPTIONAL { ?observation ?datenstand. } OPTIONAL { ?observation ?erwarteteAktualisierung. } OPTIONAL { ?observation ?korrektur. } - OPTIONAL { ?betriebsart (|) ?betriebsartLabel. } - OPTIONAL { ?geschlecht (|) ?geschlechtLabel. } - OPTIONAL { ?raum (|) ?raumLabel. } - OPTIONAL { ?zeit (|) ?zeitLabel. } + OPTIONAL { + ?betriebsart (|) ?betriebsartLabelLang. + FILTER((LANG(?betriebsartLabelLang)) = \\"\\"^^) + } + BIND(COALESCE(?betriebsartLabelLang, ?betriebsart) AS ?betriebsartLabel) + OPTIONAL { + ?geschlecht (|) ?geschlechtLabelLang. + FILTER((LANG(?geschlechtLabelLang)) = \\"\\"^^) + } + BIND(COALESCE(?geschlechtLabelLang, ?geschlecht) AS ?geschlechtLabel) + OPTIONAL { + ?raum (|) ?raumLabelLang. + FILTER((LANG(?raumLabelLang)) = \\"\\"^^) + } + BIND(COALESCE(?raumLabelLang, ?raum) AS ?raumLabel) + OPTIONAL { + ?zeit (|) ?zeitLabelLang. + FILTER((LANG(?zeitLabelLang)) = \\"\\"^^) + } + BIND(COALESCE(?zeitLabelLang, ?zeit) AS ?zeitLabel) } LIMIT 10" `; @@ -92,7 +132,11 @@ WHERE { ?observation ?tmpVar1. ?observation ?tmpVar2. ?observation . - OPTIONAL { ?raum (|) ?raumLabel. } + OPTIONAL { + ?raum (|) ?raumLabelLang. + FILTER((LANG(?raumLabelLang)) = \\"\\"^^) + } + BIND(COALESCE(?raumLabelLang, ?raum) AS ?raumLabel) } LIMIT 10" `; @@ -123,10 +167,26 @@ WHERE { OPTIONAL { ?observation ?quelle. } OPTIONAL { ?observation ?glossar. } OPTIONAL { ?observation ?fussnote. } - OPTIONAL { ?betriebsart (|) ?betriebsartLabel. } - OPTIONAL { ?geschlecht (|) ?geschlechtLabel. } - OPTIONAL { ?raum (|) ?raumLabel. } - OPTIONAL { ?zeit (|) ?zeitLabel. } + OPTIONAL { + ?betriebsart (|) ?betriebsartLabelLang. + FILTER((LANG(?betriebsartLabelLang)) = \\"\\"^^) + } + BIND(COALESCE(?betriebsartLabelLang, ?betriebsart) AS ?betriebsartLabel) + OPTIONAL { + ?geschlecht (|) ?geschlechtLabelLang. + FILTER((LANG(?geschlechtLabelLang)) = \\"\\"^^) + } + BIND(COALESCE(?geschlechtLabelLang, ?geschlecht) AS ?geschlechtLabel) + OPTIONAL { + ?raum (|) ?raumLabelLang. + FILTER((LANG(?raumLabelLang)) = \\"\\"^^) + } + BIND(COALESCE(?raumLabelLang, ?raum) AS ?raumLabel) + OPTIONAL { + ?zeit (|) ?zeitLabelLang. + FILTER((LANG(?zeitLabelLang)) = \\"\\"^^) + } + BIND(COALESCE(?zeitLabelLang, ?zeit) AS ?zeitLabel) FILTER((?raum >= \\"12\\"^^) && (?bep >= \\"12\\"^^)) } GROUP BY (?zeit) (?raum) (?betriebsart) (?betriebsartLabel) (?geschlecht) (?geschlechtLabel) (?raumLabel) (?zeitLabel) (?bep) (?quelle) (?glossar) (?fussnote) @@ -146,10 +206,26 @@ WHERE { OPTIONAL { ?observation ?quelle. } OPTIONAL { ?observation ?glossar. } OPTIONAL { ?observation ?fussnote. } - OPTIONAL { ?betriebsart (|) ?betriebsartLabel. } - OPTIONAL { ?geschlecht (|) ?geschlechtLabel. } - OPTIONAL { ?raum (|) ?raumLabel. } - OPTIONAL { ?zeit (|) ?zeitLabel. } + OPTIONAL { + ?betriebsart (|) ?betriebsartLabelLang. + FILTER((LANG(?betriebsartLabelLang)) = \\"\\"^^) + } + BIND(COALESCE(?betriebsartLabelLang, ?betriebsart) AS ?betriebsartLabel) + OPTIONAL { + ?geschlecht (|) ?geschlechtLabelLang. + FILTER((LANG(?geschlechtLabelLang)) = \\"\\"^^) + } + BIND(COALESCE(?geschlechtLabelLang, ?geschlecht) AS ?geschlechtLabel) + OPTIONAL { + ?raum (|) ?raumLabelLang. + FILTER((LANG(?raumLabelLang)) = \\"\\"^^) + } + BIND(COALESCE(?raumLabelLang, ?raum) AS ?raumLabel) + OPTIONAL { + ?zeit (|) ?zeitLabelLang. + FILTER((LANG(?zeitLabelLang)) = \\"\\"^^) + } + BIND(COALESCE(?zeitLabelLang, ?zeit) AS ?zeitLabel) } GROUP BY (?zeit) (?raum) (?betriebsart) (?betriebsartLabel) (?geschlecht) (?geschlechtLabel) (?raumLabel) (?zeitLabel) (?bep) (?quelle) (?glossar) (?fussnote) LIMIT 10" @@ -168,10 +244,26 @@ WHERE { OPTIONAL { ?observation ?quelle. } OPTIONAL { ?observation ?glossar. } OPTIONAL { ?observation ?fussnote. } - OPTIONAL { ?betriebsart (|) ?betriebsartLabel. } - OPTIONAL { ?geschlecht (|) ?geschlechtLabel. } - OPTIONAL { ?raum (|) ?raumLabel. } - OPTIONAL { ?zeit (|) ?zeitLabel. } + OPTIONAL { + ?betriebsart (|) ?betriebsartLabelLang. + FILTER((LANG(?betriebsartLabelLang)) = \\"\\"^^) + } + BIND(COALESCE(?betriebsartLabelLang, ?betriebsart) AS ?betriebsartLabel) + OPTIONAL { + ?geschlecht (|) ?geschlechtLabelLang. + FILTER((LANG(?geschlechtLabelLang)) = \\"\\"^^) + } + BIND(COALESCE(?geschlechtLabelLang, ?geschlecht) AS ?geschlechtLabel) + OPTIONAL { + ?raum (|) ?raumLabelLang. + FILTER((LANG(?raumLabelLang)) = \\"\\"^^) + } + BIND(COALESCE(?raumLabelLang, ?raum) AS ?raumLabel) + OPTIONAL { + ?zeit (|) ?zeitLabelLang. + FILTER((LANG(?zeitLabelLang)) = \\"\\"^^) + } + BIND(COALESCE(?zeitLabelLang, ?zeit) AS ?zeitLabel) } GROUP BY (?zeit) (?raum) (?betriebsart) (?betriebsartLabel) (?geschlecht) (?geschlechtLabel) (?raumLabel) (?zeitLabel) (?bep) (?quelle) (?glossar) (?fussnote) LIMIT 10" @@ -187,14 +279,82 @@ WHERE { ?observation ?tmpVar1. ?observation ?tmpVar2. ?observation . - OPTIONAL { ?raum (|) ?raumLabel. } - OPTIONAL { ?zeit (|) ?zeitLabel. } + OPTIONAL { + ?raum (|) ?raumLabelLang. + FILTER((LANG(?raumLabelLang)) = \\"\\"^^) + } + BIND(COALESCE(?raumLabelLang, ?raum) AS ?raumLabel) + OPTIONAL { + ?zeit (|) ?zeitLabelLang. + FILTER((LANG(?zeitLabelLang)) = \\"\\"^^) + } + BIND(COALESCE(?zeitLabelLang, ?zeit) AS ?zeitLabel) FILTER(?raum = ) } GROUP BY (?raum) (?raumLabel) (?zeit) (?zeitLabel) LIMIT 10" `; +exports[`handles languages one language 1`] = ` +"SELECT ?zeit ?zeitLabel ?tmpVar0 ?tmpVar1 ?tmpVar2 ?bep ?quelle FROM +WHERE { + ?observation . + ?observation ?zeit. + ?observation ?tmpVar0. + ?observation ?tmpVar1. + ?observation ?tmpVar2. + ?observation ?bep. + ?observation . + OPTIONAL { ?observation ?quelle. } + OPTIONAL { + ?zeit (|) ?zeitLabelLang. + FILTER((LANGMATCHES(LANG(?zeitLabelLang), \\"en\\"^^)) || ((LANG(?zeitLabelLang)) = \\"\\"^^)) + } + BIND(COALESCE(?zeitLabelLang, ?zeit) AS ?zeitLabel) +} +LIMIT 10" +`; + +exports[`handles languages three languages 1`] = ` +"SELECT ?zeit ?zeitLabel ?tmpVar0 ?tmpVar1 ?tmpVar2 ?bep ?quelle FROM +WHERE { + ?observation . + ?observation ?zeit. + ?observation ?tmpVar0. + ?observation ?tmpVar1. + ?observation ?tmpVar2. + ?observation ?bep. + ?observation . + OPTIONAL { ?observation ?quelle. } + OPTIONAL { + ?zeit (|) ?zeitLabelLang. + FILTER((((LANGMATCHES(LANG(?zeitLabelLang), \\"fr\\"^^)) || (LANGMATCHES(LANG(?zeitLabelLang), \\"de\\"^^))) || (LANGMATCHES(LANG(?zeitLabelLang), \\"it\\"^^))) || ((LANG(?zeitLabelLang)) = \\"\\"^^)) + } + BIND(COALESCE(?zeitLabelLang, ?zeit) AS ?zeitLabel) +} +LIMIT 10" +`; + +exports[`handles languages two languages 1`] = ` +"SELECT ?zeit ?zeitLabel ?tmpVar0 ?tmpVar1 ?tmpVar2 ?bep ?quelle FROM +WHERE { + ?observation . + ?observation ?zeit. + ?observation ?tmpVar0. + ?observation ?tmpVar1. + ?observation ?tmpVar2. + ?observation ?bep. + ?observation . + OPTIONAL { ?observation ?quelle. } + OPTIONAL { + ?zeit (|) ?zeitLabelLang. + FILTER(((LANGMATCHES(LANG(?zeitLabelLang), \\"en\\"^^)) || (LANGMATCHES(LANG(?zeitLabelLang), \\"de\\"^^))) || ((LANG(?zeitLabelLang)) = \\"\\"^^)) + } + BIND(COALESCE(?zeitLabelLang, ?zeit) AS ?zeitLabel) +} +LIMIT 10" +`; + exports[`ordering group and filter 1`] = ` "SELECT ?betriebsart ?betriebsartLabel ?geschlecht ?geschlechtLabel ?raum ?raumLabel ?zeit ?zeitLabel ?bep ?quelle ?glossar ?fussnote FROM WHERE { @@ -208,10 +368,26 @@ WHERE { OPTIONAL { ?observation ?quelle. } OPTIONAL { ?observation ?glossar. } OPTIONAL { ?observation ?fussnote. } - OPTIONAL { ?betriebsart (|) ?betriebsartLabel. } - OPTIONAL { ?geschlecht (|) ?geschlechtLabel. } - OPTIONAL { ?raum (|) ?raumLabel. } - OPTIONAL { ?zeit (|) ?zeitLabel. } + OPTIONAL { + ?betriebsart (|) ?betriebsartLabelLang. + FILTER((LANG(?betriebsartLabelLang)) = \\"\\"^^) + } + BIND(COALESCE(?betriebsartLabelLang, ?betriebsart) AS ?betriebsartLabel) + OPTIONAL { + ?geschlecht (|) ?geschlechtLabelLang. + FILTER((LANG(?geschlechtLabelLang)) = \\"\\"^^) + } + BIND(COALESCE(?geschlechtLabelLang, ?geschlecht) AS ?geschlechtLabel) + OPTIONAL { + ?raum (|) ?raumLabelLang. + FILTER((LANG(?raumLabelLang)) = \\"\\"^^) + } + BIND(COALESCE(?raumLabelLang, ?raum) AS ?raumLabel) + OPTIONAL { + ?zeit (|) ?zeitLabelLang. + FILTER((LANG(?zeitLabelLang)) = \\"\\"^^) + } + BIND(COALESCE(?zeitLabelLang, ?zeit) AS ?zeitLabel) FILTER((?raum >= \\"12\\"^^) && (?bep >= \\"12\\"^^)) } GROUP BY (?zeit) (?raum) (?betriebsart) (?betriebsartLabel) (?geschlecht) (?geschlechtLabel) (?raumLabel) (?zeitLabel) (?bep) (?quelle) (?glossar) (?fussnote) diff --git a/src/__tests__/datasetquery.test.ts b/src/__tests__/datasetquery.test.ts index 2a9589c..d12c970 100644 --- a/src/__tests__/datasetquery.test.ts +++ b/src/__tests__/datasetquery.test.ts @@ -348,3 +348,47 @@ describe("ordering", () => { expect(sparqlA).not.toBe(sparqlB); }); }); + +describe("handles languages", () => { + test("one language", async () => { + const query = dataset + .query({ languages: ["en"] }) + .select({ + zeit: zeitDimension, + + bep: beschaeftigteMeasure, + + quelle: quelleAttribute, + }); + const sparql = await query.toSparql(); + expect(sparql).toMatchSnapshot(); + }); + + test("two languages", async () => { + const query = dataset + .query({ languages: ["en", "de"] }) + .select({ + zeit: zeitDimension, + + bep: beschaeftigteMeasure, + + quelle: quelleAttribute, + }); + const sparql = await query.toSparql(); + expect(sparql).toMatchSnapshot(); + }); + + test("three languages", async () => { + const query = dataset + .query({ languages: ["fr", "de", "it"] }) + .select({ + zeit: zeitDimension, + + bep: beschaeftigteMeasure, + + quelle: quelleAttribute, + }); + const sparql = await query.toSparql(); + expect(sparql).toMatchSnapshot(); + }); +}); diff --git a/src/__tests__/filter.test.ts b/src/__tests__/filter.test.ts index adfc404..caae71f 100644 --- a/src/__tests__/filter.test.ts +++ b/src/__tests__/filter.test.ts @@ -6,7 +6,8 @@ import DataSet from "../dataset"; function extractFilter(sparql: string) { return sparql .split("\n") - .find((line: string) => line.trim().startsWith("FILTER")) + .filter((line: string) => line.trim().startsWith("FILTER")) + .slice(-1)[0] .trim(); } diff --git a/src/dataset.ts b/src/dataset.ts index 9eb1882..da492b2 100644 --- a/src/dataset.ts +++ b/src/dataset.ts @@ -2,7 +2,7 @@ import { NamedNode, Term } from "rdf-js"; import Attribute from "./components/attribute"; import Dimension from "./components/dimension"; import Measure from "./components/measure"; -import DataSetQuery from "./datasetquery"; +import DataSetQuery, { IQueryOpts } from "./datasetquery"; import SparqlFetcher from "./sparqlfetcher"; class DataSet { @@ -60,8 +60,8 @@ class DataSet { /** * Start a new query on the DataSet. */ - public query(): DataSetQuery { - return new DataSetQuery(this); + public query(opts: IQueryOpts = {}): DataSetQuery { + return new DataSetQuery(this, opts); } private async metadata() { diff --git a/src/datasetquery.ts b/src/datasetquery.ts index a2e31d8..1860e90 100644 --- a/src/datasetquery.ts +++ b/src/datasetquery.ts @@ -1,4 +1,4 @@ -import { namedNode, variable } from "@rdfjs/data-model"; +import { literal, namedNode, variable } from "@rdfjs/data-model"; import clone from "clone"; import { Generator as SparqlGenerator } from "sparqljs"; import Component from "./components/index"; @@ -22,6 +22,10 @@ interface IState { order: Component[]; } +export interface IQueryOpts { + languages?: string[]; +} + const baseState: IState = { selects: {}, filters: [], @@ -98,6 +102,122 @@ function combineFilters(operations: OperationExpression[]): FilterPattern { }; } +function langPrepare(binding, labelBinding, labelLangBinding, langs: string[]) { + // fetch labels when they exist, handling languages + const langMatch = (lang: string): OperationExpression => { + return { + type: "operation", + operator: "langmatches", + args: [ + { + type: "operation", + operator: "lang", + args: [ labelLangBinding ], + }, + literal(lang), + ], + }; + }; + + const langExactMatch = (lang = ""): OperationExpression => { + return { + type: "operation", + operator: "=", + args: [ + { + type: "operation", + operator: "lang", + args: [ labelLangBinding ], + }, + literal(lang), + ], + }; + }; + + function langHandler() { + let languages = langs.filter(Boolean); + if (!languages.length) { + // no lang specified, default to 'empty' lang + return { + type: "filter", + expression: langExactMatch(""), + }; + } + // at least one non-empty lang, add the 'empty' lang as fallback + languages = langs.concat(""); + // we now have at least two langs to work with + const lang1 = languages.shift(); + const lang2 = languages.shift(); + let expression: OperationExpression = { + type: "operation", + operator: "||", + args: [ + lang1 === "" ? langExactMatch(lang1) : langMatch(lang1), + lang2 === "" ? langExactMatch(lang2) : langMatch(lang2), + ], + }; + + let extraLang = languages.shift(); + while (typeof extraLang !== "undefined") { + expression = { + type: "operation", + operator: "||", + args: [ + expression, + extraLang === "" ? langExactMatch("") : langMatch(extraLang), + ], + }; + extraLang = languages.shift(); + } + return { + type: "filter", + expression, + }; + } + + const findLabel = { + type: "optional", + patterns: [ + { + type: "bgp", + triples: [ + { + subject: binding, + predicate: { + type: "path", + pathType: "|", + items: [ + namedNode("http://www.w3.org/2000/01/rdf-schema#label"), + namedNode("http://www.w3.org/2004/02/skos/core#prefLabel"), + ], + }, + object: labelLangBinding, + }, + ], + }, + langHandler(), + ], + }; + + const coalesceLabel = { + type: "bind", + variable: labelBinding, + expression: { + type: "operation", + operator: "coalesce", + args: [ + labelLangBinding, + binding, + ], + }, + }; + + return { + findLabel, + coalesceLabel, + }; +} + /** * A query to a [[DataSet]]. * @class DataSetQuery @@ -110,6 +230,7 @@ class DataSetQuery { private state: IState; private fetcher: SparqlFetcher; private tmpVarCount: number = 0; + private languages: string[]; /** * Creates an instance of DataSetQuery. You should not have to manually create queries, @@ -117,7 +238,8 @@ class DataSetQuery { * to query. * @param dataSet The [[DataSet]] to query. */ - constructor(dataSet: DataSet) { + constructor(dataSet: DataSet, opts: IQueryOpts = {}) { + this.languages = opts.languages || []; this.dataSet = dataSet; this.state = baseState; this.fetcher = new SparqlFetcher(this.dataSet.endpoint); @@ -373,33 +495,20 @@ class DataSetQuery { .forEach(([bindingName, component]) => { this.bindingToComponent[bindingName] = component; this.iriToBinding[component.iri.value] = bindingName; + const binding = variable(bindingName); + addedDimensionsIRIs.push(component.iri.value); mainWhereClauses.triples.push({ subject: variable("observation"), predicate: component.iri, - object: variable(bindingName), + object: binding, }); - // fetch labels when they exist + const labelBinding = variable(`${bindingName}Label`); - fetchLabels.push({ - type: "optional", - patterns: [{ - type: "bgp", - triples: [{ - subject: variable(bindingName), - predicate: { - type: "path", - pathType: "|", - items: [ - namedNode("http://www.w3.org/2000/01/rdf-schema#label"), - namedNode("http://www.w3.org/2004/02/skos/core#prefLabel"), - ], - }, - object: labelBinding, - }], - }], - }); - query.variables.push(variable(bindingName), labelBinding); + const labelLangBinding = variable(`${bindingName}LabelLang`); + const {findLabel, coalesceLabel} = langPrepare(binding, labelBinding, labelLangBinding, this.languages); + fetchLabels.push(findLabel, coalesceLabel); + query.variables.push(binding, labelBinding); }); // add dimensions that haven't been explicitly selected @@ -552,9 +661,12 @@ class DataSetQuery { private clone() { const dsq = new DataSetQuery(this.dataSet); - dsq.state = clone(this.state); - dsq.iriToBinding = clone(this.iriToBinding); dsq.bindingToComponent = clone(this.bindingToComponent); + dsq.iriToBinding = clone(this.iriToBinding); + dsq.state = clone(this.state); + dsq.fetcher = clone(this.fetcher); + dsq.tmpVarCount = clone(this.tmpVarCount); + dsq.languages = clone(this.languages); return dsq; }