Skip to content

Commit

Permalink
Added ability to autodiscover datatypes during parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
Marco De Salvo committed May 27, 2024
1 parent d2a6d98 commit 6aec07c
Show file tree
Hide file tree
Showing 2 changed files with 144 additions and 9 deletions.
99 changes: 98 additions & 1 deletion RDFSharp.Test/Model/RDFGraphTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1548,7 +1548,8 @@ public void ShouldImportFromFile(string fileExtension, RDFModelEnums.RDFFormats
RDFGraph graph1 = new RDFGraph();
RDFTriple triple1 = new RDFTriple(new RDFResource("http://ex/subj/"), new RDFResource("http://ex/pred/"), new RDFPlainLiteral("lit", "en-US"));
RDFTriple triple2 = new RDFTriple(new RDFResource("http://ex/subj/"), new RDFResource("http://ex/pred/"), new RDFResource("http://ex/obj/"));
graph1.AddTriple(triple1).AddTriple(triple2);
graph1.AddTriple(triple1)
.AddTriple(triple2);
graph1.ToFile(format, Path.Combine(Environment.CurrentDirectory, $"RDFGraphTest_ShouldImportFromFile{fileExtension}"));
RDFGraph graph2 = RDFGraph.FromFile(format, Path.Combine(Environment.CurrentDirectory, $"RDFGraphTest_ShouldImportFromFile{fileExtension}"));

Expand All @@ -1569,6 +1570,43 @@ public void ShouldImportFromFile(string fileExtension, RDFModelEnums.RDFFormats
}
}

[DataTestMethod]
[DataRow(".nt", RDFModelEnums.RDFFormats.NTriples)]
[DataRow(".rdf", RDFModelEnums.RDFFormats.RdfXml)]
[DataRow(".trix", RDFModelEnums.RDFFormats.TriX)]
[DataRow(".ttl", RDFModelEnums.RDFFormats.Turtle)]
public void ShouldImportFromFileWithEnabledDatatypeDiscovery(string fileExtension, RDFModelEnums.RDFFormats format)
{
RDFGraph graph1 = new RDFGraph();
RDFTriple triple1 = new RDFTriple(new RDFResource("http://ex/subj/"), new RDFResource("http://ex/pred/"), new RDFPlainLiteral("lit", "en-US"));
RDFTriple triple2 = new RDFTriple(new RDFResource("http://ex/subj/"), new RDFResource("http://ex/pred/"), new RDFResource("http://ex/obj/"));
graph1.AddTriple(triple1)
.AddTriple(triple2)
.AddDatatype(new RDFDatatype(new Uri($"ex:mydt{(int)format}"), RDFModelEnums.RDFDatatypes.XSD_STRING, [
new RDFPatternFacet("^ex$") ]));
graph1.ToFile(format, Path.Combine(Environment.CurrentDirectory, $"RDFGraphTest_ShouldImportFromFile{fileExtension}"));
RDFGraph graph2 = RDFGraph.FromFile(format, Path.Combine(Environment.CurrentDirectory, $"RDFGraphTest_ShouldImportFromFile{fileExtension}"), true);

Assert.IsNotNull(graph2);
Assert.IsTrue(graph2.TriplesCount == 9);
//RDF/XML uses xsd:qname for encoding predicates. In this test we demonstrate that
//triples with a predicate ending with "/" will loose this character once abbreviated:
//this is correct (being a glitch of RDF/XML specs) so at the end the graphs will differ
if (format == RDFModelEnums.RDFFormats.RdfXml)
{
Assert.IsFalse(graph2.Equals(graph1));
Assert.IsTrue(graph2.SelectTriplesByPredicate(new RDFResource("http://ex/pred/")).TriplesCount == 0);
Assert.IsTrue(graph2.SelectTriplesByPredicate(new RDFResource("http://ex/pred")).TriplesCount == 2);
}
else
{
Assert.IsTrue(graph2.Equals(graph1));
}
//Test that automatic datatype discovery happened successfully
Assert.IsTrue(RDFDatatypeRegister.GetDatatype($"ex:mydt{(int)format}").TargetDatatype == RDFModelEnums.RDFDatatypes.XSD_STRING);
Assert.IsTrue(RDFDatatypeRegister.GetDatatype($"ex:mydt{(int)format}").Facets.Single() is RDFPatternFacet fct && fct.Pattern == "^ex$");
}

[DataTestMethod]
[DataRow(".nt", RDFModelEnums.RDFFormats.NTriples)]
[DataRow(".rdf", RDFModelEnums.RDFFormats.RdfXml)]
Expand Down Expand Up @@ -1625,6 +1663,44 @@ public void ShouldImportFromStream(RDFModelEnums.RDFFormats format)
}
}

[DataTestMethod]
[DataRow(RDFModelEnums.RDFFormats.NTriples)]
[DataRow(RDFModelEnums.RDFFormats.RdfXml)]
[DataRow(RDFModelEnums.RDFFormats.TriX)]
[DataRow(RDFModelEnums.RDFFormats.Turtle)]
public void ShouldImportFromStreamWithEnabledDatatypeDiscovery(RDFModelEnums.RDFFormats format)
{
MemoryStream stream = new MemoryStream();
RDFGraph graph1 = new RDFGraph();
RDFTriple triple1 = new RDFTriple(new RDFResource("http://ex/subj/"), new RDFResource("http://ex/pred/"), new RDFPlainLiteral("lit", "en-US"));
RDFTriple triple2 = new RDFTriple(new RDFResource("http://ex/subj/"), new RDFResource("http://ex/pred/"), new RDFResource("http://ex/obj/"));
graph1.AddTriple(triple1)
.AddTriple(triple2)
.AddDatatype(new RDFDatatype(new Uri($"ex:mydtT{(int)format}"), RDFModelEnums.RDFDatatypes.XSD_STRING, [
new RDFPatternFacet("^ex$") ]));
graph1.ToStream(format, stream);
RDFGraph graph2 = RDFGraph.FromStream(format, new MemoryStream(stream.ToArray()), true);

Assert.IsNotNull(graph2);
Assert.IsTrue(graph2.TriplesCount == 9);
//RDF/XML uses xsd:qname for encoding predicates. In this test we demonstrate that
//triples with a predicate ending with "/" will loose this character once abbreviated:
//this is correct (being a glitch of RDF/XML specs) so at the end the graphs will differ
if (format == RDFModelEnums.RDFFormats.RdfXml)
{
Assert.IsFalse(graph2.Equals(graph1));
Assert.IsTrue(graph2.SelectTriplesByPredicate(new RDFResource("http://ex/pred/")).TriplesCount == 0);
Assert.IsTrue(graph2.SelectTriplesByPredicate(new RDFResource("http://ex/pred")).TriplesCount == 2);
}
else
{
Assert.IsTrue(graph2.Equals(graph1));
}
//Test that automatic datatype discovery happened successfully
Assert.IsTrue(RDFDatatypeRegister.GetDatatype($"ex:mydtT{(int)format}").TargetDatatype == RDFModelEnums.RDFDatatypes.XSD_STRING);
Assert.IsTrue(RDFDatatypeRegister.GetDatatype($"ex:mydtT{(int)format}").Facets.Single() is RDFPatternFacet fct && fct.Pattern == "^ex$");
}

[DataTestMethod]
[DataRow(RDFModelEnums.RDFFormats.NTriples)]
[DataRow(RDFModelEnums.RDFFormats.RdfXml)]
Expand Down Expand Up @@ -1660,6 +1736,27 @@ public void ShouldImportFromDataTable()
Assert.IsTrue(graph2.Equals(graph1));
}

[TestMethod]
public void ShouldImportFromDataTableWithEnabledDatatypeDiscovery()
{
RDFGraph graph1 = new RDFGraph();
RDFTriple triple1 = new RDFTriple(new RDFResource("http://subj/"), new RDFResource("http://pred/"), new RDFPlainLiteral("lit", "en-US"));
RDFTriple triple2 = new RDFTriple(new RDFResource("http://subj/"), new RDFResource("http://pred/"), new RDFResource("http://obj/"));
graph1.AddTriple(triple1)
.AddTriple(triple2)
.AddDatatype(new RDFDatatype(new Uri("ex:mydtZ"), RDFModelEnums.RDFDatatypes.XSD_STRING, [
new RDFPatternFacet("^ex$") ]));
DataTable table = graph1.ToDataTable();
RDFGraph graph2 = RDFGraph.FromDataTable(table, true);

Assert.IsNotNull(graph2);
Assert.IsTrue(graph2.TriplesCount == 9);
Assert.IsTrue(graph2.Equals(graph1));
//Test that automatic datatype discovery happened successfully
Assert.IsTrue(RDFDatatypeRegister.GetDatatype("ex:mydtZ").TargetDatatype == RDFModelEnums.RDFDatatypes.XSD_STRING);
Assert.IsTrue(RDFDatatypeRegister.GetDatatype("ex:mydtZ").Facets.Single() is RDFPatternFacet fct && fct.Pattern == "^ex$");
}

[TestMethod]
public void ShouldImportEmptyFromDataTable()
{
Expand Down
54 changes: 46 additions & 8 deletions RDFSharp/Model/RDFGraph.cs
Original file line number Diff line number Diff line change
Expand Up @@ -631,7 +631,7 @@ public DataTable ToDataTable()
/// <summary>
/// Reads a graph from a file of the given RDF format.
/// </summary>
public static RDFGraph FromFile(RDFModelEnums.RDFFormats rdfFormat, string filepath)
public static RDFGraph FromFile(RDFModelEnums.RDFFormats rdfFormat, string filepath, bool enableDatatypeDiscovery=false)
{
#region Guards
if (string.IsNullOrEmpty(filepath))
Expand All @@ -656,14 +656,23 @@ public static RDFGraph FromFile(RDFModelEnums.RDFFormats rdfFormat, string filep
graph = RDFTriX.Deserialize(filepath);
break;
}
return graph;

#region Datatype Discovery
if (enableDatatypeDiscovery)
{
foreach (RDFDatatype datatypeDefinition in graph.ExtractDatatypeDefinitions())
RDFDatatypeRegister.AddDatatype(datatypeDefinition);
}
#endregion

return graph;
}

/// <summary>
/// Reads a graph from a stream of the given RDF format.
/// </summary>
public static RDFGraph FromStream(RDFModelEnums.RDFFormats rdfFormat, Stream inputStream) => FromStream(rdfFormat, inputStream, null);
internal static RDFGraph FromStream(RDFModelEnums.RDFFormats rdfFormat, Stream inputStream, Uri graphContext)
public static RDFGraph FromStream(RDFModelEnums.RDFFormats rdfFormat, Stream inputStream, bool enableDatatypeDiscovery=false) => FromStream(rdfFormat, inputStream, null, enableDatatypeDiscovery);
internal static RDFGraph FromStream(RDFModelEnums.RDFFormats rdfFormat, Stream inputStream, Uri graphContext, bool enableDatatypeDiscovery=false)
{
#region Guards
if (inputStream == null)
Expand All @@ -686,13 +695,22 @@ internal static RDFGraph FromStream(RDFModelEnums.RDFFormats rdfFormat, Stream i
graph = RDFTriX.Deserialize(inputStream, graphContext);
break;
}
return graph;

#region Datatype Discovery
if (enableDatatypeDiscovery)
{
foreach (RDFDatatype datatypeDefinition in graph.ExtractDatatypeDefinitions())
RDFDatatypeRegister.AddDatatype(datatypeDefinition);
}
#endregion

return graph;
}

/// <summary>
/// Reads a graph from a datatable with "Subject-Predicate-Object" columns.
/// </summary>
public static RDFGraph FromDataTable(DataTable table)
public static RDFGraph FromDataTable(DataTable table, bool enableDatatypeDiscovery=false)
{
#region Guards
if (table == null)
Expand All @@ -705,6 +723,8 @@ public static RDFGraph FromDataTable(DataTable table)

RDFGraph graph = new RDFGraph();

#region Parse Table

#region CONTEXT
//Parse the name of the datatable for Uri, in order to assign the graph name
if (Uri.TryCreate(table.TableName, UriKind.Absolute, out Uri graphUri))
Expand Down Expand Up @@ -745,15 +765,25 @@ public static RDFGraph FromDataTable(DataTable table)
graph.AddTriple(new RDFTriple((RDFResource)rowSubj, (RDFResource)rowPred, (RDFLiteral)rowObj));
#endregion
}
#endregion
#endregion

#endregion

#region Datatype Discovery
if (enableDatatypeDiscovery)
{
foreach (RDFDatatype datatypeDefinition in graph.ExtractDatatypeDefinitions())
RDFDatatypeRegister.AddDatatype(datatypeDefinition);
}
#endregion

return graph;
}

/// <summary>
/// Reads a graph by trying to dereference the given Uri
/// </summary>
public static RDFGraph FromUri(Uri uri, int timeoutMilliseconds = 20000)
public static RDFGraph FromUri(Uri uri, int timeoutMilliseconds=20000, bool enableDatatypeDiscovery=false)
{
#region Guards
if (uri == null)
Expand Down Expand Up @@ -819,6 +849,14 @@ public static RDFGraph FromUri(Uri uri, int timeoutMilliseconds = 20000)
throw new RDFModelException($"Cannot read RDF graph from Uri {uri} because: " + ex.Message);
}

#region Datatype Discovery
if (enableDatatypeDiscovery)
{
foreach (RDFDatatype datatypeDefinition in graph.ExtractDatatypeDefinitions())
RDFDatatypeRegister.AddDatatype(datatypeDefinition);
}
#endregion

return graph;
}
#endregion
Expand Down

0 comments on commit 6aec07c

Please sign in to comment.