Newer
Older
package fr.emse.gitlab.saref.jobs;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.nio.charset.MalformedInputException;
import java.text.SimpleDateFormat;
import java.util.HashMap;
import java.util.HashSet;
import org.apache.jena.atlas.RuntimeIOException;
import org.apache.jena.datatypes.xsd.XSDDatatype;
import org.apache.jena.query.Dataset;
import org.apache.jena.query.QueryExecution;
import org.apache.jena.query.QueryExecutionFactory;
import org.apache.jena.query.QuerySolution;
import org.apache.jena.query.ReadWrite;
import org.apache.jena.query.ResultSet;
import org.apache.jena.rdf.model.Literal;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.rdf.model.NodeIterator;
import org.apache.jena.rdf.model.RDFNode;
import org.apache.jena.rdf.model.ResIterator;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.ResourceFactory;
import org.apache.jena.riot.Lang;
import org.apache.jena.vocabulary.DCTerms;
import org.apache.jena.vocabulary.OWL2;
import org.apache.jena.vocabulary.RDF;
import org.apache.jena.vocabulary.RDFS;
import org.topbraid.jenax.util.JenaUtil;
import org.topbraid.shacl.validation.ValidationUtil;
import org.topbraid.shacl.vocabulary.SH;
import fr.emse.gitlab.saref.Constants;
import fr.emse.gitlab.saref.SAREFPipelineException;
import fr.emse.gitlab.saref.entities.git.ReleaseVersion;
import fr.emse.gitlab.saref.entities.git.Version;
import fr.emse.gitlab.saref.vocabs.EX;
import fr.emse.gitlab.saref.vocabs.RDFP;
import fr.emse.gitlab.saref.vocabs.VANN;
public class ReadOntology extends JobRunner {
static final String SHACL_WARNING = "http://www.w3.org/ns/shacl#Warning";
static final String SHACL_VIOLATION = "http://www.w3.org/ns/shacl#Violation";
static final String SOURCE_ERROR = "There shall be exactly one annotation dcterms:source that points to the ETSI Technical Specification URL `http://www.etsi.org/deliver/etsi_ts/...`.";
static final String SELECT_VIOLATION = "PREFIX sh: <http://www.w3.org/ns/shacl#>\n"
+ "SELECT ?focusNode ?resultMessage ?resultPath ?value ?severity\n" + "WHERE { \n"
+ " ?violation sh:focusNode ?focusNode .\n" + " ?violation sh:resultMessage ?resultMessage .\n"
+ " ?violation sh:resultSeverity ?severity .\n"
+ " OPTIONAL { ?violation sh:resultPath ?resultPath . }\n"
+ " OPTIONAL { ?violation sh:value ?value . } \n" + "}"
+ "ORDER BY ?severity ?resultMessage ?focusNode ";
static final Map<String, String> PREFIXES = new HashMap<String, String>();
static {
PREFIXES.put("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
PREFIXES.put("rdfs", "http://www.w3.org/2000/01/rdf-schema#");
PREFIXES.put("owl", "http://www.w3.org/2002/07/owl#");
PREFIXES.put("xsd", "http://www.w3.org/2001/XMLSchema#");
PREFIXES.put("dcterms", "http://purl.org/dc/terms/");
PREFIXES.put("vann", "http://purl.org/vocab/vann/");
PREFIXES.put("schema", "http://schema.org/");
PREFIXES.put("voaf", "http://purl.org/vocommons/voaf#");
PREFIXES.put("saref", "https://saref.etsi.org/core/");
}
private static final SimpleDateFormat SIMPLE_DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd");
private static final Resource ETSI_URL = ResourceFactory.createResource("https://www.etsi.org/");
private static final Resource ETSI_LICENSE = ResourceFactory
.createResource("https://forge.etsi.org/etsi-software-license");
public ReadOntology(String testSuiteName) {
super(testSuiteName);
}
public void doJob(Dataset dataset, Version version) throws SAREFPipelineException {
String repoName = version.getRepositoryName();
String ontologyName = repoName.equals("saref-core") ? "saref.ttl" : repoName + ".ttl";
File ontologyFile = new File(version.getRepository().getDirectory(), "ontology/" + ontologyName);
Model model = ModelFactory.createDefaultModel();
try (FileInputStream input = new FileInputStream(ontologyFile)) {
model.read(input, Constants.BASE, Lang.TTL.getLabel());
} catch (Exception ex) {
if (ex instanceof RuntimeIOException && ex.getCause() instanceof MalformedInputException) {
try (InputStreamReader input2 = new InputStreamReader(new FileInputStream(ontologyFile),
Charset.defaultCharset())) {
model.read(input2, Constants.BASE, Lang.TTL.getLabel());
} catch (Exception ex2) {
logger.error("Exception while reading the ontology file", ex2);
throw new SAREFPipelineException("Exception while reading the ontology file", ex2);
}
} else {
logger.error("Exception while reading the ontology file", ex);
throw new SAREFPipelineException("Exception while reading the ontology file", ex);
}
}
checkPrefixes(model);
checkShapes(model, version);
// how does the TDB model work? documentation is not precise.
dataset.begin(ReadWrite.WRITE);
Model config = getNamedModel(dataset, Constants.CONFIG);
updateOntologyMetadata(version, model);
dataset.addNamedModel(version.getUri(), model);
augmentTermGraphs(dataset, version, model, config);
dataset.commit();
}
private void checkPrefixes(Model model) {
final Map<String, String> prefixes = model.getNsPrefixMap();
for (String s : PREFIXES.keySet()) {
if (prefixes.containsKey(s)) {
if (!prefixes.get(s).equals(PREFIXES.get(s))) {
logger.error(String.format(
"Prefix `%s:` in the ontology file is expected to be equal to `<%s>`. Got: `<%s>`", s,
PREFIXES.get(s), prefixes.get(s)));
}
}
}
for (Map.Entry<String, String> entry : prefixes.entrySet()) {
String s = entry.getKey();
String l = entry.getValue();
if (l.contains("saref")) {
if (!l.matches(Constants.REGEX_ONTO_SERIES_URI)) {
logger.error(String.format(
"Prefix `%s:` in the ontology file contains string \"saref\", but does not seem to match the official SAREF ontologies namespaces: `\\\\%s\\\\`. Got: `<%s>`",
s, Constants.REGEX_ONTO_SERIES_URI, l));
}
}
}
}
private void checkShapes(Model model, Version version) throws SAREFPipelineException {
Model shapeModel = JenaUtil.createDefaultModel();
try (InputStream in = ReadOntology.class.getClassLoader().getResourceAsStream("sarefShape.ttl")) {
shapeModel.read(in, Constants.BASE, "TTL");
} catch (Exception ex) {
logger.error("Exception while reading the shape file", ex);
throw new SAREFPipelineException("Exception while reading the shape file", ex);
}
Resource reportResource = ValidationUtil.validateModel(model, shapeModel, true);
boolean conforms = reportResource.getProperty(SH.conforms).getBoolean();
if (!conforms) {
Model reportModel = reportResource.getModel();
reportModel.setNsPrefixes(PREFIXES);
reportModel.setNsPrefix("sh", "http://www.w3.org/ns/shacl#");
try (QueryExecution exec = QueryExecutionFactory.create(SELECT_VIOLATION, reportModel);) {
Resource previousSeverity = null;
Literal previousResultMessage = null;
List<Resource> previousFocusNodes = new ArrayList<>();
List<RDFNode> previousvalues = new ArrayList<>();
for (ResultSet resultSet = exec.execSelect(); resultSet.hasNext();) {
QuerySolution sol = resultSet.next();
Resource severity = sol.getResource("severity");
Literal resultMessage = sol.getLiteral("resultMessage");
Resource focusNode = sol.get("focusNode").asResource();
if (severity != null && severity.equals(previousSeverity) && resultMessage != null
&& resultMessage.equals(previousResultMessage)) {
previousFocusNodes.add(focusNode);
previousvalues.add(value);
if (previousSeverity != null) {
String message = String.format("%s: %s", previousResultMessage,
String.join(" - ", previousFocusNodes.stream().map(Object::toString)
.collect(Collectors.toList()).toArray(new String[1])));
if (severity.getURI().equals(SHACL_VIOLATION)) {
logger.error(message);
} else {
logger.warn(message);
}
}
previousSeverity = severity;
previousResultMessage = resultMessage;
previousFocusNodes = new ArrayList<>();
previousvalues = new ArrayList<>();
}
}
if (!previousFocusNodes.isEmpty()) {
String message = String.format("%s: %s", previousResultMessage,
String.join(" - ", previousFocusNodes.stream().map(Object::toString)
.collect(Collectors.toList()).toArray(new String[1])));
if (previousSeverity != null && previousSeverity.getURI().equals(SHACL_VIOLATION)) {
logger.error(message);
} else {
logger.warn(message);
for (ResIterator it = model.listSubjectsWithProperty(RDF.type, OWL2.Ontology); it.hasNext();) {
logger.error("There shall be only one owl:Ontology");
}
// if(version instanceof ReleaseVersion) {
// int source = 0;
// NodeIterator its = model.listObjectsOfProperty(ontology, DCTerms.source);
// if(!its.hasNext()) {
// logger.warn(SOURCE_ERROR);
// }
// while(its.hasNext()) {
// if(source++>1) {
// logger.warn(SOURCE_ERROR);
// }
// RDFNode s = its.next();
// if(!s.isURIResource() || !s.asResource().getURI().startsWith("http://www.etsi.org/deliver/etsi_ts/")) {
// logger.warn(SOURCE_ERROR);
// }
// }
// }
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
}
}
/**
* for each term, add: some triples in the default graph (used to detect naming
* clashes), and some triples in the named graph of the term (used to display
* the page of the term)
*
* @param dataset
* @param version
* @param model
*/
private void augmentTermGraphs(Dataset dataset, Version version, Model model, Model config) {
Set<Resource> definedTerms = new HashSet<>();
Set<Resource> usedTerms = new HashSet<>();
computeTerms(version, model, definedTerms, usedTerms);
config.add(version.getRepository().getResource(), EX.hasVersion, version.getResource());
config.add(version.getResource(), RDF.type, EX.OntologyVersion);
config.add(version.getResource(), EX.repositoryName, version.getRepositoryName());
config.add(version.getResource(), EX.versionInfo, version.getVersionName());
if (version.getPriorVersion() != null) {
config.add(version.getResource(), EX.priorVersion, version.getPriorVersion().getResource());
}
for (Resource t : definedTerms) {
Model termModel = getNamedModel(dataset, t.getURI());
termModel.add(t, RDF.type, RDFS.Resource);
String localName = t.getURI().substring(version.getRepository().getNamespace().length());
termModel.add(t, RDFS.isDefinedBy, version.getResource());
// keep the most recent definition of the term
if (version.getNextVersion() == null) {
termModel.add(DCTerms.modified, RDF.type, OWL.AnnotationProperty);
termModel.add(t, DCTerms.modified, SIMPLE_DATE_FORMAT.format(version.getIssued()), XSDDatatype.XSDdate);
try (QueryExecution exec = QueryExecutionFactory.create("DESCRIBE <" + t.getURI() + ">", model)) {
termModel.add(exec.execDescribe());
}
}
config.add(t, RDF.type, RDFP.Resource);
config.add(t, EX.localName, localName);
config.add(t, RDFS.isDefinedBy, version.getResource());
}
for (Resource t : usedTerms) {
Model termModel = getNamedModel(dataset, t.getURI());
termModel.add(t, EX.isUsedBy, version.getResource());
config.add(t, RDF.type, RDFP.Resource);
config.add(t, EX.isUsedBy, version.getResource());
}
}
private void updateOntologyMetadata(Version version, Model model) {
Resource ontologySeries = version.getRepository().getResource();
model.add(ontologySeries, RDF.type, OWL2.Ontology);
model.removeAll(ontologySeries, OWL2.versionIRI, null);
model.add(ontologySeries, OWL2.versionIRI, version.getResource());
model.removeAll(ontologySeries, OWL2.versionInfo, null);
model.add(ontologySeries, OWL2.versionInfo, version.getVersionName());
model.removeAll(ontologySeries, OWL2.priorVersion, null);
if (version.getPriorVersion() != null) {
model.add(ontologySeries, OWL2.priorVersion, version.getPriorVersion().getResource());
}
model.removeAll(ontologySeries, DCTerms.publisher, null);
model.add(ontologySeries, DCTerms.publisher, ETSI_URL);
model.removeAll(ontologySeries, DCTerms.license, null);
model.add(ontologySeries, DCTerms.license, ETSI_LICENSE);
model.add(VANN.preferredNamespaceUri, RDF.type, OWL.AnnotationProperty);
model.add(VANN.preferredNamespacePrefix, RDF.type, OWL.AnnotationProperty);
model.add(DCTerms.title, RDF.type, OWL.AnnotationProperty);
model.add(DCTerms.title, RDF.type, OWL.AnnotationProperty);
model.add(DCTerms.title, RDF.type, OWL.AnnotationProperty);
model.add(DCTerms.abstract_, RDF.type, OWL.AnnotationProperty);
model.add(DCTerms.source, RDF.type, OWL.AnnotationProperty);
model.add(DCTerms.description, RDF.type, OWL.AnnotationProperty);
model.add(DCTerms.creator, RDF.type, OWL.AnnotationProperty);
model.add(DCTerms.contributor, RDF.type, OWL.AnnotationProperty);
model.add(DCTerms.publisher, RDF.type, OWL.AnnotationProperty);
model.add(DCTerms.issued, RDF.type, OWL.AnnotationProperty);
model.add(DCTerms.modified, RDF.type, OWL.AnnotationProperty);
model.add(DCTerms.license, RDF.type, OWL.AnnotationProperty);
model.add(SCHEMA.Person, RDF.type, OWL.Class);
model.add(SCHEMA.Organization, RDF.type, OWL.Class);
model.add(SCHEMA.affiliation, RDF.type, OWL.ObjectProperty);
model.add(SCHEMA.familyName, RDF.type, OWL.DatatypeProperty);
model.add(SCHEMA.givenName, RDF.type, OWL.DatatypeProperty);
model.add(SCHEMA.name, RDF.type, OWL.DatatypeProperty);
// // list of contributors
// Set<Contributor> contributors = new HashSet<>();
// for (Iterator<RevCommit> it = git.log().add(version.ref.getObjectId()).call().iterator(); it.hasNext();) {
// RevCommit commit = it.next();
// Contributor contributor = new Contributor(commit.getCommitterIdent());
// contributors.add(contributor);
// }
// for (Contributor contributor : contributors) {
// // git can only provide name + email.
// // would need to maintain some list of mapping email -> url in the future.
// Resource anon = onto.createResource();
// onto.add(repo.resource, DCTerms.contributor, anon);
// onto.add(anon, FOAF.name, contributor.getName());
// onto.add(anon, FOAF.mbox, contributor.getEmailAddress());
// }
model.removeAll(ontologySeries, VANN.preferredNamespacePrefix, null);
model.removeAll(ontologySeries, VANN.preferredNamespaceUri, null);
model.add(ontologySeries, VANN.preferredNamespacePrefix, version.getRepository().getPrefix());
model.add(ontologySeries, VANN.preferredNamespaceUri, version.getRepository().getNamespace(),
XSDDatatype.XSDanyURI);
model.removeAll(ontologySeries, DCTerms.issued, null);
model.add(ontologySeries, DCTerms.issued, SIMPLE_DATE_FORMAT.format(version.getIssued()), XSDDatatype.XSDdate);
}
private void computeTerms(Version version, Model model, Set<Resource> definedTerms, Set<Resource> usedTerms) {
model.listStatements().forEachRemaining(stmt -> {
Resource s = stmt.getSubject();
Resource p = stmt.getPredicate();
Resource o = stmt.getObject().isResource() ? (Resource) stmt.getObject() : null;
computeTerms(version, s, definedTerms, usedTerms);
computeTerms(version, p, definedTerms, usedTerms);
computeTerms(version, o, definedTerms, usedTerms);
});
}
private void computeTerms(Version version, Resource t, Set<Resource> definedTerms, Set<Resource> usedTerms) {
if (t == null || !t.isURIResource() || !t.getURI().startsWith(Constants.BASE) || t.getURI().endsWith("/")) {
return;
}
String uri = t.getURI();
if (uri.startsWith(version.getRepository().getNamespace())) {
definedTerms.add(t);
} else {
usedTerms.add(t);
}
}
private Model getNamedModel(Dataset dataset, String uri) {
if (!dataset.containsNamedModel(uri)) {
dataset.addNamedModel(uri, ModelFactory.createDefaultModel());
}
return dataset.getNamedModel(uri);
}
}