package fr.emse.gitlab.saref.jobs; import java.io.File; import java.io.FileInputStream; import java.io.InputStream; import java.io.InputStreamReader; import java.nio.charset.Charset; import java.nio.charset.MalformedInputException; import java.text.SimpleDateFormat; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; import org.apache.jena.atlas.RuntimeIOException; import org.apache.jena.datatypes.xsd.XSDDatatype; import org.apache.jena.query.Dataset; import org.apache.jena.query.QueryExecution; import org.apache.jena.query.QueryExecutionFactory; import org.apache.jena.query.QuerySolution; import org.apache.jena.query.ReadWrite; import org.apache.jena.query.ResultSet; import org.apache.jena.rdf.model.Literal; import org.apache.jena.rdf.model.Model; import org.apache.jena.rdf.model.ModelFactory; import org.apache.jena.rdf.model.NodeIterator; import org.apache.jena.rdf.model.RDFNode; import org.apache.jena.rdf.model.ResIterator; import org.apache.jena.rdf.model.Resource; import org.apache.jena.rdf.model.ResourceFactory; import org.apache.jena.riot.Lang; import org.apache.jena.vocabulary.DCTerms; import org.apache.jena.vocabulary.OWL2; import org.apache.jena.vocabulary.RDF; import org.apache.jena.vocabulary.RDFS; import org.topbraid.jenax.util.JenaUtil; import org.topbraid.shacl.validation.ValidationUtil; import org.topbraid.shacl.vocabulary.SH; import fr.emse.gitlab.saref.Constants; import fr.emse.gitlab.saref.SAREFPipelineException; import fr.emse.gitlab.saref.entities.git.ReleaseVersion; import fr.emse.gitlab.saref.entities.git.Version; import fr.emse.gitlab.saref.vocabs.EX; import fr.emse.gitlab.saref.vocabs.RDFP; import fr.emse.gitlab.saref.vocabs.VANN; public class ReadOntology extends JobRunner { static final String SHACL_WARNING = "http://www.w3.org/ns/shacl#Warning"; static final String SHACL_VIOLATION = "http://www.w3.org/ns/shacl#Violation"; static final String SOURCE_ERROR = "There shall be exactly one annotation dcterms:source that points to the ETSI Technical Specification URL `http://www.etsi.org/deliver/etsi_ts/...`."; static final String SELECT_VIOLATION = "PREFIX sh: \n" + "SELECT ?focusNode ?resultMessage ?resultPath ?value ?severity\n" + "WHERE { \n" + " ?violation sh:focusNode ?focusNode .\n" + " ?violation sh:resultMessage ?resultMessage .\n" + " ?violation sh:resultSeverity ?severity .\n" + " OPTIONAL { ?violation sh:resultPath ?resultPath . }\n" + " OPTIONAL { ?violation sh:value ?value . } \n" + "}"; static final Map PREFIXES = new HashMap(); static { PREFIXES.put("owl", "http://www.w3.org/2002/07/owl#"); PREFIXES.put("rdfs", "http://www.w3.org/2000/01/rdf-schema#"); PREFIXES.put("xsd", "http://www.w3.org/2001/XMLSchema#"); PREFIXES.put("dcterms", "http://purl.org/dc/terms/"); PREFIXES.put("vann", "http://purl.org/vocab/vann/"); PREFIXES.put("schema", "http://schema.org/"); PREFIXES.put("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"); PREFIXES.put("voaf", "http://purl.org/vocommons/voaf#"); PREFIXES.put("dce", "http://purl.org/dc/elements/1.1/"); PREFIXES.put("dct", "http://purl.org/dc/terms/"); PREFIXES.put("xml", "http://www.w3.org/XML/1998/namespace/"); PREFIXES.put("saref", "https://saref.etsi.org/core/"); } private static final SimpleDateFormat SIMPLE_DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd"); private static final Resource ETSI_URL = ResourceFactory.createResource("https://www.etsi.org/"); private static final Resource ETSI_LICENSE = ResourceFactory .createResource("https://forge.etsi.org/etsi-software-license"); public ReadOntology(String testSuiteName) { super(testSuiteName); } public void doJob(Dataset dataset, Version version) throws SAREFPipelineException { String repoName = version.getRepositoryName(); String ontologyName = repoName.equals("saref-core") ? "saref.ttl" : repoName + ".ttl"; File ontologyFile = new File(version.getRepository().getDirectory(), "ontology/" + ontologyName); Model model = ModelFactory.createDefaultModel(); try (FileInputStream input = new FileInputStream(ontologyFile)) { model.read(input, Constants.BASE, Lang.TTL.getLabel()); } catch (Exception ex) { if(ex instanceof RuntimeIOException && ex.getCause() instanceof MalformedInputException) { try (InputStreamReader input2 = new InputStreamReader(new FileInputStream(ontologyFile), Charset.defaultCharset())) { model.read(input2, Constants.BASE, Lang.TTL.getLabel()); } catch (Exception ex2) { logger.error("Exception while reading the ontology file", ex2); throw new SAREFPipelineException("Exception while reading the ontology file", ex2); } } else { logger.error("Exception while reading the ontology file", ex); throw new SAREFPipelineException("Exception while reading the ontology file", ex); } } checkPrefixes(model); checkShapes(model, version); // how does the TDB model work? documentation is not precise. dataset.begin(ReadWrite.WRITE); Model config = getNamedModel(dataset, Constants.CONFIG); updateOntologyMetadata(version, model); dataset.addNamedModel(version.getUri(), model); augmentTermGraphs(dataset, version, model, config); dataset.commit(); } private void checkPrefixes(Model model) { final Map prefixes = model.getNsPrefixMap(); for (String s : PREFIXES.keySet()) { if (prefixes.containsKey(s)) { if (!prefixes.get(s).equals(PREFIXES.get(s))) { logger.warn(String.format( "Prefix `%s:` in the ontology file is expected to be equal to `<%s>`. Got: `<%s>`", s, PREFIXES.get(s), prefixes.get(s))); } } } for (Map.Entry entry : prefixes.entrySet()) { String s = entry.getKey(); String l = entry.getValue(); if (l.contains("saref")) { if (!l.matches(Constants.REGEX_ONTO_SERIES_URI)) { logger.warn(String.format( "Prefix `%s:` in the ontology file contains string \"saref\", but does not seem to match the official SAREF ontologies namespaces: `\\\\%s\\\\`. Got: `<%s>`", s, Constants.REGEX_ONTO_SERIES_URI, l)); } } } } private void checkShapes(Model model, Version version) throws SAREFPipelineException { Model shapeModel = JenaUtil.createDefaultModel(); try (InputStream in = ReadOntology.class.getClassLoader().getResourceAsStream("sarefShape.ttl")) { shapeModel.read(in, Constants.BASE, "TTL"); } catch (Exception ex) { logger.error("Exception while reading the shape file", ex); throw new SAREFPipelineException("Exception while reading the shape file", ex); } Resource reportResource = ValidationUtil.validateModel(model, shapeModel, true); boolean conforms = reportResource.getProperty(SH.conforms).getBoolean(); if (!conforms) { Model reportModel = reportResource.getModel(); reportModel.setNsPrefixes(PREFIXES); reportModel.setNsPrefix("sh", "http://www.w3.org/ns/shacl#"); try (QueryExecution exec = QueryExecutionFactory.create(SELECT_VIOLATION, reportModel);) { for (ResultSet resultSet = exec.execSelect(); resultSet.hasNext();) { QuerySolution sol = resultSet.next(); Resource severity = sol.getResource("severity"); Resource focusNode = sol.get("focusNode").asResource(); Literal resultMessage = sol.getLiteral("resultMessage"); RDFNode value = sol.get("value"); String gotString = (value != null && value.isURIResource()) ? String.format(" Got: %s", value) : ""; if (severity != null && severity.getURI().equals(SHACL_VIOLATION)) { logger.error(String.format("Shape violation on node %s: %s%s", focusNode, resultMessage, gotString)); } else { logger.warn(String.format("Shape violation on node %s: %s%s", focusNode, resultMessage, gotString)); } } } } int onto = 0; for(ResIterator it = model.listSubjectsWithProperty(RDF.type, OWL2.Ontology) ; it.hasNext() ; ) { Resource ontology = it.next(); if(onto++>1) { logger.error("There shall be only one owl:Ontology"); } // if(version instanceof ReleaseVersion) { // int source = 0; // NodeIterator its = model.listObjectsOfProperty(ontology, DCTerms.source); // if(!its.hasNext()) { // logger.warn(SOURCE_ERROR); // } // while(its.hasNext()) { // if(source++>1) { // logger.warn(SOURCE_ERROR); // } // RDFNode s = its.next(); // if(!s.isURIResource() || !s.asResource().getURI().startsWith("http://www.etsi.org/deliver/etsi_ts/")) { // logger.warn(SOURCE_ERROR); // } // } // } } } /** * for each term, add: some triples in the default graph (used to detect naming * clashes), and some triples in the named graph of the term (used to display * the page of the term) * * @param dataset * @param version * @param model */ private void augmentTermGraphs(Dataset dataset, Version version, Model model, Model config) { Set definedTerms = new HashSet<>(); Set usedTerms = new HashSet<>(); computeTerms(version, model, definedTerms, usedTerms); config.add(version.getRepository().getResource(), EX.hasVersion, version.getResource()); config.add(version.getResource(), RDF.type, EX.OntologyVersion); config.add(version.getResource(), EX.repositoryName, version.getRepositoryName()); config.add(version.getResource(), EX.versionInfo, version.getVersionName()); if (version.getPriorVersion() != null) { config.add(version.getResource(), EX.priorVersion, version.getPriorVersion().getResource()); } for (Resource t : definedTerms) { Model termModel = getNamedModel(dataset, t.getURI()); termModel.add(t, RDF.type, RDFS.Resource); String localName = t.getURI().substring(version.getRepository().getNamespace().length()); termModel.add(t, RDFS.isDefinedBy, version.getResource()); // keep the most recent definition of the term if (version.getNextVersion() == null) { termModel.add(t, DCTerms.modified, SIMPLE_DATE_FORMAT.format(version.getIssued()), XSDDatatype.XSDdate); try (QueryExecution exec = QueryExecutionFactory.create("DESCRIBE <" + t.getURI() + ">", model)) { termModel.add(exec.execDescribe()); } } config.add(t, RDF.type, RDFP.Resource); config.add(t, EX.localName, localName); config.add(t, RDFS.isDefinedBy, version.getResource()); } for (Resource t : usedTerms) { Model termModel = getNamedModel(dataset, t.getURI()); termModel.add(t, EX.isUsedBy, version.getResource()); config.add(t, RDF.type, RDFP.Resource); config.add(t, EX.isUsedBy, version.getResource()); } } private void updateOntologyMetadata(Version version, Model model) { Resource ontologySeries = version.getRepository().getResource(); model.add(ontologySeries, RDF.type, OWL2.Ontology); model.removeAll(ontologySeries, OWL2.versionIRI, null); model.add(ontologySeries, OWL2.versionIRI, version.getResource()); model.removeAll(ontologySeries, OWL2.versionInfo, null); model.add(ontologySeries, OWL2.versionInfo, version.getVersionName()); model.removeAll(ontologySeries, OWL2.priorVersion, null); if (version.getPriorVersion() != null) { model.add(ontologySeries, OWL2.priorVersion, version.getPriorVersion().getResource()); } model.removeAll(ontologySeries, DCTerms.publisher, null); model.add(ontologySeries, DCTerms.publisher, ETSI_URL); model.removeAll(ontologySeries, DCTerms.license, null); model.add(ontologySeries, DCTerms.license, ETSI_LICENSE); // // list of contributors // Set contributors = new HashSet<>(); // for (Iterator it = git.log().add(version.ref.getObjectId()).call().iterator(); it.hasNext();) { // RevCommit commit = it.next(); // Contributor contributor = new Contributor(commit.getCommitterIdent()); // contributors.add(contributor); // } // for (Contributor contributor : contributors) { // // git can only provide name + email. // // would need to maintain some list of mapping email -> url in the future. // Resource anon = onto.createResource(); // onto.add(repo.resource, DCTerms.contributor, anon); // onto.add(anon, FOAF.name, contributor.getName()); // onto.add(anon, FOAF.mbox, contributor.getEmailAddress()); // } model.removeAll(ontologySeries, VANN.preferredNamespacePrefix, null); model.removeAll(ontologySeries, VANN.preferredNamespaceUri, null); model.add(ontologySeries, VANN.preferredNamespacePrefix, version.getRepository().getPrefix()); model.add(ontologySeries, VANN.preferredNamespaceUri, version.getRepository().getNamespace(), XSDDatatype.XSDanyURI); model.removeAll(ontologySeries, DCTerms.issued, null); model.add(ontologySeries, DCTerms.issued, SIMPLE_DATE_FORMAT.format(version.getIssued()), XSDDatatype.XSDdate); } private void computeTerms(Version version, Model model, Set definedTerms, Set usedTerms) { model.listStatements().forEachRemaining(stmt -> { Resource s = stmt.getSubject(); Resource p = stmt.getPredicate(); Resource o = stmt.getObject().isResource() ? (Resource) stmt.getObject() : null; computeTerms(version, s, definedTerms, usedTerms); computeTerms(version, p, definedTerms, usedTerms); computeTerms(version, o, definedTerms, usedTerms); }); } private void computeTerms(Version version, Resource t, Set definedTerms, Set usedTerms) { if (t == null || !t.isURIResource() || !t.getURI().startsWith(Constants.BASE) || t.getURI().endsWith("/")) { return; } String uri = t.getURI(); if (uri.startsWith(version.getRepository().getNamespace())) { definedTerms.add(t); } else { usedTerms.add(t); } } private Model getNamedModel(Dataset dataset, String uri) { if (!dataset.containsNamedModel(uri)) { dataset.addNamedModel(uri, ModelFactory.createDefaultModel()); } return dataset.getNamedModel(uri); } }