package fr.emse.gitlab.saref.jobs; import java.io.File; import java.io.FileInputStream; import java.io.InputStream; import java.io.InputStreamReader; import java.nio.charset.Charset; import java.nio.charset.MalformedInputException; import java.nio.charset.StandardCharsets; import java.text.SimpleDateFormat; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; import java.util.regex.Pattern; import org.apache.commons.io.IOUtils; import org.apache.jena.atlas.RuntimeIOException; import org.apache.jena.query.Dataset; import org.apache.jena.query.DatasetFactory; import org.apache.jena.query.QueryExecution; import org.apache.jena.query.QueryExecutionFactory; import org.apache.jena.query.QuerySolution; import org.apache.jena.query.QuerySolutionMap; import org.apache.jena.query.ReadWrite; import org.apache.jena.query.ResultSet; import org.apache.jena.rdf.model.Literal; import org.apache.jena.rdf.model.Model; import org.apache.jena.rdf.model.ModelFactory; import org.apache.jena.rdf.model.NodeIterator; import org.apache.jena.rdf.model.RDFNode; import org.apache.jena.rdf.model.ResIterator; import org.apache.jena.rdf.model.Resource; import org.apache.jena.rdf.model.ResourceFactory; import org.apache.jena.riot.Lang; import org.apache.jena.update.UpdateExecutionFactory; import org.apache.jena.update.UpdateFactory; import org.apache.jena.update.UpdateProcessor; import org.apache.jena.update.UpdateRequest; import org.apache.jena.vocabulary.OWL2; import org.apache.jena.vocabulary.RDF; import org.topbraid.jenax.util.JenaUtil; import org.topbraid.shacl.validation.ValidationUtil; import org.topbraid.shacl.vocabulary.SH; import fr.emse.gitlab.saref.Constants; import fr.emse.gitlab.saref.SAREFPipelineException; import fr.emse.gitlab.saref.entities.git.Version; import fr.emse.gitlab.saref.vocabs.EX; import fr.emse.gitlab.saref.vocabs.RDFP; public class ReadExamples extends JobRunner { static final String SHACL_WARNING = "http://www.w3.org/ns/shacl#Warning"; static final String SHACL_VIOLATION = "http://www.w3.org/ns/shacl#Violation"; public static final String REGEX_TERM_URI = "^" + Constants.BASE + Constants.REGEX_EXT + "/(?[^/]+)$"; static final String SELECT_VIOLATION = "PREFIX sh: \n" + "SELECT ?focusNode ?resultMessage ?resultPath ?value ?severity\n" + "WHERE { \n" + " ?violation sh:focusNode ?focusNode .\n" + " ?violation sh:resultMessage ?resultMessage .\n" + " ?violation sh:resultSeverity ?severity .\n" + " OPTIONAL { ?violation sh:resultPath ?resultPath . }\n" + " OPTIONAL { ?violation sh:value ?value . } \n" + "}"; static final Map PREFIXES = new HashMap(); static { PREFIXES.put("owl", "http://www.w3.org/2002/07/owl#"); PREFIXES.put("rdfs", "http://www.w3.org/2000/01/rdf-schema#"); PREFIXES.put("xsd", "http://www.w3.org/2001/XMLSchema#"); PREFIXES.put("dcterms", "http://purl.org/dc/terms/"); PREFIXES.put("vann", "http://purl.org/vocab/vann/"); PREFIXES.put("foaf", "http://xmlns.com/foaf/0.1/"); PREFIXES.put("schema", "http://schema.org/"); PREFIXES.put("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"); PREFIXES.put("voaf", "http://purl.org/vocommons/voaf#"); PREFIXES.put("dce", "http://purl.org/dc/elements/1.1/"); PREFIXES.put("dct", "http://purl.org/dc/terms/"); PREFIXES.put("xml", "http://www.w3.org/XML/1998/namespace/"); PREFIXES.put("saref", "https://saref.etsi.org/core/"); } private static final SimpleDateFormat SIMPLE_DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd"); public ReadExamples(String testSuiteName) { super(testSuiteName); } public void doJob(Dataset dataset, Version version, String url, File exampleFile) throws SAREFPipelineException { Model model = ModelFactory.createDefaultModel(); try (FileInputStream input = new FileInputStream(exampleFile)) { model.read(input, Constants.BASE, Lang.TTL.getLabel()); } catch (Exception ex) { if(ex instanceof RuntimeIOException && ex.getCause() instanceof MalformedInputException) { try (InputStreamReader input2 = new InputStreamReader(new FileInputStream(exampleFile), Charset.defaultCharset())) { model.read(input2, Constants.BASE, Lang.TTL.getLabel()); } catch (Exception ex2) { logger.error("Exception while reading the example file", ex); throw new SAREFPipelineException("Exception while reading the example file", ex2); } } else { logger.error("Exception while reading the example file", ex); throw new SAREFPipelineException("Exception while reading the example file", ex); } } checkPrefixes(model); checkShapes(model, version); Resource example = ResourceFactory.createResource(url); dataset.begin(ReadWrite.WRITE); Model config = getNamedModel(dataset, Constants.CONFIG); try { updateExampleMetadata(version, model, example); } catch (Exception ex) { logger.error("Error while updating example metadata", ex); throw new SAREFPipelineException("Error while updating example metadata", ex); } dataset.addNamedModel(url, model); augmentTermGraphs(dataset, version, example, model, config, exampleFile); dataset.commit(); } private void checkPrefixes(Model model) { final Map prefixes = model.getNsPrefixMap(); for (String s : PREFIXES.keySet()) { if (prefixes.containsKey(s)) { if (!prefixes.get(s).equals(PREFIXES.get(s))) { logger.warn(String.format( "Prefix `%s:` in the example file is expected to be equal to `<%s>`. Got: `<%s>`", s, PREFIXES.get(s), prefixes.get(s))); } } } for (Map.Entry entry : prefixes.entrySet()) { String s = entry.getKey(); String l = entry.getValue(); if (l.contains("saref")) { if (!l.matches(Constants.REGEX_ONTO_SERIES_URI)) { logger.warn(String.format( "Prefix `%s:` in the example file contains string \"saref\", but does not seem to match the official SAREF ontologies namespaces: `\\\\%s\\\\`. Got: `<%s>`", s, Constants.REGEX_ONTO_SERIES_URI, l)); } } } } private void checkShapes(Model model, Version version) throws SAREFPipelineException { Model shapeModel = JenaUtil.createDefaultModel(); try (InputStream in = ReadOntology.class.getClassLoader().getResourceAsStream("exampleShape.ttl")) { shapeModel.read(in, Constants.BASE, "TTL"); } catch (Exception ex) { logger.error("Exception while reading the example shape file", ex); throw new SAREFPipelineException("Exception while reading the example shape file", ex); } Resource reportResource = ValidationUtil.validateModel(model, shapeModel, true); boolean conforms = reportResource.getProperty(SH.conforms).getBoolean(); boolean hasErrors = false; if (!conforms) { Model reportModel = reportResource.getModel(); reportModel.setNsPrefixes(PREFIXES); reportModel.setNsPrefix("sh", "http://www.w3.org/ns/shacl#"); try (QueryExecution exec = QueryExecutionFactory.create(SELECT_VIOLATION, reportModel);) { for (ResultSet resultSet = exec.execSelect(); resultSet.hasNext();) { QuerySolution sol = resultSet.next(); Resource severity = sol.getResource("severity"); Resource focusNode = sol.get("focusNode").asResource(); Literal resultMessage = sol.getLiteral("resultMessage"); RDFNode value = sol.get("value"); String gotString = (value != null && value.isURIResource()) ? String.format(" Got: %s", value) : ""; if (severity != null && severity.getURI().equals(SHACL_VIOLATION)) { logger.error(String.format("Shape violation on node %s: %s%s", focusNode, resultMessage, gotString)); hasErrors = true; } else { logger.warn(String.format("Shape violation on node %s: %s%s", focusNode, resultMessage, gotString)); } } } } int onto = 0; ResIterator it = model.listSubjectsWithProperty(RDF.type, OWL2.Ontology); if(!it.hasNext()) { logger.error("There shall be exactly one owl:Ontology"); hasErrors = true; } while(it.hasNext()) { Resource ontology = it.next(); if(onto++>1) { logger.error("There shall be exactly one owl:Ontology"); hasErrors = true; } // check that this ontology is imported (without a version) boolean found = false; NodeIterator its = model.listObjectsOfProperty(ontology, OWL2.imports); while(its.hasNext()) { RDFNode s = its.next(); String uri = s.asResource().getURI(); if(uri.equals(version.getRepository().getNamespace())) { found = true; } else { if(!uri.matches("^https://saref.etsi.org/(core|saref4[a-z]{4})/v[1-9][0-9]*\\\\.[1-9][0-9]*\\\\.[1-9][0-9]*/$")) { logger.error("Only SAREF ontologies with specific versions shall be imported. Their URIs conform to the regular expression `^https://saref.etsi.org/(core|saref4[a-z]{4})/v[1-9][0-9]*\\\\.[1-9][0-9]*\\\\.[1-9][0-9]*/$`"); hasErrors = true; } } } if(!found) { logger.error(String.format("The current ontology shall be imported: <%s>", version.getRepository().getNamespace())); hasErrors = true; } } if(hasErrors) { throw new SAREFPipelineException("There were rrrors while checking the example structure"); } } private void updateExampleMetadata(Version version, Model model, Resource example) throws Exception { Dataset dataset = DatasetFactory.create(model); QuerySolutionMap sol = new QuerySolutionMap(); sol.add("IRI", example); String updateString = IOUtils.toString(ReadExamples.class.getClassLoader().getResourceAsStream("queries/examples/updateOntologyIRI.rq"), StandardCharsets.UTF_8); UpdateRequest updateRequest = UpdateFactory.create(updateString); UpdateProcessor exec = UpdateExecutionFactory.create(updateRequest, dataset, sol); exec.execute(); sol = new QuerySolutionMap(); sol.add("ONTOLOGY_SERIES", version.getRepository().getResource()); sol.add("ONTOLOGY_VERSION", version.getResource()); updateString = IOUtils.toString(ReadExamples.class.getClassLoader().getResourceAsStream("queries/examples/updateImportIRI.rq"), StandardCharsets.UTF_8); updateRequest = UpdateFactory.create(updateString); exec = UpdateExecutionFactory.create(updateRequest, dataset, sol); exec.execute(); } private void augmentTermGraphs(Dataset dataset, Version version, Resource example, Model model, Model config, File exampleFile) { Set exemplifiedTerms = new HashSet<>(); computeTerms(version, example, model, exemplifiedTerms); config.add(example, RDF.type, EX.Example); String localName = exampleFile.getName().substring(0,exampleFile.getName().length()-4); config.add(example, EX.localName, localName); for (Resource t : exemplifiedTerms) { Model termModel = getNamedModel(dataset, t.getURI()); termModel.add(t, EX.hasExample, example); config.add(t, RDF.type, RDFP.Resource); config.add(t, EX.hasExample, example); } } private void computeTerms(Version version, Resource example, Model model, Set exemplifiedTerms) { model.listStatements().forEachRemaining(stmt -> { Resource s = stmt.getSubject(); Resource p = stmt.getPredicate(); Resource o = stmt.getObject().isResource() ? (Resource) stmt.getObject() : null; computeTerms(version, example, s, exemplifiedTerms); computeTerms(version, example, p, exemplifiedTerms); computeTerms(version, example, o, exemplifiedTerms); }); } private void computeTerms(Version version, Resource example, Resource t, Set exemplifiedTerms) { if (t == null || !t.isURIResource() || !t.getURI().startsWith(Constants.BASE) || t.getURI().endsWith("/")) { return; } String uri = t.getURI(); if(Pattern.matches(REGEX_TERM_URI, uri)) { exemplifiedTerms.add(t); } // if (uri.startsWith(version.getRepository().getNamespace()) && !uri.startsWith(example.getURI())) { // exemplifiedTerms.add(t); // } } private Model getNamedModel(Dataset dataset, String uri) { if (!dataset.containsNamedModel(uri)) { dataset.addNamedModel(uri, ModelFactory.createDefaultModel()); } return dataset.getNamedModel(uri); } }