ReadExamples.java 11.9 KB
Newer Older
Maxime Lefrançois's avatar
Maxime Lefrançois committed
package fr.emse.gitlab.saref.jobs;

import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.nio.charset.MalformedInputException;
import java.nio.charset.StandardCharsets;
import java.text.SimpleDateFormat;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;

import org.apache.commons.io.IOUtils;
import org.apache.jena.atlas.RuntimeIOException;
import org.apache.jena.query.Dataset;
import org.apache.jena.query.DatasetFactory;
import org.apache.jena.query.QueryExecution;
import org.apache.jena.query.QueryExecutionFactory;
import org.apache.jena.query.QuerySolution;
import org.apache.jena.query.QuerySolutionMap;
import org.apache.jena.query.ReadWrite;
import org.apache.jena.query.ResultSet;
import org.apache.jena.rdf.model.Literal;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.rdf.model.NodeIterator;
import org.apache.jena.rdf.model.RDFNode;
import org.apache.jena.rdf.model.ResIterator;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.ResourceFactory;
import org.apache.jena.riot.Lang;
import org.apache.jena.update.UpdateExecutionFactory;
import org.apache.jena.update.UpdateFactory;
import org.apache.jena.update.UpdateProcessor;
import org.apache.jena.update.UpdateRequest;
import org.apache.jena.vocabulary.OWL2;
import org.apache.jena.vocabulary.RDF;
import org.topbraid.jenax.util.JenaUtil;
import org.topbraid.shacl.validation.ValidationUtil;
import org.topbraid.shacl.vocabulary.SH;

import fr.emse.gitlab.saref.Constants;
import fr.emse.gitlab.saref.SAREFPipelineException;
import fr.emse.gitlab.saref.entities.git.Version;
import fr.emse.gitlab.saref.vocabs.EX;
import fr.emse.gitlab.saref.vocabs.RDFP;

public class ReadExamples extends JobRunner {
	
	static final String SHACL_WARNING = "http://www.w3.org/ns/shacl#Warning";
	static final String SHACL_VIOLATION = "http://www.w3.org/ns/shacl#Violation";
Maxime Lefrançois's avatar
Maxime Lefrançois committed
	public static final String REGEX_TERM_URI = "^" + Constants.BASE + Constants.REGEX_EXT + "/(?<localName>[^/]+)$";
Maxime Lefrançois's avatar
Maxime Lefrançois committed

	static final String SELECT_VIOLATION = "PREFIX sh: <http://www.w3.org/ns/shacl#>\n"
			+ "SELECT ?focusNode ?resultMessage ?resultPath ?value ?severity\n" + "WHERE {   \n"
			+ "    ?violation sh:focusNode ?focusNode .\n" + "    ?violation sh:resultMessage ?resultMessage .\n"
			+ "    ?violation sh:resultSeverity ?severity .\n"
			+ "      OPTIONAL { ?violation sh:resultPath ?resultPath . }\n"
			+ "      OPTIONAL { ?violation sh:value ?value . } \n" + "}";

	static final Map<String, String> PREFIXES = new HashMap<String, String>();
	static {
		PREFIXES.put("owl", "http://www.w3.org/2002/07/owl#");
		PREFIXES.put("rdfs", "http://www.w3.org/2000/01/rdf-schema#");
		PREFIXES.put("xsd", "http://www.w3.org/2001/XMLSchema#");
		PREFIXES.put("dcterms", "http://purl.org/dc/terms/");
		PREFIXES.put("vann", "http://purl.org/vocab/vann/");
		PREFIXES.put("foaf", "http://xmlns.com/foaf/0.1/");
		PREFIXES.put("schema", "http://schema.org/");
		PREFIXES.put("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
		PREFIXES.put("voaf", "http://purl.org/vocommons/voaf#");
		PREFIXES.put("dce", "http://purl.org/dc/elements/1.1/");
		PREFIXES.put("dct", "http://purl.org/dc/terms/");
		PREFIXES.put("xml", "http://www.w3.org/XML/1998/namespace/");
		PREFIXES.put("saref", "https://saref.etsi.org/core/");
	}
	private static final SimpleDateFormat SIMPLE_DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd");

	public ReadExamples(String testSuiteName) {
		super(testSuiteName);
	}

	public void doJob(Dataset dataset, Version version, String url, File exampleFile) throws SAREFPipelineException {
		Model model = ModelFactory.createDefaultModel();
		try (FileInputStream input = new FileInputStream(exampleFile)) {
			model.read(input, Constants.BASE, Lang.TTL.getLabel());
		} catch (Exception ex) {
			if(ex instanceof RuntimeIOException && ex.getCause() instanceof MalformedInputException) {
				try (InputStreamReader  input2 = new InputStreamReader(new FileInputStream(exampleFile), Charset.defaultCharset())) {
					model.read(input2, Constants.BASE, Lang.TTL.getLabel());
				} catch (Exception ex2) {
					logger.error("Exception while reading the example file", ex);
					throw new SAREFPipelineException("Exception while reading the example file", ex2);
				}
			} else {
				logger.error("Exception while reading the example file", ex);
				throw new SAREFPipelineException("Exception while reading the example file", ex);
			}
		}
		checkPrefixes(model);
		checkShapes(model, version);
		
		Resource example = ResourceFactory.createResource(url);

		dataset.begin(ReadWrite.WRITE);
		Model config = getNamedModel(dataset, Constants.CONFIG);
		try {
			updateExampleMetadata(version, model, example);
		} catch (Exception ex) {
			logger.error("Error while updating example metadata", ex);
			throw new SAREFPipelineException("Error while updating example metadata", ex);
		}
		dataset.addNamedModel(url,  model);

		augmentTermGraphs(dataset, version, example, model, config, exampleFile);
		
		dataset.commit();
	}
	
	private void checkPrefixes(Model model) {
		final Map<String, String> prefixes = model.getNsPrefixMap();
		for (String s : PREFIXES.keySet()) {
			if (prefixes.containsKey(s)) {
				if (!prefixes.get(s).equals(PREFIXES.get(s))) {
					logger.warn(String.format(
							"Prefix `%s:` in the example file is expected to be equal to `<%s>`. Got: `<%s>`", s,
							PREFIXES.get(s), prefixes.get(s)));
				}
			}
		}
		for (Map.Entry<String, String> entry : prefixes.entrySet()) {
			String s = entry.getKey();
			String l = entry.getValue();
			if (l.contains("saref")) {
				if (!l.matches(Constants.REGEX_ONTO_SERIES_URI)) {
					logger.warn(String.format(
							"Prefix `%s:` in the example file contains string \"saref\", but does not seem to match the official SAREF ontologies namespaces: `\\\\%s\\\\`. Got: `<%s>`",
							s, Constants.REGEX_ONTO_SERIES_URI, l));
				}
			}
		}
	}

	private void checkShapes(Model model, Version version) throws SAREFPipelineException {
		Model shapeModel = JenaUtil.createDefaultModel();
		try (InputStream in = ReadOntology.class.getClassLoader().getResourceAsStream("exampleShape.ttl")) {
			shapeModel.read(in, Constants.BASE, "TTL");
		} catch (Exception ex) {
			logger.error("Exception while reading the example shape file", ex);
			throw new SAREFPipelineException("Exception while reading the example shape file", ex);
		}
		Resource reportResource = ValidationUtil.validateModel(model, shapeModel, true);
		boolean conforms = reportResource.getProperty(SH.conforms).getBoolean();
		if (!conforms) {
			Model reportModel = reportResource.getModel();
			reportModel.setNsPrefixes(PREFIXES);
			reportModel.setNsPrefix("sh", "http://www.w3.org/ns/shacl#");
			try (QueryExecution exec = QueryExecutionFactory.create(SELECT_VIOLATION, reportModel);) {
				for (ResultSet resultSet = exec.execSelect(); resultSet.hasNext();) {
					QuerySolution sol = resultSet.next();
					Resource severity = sol.getResource("severity");
					Resource focusNode = sol.get("focusNode").asResource();
					Literal resultMessage = sol.getLiteral("resultMessage");
					RDFNode value = sol.get("value");
					String gotString = (value != null && value.isURIResource()) ? String.format(" Got: %s", value)
							: "";
					if (severity != null && severity.getURI().equals(SHACL_VIOLATION)) {
						logger.error(String.format("Shape violation on node %s: %s%s", focusNode, resultMessage,
								gotString));
					} else {
						logger.warn(String.format("Shape violation on node %s: %s%s", focusNode, resultMessage,
								gotString));
					}
				}
			}
		}
		int onto = 0;
		ResIterator it = model.listSubjectsWithProperty(RDF.type, OWL2.Ontology);
		if(!it.hasNext()) {
			logger.error("There shall be exactly one owl:Ontology");
		}
		while(it.hasNext()) {
			Resource ontology = it.next();
			if(onto++>1) {
				logger.error("There shall be exactly one owl:Ontology");
			}
			// check that this ontology is imported (without a version)
			boolean found = false;
			NodeIterator its = model.listObjectsOfProperty(ontology, OWL2.imports);
			while(its.hasNext()) {
				RDFNode s = its.next();
				String uri = s.asResource().getURI();
				if(uri.equals(version.getRepository().getNamespace())) {
					found = true;
//				} else {
//					if(!uri.matches("^https://saref.etsi.org/(core|saref4[a-z]{4})/v[1-9][0-9]*\\\\.[1-9][0-9]*\\\\.[1-9][0-9]*/$")) {
//						logger.error("Only SAREF ontologies with specific versions shall be imported. Their URIs conform to the regular expression `^https://saref.etsi.org/(core|saref4[a-z]{4})/v[1-9][0-9]*\\\\.[1-9][0-9]*\\\\.[1-9][0-9]*/$`");
//					}
Maxime Lefrançois's avatar
Maxime Lefrançois committed
				}
			}
			if(!found) {
				logger.error(String.format("The current ontology shall be imported: <%s>", version.getRepository().getNamespace()));
			}
		}
	}
	
	private void updateExampleMetadata(Version version, Model model, Resource example) throws Exception {
		Dataset dataset = DatasetFactory.create(model);
		
		QuerySolutionMap sol = new QuerySolutionMap();
		sol.add("IRI", example);
		String updateString = IOUtils.toString(ReadExamples.class.getClassLoader().getResourceAsStream("queries/examples/updateOntologyIRI.rq"), StandardCharsets.UTF_8);
		UpdateRequest updateRequest = UpdateFactory.create(updateString);
		UpdateProcessor exec = UpdateExecutionFactory.create(updateRequest, dataset, sol);
		exec.execute();
		
		sol = new QuerySolutionMap();
		sol.add("ONTOLOGY_SERIES", version.getRepository().getResource());
		sol.add("ONTOLOGY_VERSION", version.getResource());
		updateString = IOUtils.toString(ReadExamples.class.getClassLoader().getResourceAsStream("queries/examples/updateImportIRI.rq"), StandardCharsets.UTF_8);
		updateRequest = UpdateFactory.create(updateString);
		exec = UpdateExecutionFactory.create(updateRequest, dataset, sol);
		exec.execute();
	}

	private void augmentTermGraphs(Dataset dataset, Version version, Resource example, Model model, Model config, File exampleFile) {
		Set<Resource> exemplifiedTerms = new HashSet<>();
		computeTerms(version, example, model, exemplifiedTerms);
		config.add(example, RDF.type, EX.Example);
		String localName = exampleFile.getName().substring(0,exampleFile.getName().length()-4);
		config.add(example, EX.localName, localName);
		for (Resource t : exemplifiedTerms) {
			Model termModel = getNamedModel(dataset, t.getURI());
			termModel.add(t, EX.hasExample, example);
			config.add(t, RDF.type, RDFP.Resource);
			config.add(t, EX.hasExample, example);
		}
	}

	private void computeTerms(Version version, Resource example, Model model, Set<Resource> exemplifiedTerms) {
		model.listStatements().forEachRemaining(stmt -> {
			Resource s = stmt.getSubject();
			Resource p = stmt.getPredicate();
			Resource o = stmt.getObject().isResource() ? (Resource) stmt.getObject() : null;
			computeTerms(version, example, s, exemplifiedTerms);
			computeTerms(version, example, p, exemplifiedTerms);
			computeTerms(version, example, o, exemplifiedTerms);
		});
	}

	private void computeTerms(Version version, Resource example, Resource t, Set<Resource> exemplifiedTerms) {
		if (t == null || !t.isURIResource() || !t.getURI().startsWith(Constants.BASE) || t.getURI().endsWith("/")) {
			return;
		}
		String uri = t.getURI();
Maxime Lefrançois's avatar
Maxime Lefrançois committed
		if(Pattern.matches(REGEX_TERM_URI, uri)) {
Maxime Lefrançois's avatar
Maxime Lefrançois committed
			exemplifiedTerms.add(t);
		}
//		if (uri.startsWith(version.getRepository().getNamespace()) && !uri.startsWith(example.getURI())) {
//			exemplifiedTerms.add(t);
//		}
	}

	private Model getNamedModel(Dataset dataset, String uri) {
		if (!dataset.containsNamedModel(uri)) {
			dataset.addNamedModel(uri, ModelFactory.createDefaultModel());
		}
		return dataset.getNamedModel(uri);
	}

}