ReadOntology.java 16.4 KB
Newer Older
Maxime Lefrançois's avatar
Maxime Lefrançois committed
package fr.emse.gitlab.saref.jobs;

import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.nio.charset.MalformedInputException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
Maxime Lefrançois's avatar
Maxime Lefrançois committed
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
Maxime Lefrançois's avatar
Maxime Lefrançois committed
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
Maxime Lefrançois's avatar
Maxime Lefrançois committed

import org.apache.jena.atlas.RuntimeIOException;
import org.apache.jena.datatypes.xsd.XSDDatatype;
import org.apache.jena.query.Dataset;
import org.apache.jena.query.QueryExecution;
import org.apache.jena.query.QueryExecutionFactory;
import org.apache.jena.query.QuerySolution;
import org.apache.jena.query.ReadWrite;
import org.apache.jena.query.ResultSet;
import org.apache.jena.rdf.model.Literal;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.rdf.model.NodeIterator;
import org.apache.jena.rdf.model.RDFNode;
import org.apache.jena.rdf.model.ResIterator;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.ResourceFactory;
import org.apache.jena.riot.Lang;
import org.apache.jena.vocabulary.DCTerms;
Maxime Lefrançois's avatar
Maxime Lefrançois committed
import org.apache.jena.vocabulary.OWL;
Maxime Lefrançois's avatar
Maxime Lefrançois committed
import org.apache.jena.vocabulary.OWL2;
import org.apache.jena.vocabulary.RDF;
import org.apache.jena.vocabulary.RDFS;
import org.topbraid.jenax.util.JenaUtil;
import org.topbraid.shacl.validation.ValidationUtil;
import org.topbraid.shacl.vocabulary.SH;

import fr.emse.gitlab.saref.Constants;
import fr.emse.gitlab.saref.SAREFPipelineException;
import fr.emse.gitlab.saref.entities.git.ReleaseVersion;
import fr.emse.gitlab.saref.entities.git.Version;
import fr.emse.gitlab.saref.vocabs.EX;
import fr.emse.gitlab.saref.vocabs.RDFP;
Maxime Lefrançois's avatar
Maxime Lefrançois committed
import fr.emse.gitlab.saref.vocabs.SCHEMA;
Maxime Lefrançois's avatar
Maxime Lefrançois committed
import fr.emse.gitlab.saref.vocabs.VANN;

public class ReadOntology extends JobRunner {

	static final String SHACL_WARNING = "http://www.w3.org/ns/shacl#Warning";
	static final String SHACL_VIOLATION = "http://www.w3.org/ns/shacl#Violation";
	static final String SOURCE_ERROR = "There shall be exactly one annotation dcterms:source that points to the ETSI Technical Specification URL `http://www.etsi.org/deliver/etsi_ts/...`.";

	static final String SELECT_VIOLATION = "PREFIX sh: <http://www.w3.org/ns/shacl#>\n"
			+ "SELECT ?focusNode ?resultMessage ?resultPath ?value ?severity\n" + "WHERE {   \n"
			+ "    ?violation sh:focusNode ?focusNode .\n" + "    ?violation sh:resultMessage ?resultMessage .\n"
			+ "    ?violation sh:resultSeverity ?severity .\n"
			+ "      OPTIONAL { ?violation sh:resultPath ?resultPath . }\n"
			+ "      OPTIONAL { ?violation sh:value ?value . } \n" + "}"
			+ "ORDER BY ?severity ?resultMessage ?focusNode ";
Maxime Lefrançois's avatar
Maxime Lefrançois committed

	static final Map<String, String> PREFIXES = new HashMap<String, String>();
	static {
		PREFIXES.put("owl", "http://www.w3.org/2002/07/owl#");
		PREFIXES.put("rdfs", "http://www.w3.org/2000/01/rdf-schema#");
		PREFIXES.put("xsd", "http://www.w3.org/2001/XMLSchema#");
		PREFIXES.put("dcterms", "http://purl.org/dc/terms/");
		PREFIXES.put("vann", "http://purl.org/vocab/vann/");
		PREFIXES.put("schema", "http://schema.org/");
		PREFIXES.put("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
		PREFIXES.put("voaf", "http://purl.org/vocommons/voaf#");
		PREFIXES.put("dce", "http://purl.org/dc/elements/1.1/");
		PREFIXES.put("dct", "http://purl.org/dc/terms/");
		PREFIXES.put("xml", "http://www.w3.org/XML/1998/namespace/");
		PREFIXES.put("saref", "https://saref.etsi.org/core/");
	}

	private static final SimpleDateFormat SIMPLE_DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd");
	private static final Resource ETSI_URL = ResourceFactory.createResource("https://www.etsi.org/");
	private static final Resource ETSI_LICENSE = ResourceFactory
			.createResource("https://forge.etsi.org/etsi-software-license");

	public ReadOntology(String testSuiteName) {
		super(testSuiteName);
	}

	public void doJob(Dataset dataset, Version version) throws SAREFPipelineException {
		String repoName = version.getRepositoryName();
		String ontologyName = repoName.equals("saref-core") ? "saref.ttl" : repoName + ".ttl";

		File ontologyFile = new File(version.getRepository().getDirectory(), "ontology/" + ontologyName);
		Model model = ModelFactory.createDefaultModel();
		try (FileInputStream input = new FileInputStream(ontologyFile)) {
			model.read(input, Constants.BASE, Lang.TTL.getLabel());
		} catch (Exception ex) {
			if (ex instanceof RuntimeIOException && ex.getCause() instanceof MalformedInputException) {
				try (InputStreamReader input2 = new InputStreamReader(new FileInputStream(ontologyFile),
						Charset.defaultCharset())) {
Maxime Lefrançois's avatar
Maxime Lefrançois committed
					model.read(input2, Constants.BASE, Lang.TTL.getLabel());
				} catch (Exception ex2) {
					logger.error("Exception while reading the ontology file", ex2);
					throw new SAREFPipelineException("Exception while reading the ontology file", ex2);
				}
			} else {
				logger.error("Exception while reading the ontology file", ex);
				throw new SAREFPipelineException("Exception while reading the ontology file", ex);
			}
		}
		checkPrefixes(model);
		checkShapes(model, version);
Maxime Lefrançois's avatar
Maxime Lefrançois committed
		// how does the TDB model work? documentation is not precise.
		dataset.begin(ReadWrite.WRITE);
		Model config = getNamedModel(dataset, Constants.CONFIG);
		updateOntologyMetadata(version, model);
		dataset.addNamedModel(version.getUri(), model);
		augmentTermGraphs(dataset, version, model, config);
		dataset.commit();
	}

	private void checkPrefixes(Model model) {
		final Map<String, String> prefixes = model.getNsPrefixMap();
		for (String s : PREFIXES.keySet()) {
			if (prefixes.containsKey(s)) {
				if (!prefixes.get(s).equals(PREFIXES.get(s))) {
					logger.warn(String.format(
							"Prefix `%s:` in the ontology file is expected to be equal to `<%s>`. Got: `<%s>`", s,
							PREFIXES.get(s), prefixes.get(s)));
				}
			}
		}
		for (Map.Entry<String, String> entry : prefixes.entrySet()) {
			String s = entry.getKey();
			String l = entry.getValue();
			if (l.contains("saref")) {
				if (!l.matches(Constants.REGEX_ONTO_SERIES_URI)) {
					logger.warn(String.format(
							"Prefix `%s:` in the ontology file contains string \"saref\", but does not seem to match the official SAREF ontologies namespaces: `\\\\%s\\\\`. Got: `<%s>`",
							s, Constants.REGEX_ONTO_SERIES_URI, l));
				}
			}
		}
	}

	private void checkShapes(Model model, Version version) throws SAREFPipelineException {
		Model shapeModel = JenaUtil.createDefaultModel();
		try (InputStream in = ReadOntology.class.getClassLoader().getResourceAsStream("sarefShape.ttl")) {
			shapeModel.read(in, Constants.BASE, "TTL");
		} catch (Exception ex) {
			logger.error("Exception while reading the shape file", ex);
			throw new SAREFPipelineException("Exception while reading the shape file", ex);
		}
		Resource reportResource = ValidationUtil.validateModel(model, shapeModel, true);
		boolean conforms = reportResource.getProperty(SH.conforms).getBoolean();
		if (!conforms) {
			Model reportModel = reportResource.getModel();
			reportModel.setNsPrefixes(PREFIXES);
			reportModel.setNsPrefix("sh", "http://www.w3.org/ns/shacl#");
			try (QueryExecution exec = QueryExecutionFactory.create(SELECT_VIOLATION, reportModel);) {
				Resource previousSeverity = null;
				Literal previousResultMessage = null;
				List<Resource> previousFocusNodes = new ArrayList<>();
				List<RDFNode> previousvalues = new ArrayList<>();
Maxime Lefrançois's avatar
Maxime Lefrançois committed
				for (ResultSet resultSet = exec.execSelect(); resultSet.hasNext();) {
					QuerySolution sol = resultSet.next();
					Resource severity = sol.getResource("severity");
					Literal resultMessage = sol.getLiteral("resultMessage");
					Resource focusNode = sol.get("focusNode").asResource();
Maxime Lefrançois's avatar
Maxime Lefrançois committed
					RDFNode value = sol.get("value");

					if (severity != null && severity.equals(previousSeverity) && resultMessage != null
							&& resultMessage.equals(previousResultMessage)) {
						previousFocusNodes.add(focusNode);
						previousvalues.add(value);
Maxime Lefrançois's avatar
Maxime Lefrançois committed
					} else {
						if (previousSeverity != null) {
							String message = String.format("%s: %s", previousResultMessage,
									String.join(" - ", previousFocusNodes.stream().map(Object::toString)
											.collect(Collectors.toList()).toArray(new String[1])));
							if (severity.getURI().equals(SHACL_VIOLATION)) {
								logger.error(message);
							} else {
								logger.warn(message);
							}
						}
						previousSeverity = severity;
						previousResultMessage = resultMessage;
						previousFocusNodes = new ArrayList<>();
						previousvalues = new ArrayList<>();
					}
				}
				if (!previousFocusNodes.isEmpty()) {
					String message = String.format("%s: %s", previousResultMessage,
							String.join(" - ", previousFocusNodes.stream().map(Object::toString)
									.collect(Collectors.toList()).toArray(new String[1])));
					if (previousSeverity != null && previousSeverity.getURI().equals(SHACL_VIOLATION)) {
						logger.error(message);
					} else {
						logger.warn(message);
Maxime Lefrançois's avatar
Maxime Lefrançois committed
					}
				}
			}
		}
		int onto = 0;
		for (ResIterator it = model.listSubjectsWithProperty(RDF.type, OWL2.Ontology); it.hasNext();) {
Maxime Lefrançois's avatar
Maxime Lefrançois committed
			Resource ontology = it.next();
			if (onto++ > 1) {
Maxime Lefrançois's avatar
Maxime Lefrançois committed
				logger.error("There shall be only one owl:Ontology");
			}
//			if(version instanceof ReleaseVersion) {
//				int source = 0;
//				NodeIterator its = model.listObjectsOfProperty(ontology, DCTerms.source);
//				if(!its.hasNext()) {
//					logger.warn(SOURCE_ERROR);
//				}
//				while(its.hasNext()) {
//					if(source++>1) {
//						logger.warn(SOURCE_ERROR);
//					}
//					RDFNode s = its.next();
//					if(!s.isURIResource() || !s.asResource().getURI().startsWith("http://www.etsi.org/deliver/etsi_ts/")) {
//						logger.warn(SOURCE_ERROR);
//					}
//				}
//			}
Maxime Lefrançois's avatar
Maxime Lefrançois committed
		}
	}

	/**
	 * for each term, add: some triples in the default graph (used to detect naming
	 * clashes), and some triples in the named graph of the term (used to display
	 * the page of the term)
	 * 
	 * @param dataset
	 * @param version
	 * @param model
	 */
	private void augmentTermGraphs(Dataset dataset, Version version, Model model, Model config) {
		Set<Resource> definedTerms = new HashSet<>();
		Set<Resource> usedTerms = new HashSet<>();
		computeTerms(version, model, definedTerms, usedTerms);
		config.add(version.getRepository().getResource(), EX.hasVersion, version.getResource());
		config.add(version.getResource(), RDF.type, EX.OntologyVersion);
		config.add(version.getResource(), EX.repositoryName, version.getRepositoryName());
		config.add(version.getResource(), EX.versionInfo, version.getVersionName());
		if (version.getPriorVersion() != null) {
			config.add(version.getResource(), EX.priorVersion, version.getPriorVersion().getResource());
		}
		for (Resource t : definedTerms) {
			Model termModel = getNamedModel(dataset, t.getURI());
			termModel.add(t, RDF.type, RDFS.Resource);
			String localName = t.getURI().substring(version.getRepository().getNamespace().length());
			termModel.add(t, RDFS.isDefinedBy, version.getResource());
			// keep the most recent definition of the term
			if (version.getNextVersion() == null) {
Maxime Lefrançois's avatar
Maxime Lefrançois committed
				termModel.add(DCTerms.modified, RDF.type, OWL.AnnotationProperty);
Maxime Lefrançois's avatar
Maxime Lefrançois committed
				termModel.add(t, DCTerms.modified, SIMPLE_DATE_FORMAT.format(version.getIssued()), XSDDatatype.XSDdate);
				try (QueryExecution exec = QueryExecutionFactory.create("DESCRIBE <" + t.getURI() + ">", model)) {
					termModel.add(exec.execDescribe());
				}
			}
			config.add(t, RDF.type, RDFP.Resource);
			config.add(t, EX.localName, localName);
			config.add(t, RDFS.isDefinedBy, version.getResource());
		}
		for (Resource t : usedTerms) {
			Model termModel = getNamedModel(dataset, t.getURI());
			termModel.add(t, EX.isUsedBy, version.getResource());
			config.add(t, RDF.type, RDFP.Resource);
			config.add(t, EX.isUsedBy, version.getResource());
		}
	}
Maxime Lefrançois's avatar
Maxime Lefrançois committed
	private void updateOntologyMetadata(Version version, Model model) {
		Resource ontologySeries = version.getRepository().getResource();
		model.add(ontologySeries, RDF.type, OWL2.Ontology);

		model.removeAll(ontologySeries, OWL2.versionIRI, null);
		model.add(ontologySeries, OWL2.versionIRI, version.getResource());

		model.removeAll(ontologySeries, OWL2.versionInfo, null);
		model.add(ontologySeries, OWL2.versionInfo, version.getVersionName());

		model.removeAll(ontologySeries, OWL2.priorVersion, null);
		if (version.getPriorVersion() != null) {
			model.add(ontologySeries, OWL2.priorVersion, version.getPriorVersion().getResource());
		}

		model.removeAll(ontologySeries, DCTerms.publisher, null);
		model.add(ontologySeries, DCTerms.publisher, ETSI_URL);

		model.removeAll(ontologySeries, DCTerms.license, null);
		model.add(ontologySeries, DCTerms.license, ETSI_LICENSE);

Maxime Lefrançois's avatar
Maxime Lefrançois committed
		model.add(VANN.preferredNamespaceUri, RDF.type, OWL.AnnotationProperty);
		model.add(VANN.preferredNamespacePrefix, RDF.type, OWL.AnnotationProperty);
		model.add(DCTerms.title, RDF.type, OWL.AnnotationProperty);
		model.add(DCTerms.title, RDF.type, OWL.AnnotationProperty);
		model.add(DCTerms.title, RDF.type, OWL.AnnotationProperty);
		model.add(DCTerms.abstract_, RDF.type, OWL.AnnotationProperty);
		model.add(DCTerms.description, RDF.type, OWL.AnnotationProperty);
		model.add(DCTerms.creator, RDF.type, OWL.AnnotationProperty);
		model.add(DCTerms.contributor, RDF.type, OWL.AnnotationProperty);
		model.add(DCTerms.publisher, RDF.type, OWL.AnnotationProperty);
		model.add(DCTerms.issued, RDF.type, OWL.AnnotationProperty);
		model.add(DCTerms.modified, RDF.type, OWL.AnnotationProperty);
		model.add(DCTerms.license, RDF.type, OWL.AnnotationProperty);
		model.add(SCHEMA.Person, RDF.type, OWL.Class);
		model.add(SCHEMA.Organization, RDF.type, OWL.Class);
		model.add(SCHEMA.affiliation, RDF.type, OWL.ObjectProperty);
Maxime Lefrançois's avatar
Maxime Lefrançois committed
		model.add(SCHEMA.familyName, RDF.type, OWL.DatatypeProperty);
		model.add(SCHEMA.givenName, RDF.type, OWL.DatatypeProperty);
		model.add(SCHEMA.name, RDF.type, OWL.DatatypeProperty);

Maxime Lefrançois's avatar
Maxime Lefrançois committed
//		// list of contributors
//		Set<Contributor> contributors = new HashSet<>();
//		for (Iterator<RevCommit> it = git.log().add(version.ref.getObjectId()).call().iterator(); it.hasNext();) {
//			RevCommit commit = it.next();
//			Contributor contributor = new Contributor(commit.getCommitterIdent());
//			contributors.add(contributor);
//		}
//		for (Contributor contributor : contributors) {
//			// git can only provide name + email.
//			// would need to maintain some list of mapping email -> url in the future.
//			Resource anon = onto.createResource();
//			onto.add(repo.resource, DCTerms.contributor, anon);
//			onto.add(anon, FOAF.name, contributor.getName());
//			onto.add(anon, FOAF.mbox, contributor.getEmailAddress());
//		}

		model.removeAll(ontologySeries, VANN.preferredNamespacePrefix, null);
		model.removeAll(ontologySeries, VANN.preferredNamespaceUri, null);
		model.add(ontologySeries, VANN.preferredNamespacePrefix, version.getRepository().getPrefix());
		model.add(ontologySeries, VANN.preferredNamespaceUri, version.getRepository().getNamespace(),
				XSDDatatype.XSDanyURI);
Maxime Lefrançois's avatar
Maxime Lefrançois committed

		model.removeAll(ontologySeries, DCTerms.issued, null);
		model.add(ontologySeries, DCTerms.issued, SIMPLE_DATE_FORMAT.format(version.getIssued()), XSDDatatype.XSDdate);
	}

	private void computeTerms(Version version, Model model, Set<Resource> definedTerms, Set<Resource> usedTerms) {
Maxime Lefrançois's avatar
Maxime Lefrançois committed
		model.listStatements().forEachRemaining(stmt -> {
			Resource s = stmt.getSubject();
			Resource p = stmt.getPredicate();
			Resource o = stmt.getObject().isResource() ? (Resource) stmt.getObject() : null;
			computeTerms(version, s, definedTerms, usedTerms);
			computeTerms(version, p, definedTerms, usedTerms);
			computeTerms(version, o, definedTerms, usedTerms);
		});
	}

	private void computeTerms(Version version, Resource t, Set<Resource> definedTerms, Set<Resource> usedTerms) {
Maxime Lefrançois's avatar
Maxime Lefrançois committed
		if (t == null || !t.isURIResource() || !t.getURI().startsWith(Constants.BASE) || t.getURI().endsWith("/")) {
			return;
		}
		String uri = t.getURI();
		if (uri.startsWith(version.getRepository().getNamespace())) {
			definedTerms.add(t);
		} else {
			usedTerms.add(t);
		}
	}

	private Model getNamedModel(Dataset dataset, String uri) {
		if (!dataset.containsNamedModel(uri)) {
			dataset.addNamedModel(uri, ModelFactory.createDefaultModel());
		}
		return dataset.getNamedModel(uri);
	}

}