/*
 * Copyright 2020 ETSI
 * 
 * Redistribution and use in source and binary forms, with or without 
 * modification, are permitted provided that the following conditions are met:
 * 1. Redistributions of source code must retain the above copyright notice, 
 *    this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright notice, 
 *    this list of conditions and the following disclaimer in the documentation 
 *    and/or other materials provided with the distribution.
 * 3. Neither the name of the copyright holder nor the names of its contributors 
 *    may be used to endorse or promote products derived from this software without 
 *    specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 
 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 */
package fr.mines_stetienne.ci.saref.managers;

import java.io.File;
import java.io.IOException;
import java.util.Collection;

import org.apache.commons.io.FileUtils;
import org.apache.jena.graph.Node;
import org.apache.jena.query.Dataset;
import org.apache.jena.query.ReadWrite;
import org.apache.jena.query.text.EntityDefinition;
import org.apache.jena.query.text.TextDatasetFactory;
import org.apache.jena.query.text.TextIndex;
import org.apache.jena.query.text.TextIndexConfig;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.tdb.TDBFactory;
import org.apache.jena.vocabulary.DCTerms;
import org.apache.jena.vocabulary.OWL;
import org.apache.jena.vocabulary.OWL2;
import org.apache.jena.vocabulary.RDF;
import org.apache.jena.vocabulary.RDFS;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.slf4j.Logger;

import fr.mines_stetienne.ci.saref.SAREF;
import fr.mines_stetienne.ci.saref.SAREFErrorLogger;
import fr.mines_stetienne.ci.saref.SAREFPipeline;
import fr.mines_stetienne.ci.saref.entities.SAREFProject;
import fr.mines_stetienne.ci.saref.entities.SAREFRepository;
import fr.mines_stetienne.ci.saref.entities.SAREFTerm;
import fr.mines_stetienne.ci.saref.entities.SAREFVersion;
import fr.mines_stetienne.ci.saref.vocabs.EX;
import fr.mines_stetienne.ci.saref.vocabs.VANN;

public class DatasetManager extends SAREFErrorLogger {

	private static final String NAME_DATASET = "dataset";
	private static final String CONFIG = SAREF.BASE + "config";
	private static final String NAME_TDB = "tdb";
	private static final String NAME_LUCENE = "lucene";
	private static final String NAME_ASSEMBLER_FILE = "assembler.ttl";

	private final File datasetDir;
	private final Dataset dataset;

	private static enum MESSAGE {
		bad_transaction, no_transaction
	}

	/**
	 * Prepare the Apache Jena TDB RDF dataset
	 */
	public DatasetManager(SAREFPipeline pipeline, Logger errorLogger) throws IOException {
		super(pipeline, errorLogger);
		datasetDir = new File(pipeline.targetDir, NAME_DATASET);
		File tdbFile = new File(datasetDir, NAME_TDB);
		File luceneFile = new File(datasetDir, NAME_LUCENE);
		FileUtils.forceMkdir(tdbFile);
		Dataset base = TDBFactory.createDataset(tdbFile.getAbsolutePath());
		EntityDefinition entDef = new EntityDefinition("uri", "text", "graph");
        entDef.setPrimaryPredicate(RDFS.label.asNode());
        entDef.setUidField("uid");
        entDef.setLangField("lang");
        Collection<Node> textPredicates = entDef.getPredicates("text");
        textPredicates.add(RDFS.comment.asNode());
        textPredicates.add(RDFS.comment.asNode());
        TextIndexConfig config = new TextIndexConfig(entDef);
        config.setAnalyzer(new EnglishAnalyzer());
        config.setValueStored(true);
        Directory dir =  new SimpleFSDirectory(luceneFile.toPath());
        TextIndex textIndex = TextDatasetFactory.createLuceneIndex(dir, config);
        dataset = TextDatasetFactory.create(base, textIndex) ;
        
	}

	public Dataset getDataset() {
		return dataset;
	}

	public void createDataset() {
		dataset.begin(ReadWrite.WRITE);
		Model model = dataset.getDefaultModel();

		for (RepositoryManager repositoryManager : sourcesManager.getSourceRepositoryManagers()) {
			SAREFRepository repository = repositoryManager.getRepository();
			SAREFProject project = repository.getProject();
			SAREFVersion priorVersion = null;
			for (SAREFVersion version : repositoryManager.getRepository().getVersions().values()) {
				model.add(version.getResource(), RDF.type, OWL2.Ontology);
				if (priorVersion != null) {
					model.add(version.getResource(), OWL.priorVersion, priorVersion.getResource());
				}
				version.getModel().listObjectsOfProperty(project.getResource(), DCTerms.title)
						.forEachRemaining(node -> {
							model.add(version.getResource(), RDFS.label, node);
						});
				version.getModel().listObjectsOfProperty(project.getResource(), DCTerms.description)
						.forEachRemaining(node -> {
							model.add(version.getResource(), RDFS.comment, node);
						});
				version.getModel().listObjectsOfProperty(project.getResource(), OWL2.versionInfo)
						.forEachRemaining(node -> {
							model.add(version.getResource(), OWL2.versionInfo, node);
						});
				version.getModel().listObjectsOfProperty(project.getResource(), OWL2.versionInfo)
						.forEachRemaining(node -> {
							model.add(version.getResource(), OWL2.versionInfo, node);
						});
				priorVersion = version;
			}
			for (SAREFTerm term : repository.getTerms().values()) {
				term.getIsDefinedBy().forEach(version -> {
					model.add(term.getResource(), RDFS.isDefinedBy, version.getResource());
				});
				term.getIsExemplifiedBy().forEach(example -> {
					model.add(term.getResource(), VANN.example, example.getResource());
				});
				term.getIsUsedBy().forEach(version -> {
					model.add(term.getResource(), EX.isUsedBy, version.getResource());
				});
				SAREFVersion version = getLastVersion(term);
				if(version != null) {
					version.getModel().listObjectsOfProperty(term.getResource(), RDFS.label).forEachRemaining(node -> {
						model.add(term.getResource(), RDFS.label, node);
					});
					version.getModel().listObjectsOfProperty(term.getResource(), RDFS.comment).forEachRemaining(node -> {
						model.add(term.getResource(), RDFS.comment, node);
					});
				}
			}
		}
		dataset.commit();
		dataset.end();
	}

	private SAREFVersion getLastVersion(SAREFTerm term) {
		SAREFVersion lastVersion = null;
		for (SAREFVersion v : term.getIsDefinedBy()) {
			if (lastVersion == null) {
				lastVersion = v;
				continue;
			}
			if (v.getVersionName().compareTo(lastVersion.getVersionName()) > 0) {
				lastVersion = v;
			}
		}
		return lastVersion;
	}

}
