Unverified Commit f052403a authored by Maxime Lefrançois's avatar Maxime Lefrançois
Browse files

docx generation with ontology reference, and check prefixes and terms exist

parent 292e0c55
Loading
Loading
Loading
Loading
+51 −3
Original line number Diff line number Diff line
import os
from pathlib import Path
import re

from markdown import markdown
from rdflib import RDF, RDFS, URIRef
from saref_pypeline.checkers import BaseChecker
from saref_pypeline.constants import BASE, METADATA, PATTERN_SAREF_GRAPHS
from saref_pypeline.entities import SAREFProjectVersion

from typing import TYPE_CHECKING
@@ -13,10 +19,15 @@ VALID_EXTENSIONS = {".html", ".md"}
OPTIONAL_SOURCES = {
    "creators": "the list of creators.",
    "contributors": "the list of contributors.",
    "scope": "the scope of the document.",
    "references": "a list of references.",
    "terms": "terms used in the document.",
    "symbols": "symbols used in the document.",
    "abbreviations": "the list of abbreviations.",
    "abstract": "a short description of the ontology.",
    "description": "a detailed description of the ontology.",
    "examples": "complete examples.",
    "references": "a list of references.",
    "examples": "a detailed description of the examples.",
    "annexes": "informative annexes of the document.",
    "acknowledgement": "a list of acknowledgements."
}

@@ -65,7 +76,44 @@ class Checker(BaseChecker):
            # Check optional documentation sources
            for source, description in OPTIONAL_SOURCES.items():
                if file_name.startswith(source):
                    self.logger.debug(f"Found optional documentation source '{file}' for {description}.")
                    self.logger.debug(f"Found optional documentation source '{file}' for {description}")

            # Check prefixed names
            namespaces = {x:y for x, y in self.project_version.ontology.namespaces}
            metadata = self.pipeline.dataset.graph(METADATA)
            graph = self.pipeline.dataset.graph(self.project_version.ontology.version_iri_with_imports)
            version_iri = self.project_version.ontology.version_iri
            data = Path(self.project.directory, "documentation", file).read_text()
            unknown_prefixes = set()
            undeclared_terms = set()
            unreferenced_saref_terms = set()
            unseen_term = set()
            if file_extension == "md":
                data = markdown(data, extensions=["extra"])
            for prefix, local_name in re.findall("\b([a-z][a-z0-9]*):([a-zA-Z0-9][a-zA-Z0-9_-]+)\b", data):
                if prefix not in namespaces:
                    unknown_prefixes.add(prefix)
                    continue
                term = URIRef(namespaces[prefix] + local_name)
                curie = f"{prefix}:{local_name}"
                if prefix == self.project.prefix:
                    if (term, RDFS.isDefinedBy, version_iri) not in metadata:
                        undeclared_terms.add(curie)
                elif term.startswith(BASE) and not PATTERN_SAREF_GRAPHS.match(term):
                    if (term, RDF.type, None) not in graph:
                        unreferenced_saref_terms.add(curie)
                else:
                    if (term, RDFS.seeAlso, version_iri) not in metadata:
                        unseen_term.add(term)
            if unknown_prefixes:
                self.logger.warning(f"""In {file}, the following prefixes are not defined in the ontology: {", ".join(unknown_prefixes)}""")
            if undeclared_terms:
                self.logger.warning(f"""In {file}, the following {self.project.name} terms are not defined in the ontology: {", ".join(undeclared_terms)}""")
            if unreferenced_saref_terms:
                self.logger.warning(f"""In {file}, the following terms are not referenced in the ontology: {", ".join(unreferenced_saref_terms)}""")
            if unseen_term:
                self.logger.warning(f"""In {file}, none of the graph documents in the project version reference the following terms: {", ".join(unseen_term)}""")

        
    #     # Check correspondence between creators and contributors in ontology and documentation sources
    #     ontology_graph = self.project_version.ontology.graph
+1 −0
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@ NAME_SOURCES = "sources"
NAME_SOURCES_PORTAL = "portal"
NAME_SOURCES_PATTERNS = "patterns"
NAME_SITE = "site"
NAME_TS = "ts"
NAME_REPORT_HTML = "report.html"
CONFIGURATION_FILE_NAME = ".saref-repositories.yml"
HTACCESS_PATH = "RewriteCond %{REQUEST_URI} ^(.*/)?[^/]*$\n"
+750 −180

File changed.

Preview size limit exceeded, changes collapsed.

+146 −89
Original line number Diff line number Diff line
from functools import cached_property
from functools import cache
import os
from pathlib import Path
import logging
@@ -9,6 +9,7 @@ from itertools import chain
from bs4 import BeautifulSoup
from rdflib.term import URIRef, BNode, Literal, IdentifiedNode, Node
from rdflib import Graph, RDF, RDFS, OWL, DC, DCTERMS, XSD
from rdflib.namespace import NamespaceManager
from jinja2 import Environment, FileSystemLoader
from saref_pypeline.entities import SAREFGraphDocument, SAREFGraphDocumentType, SAREFProjectVersion
from saref_pypeline.constants import *
@@ -30,6 +31,37 @@ jinja2_env = Environment(
if TYPE_CHECKING:
    from saref_pypeline.docgen import SiteManager


class HTMLEntityDescription(EntityDescription):
    
    def __init__(self, uri:URIRef, graph: Graph, namespace_manager=None):
        super().__init__(uri, graph, namespace_manager)

    def sup(self):
        if OWL.Class in self.types:
            return sup("c", title="class", _class="type-c")
        elif OWL.ObjectProperty in self.types:
            return sup("op", title="object property", _class="type-op")
        elif OWL.DatatypeProperty in self.types:
            return sup("dp", title="data property", _class="type-dp")
        elif OWL.AnnotationProperty in self.types:
            return sup("ap", title="annotation property", _class="type-ap")
        elif OWL.Thing in self.types:
            return sup("ni", title="named individual", _class="type-ni")
        elif OWL.Thing in self.types or self.uri.startswith(XSD):
            return sup("Δ", title="datatype", _class="type-dt")
        else:
            return sup("?", title="undeclared", _class="type-unkown")
        
    def href_title(self, onto:URIRef):
        if self.uri.startswith(onto):
            return a(self.curie, href=f"#{self.curie}", title=self.title)
        else:
            return a(self.curie, href=self.uri, title=self.title)

    def a(self):
        return a(self.curie, href=f"#{self.curie}", title=self.label)

class HTMLDocumentationGenerator:
    
    def __init__(self, site_manager: "SiteManager", project_version:SAREFProjectVersion):
@@ -40,17 +72,21 @@ class HTMLDocumentationGenerator:
        self.project = project_version.project
        self.version = project_version.version

        self._entity_cache = {}
        self.nm = NamespaceManager(Graph(), bind_namespaces="none")
        for prefix, ns in self.project_version.ontology.namespaces:
            self.nm.bind(prefix, ns)
        
        # precompute descriptions for OWL entities
        self._description(OWL.Thing, OWL_GRAPH)
        self._description(OWL.Nothing, OWL_GRAPH)
        self._description(OWL.bottomDataProperty, OWL_GRAPH)
        self._description(OWL.bottomObjectProperty, OWL_GRAPH)
        self._description(OWL.bottomDataProperty, OWL_GRAPH)
        self._description(OWL.topDataProperty, OWL_GRAPH)
        self.description(OWL.Thing, OWL_GRAPH)
        self.description(OWL.Nothing, OWL_GRAPH)
        self.description(OWL.bottomDataProperty, OWL_GRAPH)
        self.description(OWL.topDataProperty, OWL_GRAPH)
        self.description(OWL.bottomObjectProperty, OWL_GRAPH)
        self.description(OWL.topObjectProperty, OWL_GRAPH)

    def _description(self, uri:URIRef, graph) -> EntityDescription:
        return self._entity_cache.setdefault(uri, EntityDescription(uri, graph))
    @cache
    def description(self, uri:URIRef, graph) -> HTMLEntityDescription:
        return HTMLEntityDescription(uri, graph, self.nm)

    def _format_literal(self, literal):
        if not isinstance(literal, Literal):
@@ -91,7 +127,7 @@ class HTMLDocumentationGenerator:
        if self.pipeline.no_site:
            logger.debug("Skipping documentation generation")
            return
        logger.debug(f"Printing documentation for {document}")
        logger.debug(f"Printing html documentation for {document}")

        template = jinja2_env.get_template('html.j2')
        g = self.dataset.graph(document.version_iri)
@@ -164,6 +200,14 @@ class HTMLDocumentationGenerator:

            toc = ol()

            div(h2("Table of Content"), toc, id='toc2')

            self.file_or_metadata(document,
                                  title="References",
                                  file_name="references",
                                  toc=toc,
                                  toc_id="references")

            self.file_or_metadata(document,
                                  title="Abstract",
                                  file_name="abstract",
@@ -171,8 +215,6 @@ class HTMLDocumentationGenerator:
                                  toc=toc,
                                  toc_id="abstract")

            div(h2("Table of Content"), toc, id='toc2')

            self.file_or_metadata(document,
                                  title="Introduction",
                                  file_name="description",
@@ -187,54 +229,56 @@ class HTMLDocumentationGenerator:
                                  toc=toc,
                                  toc_id="examples")

            self.namespaces(document, toc)
            toc.add(li(a("Ontology Reference", href=f"#ontology-reference")))
            with div(id="ontology-reference"):
                h2("Ontology Reference")
                ref_toc = ol()
                toc.add(ref_toc)

                self.describe_entities(document,
                                   toc,
                                    ref_toc,
                                    toc_id = "classes",
                                    toc_label="Classes",
                                    entity_type = OWL.Class,
                                    print_function = self.describe_class)

                self.describe_entities(document,
                                   toc,
                                    ref_toc,
                                    toc_id = "objectproperties", 
                                    toc_label="Object Properties",
                                    entity_type=OWL.ObjectProperty,
                                    print_function = self.describe_object_property)
                
                self.describe_entities(document,
                                   toc,
                                    ref_toc,
                                    toc_id = "dataproperties", 
                                    toc_label="Data Properties",
                                    entity_type=OWL.DatatypeProperty,
                                    print_function = self.describe_data_property)

                self.describe_entities(document,
                                   toc,
                                    ref_toc,
                                    toc_id = "namedindividuals", 
                                    toc_label="Named Individuals",
                                    entity_type=OWL.Thing,
                                    print_function = self.describe_named_individual)

                self.describe_entities(document,
                                   toc,
                                    ref_toc,
                                    toc_id = "annotationproperties", 
                                    toc_label="Annotation Properties",
                                    entity_type=OWL.AnnotationProperty,
                                    print_function = self.describe_annotation_property)

                self.describe_general_axioms(document,
                                         toc, 
                                            ref_toc, 
                                            toc_id = "generalaxioms", 
                                            toc_label="General Axioms",
                                            print_function = self.describe_annotation_property)

            self.file_or_metadata(document,
                                  title="References",
                                  file_name="references",
                                  toc=toc,
                                  toc_id="references")
                self.namespaces(document, ref_toc)

                # toc.add(li(a("Hierarchies of Terms", href="#toc")))
                
            self.file_or_metadata(document,
                                title="Acknowledgements",
@@ -242,7 +286,7 @@ class HTMLDocumentationGenerator:
                                toc=toc,
                                toc_id="acknowledgements")

            # toc.add(li(a("Hierarchies of Terms", href="#toc")))
                

        with div(_class="tab-content",id="myTabContent") as my_tab_content:
            with div(_class="tab-pane show active",id="toc-toc",role="tabpanel",aria_labelledby="toc-toc-tab"):
@@ -278,7 +322,7 @@ class HTMLDocumentationGenerator:
        if self.pipeline.no_site:
            logger.debug(f"Skipping documentation generation for {document}")
            return
        logger.debug(f"Printing documentation for {document}")
        logger.debug(f"Printing html documentation for {document}")

        template = jinja2_env.get_template('html.j2')
        g = self.dataset.graph(document.version_iri)
@@ -358,11 +402,16 @@ class HTMLDocumentationGenerator:
            value = next(filter(lambda x: isinstance(x, Literal) and x.language == "en", g.objects(document.declaration_iri, property)), None)
            value = value or next(filter(lambda x: isinstance(x, Literal) and x.datatype == None or x.datatype == XSD.string, g.objects(document.declaration_iri, property)), None)
        if value and value.strip():
            h2(title, id=toc_id)
            if toc:
                toc.add(li(a(title, href=f"#{toc_id}")))
                inner_ol = ol()
                soup = BeautifulSoup(value, "html.parser")
                materialize_links(document, soup)
                if soup.h2:
                    soup.h2["id"] = toc_id
                    toc.add(li(a(soup.h2.text, href=f"#{toc_id}")))
                else:
                    h2(title, id=toc_id)
                    toc.add(li(a(title, href=f"#{toc_id}")))
                for i, h3 in enumerate(soup.find_all("h3"), start=1):
                    if not h3.get("id"):
                        text = h3.get_text()
@@ -449,19 +498,27 @@ class HTMLDocumentationGenerator:
        if not all_entities:
            return
        toc.add(li(a(toc_label, href=f"#{toc_id}")))
        # with ol() as toc_az:
        #     for entity in all_entities:
        #         li(self.description(entity, g).a())
        # toc.add(toc_az)
        
        
        with div(id=toc_id):
            h2(toc_label)
            h3(toc_label)
            with ul(_class="hlist"):
                for entity in all_entities:
                    li(self._description(entity, g).a())
                    li(self.description(entity, g).a())

        for entity in all_entities:
            description = self._description(entity, g)
            description = self.description(entity, g)
            with div(id=description.curie, _class="entity"):
                a(name=entity)
                with h3():
                with h4():
                    text(description.curie)
                    raw(" — ")
                    text(description.label)
                    a("🔗", _class="headerlink", href=entity, title="IRI")
                    a(sup("🔗"), _class="headerlink", href=entity, title=entity)
                
                    if (entity, OWL.deprecated, Literal(True)) in g:
                        span("⚠ is deprecated", _class="alert alert-danger", role="alert")
@@ -570,7 +627,7 @@ class HTMLDocumentationGenerator:
                    if other in labels:
                        text(labels[other])
                    else:
                        a(other.n3(g.namespace_manager), href=other)
                        a(other.n3(self.nm), href=other)

                self._dt_dd(document, "has characteristics",
                                 filter(lambda x: x not in [OWL.DatatypeProperty], g.objects(dp, RDF.type)),
@@ -634,7 +691,7 @@ class HTMLDocumentationGenerator:
                    if other in labels:
                        text(labels[other])
                    else:
                        a(other.n3(g.namespace_manager), href=other)
                        a(other.n3(self.nm), href=other)

                self._dt_dd(document, "has characteristics",
                                 filter(lambda x: x not in [OWL.ObjectProperty], g.objects(entity, RDF.type)),
@@ -709,7 +766,7 @@ class HTMLDocumentationGenerator:
    def namespaces(self, document: SAREFGraphDocument, toc):
        toc.add(li(a("Namespace Declarations", href="#namespacedeclarations")))
        with div(id="namespacedeclarations"):
            h2("Namespace Declarations")
            h3("Namespace Declarations")
            with dl():
                for prefix, ns in sorted(document.namespaces, key=lambda x: x[0]):
                    dt(prefix)
@@ -733,9 +790,9 @@ class HTMLDocumentationGenerator:
                    
                    with li():
                        if entity.startswith(document.namespace):
                            strong(self._description(entity, g).href_title(document.namespace))
                            strong(self.description(entity, g).href_title(document.namespace))
                        else:
                            self._description(entity, g).href_title(document.namespace)
                            self.description(entity, g).href_title(document.namespace)
                        self.hierarchy(document, entity, type, p)
            del self._hierarchy_call_count # clean after use 

@@ -754,9 +811,9 @@ class HTMLDocumentationGenerator:

                with li():
                    if sub.startswith(document.namespace):
                        strong(self._description(sub, g).href_title(document.namespace))
                        strong(self.description(sub, g).href_title(document.namespace))
                    else:
                        self._description(sub, g).href_title(document.namespace)
                        self.description(sub, g).href_title(document.namespace)
                    self.hierarchy(document, sub, type, p)
                if last:
                    ctx.__exit__(None, None, None)
@@ -764,7 +821,7 @@ class HTMLDocumentationGenerator:
    def reference_class(self, document: SAREFGraphDocument, entity:URIRef, first_occurrence:bool=True):
        g = self.dataset.graph(document.version_iri_with_imports)
        if isinstance(entity, URIRef):
            description = self._description(entity, g)
            description = self.description(entity, g)
            description.href_title(document.namespace)
            description.sup()

@@ -779,7 +836,7 @@ class HTMLDocumentationGenerator:
                (OWL.intersectionOf, None, " <span class='logic'>and</span> ", None, True),
                (OWL.unionOf, None, " <span class='logic'>or</span> ", None, True),
                (OWL.disjointUnionOf, " <span class='logic'>disjoint union of</span> ", " , ", None, True),
                (OWL.oneOf, " one of { ", ", ", " } ", False)]:
                (OWL.oneOf, " <span class='logic'>one of</span> { ", ", ", " } ", False)]:
                if list:=next(g.objects(entity, property), None):
                    if first:
                        raw(first)
@@ -855,7 +912,7 @@ class HTMLDocumentationGenerator:
    def reference_data_range(self, document: SAREFGraphDocument, entity:URIRef):
        g = self.dataset.graph(document.version_iri_with_imports)
        if isinstance(entity, URIRef):
            description = self._description(entity, g)
            description = self.description(entity, g)
            description.href_title(document.namespace)
            description.sup()

@@ -920,12 +977,12 @@ class HTMLDocumentationGenerator:
    def reference_individual_or_literal(self, document: SAREFGraphDocument, entity:Node):
        g = self.dataset.graph(document.version_iri_with_imports)
        if isinstance(entity, URIRef):
            description = self._description(entity, g)
            description = self.description(entity, g)
            description.href_title(document.namespace)
            description.sup()

        elif isinstance(entity, Literal):
            raw(entity.n3(g.namespace_manager))
            raw(entity.n3(self.nm))

        else:  # fallback
            raw(f"[?]<sup title='named individual' class='type-ni'>ni</sup>")
@@ -934,7 +991,7 @@ class HTMLDocumentationGenerator:
    def reference_property(self, document: SAREFGraphDocument, entity:URIRef):
        g = self.dataset.graph(document.version_iri_with_imports)
        if isinstance(entity, URIRef):
            description = self._description(entity, g)
            description = self.description(entity, g)
            description.href_title(document.namespace)
            description.sup()
            
+10 −3
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@ import logging
from git import Repo, GitCommandError
from saref_pypeline.docgen.docx_generator import DOCXDocumentationGenerator
from saref_pypeline.entities import SAREFProject, SAREFProjectVersion
from saref_pypeline.etsi import WK_FIELD
from saref_pypeline.localization import Localization
from saref_pypeline.constants import *
from typing import TYPE_CHECKING, Dict, TypeVar
@@ -27,6 +28,7 @@ class SiteManager:
    def __init__(self, pipeline: "SAREFPipeline"):
        self.pipeline = pipeline
        self.site_dir = os.path.join(pipeline.target_dir, NAME_SITE)
        self.ts_dir = os.path.join(pipeline.target_dir, NAME_TS)
        self.report_file_html = os.path.join(pipeline.sources_dir, NAME_REPORT_HTML)
        self.dataset = self.pipeline.dataset

@@ -117,14 +119,19 @@ class SiteManager:
    def generate_docx(self, project_version:SAREFProjectVersion, for_filter="docx"):
        project = project_version.project
        version = project_version.version
        target_dir = os.path.join(self.site_dir, project.path, str(version))
        docxgen = DOCXDocumentationGenerator(self, project_version)
        document = docxgen.render_document()
        from datetime import datetime
        now = datetime.now()
        time_formatted = now.strftime("%H_%M_%S")
        document.save(os.path.join(target_dir, f"{project_version.ontology.name}_{time_formatted}.docx"))
        os.system(f'cmd.exe /C start "{target_dir}/{project_version.ontology.name}_{time_formatted}.docx"')
        odd = docxgen.context[WK_FIELD.ONE_DD]
        ddd = docxgen.context[WK_FIELD.DDD].split("-")[0]
        part = docxgen.context[WK_FIELD.PART].zfill(2) if docxgen.context[WK_FIELD.PART] else ""
        m, t, e = (x.zfill(2) for x in docxgen.context[WK_FIELD.mte].split("."))
        document_name = f"ts_{odd}{ddd}{part}v{m}{t}{e}_{project_version.ontology.name}_{time_formatted}.docx"
        os.makedirs(self.ts_dir, exist_ok=True)
        document.save(os.path.join(self.ts_dir, document_name))
        os.system(f'cmd.exe /C start "{self.ts_dir}/{document_name}"')

    def generate_htaccess(self):
      htaccess_path = os.path.join(self.site_dir, ".htaccess")
Loading