Unverified Commit 75a1b878 authored by Maxime Lefrançois's avatar Maxime Lefrançois
Browse files

roundtrip TS->md->TS working

at least on SAREF4AGRI V2.1.1
parent 8bde9cf4
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
from saref_pypeline.docgen.utils import *
from saref_pypeline.docgen.html_generator import HTMLDocumentationGenerator
from saref_pypeline.docgen.docx_generator import DOCXDocumentationGenerator
from saref_pypeline.docgen.website_generator import WebsiteGenerator
from saref_pypeline.docgen.ts_generator import TSGenerator
from saref_pypeline.docgen.site_manager import SiteManager
+6 −6
Original line number Diff line number Diff line
@@ -4,8 +4,8 @@ from pathlib import Path
import shutil
import logging
from git import Repo, GitCommandError
from saref_pypeline.docgen.docx_generator import DOCXDocumentationGenerator
from saref_pypeline.docgen.ts_extractor import TSExtractor
from saref_pypeline.docgen.ts_generator import TSGenerator
from saref_pypeline.docgen.ts2md_extractor import TS2MDExtractor
from saref_pypeline.entities import (
    SAREFCore,
    SAREFPatterns,
@@ -18,7 +18,7 @@ from saref_pypeline.constants import *
from typing import TYPE_CHECKING, Dict, TypeVar
from itertools import chain
from saref_pypeline.docgen.utils import *
from saref_pypeline.docgen.html_generator import HTMLDocumentationGenerator
from saref_pypeline.docgen.website_generator import WebsiteGenerator

T = TypeVar("T")

@@ -133,7 +133,7 @@ class SiteManager:
        except Exception as ex:
            pass

        docgen = HTMLDocumentationGenerator(self, project_version)
        docgen = WebsiteGenerator(self, project_version)

        # html documentation
        html = docgen.render_ontology_documentation(project_version.ontology)
@@ -180,7 +180,7 @@ class SiteManager:
    def generate_ts(self, project_version: SAREFProjectVersion):
        project = project_version.project
        version = project_version.version
        docxgen = DOCXDocumentationGenerator(self, project_version)
        docxgen = TSGenerator(self, project_version)
        document = docxgen.render_document()
        from datetime import datetime

@@ -202,7 +202,7 @@ class SiteManager:
    def extract_from_ts(self, project_version: SAREFProjectVersion):
        project = project_version.project
        version = project_version.version
        docxextract = TSExtractor(self, project_version)
        docxextract = TS2MDExtractor(self, project_version)
        docxextract.extract()
        from datetime import datetime

+20 −6
Original line number Diff line number Diff line
@@ -80,7 +80,7 @@ class ExtractFormat(Enum):


class Markup(Enum):
    STRONG = "bold", ("**", "**"), ("<b>", "</b>"), 
    STRONG = "bold", ("**", "**"), ("<strong>", "</strong>"), 
    EM = "italic", ("_", "_"), ("<em>", "</em>")
    SUP = "superscript", ("<sup>", "</sup>"), ("<sup>", "</sup>")
    CODE = "name", ("`", "`"), ("<code>", "</code>"), lambda x: x == "Courier New" or x == "Consolas"
@@ -271,6 +271,9 @@ def extract_hyperlink(hyperlink: Hyperlink, ctx: RunContext):
    else:
        href = f"#{hyperlink.fragment}"

    ctx.content.extend(ctx.buffer_blank)
    ctx.buffer_blank.clear()

    if ctx.format == ExtractFormat.MD:
        ctx.content.append("[")
    else:
@@ -316,7 +319,7 @@ def extract_run(run: Run, ctx: RunContext):
    text = run.text.replace(" ", " ")

    # extract white space before and after
    before, text, after = re.match(r"^(\s*)(.*?)(\s*)$", text).groups()
    before, text, after = re.match(r"^(\s*)(.*?)(\s*)$", text, re.DOTALL).groups()
    if before:
        ctx.buffer_blank.append(before)

@@ -338,7 +341,7 @@ def extract_run(run: Run, ctx: RunContext):
        


class TSExtractor:
class TS2MDExtractor:
    """_helper class to download the TS and extract the md files"""

    def __init__(
@@ -570,7 +573,14 @@ class TSExtractor:
            method = getattr(self, fname)
            return method(paragraph, extract_format)
        else:
            logger.warning(f"TSExtractor function {fname} not implemented - skipping")
            logger.warning(f"TS2MDExtractor function {fname} not implemented - skipping")

    def extract_hyperlink_necessary(self, hyperlink:Tag):
        if hyperlink.url:
            return not any(hyperlink.url.startswith(namespace) for prefix, namespace in self.project_version.ontology.namespaces) # \
#                and not hyperlink.url == hyperlink.text
        else:
            return False
        
    def extract_inner_content(
        self, paragraph: Paragraph, extract_format: ExtractFormat = ExtractFormat.MD
@@ -581,7 +591,11 @@ class TSExtractor:

        for child in paragraph.iter_inner_content():
            if isinstance(child, Hyperlink):
                if self.extract_hyperlink_necessary(child):
                    extract_hyperlink(child, ctx)
                else:
                    for run in child.runs:
                        extract_run(run, ctx)
            elif isinstance(child, Run):
                extract_run(child, ctx)

@@ -796,7 +810,7 @@ class TSExtractor:
    # Figure styles             For formatting figures

    def extract_TF(
        self, paragraph: Paragraph, extract_format: ExtractFormat = ExtractFormat.MD
        self, paragraph: Paragraph, extract_format: ExtractFormat = ExtractFormat.HTML
    ):
        """Figure title"""
        md = self.extract_inner_content(paragraph, extract_format)
+186 −119

File changed and moved.

Preview size limit exceeded, changes collapsed.

+2 −1
Original line number Diff line number Diff line
@@ -70,7 +70,7 @@ class HTMLEntityDescription(EntityDescription):
        return a(self.curie, href=f"#{self.curie}", title=self.label)


class HTMLDocumentationGenerator:
class WebsiteGenerator:

    def __init__(
        self, site_manager: "SiteManager", project_version: SAREFProjectVersion
@@ -311,6 +311,7 @@ class HTMLDocumentationGenerator:
                    "ETSI IPR Policy",
                    href="https://www.etsi.org/intellectual-property-rights",
                ),
                class_: "alert-warning"
            )

            toc = ol()