roundtrip TS->md->TS working (75a1b878) · Commits · SAREF / saref-pypeline

src/saref_pypeline/docgen/init.py

+2 −2

Original line number	Diff line number	Diff line
		from saref_pypeline.docgen.utils import *
		from saref_pypeline.docgen.html_generator import HTMLDocumentationGenerator
		from saref_pypeline.docgen.docx_generator import DOCXDocumentationGenerator
		from saref_pypeline.docgen.website_generator import WebsiteGenerator
		from saref_pypeline.docgen.ts_generator import TSGenerator
		from saref_pypeline.docgen.site_manager import SiteManager

src/saref_pypeline/docgen/site_manager.py

+6 −6

Original line number	Diff line number	Diff line
		@@ -4,8 +4,8 @@ from pathlib import Path
		import shutil
		import logging
		from git import Repo, GitCommandError
		from saref_pypeline.docgen.docx_generator import DOCXDocumentationGenerator
		from saref_pypeline.docgen.ts_extractor import TSExtractor
		from saref_pypeline.docgen.ts_generator import TSGenerator
		from saref_pypeline.docgen.ts2md_extractor import TS2MDExtractor
		from saref_pypeline.entities import (
		SAREFCore,
		SAREFPatterns,
		@@ -18,7 +18,7 @@ from saref_pypeline.constants import *
		from typing import TYPE_CHECKING, Dict, TypeVar
		from itertools import chain
		from saref_pypeline.docgen.utils import *
		from saref_pypeline.docgen.html_generator import HTMLDocumentationGenerator
		from saref_pypeline.docgen.website_generator import WebsiteGenerator

		T = TypeVar("T")

		@@ -133,7 +133,7 @@ class SiteManager:
		except Exception as ex:
		pass

		docgen = HTMLDocumentationGenerator(self, project_version)
		docgen = WebsiteGenerator(self, project_version)

		# html documentation
		html = docgen.render_ontology_documentation(project_version.ontology)
		@@ -180,7 +180,7 @@ class SiteManager:
		def generate_ts(self, project_version: SAREFProjectVersion):
		project = project_version.project
		version = project_version.version
		docxgen = DOCXDocumentationGenerator(self, project_version)
		docxgen = TSGenerator(self, project_version)
		document = docxgen.render_document()
		from datetime import datetime

		@@ -202,7 +202,7 @@ class SiteManager:
		def extract_from_ts(self, project_version: SAREFProjectVersion):
		project = project_version.project
		version = project_version.version
		docxextract = TSExtractor(self, project_version)
		docxextract = TS2MDExtractor(self, project_version)
		docxextract.extract()
		from datetime import datetime

src/saref_pypeline/docgen/ts_extractor.py→src/saref_pypeline/docgen/ts2md_extractor.py

+20 −6

Original line number	Diff line number	Diff line
		@@ -80,7 +80,7 @@ class ExtractFormat(Enum):


		class Markup(Enum):
		STRONG = "bold", ("", ""), ("<b>", "</b>"),
		STRONG = "bold", ("", ""), ("<strong>", "</strong>"),
		EM = "italic", ("_", "_"), ("<em>", "</em>")
		SUP = "superscript", ("<sup>", "</sup>"), ("<sup>", "</sup>")
		CODE = "name", ("`", "`"), ("<code>", "</code>"), lambda x: x == "Courier New" or x == "Consolas"
		@@ -271,6 +271,9 @@ def extract_hyperlink(hyperlink: Hyperlink, ctx: RunContext):
		else:
		href = f"#{hyperlink.fragment}"

		ctx.content.extend(ctx.buffer_blank)
		ctx.buffer_blank.clear()

		if ctx.format == ExtractFormat.MD:
		ctx.content.append("[")
		else:
		@@ -316,7 +319,7 @@ def extract_run(run: Run, ctx: RunContext):
		text = run.text.replace(" ", " ")

		# extract white space before and after
		before, text, after = re.match(r"^(\s)(.?)(\s*)$", text).groups()
		before, text, after = re.match(r"^(\s)(.?)(\s*)$", text, re.DOTALL).groups()
		if before:
		ctx.buffer_blank.append(before)

		@@ -338,7 +341,7 @@ def extract_run(run: Run, ctx: RunContext):



		class TSExtractor:
		class TS2MDExtractor:
		"""_helper class to download the TS and extract the md files"""

		def __init__(
		@@ -570,7 +573,14 @@ class TSExtractor:
		method = getattr(self, fname)
		return method(paragraph, extract_format)
		else:
		logger.warning(f"TSExtractor function {fname} not implemented - skipping")
		logger.warning(f"TS2MDExtractor function {fname} not implemented - skipping")

		def extract_hyperlink_necessary(self, hyperlink:Tag):
		if hyperlink.url:
		return not any(hyperlink.url.startswith(namespace) for prefix, namespace in self.project_version.ontology.namespaces) # \
		# and not hyperlink.url == hyperlink.text
		else:
		return False

		def extract_inner_content(
		self, paragraph: Paragraph, extract_format: ExtractFormat = ExtractFormat.MD
		@@ -581,7 +591,11 @@ class TSExtractor:

		for child in paragraph.iter_inner_content():
		if isinstance(child, Hyperlink):
		if self.extract_hyperlink_necessary(child):
		extract_hyperlink(child, ctx)
		else:
		for run in child.runs:
		extract_run(run, ctx)
		elif isinstance(child, Run):
		extract_run(child, ctx)

		@@ -796,7 +810,7 @@ class TSExtractor:
		# Figure styles For formatting figures

		def extract_TF(
		self, paragraph: Paragraph, extract_format: ExtractFormat = ExtractFormat.MD
		self, paragraph: Paragraph, extract_format: ExtractFormat = ExtractFormat.HTML
		):
		"""Figure title"""
		md = self.extract_inner_content(paragraph, extract_format)

src/saref_pypeline/docgen/docx_generator.py→src/saref_pypeline/docgen/ts_generator.py

+186 −119

File changed and moved.

Preview size limit exceeded, changes collapsed.

src/saref_pypeline/docgen/html_generator.py→src/saref_pypeline/docgen/website_generator.py

+2 −1

Original line number	Diff line number	Diff line
		@@ -70,7 +70,7 @@ class HTMLEntityDescription(EntityDescription):
		return a(self.curie, href=f"#{self.curie}", title=self.label)


		class HTMLDocumentationGenerator:
		class WebsiteGenerator:

		def __init__(
		self, site_manager: "SiteManager", project_version: SAREFProjectVersion
		@@ -311,6 +311,7 @@ class HTMLDocumentationGenerator:
		"ETSI IPR Policy",
		href="https://www.etsi.org/intellectual-property-rights",
		),
		class_: "alert-warning"
		)

		toc = ol()