adjustments search and roundtrip SAREF4BLG (6a695e1e) · Commits · SAREF / saref-pypeline

src/saref_pypeline/constants.py

+2 −2

Original line number	Diff line number	Diff line
		@@ -84,5 +84,5 @@ owl:topObjectProperty a owl:ObjectProperty ;
		"""
		)
		PATTERN_TERM_NAME = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_-]+$")
		CAPTION_TABLE_RE = re.compile(r"^\sTable[\s\xa0]+([A-Z0-9\.]+)[\xa0\s:](.)\s$")
		CAPTION_FIGURE_RE = re.compile(r"^\sFigure[\s\xa0]+([A-Z0-9\.]+)[\xa0\s:](.)\s$")
		CAPTION_TABLE_RE = re.compile(r"^\sTable[\s\xa0]+([A-Z0-9\.]+)[\xa0\s:]([\s\S]?)\s$")
		CAPTION_FIGURE_RE = re.compile(r"^\sFigure[\s\xa0]+([A-Z0-9\.]+)[\xa0\s:]([\s\S]?)\s$")

src/saref_pypeline/etsi.py

+3 −0

Original line number	Diff line number	Diff line
		@@ -208,6 +208,9 @@ class P_STYLE(StrEnum, metaclass=_MetaEnum):
		# Style found in SAREF4AUTO V1.1.1
		List_Paragraph = "List Paragraph"

		# Style found in SAREF4BLDG V2.1.1
		Annotation_Text = "annotation text"


		class C_STYLE(StrEnum, metaclass=_MetaEnum):
		"""ETSI Character Styles, obtained from skeleton"""

src/saref_pypeline/pipeline.py

+1 −0

Original line number	Diff line number	Diff line
		@@ -1132,6 +1132,7 @@ class SAREFPipeline:
		return dumper.represent_str(data.name)

		yaml.SafeDumper.add_representer(WK_FIELD, represent_string)
		os.makedirs(os.path.dirname(metadata_file), exist_ok=True)
		with open(metadata_file, "w") as f:
		yaml.safe_dump(self.projects_metadata, f)

src/saref_pypeline/ts/ts2md_extractor.py

+38 −5

Original line number	Diff line number	Diff line
		@@ -645,7 +645,9 @@ class TS2MDExtractor:
		):
		"""Reference, Example => use "tab" between "item/number" and "text"."""
		md = self.extract_inner_content(paragraph)
		if md.startswith("EXAMPLE"):
		if not md:
		return ""
		elif md.startswith("EXAMPLE"):
		# Example
		left, right = md.split("\t", 1)
		content = right.replace("\n", "\n ")
		@@ -797,7 +799,8 @@ class TS2MDExtractor:
		):
		"""Programming language"""
		md = self.extract_inner_content(paragraph, extract_format)
		return f"```\n{md}\n```"
		md = md.replace("&", "&").replace("<", "<")
		return f"""<pre><code class="language-turtle">{md}</code></pre>""" if md else ""

		def extract_EQ(
		self, paragraph: Paragraph, extract_format: ExtractFormat = ExtractFormat.MD
		@@ -830,7 +833,11 @@ class TS2MDExtractor:
		<figcaption>{label}: {caption}</figcaption>
		</figure>\n"""
		else:
		return f"{md}"
		pre_code = self.get_paragraph_pre_code(paragraph)
		return f"""<figure>
		{pre_code}
		<figcaption>{label}: {caption}</figcaption>
		</figure>\n"""

		def extract_FL(
		self, paragraph: Paragraph, extract_format: ExtractFormat = ExtractFormat.MD
		@@ -997,6 +1004,24 @@ class TS2MDExtractor:
		image_paragraph = get_prev_block(self.document, paragraph)
		return self.get_paragraph_image_info(image_paragraph, i + 1)

		def get_paragraph_pre_code(self, block, i: int = 0):
		"""
		If table with one row and one column, it contains turtle code.
		"""
		if i > 3:
		return ""
		if not isinstance(block, Table):
		block = get_prev_block(self.document, block)
		return self.get_paragraph_pre_code(block, i+1)
		if len(block.rows) != 1 or len(block.columns) != 1:
		logger.warning("expected table with one line and one column for Listings")
		return ""

		cell = block.cell(0, 0)
		ttl = "\n".join(p.text for p in cell.paragraphs)
		ttl = ttl.replace("&", "&").replace("<", "<")
		return f"""<pre><code class="language-turtle">{ttl}</code></pre>""" if ttl else ""

		def extract_table(self, table, extract_format: ExtractFormat = ExtractFormat.HTML):
		"""
		Generate HTML for a python-docx Table.
		@@ -1007,6 +1032,10 @@ class TS2MDExtractor:
		"""
		assert isinstance(table, Table)

		# ignore if nrows=1 and ncols=1 like in SAREF4BLDG
		if len(table.rows) == 1 and len(table.columns)==1:
		return

		cap_id, cap_text = self.caption_from_prev_paragraph(table)

		if cap_text and "Prefixes and namespaces" in cap_text:
		@@ -1118,6 +1147,10 @@ class TS2MDExtractor:
		else:
		return ""

		def extract_Annotation_Text(
		self, paragraph: Paragraph, extract_format: ExtractFormat = ExtractFormat.MD
		):
		return self.extract_Normal(paragraph, extract_format)

		def print_admonition(classes: str, title: str, md: str):
		prefix = 4 * " "
		@@ -1126,7 +1159,7 @@ def print_admonition(classes: str, title: str, md: str):


		def get_prev_block(document: Document, target):
		"""Return the block (Paragraph or Table) that appears immediately before target_tbl."""
		"""Return the block (Paragraph or Table) that appears immediately before target."""
		prev = None
		for block in iter_block_items(document):
		if isinstance(block, Table \| Paragraph) and block._element is target._element:

src/saref_pypeline/ts/ts_generator.py

+12 −3

Original line number	Diff line number	Diff line
		@@ -902,11 +902,11 @@ class TSGenerator:
		informative_soup = BeautifulSoup()
		normative_h2 = normative_soup.find("h2")
		if normative_h2:
		informative_h3 = normative_h2.find_next("h2")
		for el in informative_h3.find_next_siblings():
		informative_h2 = normative_h2.find_next("h2")
		for el in informative_h2.find_next_siblings():
		el.extract()
		informative_soup.append(el)
		for h2 in normative_soup.find_all("h2"):
		for h2 in normative_soup.find_all(re.compile("h1\|h2")):
		h2.extract()

		self.insert_references("Normative references", normative_soup)
		@@ -1380,6 +1380,8 @@ class TSGenerator:
		if isinstance(child, Tag):
		if child.name == "img":
		self.insert_soup_img(child, text_width)
		if child.name == "pre":
		self.insert_soup_pre(child)
		elif child.name == "figcaption":
		self.insert_soup_figcaption(child)

		@@ -1463,6 +1465,13 @@ class TSGenerator:
		with Bookmark(self, id):
		self.insert_soup_children(el)

		def insert_soup_pre(self, pre: Tag) -> None:
		if not (len(pre.contents) == 1 and pre.contents[0].name == "code"):
		logger.warning("Not supported <pre> element")
		code = pre.contents[0].text
		self.new_paragraph(code, style=P_STYLE.PL)


		def insert_soup_a(self, a: Tag) -> None:
		self.ensure_cursor_paragraph()
		if href := a.get("href"):