Loading .gitignore +1 −0 Original line number Diff line number Diff line Loading @@ -4,3 +4,4 @@ __pycache__ .venv .vscode lib src/saref_pypeline.egg-info src/saref_pypeline/entities.py +2 −1 Original line number Diff line number Diff line Loading @@ -4,6 +4,7 @@ from itertools import product from packaging.version import Version import re from typing import Optional, List, Tuple, Dict, TYPE_CHECKING from urllib.parse import quote from git import Repo from rdflib import Graph, Literal, URIRef, RDF, RDFS, XSD, OWL, DCAT, DCTERMS Loading Loading @@ -721,7 +722,7 @@ class EntityDescription: @cached_property def curie_id(self): return self.curie.replace(":", "-") return quote(self.curie.replace(":","-",1)) @cached_property def label(self): Loading src/saref_pypeline/pipeline.py +1 −1 Original line number Diff line number Diff line Loading @@ -427,7 +427,7 @@ class SAREFPipeline: self.site_manager.generate_documents_index() self.site_manager.generate_terms_index() if not self.skip_terms: self.site_manager.generate_terms_pages() self.site_manager.generate_terms_pages(project_version) elif self.mode is PipelineMode.TS: TSGenerator(self, project_version).generate_ts() elif self.mode is PipelineMode.TS2MD: Loading src/saref_pypeline/ts/ts2md_extractor.py +13 −5 Original line number Diff line number Diff line Loading @@ -335,7 +335,7 @@ class TS2MDExtractor: if not confirm: return logger.log(TRACE_LEVEL, f"Extracting from TS {self.project_version} with file {self.file_path}" ) logger.debug(f"... using file {self.file_path}" ) self.extract_figures() Loading Loading @@ -485,7 +485,8 @@ class TS2MDExtractor: def extract_references(self): md_output = [] md_output.append(f"### Normative references\n") md_output.append(f"# References\n") md_output.append(f"## Normative references\n") one_dd = self.context[WK_FIELD.ONE_DD] ddd = self.context[WK_FIELD.DDD] mte = self.context[WK_FIELD.mte] Loading @@ -495,11 +496,11 @@ class TS2MDExtractor: title = f"{title1}; {title2}{ f" {title3}" if title3 else ""}" url = self.get_pdf_url() md_output.append( f"""* <a id="[0]">[0]</a> [ETSI TS {one_dd} {ddd} (V{mte})]({url}): "{title}".""" f"""* <a id="ref-0">[0]</a> [ETSI TS {one_dd} {ddd} (V{mte})]({url}): "{title}".""" ) self.extract_reference(md_output, "Normative references") md_output.append(f"\n\n### Informative references\n") md_output.append(f"\n\n## Informative references\n") self.extract_reference(md_output, "Informative references") Path(self.out_folder, "references.md").write_text( Loading @@ -514,7 +515,14 @@ class TS2MDExtractor: if not started or block_item.style.name not in [P_STYLE.NO, P_STYLE.EX]: continue ref = self.extract_block_item(block_item) ref = re.sub(r"(\[(i\.)?\d+\])", r"""<a id="\1">\1</a>""", ref, count=1) def subst(m:re.Match): m1 = m.group(1) m2 = m.group(2) if m.group(1): return f"""<a id="ref-i-{m2}">[i.{m2}]</a>""" else: return f"""<a id="ref-{m2}">[{m2}]</a>""" ref = re.sub(r"\[(i\.)?(\d+)\]", subst, ref, count=1) md_output.append(ref) # --------------------------------------- Loading src/saref_pypeline/ts/ts_generator.py +46 −29 Original line number Diff line number Diff line Loading @@ -42,6 +42,7 @@ from saref_pypeline.ts.constants import NAME_TS from saref_pypeline.ts.utils import iter_block_items, pprint_xml from saref_pypeline.utils import ( materialize_links, slugify, with_flags, ) from saref_pypeline.entities import ( Loading Loading @@ -606,7 +607,7 @@ class TSGenerator: self.new_paragraph(style=p_style) if not self._is_appendix: with Bookmark(self, f"Clause_{clause}"): with Bookmark(self, slugify(f"clause-{clause}")): self.new_run(clause) self.new_run("\t") if text: Loading Loading @@ -899,14 +900,14 @@ class TSGenerator: # split normative and non normative informative_soup = BeautifulSoup() normative_h3 = normative_soup.find("h3") if normative_h3: informative_h3 = normative_h3.find_next("h3") normative_h2 = normative_soup.find("h2") if normative_h2: informative_h3 = normative_h2.find_next("h2") for el in informative_h3.find_next_siblings(): el.extract() informative_soup.append(el) for h3 in normative_soup.find_all("h3"): h3.extract() for h2 in normative_soup.find_all("h2"): h2.extract() self.insert_references("Normative references", normative_soup) self.insert_references("Informative references", informative_soup) Loading Loading @@ -1104,7 +1105,7 @@ class TSGenerator: elif md_path.exists(): data = markdown( md_path.read_text(encoding="utf-8"), extensions=["extra", "admonition", "codehilite"], extensions=["extra", "admonition"], ) if not data: Loading Loading @@ -1162,22 +1163,32 @@ class TSGenerator: # only a few special cases of admonition are supported if "admonition" in el.get("class"): title = el.find("p", class_="admonition-title") body_parts = [p for p in el.find_all("p") if p is not title] if len(body_parts) >= 1: p = body_parts[0] body_parts = [c for c in el.find_all(recursive=False) if c is not title] if len(body_parts) >= 1 and body_parts[0].name == "p": child = body_parts[0] if title: p.insert(0, *list(title.children)) child.insert(0, *list(title.children)) else: p = title child = title style = P_STYLE.NO if title and "NOTE" in title.getText() else P_STYLE.EX self.pstyling_funcs.append(make_pstyling_func(style)) self.insert_soup(p) self.insert_soup(child) self.pstyling_funcs.pop() for child in body_parts[1:]: if child.name == "p": self.pstyling_funcs.append(make_pstyling_func(P_STYLE.EW)) for p in body_parts[1:]: self.insert_soup(p) self.insert_soup(child) self.pstyling_funcs.pop() elif child.name == "ul": self.pstyling_funcs.append(make_pstyling_func(P_STYLE.B2_plus)) self.insert_soup(child) self.pstyling_funcs.pop() else: logger.warning(f"Unimplemented support of tag {child.name} inside admonition {el.get('class')}.") self.pstyling_funcs.append(make_pstyling_func(P_STYLE.EW)) self.insert_soup(child) self.pstyling_funcs.pop() else: Loading Loading @@ -1219,7 +1230,7 @@ class TSGenerator: self.new_paragraph() self._cursor.style = P_STYLE.Heading_8 annex_number = self.get_next_annex_number() with Bookmark(self, f"Annex_{annex_number}"): with Bookmark(self, slugify(f"annex-{annex_number}")): self.new_run(f"Annex {annex_number}") self.new_run(f" ({"normative" if normative else "informative"}):\n") self.insert_soup_children(el) Loading Loading @@ -1249,11 +1260,17 @@ class TSGenerator: self.insert_soup_h(el) def insert_soup_ul(self, el: Tag) -> None: if self.pstyling_funcs: self.insert_soup_list(el) else: self.pstyling_funcs.append(make_pstyling_func(P_STYLE.B1_plus)) self.insert_soup_list(el) self.pstyling_funcs.pop() def insert_soup_ol(self, el: Tag) -> None: if self.pstyling_funcs: self.insert_soup_list(el) else: self.pstyling_funcs.append(make_pstyling_func(P_STYLE.BN)) self.insert_soup_list(el) self.pstyling_funcs.pop() Loading Loading @@ -1442,7 +1459,7 @@ class TSGenerator: if el.a: el.a.replace_with(el.a.text) match = re.match(r"^Figure ([\d\.-]+)", el.get_text()) id = f"Figure_{match.group(1)}" if match else None id = slugify(f"figure-{match.group(1)}") if match else None with Bookmark(self, id): self.insert_soup_children(el) Loading Loading @@ -1548,7 +1565,7 @@ class TSGenerator: """ self.new_paragraph(style=P_STYLE.TH) match = re.match(r"^Table (\d+)", el.get_text()) id = f"Table_{match.group(1)}" if match else None id = slugify(f"table-{match.group(1)}") if match else None with Bookmark(self, id): self.insert_soup_children(el) Loading Loading @@ -1585,7 +1602,7 @@ class TSGenerator: if colspan > 1: self._add_gridspan(cells[cell_idx], colspan) for i in range(colspan - 1): to_remove = cells[cell_idx + 1]._tc to_remove = cells[cell_idx + i + 1]._tc to_remove.getparent().remove(to_remove) rowspan = int(td.get("rowspan", "1")) Loading Loading @@ -1738,7 +1755,7 @@ class TSGenerator: self.new_paragraph(style=P_STYLE.Heading_3) with Bookmark(self, description.curie): with Bookmark(self, description.curie_id): self.new_run(description.curie) self.new_run(" — ") self.new_run(description.label) Loading Loading @@ -2051,7 +2068,7 @@ class TSGenerator: "http://www.iana.org/assignments/media-types/text/markdown" ): self.insert_soup( markdown(literal, extensions=["extra", "admonition", "codehilite"]) markdown(literal, extensions=["extra", "admonition"]) ) else: self.new_run(literal.replace("\r", "")) Loading Loading
.gitignore +1 −0 Original line number Diff line number Diff line Loading @@ -4,3 +4,4 @@ __pycache__ .venv .vscode lib src/saref_pypeline.egg-info
src/saref_pypeline/entities.py +2 −1 Original line number Diff line number Diff line Loading @@ -4,6 +4,7 @@ from itertools import product from packaging.version import Version import re from typing import Optional, List, Tuple, Dict, TYPE_CHECKING from urllib.parse import quote from git import Repo from rdflib import Graph, Literal, URIRef, RDF, RDFS, XSD, OWL, DCAT, DCTERMS Loading Loading @@ -721,7 +722,7 @@ class EntityDescription: @cached_property def curie_id(self): return self.curie.replace(":", "-") return quote(self.curie.replace(":","-",1)) @cached_property def label(self): Loading
src/saref_pypeline/pipeline.py +1 −1 Original line number Diff line number Diff line Loading @@ -427,7 +427,7 @@ class SAREFPipeline: self.site_manager.generate_documents_index() self.site_manager.generate_terms_index() if not self.skip_terms: self.site_manager.generate_terms_pages() self.site_manager.generate_terms_pages(project_version) elif self.mode is PipelineMode.TS: TSGenerator(self, project_version).generate_ts() elif self.mode is PipelineMode.TS2MD: Loading
src/saref_pypeline/ts/ts2md_extractor.py +13 −5 Original line number Diff line number Diff line Loading @@ -335,7 +335,7 @@ class TS2MDExtractor: if not confirm: return logger.log(TRACE_LEVEL, f"Extracting from TS {self.project_version} with file {self.file_path}" ) logger.debug(f"... using file {self.file_path}" ) self.extract_figures() Loading Loading @@ -485,7 +485,8 @@ class TS2MDExtractor: def extract_references(self): md_output = [] md_output.append(f"### Normative references\n") md_output.append(f"# References\n") md_output.append(f"## Normative references\n") one_dd = self.context[WK_FIELD.ONE_DD] ddd = self.context[WK_FIELD.DDD] mte = self.context[WK_FIELD.mte] Loading @@ -495,11 +496,11 @@ class TS2MDExtractor: title = f"{title1}; {title2}{ f" {title3}" if title3 else ""}" url = self.get_pdf_url() md_output.append( f"""* <a id="[0]">[0]</a> [ETSI TS {one_dd} {ddd} (V{mte})]({url}): "{title}".""" f"""* <a id="ref-0">[0]</a> [ETSI TS {one_dd} {ddd} (V{mte})]({url}): "{title}".""" ) self.extract_reference(md_output, "Normative references") md_output.append(f"\n\n### Informative references\n") md_output.append(f"\n\n## Informative references\n") self.extract_reference(md_output, "Informative references") Path(self.out_folder, "references.md").write_text( Loading @@ -514,7 +515,14 @@ class TS2MDExtractor: if not started or block_item.style.name not in [P_STYLE.NO, P_STYLE.EX]: continue ref = self.extract_block_item(block_item) ref = re.sub(r"(\[(i\.)?\d+\])", r"""<a id="\1">\1</a>""", ref, count=1) def subst(m:re.Match): m1 = m.group(1) m2 = m.group(2) if m.group(1): return f"""<a id="ref-i-{m2}">[i.{m2}]</a>""" else: return f"""<a id="ref-{m2}">[{m2}]</a>""" ref = re.sub(r"\[(i\.)?(\d+)\]", subst, ref, count=1) md_output.append(ref) # --------------------------------------- Loading
src/saref_pypeline/ts/ts_generator.py +46 −29 Original line number Diff line number Diff line Loading @@ -42,6 +42,7 @@ from saref_pypeline.ts.constants import NAME_TS from saref_pypeline.ts.utils import iter_block_items, pprint_xml from saref_pypeline.utils import ( materialize_links, slugify, with_flags, ) from saref_pypeline.entities import ( Loading Loading @@ -606,7 +607,7 @@ class TSGenerator: self.new_paragraph(style=p_style) if not self._is_appendix: with Bookmark(self, f"Clause_{clause}"): with Bookmark(self, slugify(f"clause-{clause}")): self.new_run(clause) self.new_run("\t") if text: Loading Loading @@ -899,14 +900,14 @@ class TSGenerator: # split normative and non normative informative_soup = BeautifulSoup() normative_h3 = normative_soup.find("h3") if normative_h3: informative_h3 = normative_h3.find_next("h3") normative_h2 = normative_soup.find("h2") if normative_h2: informative_h3 = normative_h2.find_next("h2") for el in informative_h3.find_next_siblings(): el.extract() informative_soup.append(el) for h3 in normative_soup.find_all("h3"): h3.extract() for h2 in normative_soup.find_all("h2"): h2.extract() self.insert_references("Normative references", normative_soup) self.insert_references("Informative references", informative_soup) Loading Loading @@ -1104,7 +1105,7 @@ class TSGenerator: elif md_path.exists(): data = markdown( md_path.read_text(encoding="utf-8"), extensions=["extra", "admonition", "codehilite"], extensions=["extra", "admonition"], ) if not data: Loading Loading @@ -1162,22 +1163,32 @@ class TSGenerator: # only a few special cases of admonition are supported if "admonition" in el.get("class"): title = el.find("p", class_="admonition-title") body_parts = [p for p in el.find_all("p") if p is not title] if len(body_parts) >= 1: p = body_parts[0] body_parts = [c for c in el.find_all(recursive=False) if c is not title] if len(body_parts) >= 1 and body_parts[0].name == "p": child = body_parts[0] if title: p.insert(0, *list(title.children)) child.insert(0, *list(title.children)) else: p = title child = title style = P_STYLE.NO if title and "NOTE" in title.getText() else P_STYLE.EX self.pstyling_funcs.append(make_pstyling_func(style)) self.insert_soup(p) self.insert_soup(child) self.pstyling_funcs.pop() for child in body_parts[1:]: if child.name == "p": self.pstyling_funcs.append(make_pstyling_func(P_STYLE.EW)) for p in body_parts[1:]: self.insert_soup(p) self.insert_soup(child) self.pstyling_funcs.pop() elif child.name == "ul": self.pstyling_funcs.append(make_pstyling_func(P_STYLE.B2_plus)) self.insert_soup(child) self.pstyling_funcs.pop() else: logger.warning(f"Unimplemented support of tag {child.name} inside admonition {el.get('class')}.") self.pstyling_funcs.append(make_pstyling_func(P_STYLE.EW)) self.insert_soup(child) self.pstyling_funcs.pop() else: Loading Loading @@ -1219,7 +1230,7 @@ class TSGenerator: self.new_paragraph() self._cursor.style = P_STYLE.Heading_8 annex_number = self.get_next_annex_number() with Bookmark(self, f"Annex_{annex_number}"): with Bookmark(self, slugify(f"annex-{annex_number}")): self.new_run(f"Annex {annex_number}") self.new_run(f" ({"normative" if normative else "informative"}):\n") self.insert_soup_children(el) Loading Loading @@ -1249,11 +1260,17 @@ class TSGenerator: self.insert_soup_h(el) def insert_soup_ul(self, el: Tag) -> None: if self.pstyling_funcs: self.insert_soup_list(el) else: self.pstyling_funcs.append(make_pstyling_func(P_STYLE.B1_plus)) self.insert_soup_list(el) self.pstyling_funcs.pop() def insert_soup_ol(self, el: Tag) -> None: if self.pstyling_funcs: self.insert_soup_list(el) else: self.pstyling_funcs.append(make_pstyling_func(P_STYLE.BN)) self.insert_soup_list(el) self.pstyling_funcs.pop() Loading Loading @@ -1442,7 +1459,7 @@ class TSGenerator: if el.a: el.a.replace_with(el.a.text) match = re.match(r"^Figure ([\d\.-]+)", el.get_text()) id = f"Figure_{match.group(1)}" if match else None id = slugify(f"figure-{match.group(1)}") if match else None with Bookmark(self, id): self.insert_soup_children(el) Loading Loading @@ -1548,7 +1565,7 @@ class TSGenerator: """ self.new_paragraph(style=P_STYLE.TH) match = re.match(r"^Table (\d+)", el.get_text()) id = f"Table_{match.group(1)}" if match else None id = slugify(f"table-{match.group(1)}") if match else None with Bookmark(self, id): self.insert_soup_children(el) Loading Loading @@ -1585,7 +1602,7 @@ class TSGenerator: if colspan > 1: self._add_gridspan(cells[cell_idx], colspan) for i in range(colspan - 1): to_remove = cells[cell_idx + 1]._tc to_remove = cells[cell_idx + i + 1]._tc to_remove.getparent().remove(to_remove) rowspan = int(td.get("rowspan", "1")) Loading Loading @@ -1738,7 +1755,7 @@ class TSGenerator: self.new_paragraph(style=P_STYLE.Heading_3) with Bookmark(self, description.curie): with Bookmark(self, description.curie_id): self.new_run(description.curie) self.new_run(" — ") self.new_run(description.label) Loading Loading @@ -2051,7 +2068,7 @@ class TSGenerator: "http://www.iana.org/assignments/media-types/text/markdown" ): self.insert_soup( markdown(literal, extensions=["extra", "admonition", "codehilite"]) markdown(literal, extensions=["extra", "admonition"]) ) else: self.new_run(literal.replace("\r", "")) Loading