Loading src/saref_pypeline/docgen/ts2md_extractor.py +3 −3 Original line number Diff line number Diff line Loading @@ -300,7 +300,7 @@ def manage_ctx( def extract_run(run: Run, ctx: RunContext): # invariant: last character of last item in ctx.content is a non-blank character # invariant: last character of last item in ctx.content is a non-word character # manage opening/closing tags manage_ctx(ctx, run, Markup.STRONG) Loading @@ -312,7 +312,7 @@ def extract_run(run: Run, ctx: RunContext): text = run.text.replace(" ", " ") # extract white space before and after before, text, after = re.match(r"^(\s*)(.*?)(\s*)$", text, re.DOTALL).groups() before, text, after = re.match(r"^(\W*)(.*?)(\W*)$", text, re.DOTALL).groups() if before: ctx.buffer_blank.append(before) Loading Loading @@ -596,7 +596,7 @@ class TS2MDExtractor: ): ctx = RunContext(extract_format) # keep track of spaces, as md markup needs to be right before/after non-space characters # invariant: last character of last item in ctx.content is a non-blank character # invariant: last character of last item in ctx.content is a non-word character # We cannot use python-docx method iter_inner_content, as it only considers runs and hyperlinks. # For example elements <fldSimple> are ignored Loading src/saref_pypeline/docgen/ts_generator.py +181 −176 Original line number Diff line number Diff line Loading @@ -163,6 +163,23 @@ def strip_outer_text(el: Tag): if el.contents and isinstance(el.contents[-1], NavigableString): el.contents[-1].replace_with(el.contents[-1].rstrip()) def make_pstyling_func(style: P_STYLE) -> Callable[[Paragraph], None]: def f(paragraph:Paragraph): paragraph.style = style return f def make_cstyling_func(style: C_STYLE) -> Callable[[Run], None]: def f(run:Run): run.style = style return f def make_rstyling_function(apply_style:Callable[[Run], None], apply_default_style:Callable[[Run], None]|None) -> Callable[[Run], None]: def f(run:Run): if apply_default_style: apply_default_style(run) apply_style(run) return f class Bookmark: def __init__(self, outer: "TSGenerator", name: str): Loading Loading @@ -215,6 +232,11 @@ class TSGenerator: self.description(OWL.bottomDataProperty, OWL_GRAPH) self.description(OWL.topDataProperty, OWL_GRAPH) # functions to style new paragraphs and runs. # Style with higher index override style with lower index self.pstyling_funcs: List[Callable[[Paragraph], None]] = [] self.rstyling_funcs: List[Callable[[Run], None]] = [] @staticmethod def _add_gridspan(cell: _Cell, val: int): tcPr = cell._tc.get_or_add_tcPr() Loading Loading @@ -301,7 +323,7 @@ class TSGenerator: - If the cursor is a Run → move cursor to its ancestor Paragraph. - If the cursor is a Table → move cursor to a new paragraph. - If the cursor is a Cell_ → move cursor to a new paragraph in the cell. - If the cursor is a Cell → move cursor to a new paragraph in the cell. Returns: bool: if a new paragraph has been created Loading @@ -318,12 +340,16 @@ class TSGenerator: new_p = OxmlElement("w:p") self._cursor._element.addnext(new_p) paragraph = Paragraph(new_p, self._cursor._parent) for pstyling_func in self.pstyling_funcs: pstyling_func(paragraph) self._cursor = paragraph return True elif isinstance(self._cursor, _Cell): new_p = OxmlElement("w:p") self._cursor._element.append(new_p) paragraph = Paragraph(new_p, self._cursor) for pstyling_func in self.pstyling_funcs: pstyling_func(paragraph) self._cursor = paragraph return True raise ValueError() Loading @@ -337,35 +363,8 @@ class TSGenerator: """ self.ensure_cursor_paragraph() self._cursor = self._cursor.add_run() def ensure_pstyle(self, style: P_STYLE, styling: Callable = None) -> Callable: def more_styling(el=None): if styling: styling(el) if isinstance(self._cursor, Paragraph): self._cursor.style = style return more_styling def ensure_rstyle( self, style_run: Callable[[Run], None], styling: Callable = None ) -> Callable: def more_styling(el=None): if styling: styling(el) if isinstance(self._cursor, Run): style_run(self._cursor) return more_styling def ensure_cstyle(self, style: C_STYLE, styling: Callable = None) -> Callable: def more_styling(el=None): if styling: styling(el) if isinstance(self._cursor, Run): self._cursor.style = style return more_styling for rstyling_func in self.rstyling_funcs: rstyling_func(self._cursor) def delete_section(self, text: str, delete_heading: bool = True) -> None: """ Loading Loading @@ -493,6 +492,8 @@ class TSGenerator: new_p = OxmlElement("w:p") self._cursor._element.addnext(new_p) paragraph = Paragraph(new_p, self._cursor._parent) for pstyling_func in self.pstyling_funcs: pstyling_func(paragraph) if style: paragraph.style = style Loading Loading @@ -558,6 +559,8 @@ class TSGenerator: """ self.ensure_cursor_paragraph() run = self._cursor.add_run(text) for rstyling_func in self.rstyling_funcs: rstyling_func(run) for arg_name in kwargs: setattr(run, arg_name, kwargs[arg_name]) self._cursor = run Loading @@ -577,8 +580,7 @@ class TSGenerator: n[-1] += 1 clause = ".".join([str(i) for i in n]) self.new_paragraph() self._cursor.style = p_style self.new_paragraph(style=p_style) if not self._is_appendix: with Bookmark(self, f"Clause_{clause}"): self.new_run(clause) Loading Loading @@ -1089,45 +1091,52 @@ class TSGenerator: return soup def insert_soup( self, el: PageElement | str | None, styling: Callable = None self, el: PageElement | str | None ) -> None: if isinstance(el, BeautifulSoup): self.insert_soup_children(el, styling) self.insert_soup_children(el) elif isinstance(el, NavigableString): if not el or el == "\n": return self.ensure_cursor_paragraph() if styling: styling(el) self.new_run(el) if styling: styling(el) elif isinstance(el, Tag): with Bookmark(self, el.get("id")): if cstyle := el.get("data-docx-cstyle"): self.rstyling_funcs.append(make_cstyling_func(cstyle)) fname = f"insert_soup_{el.name}" if hasattr(self, fname) and callable(getattr(self, fname)): method = getattr(self, fname) method(el, styling) method(el) else: logger.warning( f"TSGenerator function {fname} not implemented - skipping" ) self.insert_soup_children(el, styling) self.insert_soup_children(el) if pstyle := el.get("data-docx-pstyle"): self.ensure_cursor_paragraph() self._cursor.style = pstyle if cstyle: self.rstyling_funcs.pop() elif isinstance(el, str): soup = BeautifulSoup(el, "html.parser") self.insert_soup(soup, styling) self.insert_soup(soup) else: raise ValueError() def insert_soup_children(self, el: Tag, styling: Callable = None) -> None: def insert_soup_children(self, el: Tag) -> None: for child in el.children: self.insert_soup(child, styling) self.insert_soup(child) def insert_soup_div(self, el: Tag, styling: Callable = None) -> None: def insert_soup_div(self, el: Tag) -> None: # only a few special cases of admonition are supported if "admonition" in el.get("class"): title = el.find("p", class_="admonition-title") Loading @@ -1139,33 +1148,26 @@ class TSGenerator: else: p = title style = P_STYLE.NO if title and "NOTE" in title.getText() else P_STYLE.EX self.insert_soup_p(p, self.ensure_pstyle(style)) self.pstyling_funcs.append(make_pstyling_func(style)) self.insert_soup(p) self.pstyling_funcs.pop() self.pstyling_funcs.append(make_pstyling_func(P_STYLE.EW)) for p in body_parts[1:]: self.insert_soup_p(p, self.ensure_pstyle(P_STYLE.EW, styling)) self.insert_soup(p) self.pstyling_funcs.pop() else: self.new_paragraph() self.apply_styles(el, styling) self.insert_soup_children(el, styling) def apply_styles(self, el: Tag, styling: Callable = None) -> None: if styling: styling(el) if isinstance(self._cursor, Paragraph): if style := el.get("data-docx-pstyle", None): self._cursor.style = style if isinstance(self._cursor, Run): if style := el.get("data-docx-cstyle", None): self._cursor.style = style self.insert_soup_children(el) def insert_soup_br(self, el: Tag, styling: Callable = None) -> None: def insert_soup_br(self, el: Tag) -> None: self.new_paragraph() self.apply_styles(el, styling) def insert_soup_p(self, el: Tag, styling: Callable = None) -> None: def insert_soup_p(self, el: Tag) -> None: self.new_paragraph() self.apply_styles(el, styling) add_tab = False if el.getText().startswith("NOTE"): self._cursor.style = P_STYLE.NO Loading @@ -1180,18 +1182,18 @@ class TSGenerator: child.replace_with(re.sub(r":\w*", ":\t", child.text, 1)) break self.insert_soup_children(el, styling) self.insert_soup_children(el) def insert_soup_h(self, el: Tag, styling: Callable = None) -> None: def insert_soup_h(self, el: Tag) -> None: p_style = HEADING_TAG_2_STYLE.get(el.name, None) self.new_heading(p_style) self.insert_soup_children(el, styling) self.insert_soup_children(el) def insert_soup_h1( self, el: Tag, styling: Callable = None, normative: bool = False self, el: Tag, normative: bool = False ) -> None: if not self._is_appendix: self.insert_soup_h(el, styling) self.insert_soup_h(el) else: # add appendix title self.new_paragraph() Loading @@ -1200,72 +1202,78 @@ class TSGenerator: with Bookmark(self, f"Annex_{annex_number}"): self.new_run(f"Annex {annex_number}") self.new_run(f" ({"normative" if normative else "informative"}):\n") self.insert_soup_children(el, styling) self.insert_soup_children(el) def insert_soup_h2(self, el: Tag, styling: Callable = None) -> None: self.insert_soup_h(el, styling) def insert_soup_h2(self, el: Tag) -> None: self.insert_soup_h(el) def insert_soup_h3(self, el: Tag, styling: Callable = None) -> None: self.insert_soup_h(el, styling) def insert_soup_h3(self, el: Tag) -> None: self.insert_soup_h(el) def insert_soup_h4(self, el: Tag, styling: Callable = None) -> None: self.insert_soup_h(el, styling) def insert_soup_h4(self, el: Tag) -> None: self.insert_soup_h(el) def insert_soup_h5(self, el: Tag, styling: Callable = None) -> None: self.insert_soup_h(el, styling) def insert_soup_h5(self, el: Tag) -> None: self.insert_soup_h(el) def insert_soup_h6(self, el: Tag, styling: Callable = None) -> None: self.insert_soup_h(el, styling) def insert_soup_h6(self, el: Tag) -> None: self.insert_soup_h(el) def insert_soup_h7(self, el: Tag, styling: Callable = None) -> None: self.insert_soup_h(el, styling) def insert_soup_h7(self, el: Tag) -> None: self.insert_soup_h(el) def insert_soup_h8(self, el: Tag, styling: Callable = None) -> None: self.insert_soup_h(el, styling) def insert_soup_h8(self, el: Tag) -> None: self.insert_soup_h(el) def insert_soup_h9(self, el: Tag, styling: Callable = None) -> None: self.insert_soup_h(el, styling) def insert_soup_h9(self, el: Tag) -> None: self.insert_soup_h(el) def insert_soup_ul(self, el: Tag, styling: Callable = None) -> None: styling = self.ensure_pstyle(P_STYLE.B1_plus, styling) self.insert_soup_list(el, styling) def insert_soup_ul(self, el: Tag) -> None: self.pstyling_funcs.append(make_pstyling_func(P_STYLE.B1_plus)) self.insert_soup_list(el) self.pstyling_funcs.pop() def insert_soup_ol(self, el: Tag, styling: Callable = None) -> None: styling = self.ensure_pstyle(P_STYLE.BN, styling) self.insert_soup_list(el, styling) def insert_soup_ol(self, el: Tag) -> None: self.pstyling_funcs.append(make_pstyling_func(P_STYLE.BN)) self.insert_soup_list(el) self.pstyling_funcs.pop() def insert_soup_list(self, el: Tag, styling: Callable = None) -> None: def insert_soup_list(self, el: Tag) -> None: for li in el.find_all("li", recursive=False): self.insert_soup_li(li, styling) self.insert_soup(li) def insert_soup_li(self, li: Tag, styling: Callable = None) -> None: def insert_soup_li(self, li: Tag) -> None: self.new_paragraph() self.apply_styles(li, styling) style = self._cursor.style for child in li.children: if isinstance(child, Tag) and child.name == "ul": if isinstance(child, Tag) and child.name in ["ul", "ol"]: if child.name == "ul": sub_p_style = SUB_UL_STYLE.get(style.name, None) more_styling = self.ensure_pstyle(sub_p_style, styling) self.insert_soup_list(child, more_styling) elif isinstance(child, Tag) and child.name == "ol": else: sub_p_style = SUB_OL_STYLE.get(style.name, None) more_styling = self.ensure_pstyle(sub_p_style, styling) self.insert_soup_list(child, more_styling) self.pstyling_funcs.append(make_pstyling_func(sub_p_style)) self.insert_soup_list(child) self.pstyling_funcs.pop() else: self.insert_soup(child, styling) self.insert_soup(child) def insert_soup_em(self, el: Tag, styling: Callable = None) -> None: def insert_soup_em(self, el: Tag) -> None: self.ensure_cursor_run() more_styling = self.ensure_rstyle(lambda r: setattr(r, "italic", True), styling) self.insert_soup_children(el, more_styling) def insert_soup_strong(self, el: Tag, styling: Callable = None) -> None: self.rstyling_funcs.append(lambda r: setattr(r, "italic", True)) self.insert_soup_children(el) self.rstyling_funcs.pop() def insert_soup_strong(self, el: Tag) -> None: self.ensure_cursor_run() more_styling = self.ensure_rstyle(lambda r: setattr(r, "bold", True), styling) self.insert_soup_children(el, more_styling) def insert_soup_code(self, el: Tag, styling: Callable = None) -> None: self.rstyling_funcs.append(lambda r: setattr(r, "bold", True)) self.insert_soup_children(el) self.rstyling_funcs.pop() def insert_soup_code(self, el: Tag) -> None: self.ensure_cursor_run() def run_styling(r): Loading @@ -1273,24 +1281,25 @@ class TSGenerator: # r.font.size = Pt(9) r.font.name = "Courier New" more_styling = self.ensure_rstyle(run_styling, styling) self.insert_soup_children(el, more_styling) self.rstyling_funcs.append(run_styling) self.insert_soup_children(el) self.rstyling_funcs.pop() def insert_soup_sup(self, el: Tag, styling: Callable = None) -> None: def insert_soup_sup(self, el: Tag) -> None: self.ensure_cursor_run() more_styling = self.ensure_rstyle( lambda r: setattr(r.font, "superscript", True), styling ) self.insert_soup_children(el, more_styling) def insert_soup_sub(self, el: Tag, styling: Callable = None) -> None: self.rstyling_funcs.append(lambda r: setattr(r.font, "superscript", True)) self.insert_soup_children(el) self.rstyling_funcs.pop() def insert_soup_sub(self, el: Tag) -> None: self.ensure_cursor_run() more_styling = self.ensure_rstyle( lambda r: setattr(r.font, "subscript", True), styling ) self.insert_soup_children(el, more_styling) def insert_soup_figure(self, el: Tag, styling: Callable = None) -> None: self.rstyling_funcs.append(lambda r: setattr(r.font, "subscript", True)) self.insert_soup_children(el) self.rstyling_funcs.pop() def insert_soup_figure(self, el: Tag) -> None: """ Add a <figure> element to the Word document, including image and optional caption. If data-docx-layout="landscape", wrap the figure in a landscape section, Loading Loading @@ -1335,7 +1344,7 @@ class TSGenerator: if child.name == "img": self.insert_soup_img(child, text_width) elif child.name == "figcaption": self.insert_soup_figcaption(child, styling) self.insert_soup_figcaption(child) if landscape: first_section = self.document.sections[0] Loading Loading @@ -1404,7 +1413,7 @@ class TSGenerator: src_rect.set("b", str(crop_b)) blipfill.insert(1, src_rect) # place after <a:blip> def insert_soup_figcaption(self, el: Tag, styling: Callable = None) -> None: def insert_soup_figcaption(self, el: Tag) -> None: """ Insert a figure caption from <figcaption> into the Word document. Tabs in the HTML are preserved in the Word paragraph. Loading @@ -1415,20 +1424,20 @@ class TSGenerator: match = re.match(r"^Figure (\d+)", el.get_text()) id = f"Figure_{match.group(1)}" if match else None with Bookmark(self, id): self.insert_soup_children(el, styling) self.insert_soup_children(el) def insert_soup_a(self, a: Tag, styling: Callable = None) -> None: def insert_soup_a(self, a: Tag) -> None: self.ensure_cursor_paragraph() if href := a.get("href"): if "://" in href: return self.insert_soup_hyperlink(a.text, href, styling) return self.insert_soup_hyperlink(a.text, href) else: return self.insert_soup_internal_hyperlink(a.text, href, styling) return self.insert_soup_internal_hyperlink(a.text, href) else: return self.insert_soup_children(a, styling) return self.insert_soup_children(a) def insert_soup_hyperlink( self, text: str, url: str, styling: Callable = None self, text: str, url: str ) -> None: # Create the relationship in the document for the hyperlink part = self._cursor.part Loading @@ -1442,22 +1451,18 @@ class TSGenerator: r = OxmlElement("w:r") run = Run(r, self._cursor) run.text = text run.style = "Hyperlink" # style if styling: paragraph = self._cursor self._cursor = run styling() self._cursor = paragraph styling() for rstyling_func in self.rstyling_funcs: rstyling_func(run) run.style = "Hyperlink" # Assemble and append h.append(r) self._cursor._element.append(h) def insert_soup_internal_hyperlink( self, text: str, anchor_name: str, styling: Callable = None self, text: str, anchor_name: str ) -> None: # special case if anchor_name == "#[0]": Loading @@ -1475,25 +1480,22 @@ class TSGenerator: r = OxmlElement("w:r") run = Run(r, self._cursor) run.text = text if text.startswith(f"{self.project.prefix}:"): run.style = "Hyperlink" # style if styling: paragraph = self._cursor self._cursor = run styling() self._cursor = paragraph for rstyling_func in self.rstyling_funcs: rstyling_func(run) if text.startswith(f"{self.project.prefix}:"): run.style = "Hyperlink" h.append(r) self._cursor._element.append(h) def insert_soup_table(self, el: Tag, styling: Callable = None) -> None: def insert_soup_table(self, el: Tag) -> None: """ Insert a <table> into the Word document. """ if el.caption: self.insert_soup_caption(el.caption, styling) self.insert_soup_caption(el.caption) rows = el.find_all("tr") nb_cols = len(rows[0].find_all(["td", "th"])) if not rows: Loading Loading @@ -1521,10 +1523,10 @@ class TSGenerator: spans = len(tbl.columns) * [(0, 1)] # remaining_vertical, colspan for tr in rows: spans = self.insert_soup_tr(tr, spans, tbl, styling) spans = self.insert_soup_tr(tr, spans, tbl) self._cursor = tbl def insert_soup_caption(self, el: Tag, styling: Callable = None) -> None: def insert_soup_caption(self, el: Tag) -> None: """ Insert a table caption (<caption> in HTML). """ Loading @@ -1532,10 +1534,10 @@ class TSGenerator: match = re.match(r"^Table (\d+)", el.get_text()) id = f"Table_{match.group(1)}" if match else None with Bookmark(self, id): self.insert_soup_children(el, styling) self.insert_soup_children(el) def insert_soup_tr( self, el: Tag, spans: List[int], table: Table, styling: Callable = None self, el: Tag, spans: List[int], table: Table ) -> List[int]: """ Add a <tr> (table row) to a python-docx Table. Loading Loading @@ -1581,54 +1583,57 @@ class TSGenerator: strip_outer_text(td) if td.name == "th": self.insert_soup_th(td, cells[cell_idx], styling) self.insert_soup_th(td, cells[cell_idx]) else: self.insert_soup_td(td, cells[cell_idx], styling) self.insert_soup_td(td, cells[cell_idx]) td_id += 1 cell_idx += colspan return spans def insert_soup_th(self, el: Tag, cell: _Cell, styling: Callable = None) -> None: def insert_soup_th(self, el: Tag, cell: _Cell) -> None: """ Add a <th> (table header cell). """ cell.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER self._cursor = cell.paragraphs[0] self._cursor.alignment = WD_ALIGN_PARAGRAPH.CENTER self._cursor.style = P_STYLE.TAH self.insert_soup_children(el, styling) self.insert_soup_thtd(el, cell, P_STYLE.TAH) def insert_soup_td(self, el: Tag, cell: _Cell, styling: Callable = None) -> None: def insert_soup_td(self, el: Tag, cell: _Cell) -> None: """ Add a <td> (table data cell). """ cell.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER # remove default paragraph cell._element.remove(cell.paragraphs[0]._element) self._cursor = cell # Decide style based on HTML attributes style = el.get("style", "").lower() style_attr = el.get("style", "").lower() styles = dict( rule.strip().split(":", 1) for rule in style.split(";") if ":" in rule rule.strip().split(":", 1) for rule in style_attr.split(";") if ":" in rule ) align = styles.get("text-align", "").strip() if align == "center": style = P_STYLE.TAC pstyle = P_STYLE.TAC elif align == "right": style = P_STYLE.TAR pstyle = P_STYLE.TAR else: style = P_STYLE.TAL # default align left self.insert_soup_children(el, self.ensure_pstyle(style, styling)) pstyle = P_STYLE.TAL # default align left self.insert_soup_thtd(el, cell, pstyle) def insert_soup_thtd(self, el: Tag, cell: _Cell, pstyle: P_STYLE) -> None: cell.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER # remove default paragraph cell._element.remove(cell.paragraphs[0]._element) self._cursor = cell self.pstyling_funcs.append(make_pstyling_func(pstyle)) self.insert_soup_children(el) self.pstyling_funcs.pop() # ensure every cell has a default paragraph if not cell.paragraphs: cell.add_paragraph() # --------------------------------------------------------------------- # Methods for the ontology reference annex # --------------------------------------------------------------------- Loading Loading
src/saref_pypeline/docgen/ts2md_extractor.py +3 −3 Original line number Diff line number Diff line Loading @@ -300,7 +300,7 @@ def manage_ctx( def extract_run(run: Run, ctx: RunContext): # invariant: last character of last item in ctx.content is a non-blank character # invariant: last character of last item in ctx.content is a non-word character # manage opening/closing tags manage_ctx(ctx, run, Markup.STRONG) Loading @@ -312,7 +312,7 @@ def extract_run(run: Run, ctx: RunContext): text = run.text.replace(" ", " ") # extract white space before and after before, text, after = re.match(r"^(\s*)(.*?)(\s*)$", text, re.DOTALL).groups() before, text, after = re.match(r"^(\W*)(.*?)(\W*)$", text, re.DOTALL).groups() if before: ctx.buffer_blank.append(before) Loading Loading @@ -596,7 +596,7 @@ class TS2MDExtractor: ): ctx = RunContext(extract_format) # keep track of spaces, as md markup needs to be right before/after non-space characters # invariant: last character of last item in ctx.content is a non-blank character # invariant: last character of last item in ctx.content is a non-word character # We cannot use python-docx method iter_inner_content, as it only considers runs and hyperlinks. # For example elements <fldSimple> are ignored Loading
src/saref_pypeline/docgen/ts_generator.py +181 −176 Original line number Diff line number Diff line Loading @@ -163,6 +163,23 @@ def strip_outer_text(el: Tag): if el.contents and isinstance(el.contents[-1], NavigableString): el.contents[-1].replace_with(el.contents[-1].rstrip()) def make_pstyling_func(style: P_STYLE) -> Callable[[Paragraph], None]: def f(paragraph:Paragraph): paragraph.style = style return f def make_cstyling_func(style: C_STYLE) -> Callable[[Run], None]: def f(run:Run): run.style = style return f def make_rstyling_function(apply_style:Callable[[Run], None], apply_default_style:Callable[[Run], None]|None) -> Callable[[Run], None]: def f(run:Run): if apply_default_style: apply_default_style(run) apply_style(run) return f class Bookmark: def __init__(self, outer: "TSGenerator", name: str): Loading Loading @@ -215,6 +232,11 @@ class TSGenerator: self.description(OWL.bottomDataProperty, OWL_GRAPH) self.description(OWL.topDataProperty, OWL_GRAPH) # functions to style new paragraphs and runs. # Style with higher index override style with lower index self.pstyling_funcs: List[Callable[[Paragraph], None]] = [] self.rstyling_funcs: List[Callable[[Run], None]] = [] @staticmethod def _add_gridspan(cell: _Cell, val: int): tcPr = cell._tc.get_or_add_tcPr() Loading Loading @@ -301,7 +323,7 @@ class TSGenerator: - If the cursor is a Run → move cursor to its ancestor Paragraph. - If the cursor is a Table → move cursor to a new paragraph. - If the cursor is a Cell_ → move cursor to a new paragraph in the cell. - If the cursor is a Cell → move cursor to a new paragraph in the cell. Returns: bool: if a new paragraph has been created Loading @@ -318,12 +340,16 @@ class TSGenerator: new_p = OxmlElement("w:p") self._cursor._element.addnext(new_p) paragraph = Paragraph(new_p, self._cursor._parent) for pstyling_func in self.pstyling_funcs: pstyling_func(paragraph) self._cursor = paragraph return True elif isinstance(self._cursor, _Cell): new_p = OxmlElement("w:p") self._cursor._element.append(new_p) paragraph = Paragraph(new_p, self._cursor) for pstyling_func in self.pstyling_funcs: pstyling_func(paragraph) self._cursor = paragraph return True raise ValueError() Loading @@ -337,35 +363,8 @@ class TSGenerator: """ self.ensure_cursor_paragraph() self._cursor = self._cursor.add_run() def ensure_pstyle(self, style: P_STYLE, styling: Callable = None) -> Callable: def more_styling(el=None): if styling: styling(el) if isinstance(self._cursor, Paragraph): self._cursor.style = style return more_styling def ensure_rstyle( self, style_run: Callable[[Run], None], styling: Callable = None ) -> Callable: def more_styling(el=None): if styling: styling(el) if isinstance(self._cursor, Run): style_run(self._cursor) return more_styling def ensure_cstyle(self, style: C_STYLE, styling: Callable = None) -> Callable: def more_styling(el=None): if styling: styling(el) if isinstance(self._cursor, Run): self._cursor.style = style return more_styling for rstyling_func in self.rstyling_funcs: rstyling_func(self._cursor) def delete_section(self, text: str, delete_heading: bool = True) -> None: """ Loading Loading @@ -493,6 +492,8 @@ class TSGenerator: new_p = OxmlElement("w:p") self._cursor._element.addnext(new_p) paragraph = Paragraph(new_p, self._cursor._parent) for pstyling_func in self.pstyling_funcs: pstyling_func(paragraph) if style: paragraph.style = style Loading Loading @@ -558,6 +559,8 @@ class TSGenerator: """ self.ensure_cursor_paragraph() run = self._cursor.add_run(text) for rstyling_func in self.rstyling_funcs: rstyling_func(run) for arg_name in kwargs: setattr(run, arg_name, kwargs[arg_name]) self._cursor = run Loading @@ -577,8 +580,7 @@ class TSGenerator: n[-1] += 1 clause = ".".join([str(i) for i in n]) self.new_paragraph() self._cursor.style = p_style self.new_paragraph(style=p_style) if not self._is_appendix: with Bookmark(self, f"Clause_{clause}"): self.new_run(clause) Loading Loading @@ -1089,45 +1091,52 @@ class TSGenerator: return soup def insert_soup( self, el: PageElement | str | None, styling: Callable = None self, el: PageElement | str | None ) -> None: if isinstance(el, BeautifulSoup): self.insert_soup_children(el, styling) self.insert_soup_children(el) elif isinstance(el, NavigableString): if not el or el == "\n": return self.ensure_cursor_paragraph() if styling: styling(el) self.new_run(el) if styling: styling(el) elif isinstance(el, Tag): with Bookmark(self, el.get("id")): if cstyle := el.get("data-docx-cstyle"): self.rstyling_funcs.append(make_cstyling_func(cstyle)) fname = f"insert_soup_{el.name}" if hasattr(self, fname) and callable(getattr(self, fname)): method = getattr(self, fname) method(el, styling) method(el) else: logger.warning( f"TSGenerator function {fname} not implemented - skipping" ) self.insert_soup_children(el, styling) self.insert_soup_children(el) if pstyle := el.get("data-docx-pstyle"): self.ensure_cursor_paragraph() self._cursor.style = pstyle if cstyle: self.rstyling_funcs.pop() elif isinstance(el, str): soup = BeautifulSoup(el, "html.parser") self.insert_soup(soup, styling) self.insert_soup(soup) else: raise ValueError() def insert_soup_children(self, el: Tag, styling: Callable = None) -> None: def insert_soup_children(self, el: Tag) -> None: for child in el.children: self.insert_soup(child, styling) self.insert_soup(child) def insert_soup_div(self, el: Tag, styling: Callable = None) -> None: def insert_soup_div(self, el: Tag) -> None: # only a few special cases of admonition are supported if "admonition" in el.get("class"): title = el.find("p", class_="admonition-title") Loading @@ -1139,33 +1148,26 @@ class TSGenerator: else: p = title style = P_STYLE.NO if title and "NOTE" in title.getText() else P_STYLE.EX self.insert_soup_p(p, self.ensure_pstyle(style)) self.pstyling_funcs.append(make_pstyling_func(style)) self.insert_soup(p) self.pstyling_funcs.pop() self.pstyling_funcs.append(make_pstyling_func(P_STYLE.EW)) for p in body_parts[1:]: self.insert_soup_p(p, self.ensure_pstyle(P_STYLE.EW, styling)) self.insert_soup(p) self.pstyling_funcs.pop() else: self.new_paragraph() self.apply_styles(el, styling) self.insert_soup_children(el, styling) def apply_styles(self, el: Tag, styling: Callable = None) -> None: if styling: styling(el) if isinstance(self._cursor, Paragraph): if style := el.get("data-docx-pstyle", None): self._cursor.style = style if isinstance(self._cursor, Run): if style := el.get("data-docx-cstyle", None): self._cursor.style = style self.insert_soup_children(el) def insert_soup_br(self, el: Tag, styling: Callable = None) -> None: def insert_soup_br(self, el: Tag) -> None: self.new_paragraph() self.apply_styles(el, styling) def insert_soup_p(self, el: Tag, styling: Callable = None) -> None: def insert_soup_p(self, el: Tag) -> None: self.new_paragraph() self.apply_styles(el, styling) add_tab = False if el.getText().startswith("NOTE"): self._cursor.style = P_STYLE.NO Loading @@ -1180,18 +1182,18 @@ class TSGenerator: child.replace_with(re.sub(r":\w*", ":\t", child.text, 1)) break self.insert_soup_children(el, styling) self.insert_soup_children(el) def insert_soup_h(self, el: Tag, styling: Callable = None) -> None: def insert_soup_h(self, el: Tag) -> None: p_style = HEADING_TAG_2_STYLE.get(el.name, None) self.new_heading(p_style) self.insert_soup_children(el, styling) self.insert_soup_children(el) def insert_soup_h1( self, el: Tag, styling: Callable = None, normative: bool = False self, el: Tag, normative: bool = False ) -> None: if not self._is_appendix: self.insert_soup_h(el, styling) self.insert_soup_h(el) else: # add appendix title self.new_paragraph() Loading @@ -1200,72 +1202,78 @@ class TSGenerator: with Bookmark(self, f"Annex_{annex_number}"): self.new_run(f"Annex {annex_number}") self.new_run(f" ({"normative" if normative else "informative"}):\n") self.insert_soup_children(el, styling) self.insert_soup_children(el) def insert_soup_h2(self, el: Tag, styling: Callable = None) -> None: self.insert_soup_h(el, styling) def insert_soup_h2(self, el: Tag) -> None: self.insert_soup_h(el) def insert_soup_h3(self, el: Tag, styling: Callable = None) -> None: self.insert_soup_h(el, styling) def insert_soup_h3(self, el: Tag) -> None: self.insert_soup_h(el) def insert_soup_h4(self, el: Tag, styling: Callable = None) -> None: self.insert_soup_h(el, styling) def insert_soup_h4(self, el: Tag) -> None: self.insert_soup_h(el) def insert_soup_h5(self, el: Tag, styling: Callable = None) -> None: self.insert_soup_h(el, styling) def insert_soup_h5(self, el: Tag) -> None: self.insert_soup_h(el) def insert_soup_h6(self, el: Tag, styling: Callable = None) -> None: self.insert_soup_h(el, styling) def insert_soup_h6(self, el: Tag) -> None: self.insert_soup_h(el) def insert_soup_h7(self, el: Tag, styling: Callable = None) -> None: self.insert_soup_h(el, styling) def insert_soup_h7(self, el: Tag) -> None: self.insert_soup_h(el) def insert_soup_h8(self, el: Tag, styling: Callable = None) -> None: self.insert_soup_h(el, styling) def insert_soup_h8(self, el: Tag) -> None: self.insert_soup_h(el) def insert_soup_h9(self, el: Tag, styling: Callable = None) -> None: self.insert_soup_h(el, styling) def insert_soup_h9(self, el: Tag) -> None: self.insert_soup_h(el) def insert_soup_ul(self, el: Tag, styling: Callable = None) -> None: styling = self.ensure_pstyle(P_STYLE.B1_plus, styling) self.insert_soup_list(el, styling) def insert_soup_ul(self, el: Tag) -> None: self.pstyling_funcs.append(make_pstyling_func(P_STYLE.B1_plus)) self.insert_soup_list(el) self.pstyling_funcs.pop() def insert_soup_ol(self, el: Tag, styling: Callable = None) -> None: styling = self.ensure_pstyle(P_STYLE.BN, styling) self.insert_soup_list(el, styling) def insert_soup_ol(self, el: Tag) -> None: self.pstyling_funcs.append(make_pstyling_func(P_STYLE.BN)) self.insert_soup_list(el) self.pstyling_funcs.pop() def insert_soup_list(self, el: Tag, styling: Callable = None) -> None: def insert_soup_list(self, el: Tag) -> None: for li in el.find_all("li", recursive=False): self.insert_soup_li(li, styling) self.insert_soup(li) def insert_soup_li(self, li: Tag, styling: Callable = None) -> None: def insert_soup_li(self, li: Tag) -> None: self.new_paragraph() self.apply_styles(li, styling) style = self._cursor.style for child in li.children: if isinstance(child, Tag) and child.name == "ul": if isinstance(child, Tag) and child.name in ["ul", "ol"]: if child.name == "ul": sub_p_style = SUB_UL_STYLE.get(style.name, None) more_styling = self.ensure_pstyle(sub_p_style, styling) self.insert_soup_list(child, more_styling) elif isinstance(child, Tag) and child.name == "ol": else: sub_p_style = SUB_OL_STYLE.get(style.name, None) more_styling = self.ensure_pstyle(sub_p_style, styling) self.insert_soup_list(child, more_styling) self.pstyling_funcs.append(make_pstyling_func(sub_p_style)) self.insert_soup_list(child) self.pstyling_funcs.pop() else: self.insert_soup(child, styling) self.insert_soup(child) def insert_soup_em(self, el: Tag, styling: Callable = None) -> None: def insert_soup_em(self, el: Tag) -> None: self.ensure_cursor_run() more_styling = self.ensure_rstyle(lambda r: setattr(r, "italic", True), styling) self.insert_soup_children(el, more_styling) def insert_soup_strong(self, el: Tag, styling: Callable = None) -> None: self.rstyling_funcs.append(lambda r: setattr(r, "italic", True)) self.insert_soup_children(el) self.rstyling_funcs.pop() def insert_soup_strong(self, el: Tag) -> None: self.ensure_cursor_run() more_styling = self.ensure_rstyle(lambda r: setattr(r, "bold", True), styling) self.insert_soup_children(el, more_styling) def insert_soup_code(self, el: Tag, styling: Callable = None) -> None: self.rstyling_funcs.append(lambda r: setattr(r, "bold", True)) self.insert_soup_children(el) self.rstyling_funcs.pop() def insert_soup_code(self, el: Tag) -> None: self.ensure_cursor_run() def run_styling(r): Loading @@ -1273,24 +1281,25 @@ class TSGenerator: # r.font.size = Pt(9) r.font.name = "Courier New" more_styling = self.ensure_rstyle(run_styling, styling) self.insert_soup_children(el, more_styling) self.rstyling_funcs.append(run_styling) self.insert_soup_children(el) self.rstyling_funcs.pop() def insert_soup_sup(self, el: Tag, styling: Callable = None) -> None: def insert_soup_sup(self, el: Tag) -> None: self.ensure_cursor_run() more_styling = self.ensure_rstyle( lambda r: setattr(r.font, "superscript", True), styling ) self.insert_soup_children(el, more_styling) def insert_soup_sub(self, el: Tag, styling: Callable = None) -> None: self.rstyling_funcs.append(lambda r: setattr(r.font, "superscript", True)) self.insert_soup_children(el) self.rstyling_funcs.pop() def insert_soup_sub(self, el: Tag) -> None: self.ensure_cursor_run() more_styling = self.ensure_rstyle( lambda r: setattr(r.font, "subscript", True), styling ) self.insert_soup_children(el, more_styling) def insert_soup_figure(self, el: Tag, styling: Callable = None) -> None: self.rstyling_funcs.append(lambda r: setattr(r.font, "subscript", True)) self.insert_soup_children(el) self.rstyling_funcs.pop() def insert_soup_figure(self, el: Tag) -> None: """ Add a <figure> element to the Word document, including image and optional caption. If data-docx-layout="landscape", wrap the figure in a landscape section, Loading Loading @@ -1335,7 +1344,7 @@ class TSGenerator: if child.name == "img": self.insert_soup_img(child, text_width) elif child.name == "figcaption": self.insert_soup_figcaption(child, styling) self.insert_soup_figcaption(child) if landscape: first_section = self.document.sections[0] Loading Loading @@ -1404,7 +1413,7 @@ class TSGenerator: src_rect.set("b", str(crop_b)) blipfill.insert(1, src_rect) # place after <a:blip> def insert_soup_figcaption(self, el: Tag, styling: Callable = None) -> None: def insert_soup_figcaption(self, el: Tag) -> None: """ Insert a figure caption from <figcaption> into the Word document. Tabs in the HTML are preserved in the Word paragraph. Loading @@ -1415,20 +1424,20 @@ class TSGenerator: match = re.match(r"^Figure (\d+)", el.get_text()) id = f"Figure_{match.group(1)}" if match else None with Bookmark(self, id): self.insert_soup_children(el, styling) self.insert_soup_children(el) def insert_soup_a(self, a: Tag, styling: Callable = None) -> None: def insert_soup_a(self, a: Tag) -> None: self.ensure_cursor_paragraph() if href := a.get("href"): if "://" in href: return self.insert_soup_hyperlink(a.text, href, styling) return self.insert_soup_hyperlink(a.text, href) else: return self.insert_soup_internal_hyperlink(a.text, href, styling) return self.insert_soup_internal_hyperlink(a.text, href) else: return self.insert_soup_children(a, styling) return self.insert_soup_children(a) def insert_soup_hyperlink( self, text: str, url: str, styling: Callable = None self, text: str, url: str ) -> None: # Create the relationship in the document for the hyperlink part = self._cursor.part Loading @@ -1442,22 +1451,18 @@ class TSGenerator: r = OxmlElement("w:r") run = Run(r, self._cursor) run.text = text run.style = "Hyperlink" # style if styling: paragraph = self._cursor self._cursor = run styling() self._cursor = paragraph styling() for rstyling_func in self.rstyling_funcs: rstyling_func(run) run.style = "Hyperlink" # Assemble and append h.append(r) self._cursor._element.append(h) def insert_soup_internal_hyperlink( self, text: str, anchor_name: str, styling: Callable = None self, text: str, anchor_name: str ) -> None: # special case if anchor_name == "#[0]": Loading @@ -1475,25 +1480,22 @@ class TSGenerator: r = OxmlElement("w:r") run = Run(r, self._cursor) run.text = text if text.startswith(f"{self.project.prefix}:"): run.style = "Hyperlink" # style if styling: paragraph = self._cursor self._cursor = run styling() self._cursor = paragraph for rstyling_func in self.rstyling_funcs: rstyling_func(run) if text.startswith(f"{self.project.prefix}:"): run.style = "Hyperlink" h.append(r) self._cursor._element.append(h) def insert_soup_table(self, el: Tag, styling: Callable = None) -> None: def insert_soup_table(self, el: Tag) -> None: """ Insert a <table> into the Word document. """ if el.caption: self.insert_soup_caption(el.caption, styling) self.insert_soup_caption(el.caption) rows = el.find_all("tr") nb_cols = len(rows[0].find_all(["td", "th"])) if not rows: Loading Loading @@ -1521,10 +1523,10 @@ class TSGenerator: spans = len(tbl.columns) * [(0, 1)] # remaining_vertical, colspan for tr in rows: spans = self.insert_soup_tr(tr, spans, tbl, styling) spans = self.insert_soup_tr(tr, spans, tbl) self._cursor = tbl def insert_soup_caption(self, el: Tag, styling: Callable = None) -> None: def insert_soup_caption(self, el: Tag) -> None: """ Insert a table caption (<caption> in HTML). """ Loading @@ -1532,10 +1534,10 @@ class TSGenerator: match = re.match(r"^Table (\d+)", el.get_text()) id = f"Table_{match.group(1)}" if match else None with Bookmark(self, id): self.insert_soup_children(el, styling) self.insert_soup_children(el) def insert_soup_tr( self, el: Tag, spans: List[int], table: Table, styling: Callable = None self, el: Tag, spans: List[int], table: Table ) -> List[int]: """ Add a <tr> (table row) to a python-docx Table. Loading Loading @@ -1581,54 +1583,57 @@ class TSGenerator: strip_outer_text(td) if td.name == "th": self.insert_soup_th(td, cells[cell_idx], styling) self.insert_soup_th(td, cells[cell_idx]) else: self.insert_soup_td(td, cells[cell_idx], styling) self.insert_soup_td(td, cells[cell_idx]) td_id += 1 cell_idx += colspan return spans def insert_soup_th(self, el: Tag, cell: _Cell, styling: Callable = None) -> None: def insert_soup_th(self, el: Tag, cell: _Cell) -> None: """ Add a <th> (table header cell). """ cell.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER self._cursor = cell.paragraphs[0] self._cursor.alignment = WD_ALIGN_PARAGRAPH.CENTER self._cursor.style = P_STYLE.TAH self.insert_soup_children(el, styling) self.insert_soup_thtd(el, cell, P_STYLE.TAH) def insert_soup_td(self, el: Tag, cell: _Cell, styling: Callable = None) -> None: def insert_soup_td(self, el: Tag, cell: _Cell) -> None: """ Add a <td> (table data cell). """ cell.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER # remove default paragraph cell._element.remove(cell.paragraphs[0]._element) self._cursor = cell # Decide style based on HTML attributes style = el.get("style", "").lower() style_attr = el.get("style", "").lower() styles = dict( rule.strip().split(":", 1) for rule in style.split(";") if ":" in rule rule.strip().split(":", 1) for rule in style_attr.split(";") if ":" in rule ) align = styles.get("text-align", "").strip() if align == "center": style = P_STYLE.TAC pstyle = P_STYLE.TAC elif align == "right": style = P_STYLE.TAR pstyle = P_STYLE.TAR else: style = P_STYLE.TAL # default align left self.insert_soup_children(el, self.ensure_pstyle(style, styling)) pstyle = P_STYLE.TAL # default align left self.insert_soup_thtd(el, cell, pstyle) def insert_soup_thtd(self, el: Tag, cell: _Cell, pstyle: P_STYLE) -> None: cell.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER # remove default paragraph cell._element.remove(cell.paragraphs[0]._element) self._cursor = cell self.pstyling_funcs.append(make_pstyling_func(pstyle)) self.insert_soup_children(el) self.pstyling_funcs.pop() # ensure every cell has a default paragraph if not cell.paragraphs: cell.add_paragraph() # --------------------------------------------------------------------- # Methods for the ontology reference annex # --------------------------------------------------------------------- Loading