Loading src/saref_pypeline/docgen/ts2md_extractor.py +9 −2 Original line number Diff line number Diff line Loading @@ -853,7 +853,7 @@ class TS2MDExtractor: """Figure title""" md = self.extract_inner_content(paragraph, extract_format) match = re.match(r"(\w+[\s\xa0]+[A-Z0-9\.]+)[: \xa0]*(.*)", md) match = re.match(r"(\w+[\s\xa0]+[A-Z0-9\.-]+)[: \xa0]*(.*)", md) if not match: return "" label, caption = match.group(1), match.group(2) Loading @@ -875,6 +875,13 @@ class TS2MDExtractor: self, paragraph: Paragraph, extract_format: ExtractFormat = ExtractFormat.MD ): """Figure layout, do nothing""" if paragraph.text.startswith("Figure"): logger.warning(f"Wrong style for {paragraph.text}") return self.extract_TF(paragraph, extract_format) elif paragraph.text.strip(): logger.warning(f"Styl FL should only be for Figure layout. Got {paragraph.text}") return self.extract_Normal(paragraph, extract_format) else: return "" def extract_NF( Loading src/saref_pypeline/docgen/ts_generator.py +7 −7 Original line number Diff line number Diff line Loading @@ -935,8 +935,6 @@ class TSGenerator: ) return for p in soup.find_all("p"): p.attrs["class"] = "Normal" for li in soup.find_all("li"): li.string = re.sub(r" *: *", "\t", li.string, 1) li["data-docx-pstyle"] = P_STYLE.EW Loading @@ -957,8 +955,6 @@ class TSGenerator: ) return for p in soup.find_all("p"): p.attrs["class"] = "Normal" for li in soup.find_all("li"): for desc in li.descendants: if isinstance(desc, NavigableString): Loading @@ -966,7 +962,11 @@ class TSGenerator: if n: # replaced once desc.replace_with(new_text) break if li.find_next_sibling("li") is not None: li["data-docx-pstyle"] = P_STYLE.EW else: li["data-docx-pstyle"] = P_STYLE.EX self.insert_soup(soup) def edit_history(self): Loading Loading @@ -1039,7 +1039,7 @@ class TSGenerator: # Methods for inserting soup # --------------------------------------------------------------------- @lru_cache # @lru_cache def get_soup(self, file_name: str) -> BeautifulSoup | None: """ Load a file (HTML or Markdown) as a BeautifulSoup object. Loading Loading @@ -1421,7 +1421,7 @@ class TSGenerator: self.new_paragraph(style=P_STYLE.TF) if el.a: el.a.replace_with(el.a.text) match = re.match(r"^Figure (\d+)", el.get_text()) match = re.match(r"^Figure ([\d\.-]+)", el.get_text()) id = f"Figure_{match.group(1)}" if match else None with Bookmark(self, id): self.insert_soup_children(el) Loading Loading
src/saref_pypeline/docgen/ts2md_extractor.py +9 −2 Original line number Diff line number Diff line Loading @@ -853,7 +853,7 @@ class TS2MDExtractor: """Figure title""" md = self.extract_inner_content(paragraph, extract_format) match = re.match(r"(\w+[\s\xa0]+[A-Z0-9\.]+)[: \xa0]*(.*)", md) match = re.match(r"(\w+[\s\xa0]+[A-Z0-9\.-]+)[: \xa0]*(.*)", md) if not match: return "" label, caption = match.group(1), match.group(2) Loading @@ -875,6 +875,13 @@ class TS2MDExtractor: self, paragraph: Paragraph, extract_format: ExtractFormat = ExtractFormat.MD ): """Figure layout, do nothing""" if paragraph.text.startswith("Figure"): logger.warning(f"Wrong style for {paragraph.text}") return self.extract_TF(paragraph, extract_format) elif paragraph.text.strip(): logger.warning(f"Styl FL should only be for Figure layout. Got {paragraph.text}") return self.extract_Normal(paragraph, extract_format) else: return "" def extract_NF( Loading
src/saref_pypeline/docgen/ts_generator.py +7 −7 Original line number Diff line number Diff line Loading @@ -935,8 +935,6 @@ class TSGenerator: ) return for p in soup.find_all("p"): p.attrs["class"] = "Normal" for li in soup.find_all("li"): li.string = re.sub(r" *: *", "\t", li.string, 1) li["data-docx-pstyle"] = P_STYLE.EW Loading @@ -957,8 +955,6 @@ class TSGenerator: ) return for p in soup.find_all("p"): p.attrs["class"] = "Normal" for li in soup.find_all("li"): for desc in li.descendants: if isinstance(desc, NavigableString): Loading @@ -966,7 +962,11 @@ class TSGenerator: if n: # replaced once desc.replace_with(new_text) break if li.find_next_sibling("li") is not None: li["data-docx-pstyle"] = P_STYLE.EW else: li["data-docx-pstyle"] = P_STYLE.EX self.insert_soup(soup) def edit_history(self): Loading Loading @@ -1039,7 +1039,7 @@ class TSGenerator: # Methods for inserting soup # --------------------------------------------------------------------- @lru_cache # @lru_cache def get_soup(self, file_name: str) -> BeautifulSoup | None: """ Load a file (HTML or Markdown) as a BeautifulSoup object. Loading Loading @@ -1421,7 +1421,7 @@ class TSGenerator: self.new_paragraph(style=P_STYLE.TF) if el.a: el.a.replace_with(el.a.text) match = re.match(r"^Figure (\d+)", el.get_text()) match = re.match(r"^Figure ([\d\.-]+)", el.get_text()) id = f"Figure_{match.group(1)}" if match else None with Bookmark(self, id): self.insert_soup_children(el) Loading