Unverified Commit 292e0c55 authored by Maxime Lefrançois's avatar Maxime Lefrançois
Browse files

ongoing work on docx generation

parent fd02818c
Loading
Loading
Loading
Loading
+918 −191

File changed.

Preview size limit exceeded, changes collapsed.

saref_pypeline/etsi.py

0 → 100644
+293 −0
Original line number Diff line number Diff line
from enum import Enum, EnumMeta, StrEnum
import inspect

class WK_FIELD(StrEnum):
    """Enumeration of fields of interest for a work item.
    
    The value of a field can be found in the ETSI skeleton for Technical Specifications, and need to be replaced by the corresponding value in a work item.
    """
    # fields not present in the ETSI skeleton
    WK_ID = "Work item ID"
    """Work Item ID on the ETSI portal."""
    Short_Title = "<short title>"
    """Short Title of the Work Item"""

    # fields present in the ETSI skeleton
    ONE_DD = "1DD"
    """First three digits of document number (ex. for TS 103 410-1: "103")"""
    DDD = "DDD"
    """Last three digits of document number, optionally dash part (ex. for TS 103 410-1: "410-1")"""
    PART = "PART"
    """Part of the technical specification (ex. for TS 103 410-1: "1")"""
    mte = "m.t.e"
    """Version number (major, technical, editorial) (ex. for TS 103 410-1 V2.1.1: "2.1.1")"""
    yyyy = "yyyy"
    """Publication year"""
    mm = "mm"
    """Publication month"""
    Title = "Title;"
    """Title 1"""
    Title_Part = "Part #: Part element of title;"
    """Title 2"""
    Title_Sub_Part = "Sub-part #: Sub-part element of title"
    """Title 3"""
    Title_Release = "Release #"
    """Release number"""
    Workitem = "<Workitem>"
    """Workitem number, ex. RTS/SmartM2M-103410-8v211"""
    Keywords = "<keywords>"
    """Keywords, ex. AGEING, HEALTH, IoT, oneM2M, ontology, SAREF, Semantic"""
    TB_type = "ETSI Technical Committee|ETSI Project|<other>"
    """Type of technical body, example "ETSI Technical Committee"."""
    long_techbody = "<long techbody>"
    """Long name of technical body, example "Data Solutions"."""
    short_techbody = "<short techbody>"
    """Short name of technical body, example "DATA"."""

class TBType(Enum):
    """Enumeration of types of ETSI Technical Bodies"""
    ETSI_Technical_Committee = "ETSI Technical Committee"
    ETSI_Project = "ETSI Project"

class TB(Enum):
    """Enumeration of ETSI Technical Bodies"""
    SmartM2M = TBType.ETSI_Technical_Committee, "Smart Machine-to-Machine communications",
    DATA = TBType.ETSI_Technical_Committee, "Data Solutions"

    def __init__(self, type_:TBType, long_name):
        self.type = type_.value
        self.long_name = long_name

class _MetaEnum(EnumMeta):
    """Source: https://stackoverflow.com/a/78943193"""
    def __new__(metacls, clsname, bases, classdict):
        cls = super().__new__(metacls, clsname, bases, classdict)

        # Extract source code and split docstrings
        source = inspect.getsource(cls)
        docstrings = source.split('"""')[-2::-2]

        # Assign the docstrings to enum members
        for member_name, doc_str in zip(reversed(cls._member_names_), docstrings):
            enum_member = getattr(cls, member_name)
            enum_member.__doc__ = doc_str.strip()

        return cls
    
class P_STYLE(StrEnum, metaclass=_MetaEnum):
    """ETSI Styles, listed at https://portal.etsi.org/Services/editHelp/Standards-development/Drafting/Styles-listing/Styles-listing

Do not alter existing styles or formats pre-set in the ETSI styles, do not add new styles to the ETSI template and do not delete ETSI styles. 

The ETSI Secretariat provides a Microsoft® Word template which contains a set of pre-defined styles simplifying the formatting of documents according to the ETSI drafting rules:

- applying the ETSI template from the very beginning of work avoids delay throughout the drafting stage;
- it can be applied to a new or existing ETSI deliverable;
- it is recommended to attach it to the change request (CR) template;
- it must only be used for the purpose of the standardization work within ETSI.

For Word for Windows® 2007 and higher use the following file:

 Download ETSIW_2013.dotm (17 kb): https://portal.etsi.org/Portals/0/TBpages/edithelp/Docs/ETSIW_2013.dotm

ETSI Technical Specification (TS)

TS (ETSI Technical Specification) is the preferred deliverable when the document contains normative provisions and short time to "market", validation and maintenance are essential. A TS may later be converted to an ES or an EN, or be used to publish the contents of a draft ES being sent for vote or a draft EN being sent for Public Enquiry or vote.

https://portal.etsi.org/Portals/0/TBpages/edithelp/Docs/ETSI_Skeletons/ETSI_TS_skeleton.docx
"""

    # Heading styles	For different headings

    Heading_1 = "Heading 1"
    """Clause"""
    Heading_2 = "Heading 2"
    """Subdivision level 2"""
    Heading_3 = "Heading 3"
    """Subdivision level 3"""
    Heading_4 = "Heading 4"
    """Subdivision level 4"""
    Heading_5 = "Heading 5"
    """Subdivision level 5"""
    H6 = "H6"
    """Subdivision level 6 (not reflected in the table of contents)"""
    Heading_8 = "Heading 8"
    """Annex title (for ENs, HSs, TSs, ESs and GSs only)"""
    Heading_9 = "Heading 9"     
    """Annex title (for TRs, EGs, GRs and SRs only)"""

    # Example styles			For examples and abbreviations/symbols lists

    EX = "EX"
    """Reference, Example => use "tab" between "item/number" and "text"."""    
    EW = "EW"
    """Symbol, Abbreviation, Example continuation in text => use "tab" between "item/number" and "text"."""
    NO = "NO"
    """Note integrated in the text => use "tab" between "item/number" and "text"."""

    # Figure styles             For formatting figures
    TF = "TF"
    """Figure title"""
    FL = "FL"
    """Figure layout"""
    NF = "NF"
    """Note in figure => use "tab" between "item/number" and "text"."""
    

    # Table styles              For formatting tables

    TH = "TH"
    """Table title"""
    TAH = "TAH"
    """Heading within table or column heading """
    TAC = "TAC"
    """Centred texts"""
    TAL = "TAL"
    """Left aligned text """
    TAR = "TAR"
    """Right aligned text """
    TB1 = "TB1"
    """List in tables Level 1 """
    TB2 = "TB2"
    """List in tables Level 2 """
    TAN = "TAN"
    """Note in table => use "tab" between "item/number" and "text"."""
    
    # List styles (indents)

    B1 = "B1"
    """Indent 1"""
    B2 = "B2"
    """Indent 2"""
    B3 = "B3"
    """Indent 3"""
    B4 = "B4"
    """Indent 4"""
    B5 = "B5"
    """Indent 5"""
    B1_plus = "B1+"
    """Bulleted indent 1 (round bullets)"""
    B2_plus = "B2+"
    """Bulleted indent 2 (dashes) """
    B3_plus = "B3+"
    """Bulleted indent 3 (square bullets) """
    BN = "BN"
    """Bulleted (numbers) indent 1 """
    BL = "BL"
    """Bulleted (letters) indent 1 """

    # General styles                For different items
    Normal = "Normal"
    """Standard paragraph, Definition """
    TT = "TT"
    """Contents list title """
    PL = "PL"
    """Programming language """
    EQ = "EQ"
    """Equation """
    Header = "Header"
    """Header (portrait and landscape pages)"""    

    # Style which can be user-defined               For formatting defined by the user that will not be altered by the ETSI processing macros 
    FP = "FP"
    """Free Paragraph. 
    Style which can be user-defined
    For formatting defined by the user that will not be altered by the ETSI processing macros """

class C_STYLE(StrEnum, metaclass=_MetaEnum):
    """ETSI Character Styles, obtained from skeleton"""

    Normal = "Default Paragraph Font"
    "Times New Roman, 10"

    Hyperlink = "Hyperlink"
    """Blue, Underlined"""

    Guidance = "Guidance"
    """Green, Italics"""

    Strong = "Strong"
    """Bold"""

    HTML_Definition = "HTML Definition"
    """Times, 10, Italics"""

    HTML_Keyboard = "HTML Keyboard"
    """Courier New, 10"""

    HTML_Sample = "HTML Sample"
    """Courier New, 10"""



HEADING_STYLES = [P_STYLE.Heading_1,
                  P_STYLE.Heading_2,
                  P_STYLE.Heading_3,
                  P_STYLE.Heading_4,
                  P_STYLE.Heading_5,
                  P_STYLE.H6,
                  P_STYLE.Heading_8,
                  P_STYLE.Heading_9]

HEADING_TAG_2_STYLE = {
    "h1": P_STYLE.Heading_1,
    "h2": P_STYLE.Heading_2,
    "h3": P_STYLE.Heading_3,
    "h4": P_STYLE.Heading_4,
    "h5": P_STYLE.Heading_5,
    "h6": P_STYLE.H6,
    "h7": P_STYLE.Heading_8,
    "h8": P_STYLE.Heading_9}

SECTION_ENDING_STYLES = {
    P_STYLE.Heading_1: [P_STYLE.Heading_1, 
                                P_STYLE.Heading_8, 
                                P_STYLE.Heading_9],
    P_STYLE.Heading_2: [P_STYLE.Heading_1, 
                                P_STYLE.Heading_2, 
                                P_STYLE.Heading_8,
                                P_STYLE.Heading_9],
    P_STYLE.Heading_3: [P_STYLE.Heading_1,
                P_STYLE.Heading_2, 
                P_STYLE.Heading_3, 
                P_STYLE.Heading_8, 
                P_STYLE.Heading_9],
    P_STYLE.Heading_4: [P_STYLE.Heading_1,
                P_STYLE.Heading_2, 
                P_STYLE.Heading_3, 
                P_STYLE.Heading_4, 
                P_STYLE.Heading_8,
                P_STYLE.Heading_9],
    P_STYLE.Heading_5: [P_STYLE.Heading_1,
                P_STYLE.Heading_2, 
                P_STYLE.Heading_3, 
                P_STYLE.Heading_4, 
                P_STYLE.Heading_5, 
                P_STYLE.Heading_8,
                P_STYLE.Heading_9],
    P_STYLE.H6: [P_STYLE.Heading_1,
            P_STYLE.Heading_2, 
            P_STYLE.Heading_3, 
            P_STYLE.Heading_4, 
            P_STYLE.Heading_5, 
            P_STYLE.H6, 
            P_STYLE.Heading_8,
            P_STYLE.Heading_9],
    P_STYLE.Heading_8: [P_STYLE.Heading_1,
                P_STYLE.Heading_8,
                P_STYLE.Heading_9],
    P_STYLE.Heading_9: [P_STYLE.Heading_1,
                P_STYLE.Heading_8,
                P_STYLE.Heading_9]
}
"""list of sections that announce the end of the given section"""

SUB_UL_STYLE = {
    P_STYLE.B1: P_STYLE.B2_plus,
    P_STYLE.B1_plus: P_STYLE.B2_plus,
    P_STYLE.B2_plus: P_STYLE.B3_plus,
    P_STYLE.BN: P_STYLE.B2_plus,
    P_STYLE.BL: P_STYLE.B2_plus,
}

SUB_LI_STYLE = dict()
 No newline at end of file
+52 −29
Original line number Diff line number Diff line
@@ -3,12 +3,16 @@ from bs4 import BeautifulSoup
import re
import logging
from saref_pypeline._logging import TRACE_LEVEL
from saref_pypeline.etsi import WK_FIELD, TBType, TB

logger = logging.getLogger(__name__)


def _fetch_keywords(ref, wk_id) -> str:
    logger.log(TRACE_LEVEL, f"fetch for {ref}")
    url = f"https://portal.etsi.org/webapp/WorkProgram/Report_WorkItem.asp?WKI_ID={wk_id}"
    url = (
        f"https://portal.etsi.org/webapp/WorkProgram/Report_WorkItem.asp?WKI_ID={wk_id}"
    )
    resp = requests.get(url)
    try:
        resp.raise_for_status()
@@ -26,6 +30,7 @@ def _fetch_keywords(ref, wk_id) -> str:
    except Exception as e:
        logger.warning(f"Exception while fetching keywords for {ref} {wk_id}: {e}")


def fetch_metadata() -> list:
    result = list()
    for search in ["103264", "103548", "103410"]:
@@ -46,17 +51,26 @@ def fetch_metadata() -> list:
            if not "Drafting" in status and not "Published" in status:
                continue

            pub_date_match = re.search(r"Publication\s+\(([0-9]{4}-[0-9]{2}-[0-9]{2})\)", status)
            pub_date = pub_date_match.group(1) if pub_date_match else None
            pub_date_match = re.search(
                r"Publication\s+\(([0-9]{4})-([0-9]{2})-[0-9]{2}\)", status
            )
            yyyy, mm = pub_date_match.group(1, 2) if pub_date_match else (None, None)

            # Extract version, ref, Work Item ID
            try:
                col_text = cells[1].get_text(separator=" ", strip=True)
                doc_nb = re.search(r"Doc\.\ Nb\.\s*(TS\ [0-9]{3}\ [0-9]{3}(-[0-9]+)?)", col_text).group(1)
                version_match = re.search(r"Ver\.\s*(\d+\.\d+\.\d+)", col_text)
                version = version_match.group(1) if version_match else None
                one_dd, ddd, part = re.search(
                    r"Doc\.\ Nb\.\s*TS\ ([0-9]{3})\ ([0-9]{3}(-([0-9]+))?)", col_text
                ).group(1, 2, 4)
                mte_match = re.search(r"Ver\.\s*(\d+\.\d+\.\d+)", col_text)
                mte = mte_match.group(1) if mte_match else None
                tb = re.search(r"Technical Body:\s*([\w-]+)", col_text).group(1)
                ref = re.search(r"Ref\.\s*([A-Z]+/[\w-]+)", col_text).group(1)
                wk_id = int(re.search(r"WKI_ID=(\d+)", cells[1].find("a", href=True)["href"]).group(1))
                wk_id = int(
                    re.search(
                        r"WKI_ID=(\d+)", cells[1].find("a", href=True)["href"]
                    ).group(1)
                )
            except:
                continue

@@ -66,31 +80,40 @@ def fetch_metadata() -> list:
            title1 = title_lines[0] if len(title_lines) >= 2 else None
            title2 = title_lines[1] if len(title_lines) >= 3 else None
            title3 = title_lines[2] if len(title_lines) >= 4 else None
            try:
                part = int(re.search(r"Part\s+(\d+)", title3).group(1))
            except:
                part = None
            # try:
            #     part = int(re.search(r"Part\s+(\d+)", title3).group(1))
            # except:
            #     part = None

            # fetch keywords
            keywords = _fetch_keywords(ref, wk_id)

            result.append({
                "doc_nb": doc_nb,
                "ref": ref,
                "version": version,
                "pub_date": pub_date,
                "wk_id": wk_id,
                "short_title": short_title,
                "title1": title1,
                "title2": title2,
                "title3": title3,
                "part": part,
                "keywords": keywords,
            })
            result.append(
                {
                    WK_FIELD.WK_ID: wk_id,
                    WK_FIELD.Short_Title: short_title,
                    WK_FIELD.ONE_DD: one_dd,
                    WK_FIELD.DDD: ddd,
                    WK_FIELD.PART: part,
                    WK_FIELD.mte: mte,
                    WK_FIELD.yyyy: yyyy,
                    WK_FIELD.mm: mm,
                    WK_FIELD.Title: title1,
                    WK_FIELD.Title_Part: title2,
                    WK_FIELD.Title_Sub_Part: title3,
                    WK_FIELD.Title_Release: None,
                    WK_FIELD.Workitem: ref,
                    WK_FIELD.Keywords: keywords,
                    WK_FIELD.TB_type: TB[tb].type,
                    WK_FIELD.short_techbody: tb,
                    WK_FIELD.long_techbody: TB[tb].long_name,
                }
            )
    return result


if __name__ == "__main__":
    details = fetch_metadata()
    import pprint
    pprint.pprint(details)

    pprint.pprint(details)
+19 −0
Original line number Diff line number Diff line
@@ -5,6 +5,7 @@ import re
from git import Repo, GitCommandError
from saref_pypeline._logging import TRACE_LEVEL
from saref_pypeline.utils import skip_if_filtered
from saref_pypeline.etsi import WK_FIELD
from saref_pypeline.docgen import SiteManager
from saref_pypeline.dataset import ManagedDataset
from saref_pypeline.constants import *
@@ -509,10 +510,28 @@ class SAREFPipeline:
    def fetch_projects_metadata(self):
        metadata_file = os.path.join(self.target_dir, "projects_metadata.yaml")
        try:
            def dict_fields_constructor(loader, node):
                # Map string keys back to FIELDS if possible
                mapping = loader.construct_mapping(node)
                return {
                    WK_FIELD[k] if k in WK_FIELD.__members__ else k: v
                    for k, v in mapping.items()
                }
            yaml.SafeLoader.add_constructor(
                "tag:yaml.org,2002:map", 
                dict_fields_constructor)
            with open(metadata_file, "r") as f:
                self.projects_metadata = yaml.safe_load(f)
            if not self.projects_metadata:
                raise ValueError()
        except:
            self.projects_metadata = fetch_metadata()
            def represent_string(dumper, data):
                return dumper.represent_str(data.name)
                
            yaml.SafeDumper.add_representer(
                WK_FIELD,
                represent_string)
            with open(metadata_file, "w") as f:
                yaml.safe_dump(self.projects_metadata, f)

+127 KiB

File added.

No diff preview for this file type.

Loading