Commit c067c9d5 authored by carignani's avatar carignani
Browse files

support subsections (fix #10)

parent a9d6cf4c
Loading
Loading
Loading
Loading
+51 −16
Original line number Diff line number Diff line
@@ -33,9 +33,21 @@ metadata:
imports:
  {imports}

data_types:
'''

SUBSECTIONS = [
    "Artifact Types",
    "Data Types",
    "Capability Types",
    "Interface Types",
    "Requirements Types",
    "Relationship Types",
    "Interface Types",
    "Node Types",
    "Group Types",
    "Policy Types"
]

MODELS = {}
EXAMPLES = {}

@@ -109,8 +121,8 @@ def get_content(doc):
        elif isinstance(element, docx.oxml.table.CT_Tbl):
            ret.append(Table(element, body))
            table_count = table_count + 1
        else:
            print("Non paragraph or table " +  str(type(element)))
        #else:
        #    print("Non paragraph or table " +  str(type(element)))
    print("Paragraphs: " + str(parag_count))
    print("Tables: " + str(table_count))
    return ret
@@ -130,6 +142,17 @@ def find_sect(sect_to_find, start_idx, doc_content):
    print("FOUND " + sect_to_find + " at " + str(start_idx))
    return start_idx

def is_lvl2_section_hdn(txt):
    ''' Returns true if txt is level 2 heading'''
    clean_txt = txt.strip()
    
    if not bool(re.match(r'^[0-9]\.[0-9]+\t[a-zA-Z\s]*$', clean_txt)):
        return False

    subtitle = clean_txt.split('\t')[1]
    
    return subtitle in SUBSECTIONS

def is_lvl1_section_hdn(txt):
    ''' Returns true if txt is level 1 heading'''
    clean_txt = txt.strip()
@@ -159,6 +182,15 @@ def find_all_sections(doc_content):
    sections.append(Section(start_indx, end_indx-1, doc_content[start_indx].text))
    return sections

def write_subsection_to_file(txt, buf):
    '''
    Writes a subsection header in utf-8 encoding to file buf
    '''
    buf.write(slugify(txt)+":")
    if not txt.endswith('\n'):
        buf.write('\n')
    buf.write('\n')

def write_table_to_file(tab, buf):
    '''
    Writes content of table t in utf-8 encoding to file F
@@ -177,28 +209,28 @@ def gen_tables_btwn(a_id, b_id, content, buf):
    definitions_count = 0

    for idx in range(a_id, b_id):

        if idx >= len(content):
            print("A: " + str(a_id))
            print("B: " + str(b_id))
            print("IDX: " + str(idx))
            print("LEN(CONTENT): " + str(len(content)))
            print("ERROR: Paragraph out of range (A: {}, B: {}, IDX: {}, LEN(CONTENT): {})".format(
                a_id, b_id,idx, len(content)))
            return definitions_count

        tmp_elem = content[idx]

        if isinstance(tmp_elem, Paragraph) and is_lvl2_section_hdn(tmp_elem.text):
            print(tmp_elem.text)
            write_subsection_to_file(tmp_elem.text.split("\t")[1], buf)
            definitions_count = definitions_count + 1

        if isinstance(tmp_elem, Table) and is_tosca_def(tmp_elem):
            write_table_to_file(tmp_elem, buf)
            definitions_count = definitions_count + 1

        elif isinstance(tmp_elem, Table):
            txt = tmp_elem.rows[0].cells[0].text
            if txt.strip().startswith("Name") or txt.strip().startswith("Shorthand") or \
                txt.strip().startswith("tosca_def"):
                continue
            # print("----- Filtered out: " + txt.split("\n")[0])
            #if not len(tmp_elem.rows) == 1:
                #print("       Rows count != 1 ")
            #if not len(tmp_elem.columns) == 1:
            #    print("       Columns count != 1 ")
            #if not match_definition_incipit(txt):
            #    print("       Regex != 1 ")
    return definitions_count

def generate_header(
@@ -317,6 +349,9 @@ def parse_version_from_filename(filename):
                .replace("0",".").strip(".").strip("p.docx")
    return ""

def slugify(t):
    return t.replace(" ", "_").lower()

if __name__ == "__main__":

    try:
+6 −0
Original line number Diff line number Diff line
@@ -20,6 +20,12 @@ def test_is_lvl1_section_hdn():
    assert d2t.is_lvl1_section_hdn("Annex A (informative)")
    assert d2t.is_lvl1_section_hdn("Annex C (normative):\tConformance\t284")

def test_is_lvl2_section_hdn():
    
    assert d2t.is_lvl2_section_hdn("6.3\tData Types")
    assert not d2t.is_lvl2_section_hdn("6.4.2\tSomething")
    assert not d2t.is_lvl2_section_hdn("6.4\tSomething")

def test_section_init():

    ssss = d2t.Section(0, 10, "6\tVNFD TOSCA model")