clean up and documentation for doc2tosca

252e6e41 · carignani · 7a1bdf77 · 252e6e41
Commit 252e6e41 authored 6 years ago by carignani
--- a/doc2tosca.py
+++ b/doc2tosca.py
-#!/bin/python2.7
+#!/bin/python3.5
 '''
 Generate tosca definitions from Docx specfication
 '''

 import sys
+from io import BytesIO as StringIO
+
 import docx
 from docx.table import Table
 from docx.text.paragraph import Paragraph

-def  is_tosca_def(table):
-    '''
-    Returns true when a table contains TOSCA definitions, i.e.
-    the table contains just one cell and text starts with an 
-    empty space ' '' 
-    '''
-    txt = table.rows[0].cells[0].text[0]
-    return \
-    len(table.rows) == 1 and \
-    len(table.columns) == 1 and \
-    txt.startswith(' ')
-
-
-try:
-    SOL001_FN = sys.argv[1] 
-except:
-    print 'Error: Filename missing or filename not a docx document'
-    print 'Usage: doc2tosca <docx-with-tosca-definitions>'
-    sys.exit(1)
-
-OUT_FN_VNFD = 'try-tosca-export_vnfd.yaml'
-OUT_FN_NSD  = 'try-tosca-export_nsd.yaml'
-OUT_FN_PNFD = 'try-tosca-export_pnfd.yaml'
-OUT_FN_COMM = 'try-tosca-export_comm.yaml'
-
-SOL001 = docx.Document(SOL001_FN)
-
-F_VNFD = open(OUT_FN_VNFD, 'w')
-F_NSD = open(OUT_FN_NSD, 'w')
-F_PNFD = open(OUT_FN_PNFD, 'w')
-F_COMM = open(OUT_FN_COMM, 'w')
-
-HDR_VNFD ='''tosca_definitions_version: tosca_simple_yaml_1_2
-description: ETSI NFV SOL 001 nsd types definitions version 2.5.1
-
-imports:
-  - etsi_nfv_sol001_vnfd_2_5_1_types.yaml
-
-data_types:
-'''
-
-HDR_NSD ='''tosca_definitions_version: tosca_simple_yaml_1_2
-description: ETSI NFV SOL 001 nsd types definitions version 2.5.1
-
-imports:
-  - etsi_nfv_sol001_vnfd_2_5_1_types.yaml
+BASE_FILENAME = "try-tosca-export_{}.yaml"
+TOSCA_VERSION = "tosca_simple_yaml_1_2"
+SPEC_VERSION = "2.5.1"

-data_types:
-'''
+MODEL_NAMES = ['vnfd', 'nsd', 'pnfd', 'common']

-HDR_PNFD ='''tosca_definitions_version: tosca_simple_yaml_1_2
-description: ETSI NFV SOL 001 nsd types definitions version 2.5.1
+HDR = '''tosca_definitions_version: {tosca_version}
+description: ETSI NFV SOL 001 {model} types definitions version {spec_version}

 imports:
-  - etsi_nfv_sol001_vnfd_2_5_1_types.yaml
+{imports}

 data_types:
-'''

-HDR_COMM ='''tosca_definitions_version: tosca_simple_yaml_1_2
-description: ETSI NFV SOL 001 nsd types definitions version 2.5.1
-
-imports:
-  - etsi_nfv_sol001_vnfd_2_5_1_types.yaml
-
-data_types:
 '''

-F_VNFD.write(HDR_VNFD)
-F_NSD.write(HDR_NSD)
-F_PNFD.write(HDR_PNFD)
-F_COMM.write(HDR_COMM)
-
-p_id = 0
-
-cur_sect = "0"
+def  is_tosca_def(table):
+    '''
+    Returns true when a table contains TOSCA definitions, i.e.
+    the table contains just one cell and text starts with an
+    empty space ' '
+    '''
+    txt = table.rows[0].cells[0].text[0]
+    return \
+    len(table.rows) == 1 and \
+    len(table.columns) == 1 and \
+    txt.startswith(' ')

-from sets import Set
+def tosca_model_info(name, imports):
+    '''
+    Returns a dictionary for the information on the model
+    '''
+    return {
+        'name' : name,
+        'fn' : BASE_FILENAME.format(name),
+        'fd' : None,
+        'imports' : imports,
+        'buf' :  StringIO()
+    }

 def get_content(doc):
+    '''
+    Returns a list of all paragraphs and tables in the Document
+    '''
    ret = []
    body = doc._body
-    par = 0
-    tables = 0
+    parag_count = 0
+    table_count = 0
    for element in body._element:
        if isinstance(element, docx.oxml.text.paragraph.CT_P):
            ret.append(Paragraph(element, body))
-            par = par + 1
+            parag_count = parag_count + 1
        elif isinstance(element, docx.oxml.table.CT_Tbl):
            ret.append(Table(element, body))
-            tables = tables + 1
+            table_count = table_count + 1
        else:
-            print "FOUND " +  str(type(element))
-    print "pars " + str(par)
-    print "tables " + str(tables)
+            print "Non paragraph or table " +  str(type(element))
+    print "Paragraphs: " + str(parag_count)
+    print "Tables: " + str(table_count)
    return ret

-content = get_content(SOL001)
-tables=0
-
-while p_id < len(content):
-    elem = content[p_id]
-    if isinstance(elem, Paragraph) and elem.text == "Foreword":
-        break
-    p_id = p_id + 1
-
-if p_id >= len(content):
-    print "FOREWORD NOT FOUND"
-
 def find_sect(sect_to_find, start_idx, doc_content):
+    '''
+    Returns the index in the doc_content list to the first paragraph
+    or heading of the section with title sect_to_find,
+    starting the research from start_idx
+    '''
    while start_idx < len(doc_content):
-        my_elem = content[start_idx]
+        my_elem = doc_content[start_idx]
        if isinstance(my_elem, Paragraph) and my_elem.text == sect_to_find:
            break
        start_idx = start_idx + 1
@@ -127,40 +86,94 @@ def find_sect(sect_to_find, start_idx, doc_content):
    print "FOUND " + sect_to_find + " at " + str(start_idx)
    return start_idx

-sect_6_id = find_sect("6\tVNFD TOSCA model", p_id, content)
+def write_table_to_file(tab, buf):
+    '''
+    Writes content of table t in utf-8 encoding to file F
+    '''
+    buf.write(tab.rows[0].cells[0].text.encode('utf-8'))
+    buf.write('\n# -------------------- #\n')
+
+def generate_tables_between(a_id, b_id, content, buf):
+    '''
+    Loops over content and writes all tosca definitions to the
+    fdesc file. Returns the number of written definitions
+    '''
+    definitions_count = 0
+    for idx in range(a_id, b_id):
+        tmp_elem = content[idx]
+        if isinstance(tmp_elem, Table) and is_tosca_def(tmp_elem):
+            write_table_to_file(tmp_elem, buf)
+            definitions_count = definitions_count + 1
+    return definitions_count
+
+def dump_header(model_name, buf, imports):
+    '''
+    Writes the header to the file for a specific model
+    '''
+    buf.write(HDR.format(
+        tosca_version=TOSCA_VERSION,
+        model=model_name,
+        spec_version=SPEC_VERSION,
+        imports=imports))
+
+MODELS = {}

+for mn in MODEL_NAMES:
+    MODELS[mn] = tosca_model_info(mn, '- etsi_nfv_sol001_common_types.yaml')

-sect_7_id = find_sect("7\tNSD TOSCA model", sect_6_id + 1, content)

+if __name__ == "__main__":

-sect_8_id = find_sect("8\tPNFD TOSCA model", sect_7_id + 1, content)
+    try:
+        SOL001_FN = sys.argv[1]
+    except:
+        print 'Error: Filename missing or filename not a docx document'
+        print 'Usage: doc2tosca <docx-with-tosca-definitions>'
+        sys.exit(1)

-annex_a_id = find_sect("Annex A (informative):", sect_7_id + 1, content)
+    print "Opening " + SOL001_FN

-def write_table_to_file(t, F):
-    F.write(t.rows[0].cells[0].text.encode('utf-8'))
-    F.write('\n# -------------------- #\n')
+    SOL001 = docx.Document(SOL001_FN)

-def generate_tables_between(a_id, b_id, FDESC, fn):
-    count=0
-    for idx in range(a_id, b_id):
-        tmp_elem = content[idx]
-        if isinstance(tmp_elem, Table) and is_tosca_def(tmp_elem):
-            write_table_to_file(tmp_elem, FDESC)
-            count = count + 1
-    print "Printed " + str(count) + " type to " + fn 
+    for m in MODELS:
+        dump_header(MODELS[m]['name'], MODELS[m]['buf'], MODELS[m]['imports'])
+
+    p_id = 0
+
+    cur_sect = "0"
+
+    CONTENT = get_content(SOL001)
+    tables=0
+
+    while p_id < len(CONTENT):
+        elem = CONTENT[p_id]
+        if isinstance(elem, Paragraph) and elem.text == "Foreword":
+            break
+        p_id = p_id + 1
+
+    if p_id >= len(CONTENT):
+        print "FOREWORD NOT FOUND"
+
+    sect_6_id = find_sect("6\tVNFD TOSCA model", p_id, CONTENT)
+
+    sect_7_id = find_sect("7\tNSD TOSCA model", sect_6_id + 1, CONTENT)
+
+    sect_8_id = find_sect("8\tPNFD TOSCA model", sect_7_id + 1, CONTENT)
+
+    annex_a_id = find_sect("Annex A (informative):", sect_7_id + 1, CONTENT)

-generate_tables_between(sect_6_id, sect_7_id, F_VNFD, "VNFD")
-generate_tables_between(sect_7_id, sect_8_id, F_NSD, "NSD")
-generate_tables_between(sect_8_id, annex_a_id, F_PNFD, "PNFD")
+    count = generate_tables_between(sect_6_id, sect_7_id, CONTENT, MODELS['vnfd']['buf'])
+    print "Printed " + str(count) + " types to " + "VNFD"

-F_VNFD.write('\n')
-F_NSD.write('\n')
-F_PNFD.write('\n')
-F_COMM.write('\n')
+    count = generate_tables_between(sect_7_id, sect_8_id, CONTENT, MODELS['nsd']['buf'])
+    print "Printed " + str(count) + " types to " + "NSD"

-F_VNFD.close()
-F_NSD.close()
-F_PNFD.close()
-F_COMM.close()
+    count = generate_tables_between(sect_8_id, annex_a_id, CONTENT, MODELS['pnfd']['buf'])
+    print "Printed " + str(count) + " types to " + "PNFD"

+    for m in MODELS:
+        MODELS[m]['fd'] = open(MODELS[m]['fn'], 'w')
+        MODELS[m]['buf'].seek(0)
+        MODELS[m]['fd'].write(MODELS[m]['buf'].read())
+        MODELS[m]['fd'].write('\n')
+        MODELS[m]['fd'].close()