refactor and linting

680ab594 · carignani · d1da74f4 · 680ab594
Commit 680ab594 authored 4 years ago by carignani
--- a/src/doc2tosca.py
+++ b/src/doc2tosca.py
@@ -35,6 +35,14 @@ imports:

 '''

+sections_to_models = {
+    6: 'vnfd',
+    7: 'nsd',
+    8: 'pnfd',
+    9: 'common'
+}
+
+
 SUBSECTIONS = [
    "Artifact Types",
    "Data Types",
@@ -51,6 +59,7 @@ SUBSECTIONS = [
 MODELS = {}
 EXAMPLES = {}

+
 class Section():
    '''
    Defines a section of the base document
@@ -60,7 +69,7 @@ class Section():
        self.from_id = from_id
        self.to_id = to_id
        self.is_annex = title.strip().startswith("Annex")
-        
+
        if not self.is_annex:
            cleaned_title = title.strip().split("\t")
            self.title = cleaned_title[1]
@@ -72,15 +81,23 @@ class Section():

    def __repr__(self):
        if self.is_annex:
-            return "({}, Annex {}, {}-{})".format(self.title,self.letter, self.from_id, self.to_id)
-        return "({}, {}, {}-{})".format(self.title, self.number, self.from_id, self.to_id)
+            return "({}, Annex {}, {}-{})".format(
+                self.title, self.letter, self.from_id, self.to_id
+            )
+        return "({}, {}, {}-{})".format(
+            self.title, self.number, self.from_id, self.to_id
+        )
+

 def match_definition_incipit(txt):
    '''
    Returns tru if txt matches the incipit of a definition,
-    identified by the word 'tosca' 
+    identified by the word 'tosca'
    '''
-    return bool(re.match(r'^tosca\.[a-zA-Z\.:0-9\s]*$',txt.split("\n")[0].strip()))
+    return bool(
+        re.match(r'^tosca\.[a-zA-Z\.:0-9\s]*$', txt.split("\n")[0].strip())
+    )
+

 def is_tosca_def(table):
    '''
@@ -92,20 +109,22 @@ def is_tosca_def(table):
    return \
        len(table.rows) == 1 and \
        len(table.columns) == 1 and \
-        match_definition_incipit(txt)    
+        match_definition_incipit(txt)
+

 def tosca_model_info(name, version, imports):
    '''
    Returns a dictionary to hold information on the model
    '''
    return {
-        'name' : name,
-        'fn' : BASE_FILENAME.format(version.replace(".","-"), name),
-        'fd' : None,
-        'imports' : imports,
-        'buf' :  StringIO()
+        'name': name,
+        'fn': BASE_FILENAME.format(version.replace(".", "-"), name),
+        'fd': None,
+        'imports': imports,
+        'buf':  StringIO()
    }

+
 def get_content(doc):
    '''
    Returns a list of all paragraphs and tables in the Document
@@ -121,12 +140,13 @@ def get_content(doc):
        elif isinstance(element, docx.oxml.table.CT_Tbl):
            ret.append(Table(element, body))
            table_count = table_count + 1
-        #else:
+        # else:
        #    print("Non paragraph or table " +  str(type(element)))
    print("Paragraphs: " + str(parag_count))
    print("Tables: " + str(table_count))
    return ret

+
 def find_sect(sect_to_find, start_idx, doc_content):
    '''
    Returns the index in the doc_content list to the first paragraph
@@ -135,29 +155,33 @@ def find_sect(sect_to_find, start_idx, doc_content):
    '''
    while start_idx < len(doc_content):
        my_elem = doc_content[start_idx]
-        if isinstance(my_elem, Paragraph) and my_elem.text.strip() == sect_to_find:
+        if isinstance(my_elem, Paragraph) and \
+           my_elem.text.strip() == sect_to_find:
            break
        start_idx = start_idx + 1

    print("FOUND " + sect_to_find + " at " + str(start_idx))
    return start_idx

+
 def is_lvl2_section_hdn(txt):
    ''' Returns true if txt is level 2 heading'''
    clean_txt = txt.strip()
-    
+
    if not bool(re.match(r'^[0-9]\.[0-9]+\t[a-zA-Z\s]*$', clean_txt)):
        return False

    subtitle = clean_txt.split('\t')[1]
-    
+
    return subtitle in SUBSECTIONS

+
 def is_lvl1_section_hdn(txt):
    ''' Returns true if txt is level 1 heading'''
    clean_txt = txt.strip()
    return bool(re.match(r'^[0-9]+\t[a-zA-Z\s]*$', clean_txt)) or \
-            bool(re.match(r'^Annex[\s]*[A-Z]+[\s\t]+[a-zA-Z\s\(\)]*', clean_txt))
+        bool(re.match(r'^Annex[\s]*[A-Z]+[\s\t]+[a-zA-Z\s\(\)]*', clean_txt))
+

 def find_all_sections(doc_content):
    '''
@@ -171,17 +195,26 @@ def find_all_sections(doc_content):

    while end_indx < len(doc_content):
        my_elem = doc_content[end_indx]
-        if isinstance(my_elem, Paragraph) and is_lvl1_section_hdn(my_elem.text):
+        if isinstance(my_elem, Paragraph) and \
+           is_lvl1_section_hdn(my_elem.text):
            if start_indx != 0:
-                sections.append(Section(start_indx, end_indx-1, doc_content[start_indx].text))
-            
+                sections.append(
+                    Section(
+                        start_indx,
+                        end_indx-1,
+                        doc_content[start_indx].text)
+                )
+
            start_indx = end_indx
-            
+
        end_indx = end_indx + 1
-    
-    sections.append(Section(start_indx, end_indx-1, doc_content[start_indx].text))
+
+    sections.append(
+        Section(start_indx, end_indx-1, doc_content[start_indx].text)
+    )
    return sections

+
 def write_subsection_to_file(txt, buf):
    '''
    Writes a subsection header in utf-8 encoding to file buf
@@ -191,6 +224,7 @@ def write_subsection_to_file(txt, buf):
        buf.write('\n')
    buf.write('\n')

+
 def write_table_to_file(tab, buf):
    '''
    Writes content of table t in utf-8 encoding to file F
@@ -201,6 +235,10 @@ def write_table_to_file(tab, buf):
        buf.write('\n')
    buf.write('\n')

+
+range_err_mess = "ERR: Out of range (A: {}, B: {}, IDX: {}, LEN(CONTENT): {})"
+
+
 def gen_tables_btwn(a_id, b_id, content, buf):
    '''
    Loops over content and writes all tosca definitions to the
@@ -211,13 +249,14 @@ def gen_tables_btwn(a_id, b_id, content, buf):
    for idx in range(a_id, b_id):

        if idx >= len(content):
-            print("ERROR: Paragraph out of range (A: {}, B: {}, IDX: {}, LEN(CONTENT): {})".format(
-                a_id, b_id,idx, len(content)))
+            print(
+                range_err_mess.format(a_id, b_id, idx, len(content)))
            return definitions_count

        tmp_elem = content[idx]

-        if isinstance(tmp_elem, Paragraph) and is_lvl2_section_hdn(tmp_elem.text):
+        if isinstance(tmp_elem, Paragraph) and \
+           is_lvl2_section_hdn(tmp_elem.text):
            print(tmp_elem.text)
            write_subsection_to_file(tmp_elem.text.split("\t")[1], buf)
            definitions_count = definitions_count + 1
@@ -228,16 +267,18 @@ def gen_tables_btwn(a_id, b_id, content, buf):

        elif isinstance(tmp_elem, Table):
            txt = tmp_elem.rows[0].cells[0].text
-            if txt.strip().startswith("Name") or txt.strip().startswith("Shorthand") or \
-                txt.strip().startswith("tosca_def"):
+            if txt.strip().startswith("Name") or \
+               txt.strip().startswith("Shorthand") or \
+               txt.strip().startswith("tosca_def"):
                continue
    return definitions_count

+
 def generate_header(
-        model_name, 
-        buf, 
-        spec_version=SPEC_VERSION, 
-        imports=None, 
+        model_name,
+        buf,
+        spec_version=SPEC_VERSION,
+        imports=None,
        tosca_version=DEFAULT_TOSCA_VERSION):
    '''
    Writes the header to the file for a specific model
@@ -248,27 +289,46 @@ def generate_header(
        spec_version=spec_version,
        imports=imports))

+
+def init_models(yaml_root, spec_ver, tosc_ver):
+    for model in MODEL_NAMES:
+        import_stmt = 'etsi_nfv_sol001_common_types.yaml'
+
+        if yaml_root != 'local':
+            import_stmt = \
+                'https://forge.etsi.org/rep/nfv/SOL001/raw/{}/{}'.format(
+                    spec_ver, import_stmt
+                )
+
+    MODELS[model] = tosca_model_info(
+        model,
+        spec_ver,
+        '- ' + import_stmt
+    )
+
+    for mod in MODELS:
+        generate_header(
+            MODELS[mod]['name'],
+            MODELS[mod]['buf'],
+            spec_ver,
+            MODELS[mod]['imports'],
+            tosc_ver
+        )
+
+
 def generate_templates(
-        filename, 
-        spec_ver=SPEC_VERSION, 
-        yaml_root='uri', 
+        filename,
+        spec_ver=SPEC_VERSION,
+        yaml_root='uri',
        tosc_ver=DEFAULT_TOSCA_VERSION):
    '''
-    Takes a filename or file object and loads the definition into the MODELS dictionary
+    Takes a filename or file object and loads the definition into
+    the MODELS dictionary
    '''
    if isinstance(filename, str):
        print("Opening " + filename)

-    for mod in MODEL_NAMES:
-        import_stmt = 'etsi_nfv_sol001_common_types.yaml'
-        if yaml_root != 'local':
-            import_stmt = \
-                'https://forge.etsi.org/rep/nfv/SOL001/raw/{}/'.format(spec_ver) + import_stmt
-        MODELS[mod] = tosca_model_info(
-            mod,
-            spec_ver, 
-            '- ' + import_stmt
-        )
+    init_models(yaml_root, spec_ver, tosc_ver)

    try:
        sol_001 = docx.Document(filename)
@@ -276,40 +336,30 @@ def generate_templates(
        print("Error opening the submitted Docx file")
        raise ValueError("Cannot open the submitted Docx file")

-    for mod in MODELS:
-        generate_header(
-            MODELS[mod]['name'], 
-            MODELS[mod]['buf'],
-            spec_ver, 
-            MODELS[mod]['imports'],
-            tosc_ver
-        )
-
    content = get_content(sol_001)
    sections = find_all_sections(content)

-    sections_to_models = {
-        6 : 'vnfd',
-        7 : 'nsd',
-        8 : 'pnfd',
-        9 : 'common'
-    }
-
    for sect in sections:

        if not sect.is_annex:
            if sect.number in sections_to_models.keys():
                model = sections_to_models[sect.number]
-                count = gen_tables_btwn(sect.from_id, sect.to_id, content, MODELS[model]['buf'])
+                count = gen_tables_btwn(
+                    sect.from_id, sect.to_id, content, MODELS[model]['buf']
+                )
                print("Printed " + str(count) + " types to " + model)
        else:
-            if sect.letter == "A":
-                count = generate_examples_between(sect.from_id, sect.to_id, content, EXAMPLES)
-                print("Printed " + str(count) + " types to " + "Annex " + sect.letter)
+            if sect.letter == "A" or sect.letter == "E":
+                count = generate_examples_between(
+                    sect.from_id, sect.to_id, content, EXAMPLES
+                )
+                print("Printed {} types to Annex {}".format(
+                    str(count), sect.letter)
+                )


 def print_to_files(prefix=None):
-    ''' 
+    '''
    Prefix is a path to a folder to work into
    '''
    for key in MODELS:
@@ -335,23 +385,26 @@ def print_to_files(prefix=None):
            newf.write("\n")
            newf.close()

+
 def parse_version_from_filename(filename):
    '''
    Parses the version from the filename
    '''
    base_filename = os.path.basename(filename)

-    if base_filename.startswith("gs_NFV-SOL001v"):         
+    if base_filename.startswith("gs_NFV-SOL001v"):
        return "v" + base_filename.strip("gs_NFV-SOL001v") \
-                .replace("0",".").strip(".").strip("p.docx")
-    if base_filename.startswith("gs_nfv-sol001v"):         
+                .replace("0", ".").strip(".").strip("p.docx")
+    if base_filename.startswith("gs_nfv-sol001v"):
        return "v" + base_filename.strip("gs_nfv-sol001v") \
-                .replace("0",".").strip(".").strip("p.docx")
+                .replace("0", ".").strip(".").strip("p.docx")
    return ""

+
 def slugify(t):
    return t.replace(" ", "_").lower()

+
 if __name__ == "__main__":

    try:
@@ -365,5 +418,3 @@ if __name__ == "__main__":
    generate_templates(SOL001_FN, spec_ver=ver)

    print_to_files()
-
-