From dc7f185f748da5be7f5373b56f9b81237cebc19f Mon Sep 17 00:00:00 2001
From: Michele Carignani <michele.carignani@etsi.org>
Date: Tue, 23 Apr 2019 15:09:35 +0200
Subject: [PATCH] improvements and python3 portability

---
 doc2tosca.py | 75 +++++++++++++++++++++++++++++++++++++---------------
 tosca2doc.py |  7 +++--
 2 files changed, 57 insertions(+), 25 deletions(-)

diff --git a/doc2tosca.py b/doc2tosca.py
index 81ebb88..46f3be7 100644
--- a/doc2tosca.py
+++ b/doc2tosca.py
@@ -4,7 +4,8 @@ Generate tosca definitions from Docx specfication
 '''
 
 import sys
-from io import BytesIO as StringIO
+import re
+from io import StringIO
 
 import docx
 from docx.table import Table
@@ -12,31 +13,37 @@ from docx.text.paragraph import Paragraph
 
 BASE_FILENAME = "try-tosca-export_{}.yaml"
 TOSCA_VERSION = "tosca_simple_yaml_1_2"
-SPEC_VERSION = "2.5.1"
+SPEC_VERSION = "2.6.1"
 
 MODEL_NAMES = ['vnfd', 'nsd', 'pnfd', 'common']
 
 HDR = '''tosca_definitions_version: {tosca_version}
 description: ETSI NFV SOL 001 {model} types definitions version {spec_version}
+metadata:
+  - template_name: {model}
+  - template_name: ETSI_NFV
+  - template_version: {spec_version}
 
 imports:
-{imports}
+  {imports}
 
 data_types:
-
 '''
 
+def match_definition_incipit(txt):
+    return bool(re.match(r'^tosca\.[a-zA-Z\.:0-9\s]*$',txt.split("\n")[0].strip()))
+
 def  is_tosca_def(table):
     '''
     Returns true when a table contains TOSCA definitions, i.e.
     the table contains just one cell and text starts with an
     empty space ' '
     '''
-    txt = table.rows[0].cells[0].text[0]
+    txt = table.rows[0].cells[0].text
     return \
-    len(table.rows) == 1 and \
-    len(table.columns) == 1 and \
-    txt.startswith(' ')
+        len(table.rows) == 1 and \
+        len(table.columns) == 1 and \
+        match_definition_incipit(txt)
 
 def tosca_model_info(name, imports):
     '''
@@ -66,9 +73,9 @@ def get_content(doc):
             ret.append(Table(element, body))
             table_count = table_count + 1
         else:
-            print "Non paragraph or table " +  str(type(element))
-    print "Paragraphs: " + str(parag_count)
-    print "Tables: " + str(table_count)
+            print("Non paragraph or table " +  str(type(element)))
+    print("Paragraphs: " + str(parag_count))
+    print("Tables: " + str(table_count))
     return ret
 
 def find_sect(sect_to_find, start_idx, doc_content):
@@ -83,15 +90,29 @@ def find_sect(sect_to_find, start_idx, doc_content):
             break
         start_idx = start_idx + 1
 
-    print "FOUND " + sect_to_find + " at " + str(start_idx)
+    print("FOUND " + sect_to_find + " at " + str(start_idx))
     return start_idx
 
 def write_table_to_file(tab, buf):
     '''
     Writes content of table t in utf-8 encoding to file F
     '''
-    buf.write(tab.rows[0].cells[0].text.encode('utf-8'))
-    buf.write('\n# -------------------- #\n')
+    def pad2 (txt):
+        if txt.startswith("   "):
+            return " " + txt
+        if txt.startswith("  "):
+            return "  " + txt
+        if txt.startswith(" "):
+            return " " + txt
+        return "  " + txt
+
+    txt = tab.rows[0].cells[0].text
+    # print("+++++  Included in: " + tab.rows[0].cells[0].text.split("\n")[0])
+    buf.write("\n".join([pad2(x) for x in txt.split("\n")]))
+    # buf.write('\n# -------------------- #\n')
+    if not txt.endswith('\n'):
+        buf.write('\n')
+    buf.write('\n')
 
 def generate_tables_between(a_id, b_id, content, buf):
     '''
@@ -104,6 +125,18 @@ def generate_tables_between(a_id, b_id, content, buf):
         if isinstance(tmp_elem, Table) and is_tosca_def(tmp_elem):
             write_table_to_file(tmp_elem, buf)
             definitions_count = definitions_count + 1
+        elif isinstance(tmp_elem, Table):
+            txt = tmp_elem.rows[0].cells[0].text
+            if txt.strip().startswith("Name") or txt.strip().startswith("Shorthand") or \
+                txt.strip().startswith("tosca_def"):
+                continue
+            print("----- Filtered out: " + txt.split("\n")[0])
+            if not len(tmp_elem.rows) == 1:
+                print("       Rows count != 1 ")
+            if not len(tmp_elem.columns) == 1:
+                print("       Columns count != 1 ")
+            if not match_definition_incipit(txt):
+                print("       Regex != 1 ")
     return definitions_count
 
 def dump_header(model_name, buf, imports):
@@ -127,11 +160,11 @@ if __name__ == "__main__":
     try:
         SOL001_FN = sys.argv[1]
     except:
-        print 'Error: Filename missing or filename not a docx document'
-        print 'Usage: doc2tosca <docx-with-tosca-definitions>'
+        print('Error: Filename missing or filename not a docx document')
+        print('Usage: doc2tosca <docx-with-tosca-definitions>')
         sys.exit(1)
 
-    print "Opening " + SOL001_FN
+    print( "Opening " + SOL001_FN)
 
     SOL001 = docx.Document(SOL001_FN)
 
@@ -152,7 +185,7 @@ if __name__ == "__main__":
         p_id = p_id + 1
 
     if p_id >= len(CONTENT):
-        print "FOREWORD NOT FOUND"
+        print( "FOREWORD NOT FOUND")
 
     sect_6_id = find_sect("6\tVNFD TOSCA model", p_id, CONTENT)
 
@@ -163,13 +196,13 @@ if __name__ == "__main__":
     annex_a_id = find_sect("Annex A (informative):", sect_7_id + 1, CONTENT)
 
     count = generate_tables_between(sect_6_id, sect_7_id, CONTENT, MODELS['vnfd']['buf'])
-    print "Printed " + str(count) + " types to " + "VNFD"
+    print("Printed " + str(count) + " types to " + "VNFD\n\n\n")
 
     count = generate_tables_between(sect_7_id, sect_8_id, CONTENT, MODELS['nsd']['buf'])
-    print "Printed " + str(count) + " types to " + "NSD"
+    print("Printed " + str(count) + " types to " + "NSD\n\n\n")
 
     count = generate_tables_between(sect_8_id, annex_a_id, CONTENT, MODELS['pnfd']['buf'])
-    print "Printed " + str(count) + " types to " + "PNFD"
+    print("Printed " + str(count) + " types to " + "PNFD\n\n\n")
 
     for m in MODELS:
         MODELS[m]['fd'] = open(MODELS[m]['fn'], 'w')
diff --git a/tosca2doc.py b/tosca2doc.py
index 475b123..d176ac1 100755
--- a/tosca2doc.py
+++ b/tosca2doc.py
@@ -3,7 +3,6 @@
 Parses a TOSCA template and generates a docx file
 '''
 
-
 import sys
 import toscaparser.utils.yamlparser as yaml
 
@@ -61,7 +60,7 @@ def print_lvl(num, txt):
     if num + 1 > PRINT_TRESHOLD:
         return
     index = ".".join(map(str, IDS[:(num+1)]))
-    print index +  " " + ('-'*(num+1)) + ' ' + txt
+    print(index +  " " + ('-'*(num+1)) + ' ' + txt)
 
 def add_heading_with_num(num, txt, doc):
     '''
@@ -178,12 +177,12 @@ if __name__ == "__main__":
     doc = docx.Document()
 
     if len(sys.argv) < 2:
-        print "Usage: robot2doc <robot_file_or_dir> [<out_file> [spec_section_title]]"
+        print("Usage: robot2doc <robot_file_or_dir> [<out_file> [spec_section_title]]")
         sys.exit()
 
     FOLDR = sys.argv[1]
 
-    print "Using folder: " + sys.argv[1]
+    print("Using folder: " + sys.argv[1])
 
     for fn in ['VNFD', 'NSD', 'PNFD']:
         generate_from_file(fn, doc, FOLDR + '/' + FNS[fn])
-- 
GitLab