#!/bin/python3 ''' Generate tosca definitions from Docx specfication ''' import sys import os import re from io import StringIO import docx from docx.table import Table from docx.text.paragraph import Paragraph BASE_FILENAME = "generated_etsi_nfv_sol001_{}_types.yaml" TOSCA_VERSION = "tosca_simple_yaml_1_2" SPEC_VERSION = "2.6.1" allowed_versions = ["2.6.1","2.7.1"] MODEL_NAMES = ['vnfd', 'nsd', 'pnfd', 'common'] HDR = '''tosca_definitions_version: {tosca_version} description: ETSI NFV SOL 001 {model} types definitions version {spec_version} metadata: - template_name: etsi_nfv_sol001_{model}_types - template_name: ETSI_NFV - template_version: {spec_version} imports: {imports} data_types: ''' def match_definition_incipit(txt): return bool(re.match(r'^tosca\.[a-zA-Z\.:0-9\s]*$',txt.split("\n")[0].strip())) def is_tosca_def(table): ''' Returns true when a table contains TOSCA definitions, i.e. the table contains just one cell and text starts with an empty space ' ' ''' txt = table.rows[0].cells[0].text return \ len(table.rows) == 1 and \ len(table.columns) == 1 and \ match_definition_incipit(txt) def tosca_model_info(name, imports): ''' Returns a dictionary for the information on the model ''' return { 'name' : name, 'fn' : BASE_FILENAME.format(name), 'fd' : None, 'imports' : imports, 'buf' : StringIO() } def get_content(doc): ''' Returns a list of all paragraphs and tables in the Document ''' ret = [] body = doc._body parag_count = 0 table_count = 0 for element in body._element: if isinstance(element, docx.oxml.text.paragraph.CT_P): ret.append(Paragraph(element, body)) parag_count = parag_count + 1 elif isinstance(element, docx.oxml.table.CT_Tbl): ret.append(Table(element, body)) table_count = table_count + 1 else: print("Non paragraph or table " + str(type(element))) print("Paragraphs: " + str(parag_count)) print("Tables: " + str(table_count)) return ret def find_sect(sect_to_find, start_idx, doc_content): ''' Returns the index in the doc_content list to the first paragraph or heading of the section with title sect_to_find, starting the research from start_idx ''' while start_idx < len(doc_content): my_elem = doc_content[start_idx] if isinstance(my_elem, Paragraph) and my_elem.text.strip() == sect_to_find: break start_idx = start_idx + 1 print("FOUND " + sect_to_find + " at " + str(start_idx)) return start_idx def write_table_to_file(tab, buf): ''' Writes content of table t in utf-8 encoding to file F ''' def pad2 (txt): if txt.startswith(" "): return " " + txt if txt.startswith(" "): return " " + txt if txt.startswith(" "): return " " + txt return " " + txt txt = tab.rows[0].cells[0].text # print("# Included in: " + tab.rows[0].cells[0].text.split("\n")[0]) buf.write("\n".join([x for x in txt.split("\n")])) # buf.write('\n# -------------------- #\n') if not txt.endswith('\n'): buf.write('\n') buf.write('\n') def generate_tables_between(a_id, b_id, content, buf): ''' Loops over content and writes all tosca definitions to the fdesc file. Returns the number of written definitions ''' definitions_count = 0 for idx in range(a_id, b_id): if idx >= len(content): print("A: " + str(a_id)) print("B: " + str(b_id)) print("IDX: " + str(idx)) print("LEN(CONTENT): " + str(len(content))) return tmp_elem = content[idx] if isinstance(tmp_elem, Table) and is_tosca_def(tmp_elem): write_table_to_file(tmp_elem, buf) definitions_count = definitions_count + 1 elif isinstance(tmp_elem, Table): txt = tmp_elem.rows[0].cells[0].text if txt.strip().startswith("Name") or txt.strip().startswith("Shorthand") or \ txt.strip().startswith("tosca_def"): continue print("----- Filtered out: " + txt.split("\n")[0]) if not len(tmp_elem.rows) == 1: print(" Rows count != 1 ") if not len(tmp_elem.columns) == 1: print(" Columns count != 1 ") if not match_definition_incipit(txt): print(" Regex != 1 ") return definitions_count def dump_header(model_name, buf, spec_version=SPEC_VERSION, imports=None): ''' Writes the header to the file for a specific model ''' buf.write(HDR.format( tosca_version=TOSCA_VERSION, model=model_name, spec_version=spec_version, imports=imports)) MODELS = {} def generate_templates(filename, spec_version=SPEC_VERSION): ''' Takes a filename or file object and loads the definition into the MODELS dictionary ''' if isinstance(filename, str): print("Opening " + filename) for mn in MODEL_NAMES: MODELS[mn] = tosca_model_info( mn, '- https://forge.etsi.org/rep/nfv/SOL001/raw/v{}/etsi_nfv_sol001_common_types.yaml'.format(spec_version) ) sol_001 = docx.Document(filename) for m in MODELS: dump_header( MODELS[m]['name'], MODELS[m]['buf'], spec_version, MODELS[m]['imports']) p_id = 0 cur_sect = "0" CONTENT = get_content(sol_001) tables=0 while p_id < len(CONTENT): elem = CONTENT[p_id] if isinstance(elem, Paragraph) and elem.text == "Foreword": break p_id = p_id + 1 if p_id >= len(CONTENT): print("FOREWORD NOT FOUND") sect_6_id = find_sect("6\tVNFD TOSCA model", p_id, CONTENT) sect_7_id = find_sect("7\tNSD TOSCA model", sect_6_id, CONTENT) sect_8_id = find_sect("8\tPNFD TOSCA model", sect_7_id, CONTENT) sect_9_id = find_sect("9\tCommon Definitions", sect_8_id, CONTENT) annex_a_id = find_sect("Annex A (informative):", sect_9_id, CONTENT) count = generate_tables_between(sect_6_id, sect_7_id, CONTENT, MODELS['vnfd']['buf']) print("Printed " + str(count) + " types to " + "VNFD\n\n\n") count = generate_tables_between(sect_7_id, sect_8_id, CONTENT, MODELS['nsd']['buf']) print("Printed " + str(count) + " types to " + "NSD\n\n\n") count = generate_tables_between(sect_8_id, sect_9_id, CONTENT, MODELS['pnfd']['buf']) print("Printed " + str(count) + " types to " + "PNFD\n\n\n") count = generate_tables_between(sect_9_id, annex_a_id, CONTENT, MODELS['common']['buf']) print("Printed " + str(count) + " types to " + "Common\n\n\n") def print_to_files(prefix=None): ''' Prefix is a path to a folder to work into ''' for m in MODELS: if prefix != None: MODELS[m]['fn'] = os.path.join(prefix, MODELS[m]['fn']) print("Writing to " + MODELS[m]['fn']) MODELS[m]['fd'] = open(MODELS[m]['fn'], 'w') MODELS[m]['buf'].seek(0) MODELS[m]['fd'].write(MODELS[m]['buf'].read()) MODELS[m]['fd'].write('\n') MODELS[m]['fd'].close() if __name__ == "__main__": try: SOL001_FN = sys.argv[1] except: print('Error: Filename missing or filename not a docx document') print('Usage: doc2tosca ') sys.exit(1) generate_templates(SOL001_FN) print_to_files()