diff --git a/doc2tosca.py b/doc2tosca.py index 950e6cab37bebaa93d246984f97b7dfbc0e748af..81ebb8885f21eade912fb2ebefd92271470050b3 100644 --- a/doc2tosca.py +++ b/doc2tosca.py @@ -1,125 +1,84 @@ -#!/bin/python2.7 +#!/bin/python3.5 ''' Generate tosca definitions from Docx specfication ''' import sys +from io import BytesIO as StringIO + import docx from docx.table import Table from docx.text.paragraph import Paragraph -def is_tosca_def(table): - ''' - Returns true when a table contains TOSCA definitions, i.e. - the table contains just one cell and text starts with an - empty space ' '' - ''' - txt = table.rows[0].cells[0].text[0] - return \ - len(table.rows) == 1 and \ - len(table.columns) == 1 and \ - txt.startswith(' ') - - -try: - SOL001_FN = sys.argv[1] -except: - print 'Error: Filename missing or filename not a docx document' - print 'Usage: doc2tosca <docx-with-tosca-definitions>' - sys.exit(1) - -OUT_FN_VNFD = 'try-tosca-export_vnfd.yaml' -OUT_FN_NSD = 'try-tosca-export_nsd.yaml' -OUT_FN_PNFD = 'try-tosca-export_pnfd.yaml' -OUT_FN_COMM = 'try-tosca-export_comm.yaml' - -SOL001 = docx.Document(SOL001_FN) - -F_VNFD = open(OUT_FN_VNFD, 'w') -F_NSD = open(OUT_FN_NSD, 'w') -F_PNFD = open(OUT_FN_PNFD, 'w') -F_COMM = open(OUT_FN_COMM, 'w') - -HDR_VNFD ='''tosca_definitions_version: tosca_simple_yaml_1_2 -description: ETSI NFV SOL 001 nsd types definitions version 2.5.1 - -imports: - - etsi_nfv_sol001_vnfd_2_5_1_types.yaml - -data_types: -''' - -HDR_NSD ='''tosca_definitions_version: tosca_simple_yaml_1_2 -description: ETSI NFV SOL 001 nsd types definitions version 2.5.1 - -imports: - - etsi_nfv_sol001_vnfd_2_5_1_types.yaml +BASE_FILENAME = "try-tosca-export_{}.yaml" +TOSCA_VERSION = "tosca_simple_yaml_1_2" +SPEC_VERSION = "2.5.1" -data_types: -''' +MODEL_NAMES = ['vnfd', 'nsd', 'pnfd', 'common'] -HDR_PNFD ='''tosca_definitions_version: tosca_simple_yaml_1_2 -description: ETSI NFV SOL 001 nsd types definitions version 2.5.1 +HDR = '''tosca_definitions_version: {tosca_version} +description: ETSI NFV SOL 001 {model} types definitions version {spec_version} imports: - - etsi_nfv_sol001_vnfd_2_5_1_types.yaml +{imports} data_types: -''' -HDR_COMM ='''tosca_definitions_version: tosca_simple_yaml_1_2 -description: ETSI NFV SOL 001 nsd types definitions version 2.5.1 - -imports: - - etsi_nfv_sol001_vnfd_2_5_1_types.yaml - -data_types: ''' -F_VNFD.write(HDR_VNFD) -F_NSD.write(HDR_NSD) -F_PNFD.write(HDR_PNFD) -F_COMM.write(HDR_COMM) - -p_id = 0 - -cur_sect = "0" +def is_tosca_def(table): + ''' + Returns true when a table contains TOSCA definitions, i.e. + the table contains just one cell and text starts with an + empty space ' ' + ''' + txt = table.rows[0].cells[0].text[0] + return \ + len(table.rows) == 1 and \ + len(table.columns) == 1 and \ + txt.startswith(' ') -from sets import Set +def tosca_model_info(name, imports): + ''' + Returns a dictionary for the information on the model + ''' + return { + 'name' : name, + 'fn' : BASE_FILENAME.format(name), + 'fd' : None, + 'imports' : imports, + 'buf' : StringIO() + } def get_content(doc): + ''' + Returns a list of all paragraphs and tables in the Document + ''' ret = [] body = doc._body - par = 0 - tables = 0 + parag_count = 0 + table_count = 0 for element in body._element: if isinstance(element, docx.oxml.text.paragraph.CT_P): ret.append(Paragraph(element, body)) - par = par + 1 + parag_count = parag_count + 1 elif isinstance(element, docx.oxml.table.CT_Tbl): ret.append(Table(element, body)) - tables = tables + 1 + table_count = table_count + 1 else: - print "FOUND " + str(type(element)) - print "pars " + str(par) - print "tables " + str(tables) + print "Non paragraph or table " + str(type(element)) + print "Paragraphs: " + str(parag_count) + print "Tables: " + str(table_count) return ret -content = get_content(SOL001) -tables=0 - -while p_id < len(content): - elem = content[p_id] - if isinstance(elem, Paragraph) and elem.text == "Foreword": - break - p_id = p_id + 1 - -if p_id >= len(content): - print "FOREWORD NOT FOUND" - def find_sect(sect_to_find, start_idx, doc_content): + ''' + Returns the index in the doc_content list to the first paragraph + or heading of the section with title sect_to_find, + starting the research from start_idx + ''' while start_idx < len(doc_content): - my_elem = content[start_idx] + my_elem = doc_content[start_idx] if isinstance(my_elem, Paragraph) and my_elem.text == sect_to_find: break start_idx = start_idx + 1 @@ -127,40 +86,94 @@ def find_sect(sect_to_find, start_idx, doc_content): print "FOUND " + sect_to_find + " at " + str(start_idx) return start_idx -sect_6_id = find_sect("6\tVNFD TOSCA model", p_id, content) +def write_table_to_file(tab, buf): + ''' + Writes content of table t in utf-8 encoding to file F + ''' + buf.write(tab.rows[0].cells[0].text.encode('utf-8')) + buf.write('\n# -------------------- #\n') + +def generate_tables_between(a_id, b_id, content, buf): + ''' + Loops over content and writes all tosca definitions to the + fdesc file. Returns the number of written definitions + ''' + definitions_count = 0 + for idx in range(a_id, b_id): + tmp_elem = content[idx] + if isinstance(tmp_elem, Table) and is_tosca_def(tmp_elem): + write_table_to_file(tmp_elem, buf) + definitions_count = definitions_count + 1 + return definitions_count + +def dump_header(model_name, buf, imports): + ''' + Writes the header to the file for a specific model + ''' + buf.write(HDR.format( + tosca_version=TOSCA_VERSION, + model=model_name, + spec_version=SPEC_VERSION, + imports=imports)) + +MODELS = {} +for mn in MODEL_NAMES: + MODELS[mn] = tosca_model_info(mn, '- etsi_nfv_sol001_common_types.yaml') -sect_7_id = find_sect("7\tNSD TOSCA model", sect_6_id + 1, content) +if __name__ == "__main__": -sect_8_id = find_sect("8\tPNFD TOSCA model", sect_7_id + 1, content) + try: + SOL001_FN = sys.argv[1] + except: + print 'Error: Filename missing or filename not a docx document' + print 'Usage: doc2tosca <docx-with-tosca-definitions>' + sys.exit(1) -annex_a_id = find_sect("Annex A (informative):", sect_7_id + 1, content) + print "Opening " + SOL001_FN -def write_table_to_file(t, F): - F.write(t.rows[0].cells[0].text.encode('utf-8')) - F.write('\n# -------------------- #\n') + SOL001 = docx.Document(SOL001_FN) -def generate_tables_between(a_id, b_id, FDESC, fn): - count=0 - for idx in range(a_id, b_id): - tmp_elem = content[idx] - if isinstance(tmp_elem, Table) and is_tosca_def(tmp_elem): - write_table_to_file(tmp_elem, FDESC) - count = count + 1 - print "Printed " + str(count) + " type to " + fn + for m in MODELS: + dump_header(MODELS[m]['name'], MODELS[m]['buf'], MODELS[m]['imports']) + + p_id = 0 + + cur_sect = "0" + + CONTENT = get_content(SOL001) + tables=0 + + while p_id < len(CONTENT): + elem = CONTENT[p_id] + if isinstance(elem, Paragraph) and elem.text == "Foreword": + break + p_id = p_id + 1 + + if p_id >= len(CONTENT): + print "FOREWORD NOT FOUND" + + sect_6_id = find_sect("6\tVNFD TOSCA model", p_id, CONTENT) + + sect_7_id = find_sect("7\tNSD TOSCA model", sect_6_id + 1, CONTENT) + + sect_8_id = find_sect("8\tPNFD TOSCA model", sect_7_id + 1, CONTENT) + + annex_a_id = find_sect("Annex A (informative):", sect_7_id + 1, CONTENT) -generate_tables_between(sect_6_id, sect_7_id, F_VNFD, "VNFD") -generate_tables_between(sect_7_id, sect_8_id, F_NSD, "NSD") -generate_tables_between(sect_8_id, annex_a_id, F_PNFD, "PNFD") + count = generate_tables_between(sect_6_id, sect_7_id, CONTENT, MODELS['vnfd']['buf']) + print "Printed " + str(count) + " types to " + "VNFD" -F_VNFD.write('\n') -F_NSD.write('\n') -F_PNFD.write('\n') -F_COMM.write('\n') + count = generate_tables_between(sect_7_id, sect_8_id, CONTENT, MODELS['nsd']['buf']) + print "Printed " + str(count) + " types to " + "NSD" -F_VNFD.close() -F_NSD.close() -F_PNFD.close() -F_COMM.close() + count = generate_tables_between(sect_8_id, annex_a_id, CONTENT, MODELS['pnfd']['buf']) + print "Printed " + str(count) + " types to " + "PNFD" + for m in MODELS: + MODELS[m]['fd'] = open(MODELS[m]['fn'], 'w') + MODELS[m]['buf'].seek(0) + MODELS[m]['fd'].write(MODELS[m]['buf'].read()) + MODELS[m]['fd'].write('\n') + MODELS[m]['fd'].close()