Loading doc2tosca.py +128 −115 Original line number Diff line number Diff line #!/bin/python2.7 #!/bin/python3.5 ''' Generate tosca definitions from Docx specfication ''' import sys from io import BytesIO as StringIO import docx from docx.table import Table from docx.text.paragraph import Paragraph BASE_FILENAME = "try-tosca-export_{}.yaml" TOSCA_VERSION = "tosca_simple_yaml_1_2" SPEC_VERSION = "2.5.1" MODEL_NAMES = ['vnfd', 'nsd', 'pnfd', 'common'] HDR = '''tosca_definitions_version: {tosca_version} description: ETSI NFV SOL 001 {model} types definitions version {spec_version} imports: {imports} data_types: ''' def is_tosca_def(table): ''' Returns true when a table contains TOSCA definitions, i.e. the table contains just one cell and text starts with an empty space ' '' empty space ' ' ''' txt = table.rows[0].cells[0].text[0] return \ Loading @@ -20,106 +38,47 @@ def is_tosca_def(table): len(table.columns) == 1 and \ txt.startswith(' ') try: SOL001_FN = sys.argv[1] except: print 'Error: Filename missing or filename not a docx document' print 'Usage: doc2tosca <docx-with-tosca-definitions>' sys.exit(1) OUT_FN_VNFD = 'try-tosca-export_vnfd.yaml' OUT_FN_NSD = 'try-tosca-export_nsd.yaml' OUT_FN_PNFD = 'try-tosca-export_pnfd.yaml' OUT_FN_COMM = 'try-tosca-export_comm.yaml' SOL001 = docx.Document(SOL001_FN) F_VNFD = open(OUT_FN_VNFD, 'w') F_NSD = open(OUT_FN_NSD, 'w') F_PNFD = open(OUT_FN_PNFD, 'w') F_COMM = open(OUT_FN_COMM, 'w') HDR_VNFD ='''tosca_definitions_version: tosca_simple_yaml_1_2 description: ETSI NFV SOL 001 nsd types definitions version 2.5.1 imports: - etsi_nfv_sol001_vnfd_2_5_1_types.yaml data_types: def tosca_model_info(name, imports): ''' HDR_NSD ='''tosca_definitions_version: tosca_simple_yaml_1_2 description: ETSI NFV SOL 001 nsd types definitions version 2.5.1 imports: - etsi_nfv_sol001_vnfd_2_5_1_types.yaml data_types: Returns a dictionary for the information on the model ''' return { 'name' : name, 'fn' : BASE_FILENAME.format(name), 'fd' : None, 'imports' : imports, 'buf' : StringIO() } HDR_PNFD ='''tosca_definitions_version: tosca_simple_yaml_1_2 description: ETSI NFV SOL 001 nsd types definitions version 2.5.1 imports: - etsi_nfv_sol001_vnfd_2_5_1_types.yaml data_types: def get_content(doc): ''' HDR_COMM ='''tosca_definitions_version: tosca_simple_yaml_1_2 description: ETSI NFV SOL 001 nsd types definitions version 2.5.1 imports: - etsi_nfv_sol001_vnfd_2_5_1_types.yaml data_types: Returns a list of all paragraphs and tables in the Document ''' F_VNFD.write(HDR_VNFD) F_NSD.write(HDR_NSD) F_PNFD.write(HDR_PNFD) F_COMM.write(HDR_COMM) p_id = 0 cur_sect = "0" from sets import Set def get_content(doc): ret = [] body = doc._body par = 0 tables = 0 parag_count = 0 table_count = 0 for element in body._element: if isinstance(element, docx.oxml.text.paragraph.CT_P): ret.append(Paragraph(element, body)) par = par + 1 parag_count = parag_count + 1 elif isinstance(element, docx.oxml.table.CT_Tbl): ret.append(Table(element, body)) tables = tables + 1 table_count = table_count + 1 else: print "FOUND " + str(type(element)) print "pars " + str(par) print "tables " + str(tables) print "Non paragraph or table " + str(type(element)) print "Paragraphs: " + str(parag_count) print "Tables: " + str(table_count) return ret content = get_content(SOL001) tables=0 while p_id < len(content): elem = content[p_id] if isinstance(elem, Paragraph) and elem.text == "Foreword": break p_id = p_id + 1 if p_id >= len(content): print "FOREWORD NOT FOUND" def find_sect(sect_to_find, start_idx, doc_content): ''' Returns the index in the doc_content list to the first paragraph or heading of the section with title sect_to_find, starting the research from start_idx ''' while start_idx < len(doc_content): my_elem = content[start_idx] my_elem = doc_content[start_idx] if isinstance(my_elem, Paragraph) and my_elem.text == sect_to_find: break start_idx = start_idx + 1 Loading @@ -127,40 +86,94 @@ def find_sect(sect_to_find, start_idx, doc_content): print "FOUND " + sect_to_find + " at " + str(start_idx) return start_idx sect_6_id = find_sect("6\tVNFD TOSCA model", p_id, content) def write_table_to_file(tab, buf): ''' Writes content of table t in utf-8 encoding to file F ''' buf.write(tab.rows[0].cells[0].text.encode('utf-8')) buf.write('\n# -------------------- #\n') def generate_tables_between(a_id, b_id, content, buf): ''' Loops over content and writes all tosca definitions to the fdesc file. Returns the number of written definitions ''' definitions_count = 0 for idx in range(a_id, b_id): tmp_elem = content[idx] if isinstance(tmp_elem, Table) and is_tosca_def(tmp_elem): write_table_to_file(tmp_elem, buf) definitions_count = definitions_count + 1 return definitions_count sect_7_id = find_sect("7\tNSD TOSCA model", sect_6_id + 1, content) def dump_header(model_name, buf, imports): ''' Writes the header to the file for a specific model ''' buf.write(HDR.format( tosca_version=TOSCA_VERSION, model=model_name, spec_version=SPEC_VERSION, imports=imports)) MODELS = {} sect_8_id = find_sect("8\tPNFD TOSCA model", sect_7_id + 1, content) for mn in MODEL_NAMES: MODELS[mn] = tosca_model_info(mn, '- etsi_nfv_sol001_common_types.yaml') annex_a_id = find_sect("Annex A (informative):", sect_7_id + 1, content) def write_table_to_file(t, F): F.write(t.rows[0].cells[0].text.encode('utf-8')) F.write('\n# -------------------- #\n') if __name__ == "__main__": def generate_tables_between(a_id, b_id, FDESC, fn): count=0 for idx in range(a_id, b_id): tmp_elem = content[idx] if isinstance(tmp_elem, Table) and is_tosca_def(tmp_elem): write_table_to_file(tmp_elem, FDESC) count = count + 1 print "Printed " + str(count) + " type to " + fn try: SOL001_FN = sys.argv[1] except: print 'Error: Filename missing or filename not a docx document' print 'Usage: doc2tosca <docx-with-tosca-definitions>' sys.exit(1) print "Opening " + SOL001_FN SOL001 = docx.Document(SOL001_FN) for m in MODELS: dump_header(MODELS[m]['name'], MODELS[m]['buf'], MODELS[m]['imports']) p_id = 0 cur_sect = "0" CONTENT = get_content(SOL001) tables=0 while p_id < len(CONTENT): elem = CONTENT[p_id] if isinstance(elem, Paragraph) and elem.text == "Foreword": break p_id = p_id + 1 if p_id >= len(CONTENT): print "FOREWORD NOT FOUND" sect_6_id = find_sect("6\tVNFD TOSCA model", p_id, CONTENT) sect_7_id = find_sect("7\tNSD TOSCA model", sect_6_id + 1, CONTENT) sect_8_id = find_sect("8\tPNFD TOSCA model", sect_7_id + 1, CONTENT) annex_a_id = find_sect("Annex A (informative):", sect_7_id + 1, CONTENT) generate_tables_between(sect_6_id, sect_7_id, F_VNFD, "VNFD") generate_tables_between(sect_7_id, sect_8_id, F_NSD, "NSD") generate_tables_between(sect_8_id, annex_a_id, F_PNFD, "PNFD") count = generate_tables_between(sect_6_id, sect_7_id, CONTENT, MODELS['vnfd']['buf']) print "Printed " + str(count) + " types to " + "VNFD" F_VNFD.write('\n') F_NSD.write('\n') F_PNFD.write('\n') F_COMM.write('\n') count = generate_tables_between(sect_7_id, sect_8_id, CONTENT, MODELS['nsd']['buf']) print "Printed " + str(count) + " types to " + "NSD" F_VNFD.close() F_NSD.close() F_PNFD.close() F_COMM.close() count = generate_tables_between(sect_8_id, annex_a_id, CONTENT, MODELS['pnfd']['buf']) print "Printed " + str(count) + " types to " + "PNFD" for m in MODELS: MODELS[m]['fd'] = open(MODELS[m]['fn'], 'w') MODELS[m]['buf'].seek(0) MODELS[m]['fd'].write(MODELS[m]['buf'].read()) MODELS[m]['fd'].write('\n') MODELS[m]['fd'].close() Loading
doc2tosca.py +128 −115 Original line number Diff line number Diff line #!/bin/python2.7 #!/bin/python3.5 ''' Generate tosca definitions from Docx specfication ''' import sys from io import BytesIO as StringIO import docx from docx.table import Table from docx.text.paragraph import Paragraph BASE_FILENAME = "try-tosca-export_{}.yaml" TOSCA_VERSION = "tosca_simple_yaml_1_2" SPEC_VERSION = "2.5.1" MODEL_NAMES = ['vnfd', 'nsd', 'pnfd', 'common'] HDR = '''tosca_definitions_version: {tosca_version} description: ETSI NFV SOL 001 {model} types definitions version {spec_version} imports: {imports} data_types: ''' def is_tosca_def(table): ''' Returns true when a table contains TOSCA definitions, i.e. the table contains just one cell and text starts with an empty space ' '' empty space ' ' ''' txt = table.rows[0].cells[0].text[0] return \ Loading @@ -20,106 +38,47 @@ def is_tosca_def(table): len(table.columns) == 1 and \ txt.startswith(' ') try: SOL001_FN = sys.argv[1] except: print 'Error: Filename missing or filename not a docx document' print 'Usage: doc2tosca <docx-with-tosca-definitions>' sys.exit(1) OUT_FN_VNFD = 'try-tosca-export_vnfd.yaml' OUT_FN_NSD = 'try-tosca-export_nsd.yaml' OUT_FN_PNFD = 'try-tosca-export_pnfd.yaml' OUT_FN_COMM = 'try-tosca-export_comm.yaml' SOL001 = docx.Document(SOL001_FN) F_VNFD = open(OUT_FN_VNFD, 'w') F_NSD = open(OUT_FN_NSD, 'w') F_PNFD = open(OUT_FN_PNFD, 'w') F_COMM = open(OUT_FN_COMM, 'w') HDR_VNFD ='''tosca_definitions_version: tosca_simple_yaml_1_2 description: ETSI NFV SOL 001 nsd types definitions version 2.5.1 imports: - etsi_nfv_sol001_vnfd_2_5_1_types.yaml data_types: def tosca_model_info(name, imports): ''' HDR_NSD ='''tosca_definitions_version: tosca_simple_yaml_1_2 description: ETSI NFV SOL 001 nsd types definitions version 2.5.1 imports: - etsi_nfv_sol001_vnfd_2_5_1_types.yaml data_types: Returns a dictionary for the information on the model ''' return { 'name' : name, 'fn' : BASE_FILENAME.format(name), 'fd' : None, 'imports' : imports, 'buf' : StringIO() } HDR_PNFD ='''tosca_definitions_version: tosca_simple_yaml_1_2 description: ETSI NFV SOL 001 nsd types definitions version 2.5.1 imports: - etsi_nfv_sol001_vnfd_2_5_1_types.yaml data_types: def get_content(doc): ''' HDR_COMM ='''tosca_definitions_version: tosca_simple_yaml_1_2 description: ETSI NFV SOL 001 nsd types definitions version 2.5.1 imports: - etsi_nfv_sol001_vnfd_2_5_1_types.yaml data_types: Returns a list of all paragraphs and tables in the Document ''' F_VNFD.write(HDR_VNFD) F_NSD.write(HDR_NSD) F_PNFD.write(HDR_PNFD) F_COMM.write(HDR_COMM) p_id = 0 cur_sect = "0" from sets import Set def get_content(doc): ret = [] body = doc._body par = 0 tables = 0 parag_count = 0 table_count = 0 for element in body._element: if isinstance(element, docx.oxml.text.paragraph.CT_P): ret.append(Paragraph(element, body)) par = par + 1 parag_count = parag_count + 1 elif isinstance(element, docx.oxml.table.CT_Tbl): ret.append(Table(element, body)) tables = tables + 1 table_count = table_count + 1 else: print "FOUND " + str(type(element)) print "pars " + str(par) print "tables " + str(tables) print "Non paragraph or table " + str(type(element)) print "Paragraphs: " + str(parag_count) print "Tables: " + str(table_count) return ret content = get_content(SOL001) tables=0 while p_id < len(content): elem = content[p_id] if isinstance(elem, Paragraph) and elem.text == "Foreword": break p_id = p_id + 1 if p_id >= len(content): print "FOREWORD NOT FOUND" def find_sect(sect_to_find, start_idx, doc_content): ''' Returns the index in the doc_content list to the first paragraph or heading of the section with title sect_to_find, starting the research from start_idx ''' while start_idx < len(doc_content): my_elem = content[start_idx] my_elem = doc_content[start_idx] if isinstance(my_elem, Paragraph) and my_elem.text == sect_to_find: break start_idx = start_idx + 1 Loading @@ -127,40 +86,94 @@ def find_sect(sect_to_find, start_idx, doc_content): print "FOUND " + sect_to_find + " at " + str(start_idx) return start_idx sect_6_id = find_sect("6\tVNFD TOSCA model", p_id, content) def write_table_to_file(tab, buf): ''' Writes content of table t in utf-8 encoding to file F ''' buf.write(tab.rows[0].cells[0].text.encode('utf-8')) buf.write('\n# -------------------- #\n') def generate_tables_between(a_id, b_id, content, buf): ''' Loops over content and writes all tosca definitions to the fdesc file. Returns the number of written definitions ''' definitions_count = 0 for idx in range(a_id, b_id): tmp_elem = content[idx] if isinstance(tmp_elem, Table) and is_tosca_def(tmp_elem): write_table_to_file(tmp_elem, buf) definitions_count = definitions_count + 1 return definitions_count sect_7_id = find_sect("7\tNSD TOSCA model", sect_6_id + 1, content) def dump_header(model_name, buf, imports): ''' Writes the header to the file for a specific model ''' buf.write(HDR.format( tosca_version=TOSCA_VERSION, model=model_name, spec_version=SPEC_VERSION, imports=imports)) MODELS = {} sect_8_id = find_sect("8\tPNFD TOSCA model", sect_7_id + 1, content) for mn in MODEL_NAMES: MODELS[mn] = tosca_model_info(mn, '- etsi_nfv_sol001_common_types.yaml') annex_a_id = find_sect("Annex A (informative):", sect_7_id + 1, content) def write_table_to_file(t, F): F.write(t.rows[0].cells[0].text.encode('utf-8')) F.write('\n# -------------------- #\n') if __name__ == "__main__": def generate_tables_between(a_id, b_id, FDESC, fn): count=0 for idx in range(a_id, b_id): tmp_elem = content[idx] if isinstance(tmp_elem, Table) and is_tosca_def(tmp_elem): write_table_to_file(tmp_elem, FDESC) count = count + 1 print "Printed " + str(count) + " type to " + fn try: SOL001_FN = sys.argv[1] except: print 'Error: Filename missing or filename not a docx document' print 'Usage: doc2tosca <docx-with-tosca-definitions>' sys.exit(1) print "Opening " + SOL001_FN SOL001 = docx.Document(SOL001_FN) for m in MODELS: dump_header(MODELS[m]['name'], MODELS[m]['buf'], MODELS[m]['imports']) p_id = 0 cur_sect = "0" CONTENT = get_content(SOL001) tables=0 while p_id < len(CONTENT): elem = CONTENT[p_id] if isinstance(elem, Paragraph) and elem.text == "Foreword": break p_id = p_id + 1 if p_id >= len(CONTENT): print "FOREWORD NOT FOUND" sect_6_id = find_sect("6\tVNFD TOSCA model", p_id, CONTENT) sect_7_id = find_sect("7\tNSD TOSCA model", sect_6_id + 1, CONTENT) sect_8_id = find_sect("8\tPNFD TOSCA model", sect_7_id + 1, CONTENT) annex_a_id = find_sect("Annex A (informative):", sect_7_id + 1, CONTENT) generate_tables_between(sect_6_id, sect_7_id, F_VNFD, "VNFD") generate_tables_between(sect_7_id, sect_8_id, F_NSD, "NSD") generate_tables_between(sect_8_id, annex_a_id, F_PNFD, "PNFD") count = generate_tables_between(sect_6_id, sect_7_id, CONTENT, MODELS['vnfd']['buf']) print "Printed " + str(count) + " types to " + "VNFD" F_VNFD.write('\n') F_NSD.write('\n') F_PNFD.write('\n') F_COMM.write('\n') count = generate_tables_between(sect_7_id, sect_8_id, CONTENT, MODELS['nsd']['buf']) print "Printed " + str(count) + " types to " + "NSD" F_VNFD.close() F_NSD.close() F_PNFD.close() F_COMM.close() count = generate_tables_between(sect_8_id, annex_a_id, CONTENT, MODELS['pnfd']['buf']) print "Printed " + str(count) + " types to " + "PNFD" for m in MODELS: MODELS[m]['fd'] = open(MODELS[m]['fn'], 'w') MODELS[m]['buf'].seek(0) MODELS[m]['fd'].write(MODELS[m]['buf'].read()) MODELS[m]['fd'].write('\n') MODELS[m]['fd'].close()