Skip to content
Snippets Groups Projects
Commit 252e6e41 authored by carignani's avatar carignani
Browse files

clean up and documentation for doc2tosca

parent 7a1bdf77
No related branches found
No related tags found
No related merge requests found
#!/bin/python2.7
#!/bin/python3.5
'''
Generate tosca definitions from Docx specfication
'''
import sys
from io import BytesIO as StringIO
import docx
from docx.table import Table
from docx.text.paragraph import Paragraph
def is_tosca_def(table):
'''
Returns true when a table contains TOSCA definitions, i.e.
the table contains just one cell and text starts with an
empty space ' ''
'''
txt = table.rows[0].cells[0].text[0]
return \
len(table.rows) == 1 and \
len(table.columns) == 1 and \
txt.startswith(' ')
try:
SOL001_FN = sys.argv[1]
except:
print 'Error: Filename missing or filename not a docx document'
print 'Usage: doc2tosca <docx-with-tosca-definitions>'
sys.exit(1)
OUT_FN_VNFD = 'try-tosca-export_vnfd.yaml'
OUT_FN_NSD = 'try-tosca-export_nsd.yaml'
OUT_FN_PNFD = 'try-tosca-export_pnfd.yaml'
OUT_FN_COMM = 'try-tosca-export_comm.yaml'
SOL001 = docx.Document(SOL001_FN)
F_VNFD = open(OUT_FN_VNFD, 'w')
F_NSD = open(OUT_FN_NSD, 'w')
F_PNFD = open(OUT_FN_PNFD, 'w')
F_COMM = open(OUT_FN_COMM, 'w')
HDR_VNFD ='''tosca_definitions_version: tosca_simple_yaml_1_2
description: ETSI NFV SOL 001 nsd types definitions version 2.5.1
imports:
- etsi_nfv_sol001_vnfd_2_5_1_types.yaml
data_types:
'''
HDR_NSD ='''tosca_definitions_version: tosca_simple_yaml_1_2
description: ETSI NFV SOL 001 nsd types definitions version 2.5.1
imports:
- etsi_nfv_sol001_vnfd_2_5_1_types.yaml
BASE_FILENAME = "try-tosca-export_{}.yaml"
TOSCA_VERSION = "tosca_simple_yaml_1_2"
SPEC_VERSION = "2.5.1"
data_types:
'''
MODEL_NAMES = ['vnfd', 'nsd', 'pnfd', 'common']
HDR_PNFD ='''tosca_definitions_version: tosca_simple_yaml_1_2
description: ETSI NFV SOL 001 nsd types definitions version 2.5.1
HDR = '''tosca_definitions_version: {tosca_version}
description: ETSI NFV SOL 001 {model} types definitions version {spec_version}
imports:
- etsi_nfv_sol001_vnfd_2_5_1_types.yaml
{imports}
data_types:
'''
HDR_COMM ='''tosca_definitions_version: tosca_simple_yaml_1_2
description: ETSI NFV SOL 001 nsd types definitions version 2.5.1
imports:
- etsi_nfv_sol001_vnfd_2_5_1_types.yaml
data_types:
'''
F_VNFD.write(HDR_VNFD)
F_NSD.write(HDR_NSD)
F_PNFD.write(HDR_PNFD)
F_COMM.write(HDR_COMM)
p_id = 0
cur_sect = "0"
def is_tosca_def(table):
'''
Returns true when a table contains TOSCA definitions, i.e.
the table contains just one cell and text starts with an
empty space ' '
'''
txt = table.rows[0].cells[0].text[0]
return \
len(table.rows) == 1 and \
len(table.columns) == 1 and \
txt.startswith(' ')
from sets import Set
def tosca_model_info(name, imports):
'''
Returns a dictionary for the information on the model
'''
return {
'name' : name,
'fn' : BASE_FILENAME.format(name),
'fd' : None,
'imports' : imports,
'buf' : StringIO()
}
def get_content(doc):
'''
Returns a list of all paragraphs and tables in the Document
'''
ret = []
body = doc._body
par = 0
tables = 0
parag_count = 0
table_count = 0
for element in body._element:
if isinstance(element, docx.oxml.text.paragraph.CT_P):
ret.append(Paragraph(element, body))
par = par + 1
parag_count = parag_count + 1
elif isinstance(element, docx.oxml.table.CT_Tbl):
ret.append(Table(element, body))
tables = tables + 1
table_count = table_count + 1
else:
print "FOUND " + str(type(element))
print "pars " + str(par)
print "tables " + str(tables)
print "Non paragraph or table " + str(type(element))
print "Paragraphs: " + str(parag_count)
print "Tables: " + str(table_count)
return ret
content = get_content(SOL001)
tables=0
while p_id < len(content):
elem = content[p_id]
if isinstance(elem, Paragraph) and elem.text == "Foreword":
break
p_id = p_id + 1
if p_id >= len(content):
print "FOREWORD NOT FOUND"
def find_sect(sect_to_find, start_idx, doc_content):
'''
Returns the index in the doc_content list to the first paragraph
or heading of the section with title sect_to_find,
starting the research from start_idx
'''
while start_idx < len(doc_content):
my_elem = content[start_idx]
my_elem = doc_content[start_idx]
if isinstance(my_elem, Paragraph) and my_elem.text == sect_to_find:
break
start_idx = start_idx + 1
......@@ -127,40 +86,94 @@ def find_sect(sect_to_find, start_idx, doc_content):
print "FOUND " + sect_to_find + " at " + str(start_idx)
return start_idx
sect_6_id = find_sect("6\tVNFD TOSCA model", p_id, content)
def write_table_to_file(tab, buf):
'''
Writes content of table t in utf-8 encoding to file F
'''
buf.write(tab.rows[0].cells[0].text.encode('utf-8'))
buf.write('\n# -------------------- #\n')
def generate_tables_between(a_id, b_id, content, buf):
'''
Loops over content and writes all tosca definitions to the
fdesc file. Returns the number of written definitions
'''
definitions_count = 0
for idx in range(a_id, b_id):
tmp_elem = content[idx]
if isinstance(tmp_elem, Table) and is_tosca_def(tmp_elem):
write_table_to_file(tmp_elem, buf)
definitions_count = definitions_count + 1
return definitions_count
def dump_header(model_name, buf, imports):
'''
Writes the header to the file for a specific model
'''
buf.write(HDR.format(
tosca_version=TOSCA_VERSION,
model=model_name,
spec_version=SPEC_VERSION,
imports=imports))
MODELS = {}
for mn in MODEL_NAMES:
MODELS[mn] = tosca_model_info(mn, '- etsi_nfv_sol001_common_types.yaml')
sect_7_id = find_sect("7\tNSD TOSCA model", sect_6_id + 1, content)
if __name__ == "__main__":
sect_8_id = find_sect("8\tPNFD TOSCA model", sect_7_id + 1, content)
try:
SOL001_FN = sys.argv[1]
except:
print 'Error: Filename missing or filename not a docx document'
print 'Usage: doc2tosca <docx-with-tosca-definitions>'
sys.exit(1)
annex_a_id = find_sect("Annex A (informative):", sect_7_id + 1, content)
print "Opening " + SOL001_FN
def write_table_to_file(t, F):
F.write(t.rows[0].cells[0].text.encode('utf-8'))
F.write('\n# -------------------- #\n')
SOL001 = docx.Document(SOL001_FN)
def generate_tables_between(a_id, b_id, FDESC, fn):
count=0
for idx in range(a_id, b_id):
tmp_elem = content[idx]
if isinstance(tmp_elem, Table) and is_tosca_def(tmp_elem):
write_table_to_file(tmp_elem, FDESC)
count = count + 1
print "Printed " + str(count) + " type to " + fn
for m in MODELS:
dump_header(MODELS[m]['name'], MODELS[m]['buf'], MODELS[m]['imports'])
p_id = 0
cur_sect = "0"
CONTENT = get_content(SOL001)
tables=0
while p_id < len(CONTENT):
elem = CONTENT[p_id]
if isinstance(elem, Paragraph) and elem.text == "Foreword":
break
p_id = p_id + 1
if p_id >= len(CONTENT):
print "FOREWORD NOT FOUND"
sect_6_id = find_sect("6\tVNFD TOSCA model", p_id, CONTENT)
sect_7_id = find_sect("7\tNSD TOSCA model", sect_6_id + 1, CONTENT)
sect_8_id = find_sect("8\tPNFD TOSCA model", sect_7_id + 1, CONTENT)
annex_a_id = find_sect("Annex A (informative):", sect_7_id + 1, CONTENT)
generate_tables_between(sect_6_id, sect_7_id, F_VNFD, "VNFD")
generate_tables_between(sect_7_id, sect_8_id, F_NSD, "NSD")
generate_tables_between(sect_8_id, annex_a_id, F_PNFD, "PNFD")
count = generate_tables_between(sect_6_id, sect_7_id, CONTENT, MODELS['vnfd']['buf'])
print "Printed " + str(count) + " types to " + "VNFD"
F_VNFD.write('\n')
F_NSD.write('\n')
F_PNFD.write('\n')
F_COMM.write('\n')
count = generate_tables_between(sect_7_id, sect_8_id, CONTENT, MODELS['nsd']['buf'])
print "Printed " + str(count) + " types to " + "NSD"
F_VNFD.close()
F_NSD.close()
F_PNFD.close()
F_COMM.close()
count = generate_tables_between(sect_8_id, annex_a_id, CONTENT, MODELS['pnfd']['buf'])
print "Printed " + str(count) + " types to " + "PNFD"
for m in MODELS:
MODELS[m]['fd'] = open(MODELS[m]['fn'], 'w')
MODELS[m]['buf'].seek(0)
MODELS[m]['fd'].write(MODELS[m]['buf'].read())
MODELS[m]['fd'].write('\n')
MODELS[m]['fd'].close()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment