Newer
Older
'''
Generate tosca definitions from Docx specfication
'''
import sys
from docx.table import Table
from docx.text.paragraph import Paragraph
BASE_FILENAME = "try-tosca-export_{}.yaml"
TOSCA_VERSION = "tosca_simple_yaml_1_2"
MODEL_NAMES = ['vnfd', 'nsd', 'pnfd', 'common']
HDR = '''tosca_definitions_version: {tosca_version}
description: ETSI NFV SOL 001 {model} types definitions version {spec_version}
metadata:
- template_name: {model}
- template_name: ETSI_NFV
- template_version: {spec_version}
data_types:
'''
def match_definition_incipit(txt):
return bool(re.match(r'^tosca\.[a-zA-Z\.:0-9\s]*$',txt.split("\n")[0].strip()))
def is_tosca_def(table):
'''
Returns true when a table contains TOSCA definitions, i.e.
the table contains just one cell and text starts with an
empty space ' '
'''
len(table.rows) == 1 and \
len(table.columns) == 1 and \
match_definition_incipit(txt)
def tosca_model_info(name, imports):
'''
Returns a dictionary for the information on the model
'''
return {
'name' : name,
'fn' : BASE_FILENAME.format(name),
'fd' : None,
'imports' : imports,
'buf' : StringIO()
}
def get_content(doc):
'''
Returns a list of all paragraphs and tables in the Document
'''
ret = []
body = doc._body
for element in body._element:
if isinstance(element, docx.oxml.text.paragraph.CT_P):
ret.append(Paragraph(element, body))
elif isinstance(element, docx.oxml.table.CT_Tbl):
ret.append(Table(element, body))
print("Non paragraph or table " + str(type(element)))
print("Paragraphs: " + str(parag_count))
print("Tables: " + str(table_count))
return ret
def find_sect(sect_to_find, start_idx, doc_content):
'''
Returns the index in the doc_content list to the first paragraph
or heading of the section with title sect_to_find,
starting the research from start_idx
'''
while start_idx < len(doc_content):
if isinstance(my_elem, Paragraph) and my_elem.text == sect_to_find:
break
start_idx = start_idx + 1
print("FOUND " + sect_to_find + " at " + str(start_idx))
return start_idx
def write_table_to_file(tab, buf):
'''
Writes content of table t in utf-8 encoding to file F
'''
def pad2 (txt):
if txt.startswith(" "):
return " " + txt
if txt.startswith(" "):
return " " + txt
if txt.startswith(" "):
return " " + txt
return " " + txt
txt = tab.rows[0].cells[0].text
# print("+++++ Included in: " + tab.rows[0].cells[0].text.split("\n")[0])
buf.write("\n".join([pad2(x) for x in txt.split("\n")]))
# buf.write('\n# -------------------- #\n')
if not txt.endswith('\n'):
buf.write('\n')
buf.write('\n')
def generate_tables_between(a_id, b_id, content, buf):
'''
Loops over content and writes all tosca definitions to the
fdesc file. Returns the number of written definitions
'''
definitions_count = 0
for idx in range(a_id, b_id):
tmp_elem = content[idx]
if isinstance(tmp_elem, Table) and is_tosca_def(tmp_elem):
write_table_to_file(tmp_elem, buf)
definitions_count = definitions_count + 1
elif isinstance(tmp_elem, Table):
txt = tmp_elem.rows[0].cells[0].text
if txt.strip().startswith("Name") or txt.strip().startswith("Shorthand") or \
txt.strip().startswith("tosca_def"):
continue
print("----- Filtered out: " + txt.split("\n")[0])
if not len(tmp_elem.rows) == 1:
print(" Rows count != 1 ")
if not len(tmp_elem.columns) == 1:
print(" Columns count != 1 ")
if not match_definition_incipit(txt):
print(" Regex != 1 ")
def dump_header(model_name, buf, imports=None):
'''
Writes the header to the file for a specific model
'''
buf.write(HDR.format(
tosca_version=TOSCA_VERSION,
model=model_name,
spec_version=SPEC_VERSION,
imports=imports))
MODELS = {}
for mn in MODEL_NAMES:
MODELS[mn] = tosca_model_info(mn, '- etsi_nfv_sol001_common_types.yaml')
def generate_templates(filename):
'''
Takes a filename and loads the definition into the MODELS dictionary
'''
if isinstance(filename, str):
print("Opening " + filename)
# MODELS[m]['imports']
dump_header(MODELS[m]['name'], MODELS[m]['buf'])
p_id = 0
cur_sect = "0"
tables=0
while p_id < len(CONTENT):
elem = CONTENT[p_id]
if isinstance(elem, Paragraph) and elem.text == "Foreword":
break
p_id = p_id + 1
if p_id >= len(CONTENT):
sect_6_id = find_sect("6\tVNFD TOSCA model", p_id, CONTENT)
sect_7_id = find_sect("7\tNSD TOSCA model", sect_6_id + 1, CONTENT)
sect_8_id = find_sect("8\tPNFD TOSCA model", sect_7_id + 1, CONTENT)
annex_a_id = find_sect("Annex A (informative):", sect_7_id + 1, CONTENT)
count = generate_tables_between(sect_6_id, sect_7_id, CONTENT, MODELS['vnfd']['buf'])
print("Printed " + str(count) + " types to " + "VNFD\n\n\n")
count = generate_tables_between(sect_7_id, sect_8_id, CONTENT, MODELS['nsd']['buf'])
print("Printed " + str(count) + " types to " + "NSD\n\n\n")
count = generate_tables_between(sect_8_id, annex_a_id, CONTENT, MODELS['pnfd']['buf'])
print("Printed " + str(count) + " types to " + "PNFD\n\n\n")
def print_to_files(prefix=None):
'''
Prefix is a path to a folder to work into
'''
if prefix != None:
MODELS[m]['fn'] = os.path.join(prefix, MODELS[m]['fn'])
print("Writing to " + MODELS[m]['fn'])
MODELS[m]['fd'] = open(MODELS[m]['fn'], 'w')
MODELS[m]['buf'].seek(0)
MODELS[m]['fd'].write(MODELS[m]['buf'].read())
MODELS[m]['fd'].write('\n')
MODELS[m]['fd'].close()
if __name__ == "__main__":
try:
SOL001_FN = sys.argv[1]
except:
print('Error: Filename missing or filename not a docx document')
print('Usage: doc2tosca <docx-with-tosca-definitions>')
sys.exit(1)
generate_templates(SOL001_FN)
print_to_files()