Newer
Older
'''
Generate tosca definitions from Docx specfication
'''
import sys
from docx.table import Table
from docx.text.paragraph import Paragraph
from example import generate_examples_between
BASE_FILENAME = "etsi_nfv_sol001_{}_{}_types.yaml"
TOSCA_VERSION = "tosca_simple_yaml_1_3"
DEFAULT_TOSCA_VERSION = "tosca_simple_yaml_1_3"
allowed_versions = ["v2.6.1", "v2.6.3", "v2.7.1", "v2.8.1", "v3.3.1"]
MODEL_NAMES = ['vnfd', 'nsd', 'pnfd', 'common']
HDR = '''tosca_definitions_version: {tosca_version}
description: ETSI NFV SOL 001 {model} types definitions version {spec_version}
- template_name: ETSI_NFV
- template_version: {spec_version}
sections_to_models = {
6: 'vnfd',
7: 'nsd',
8: 'pnfd',
9: 'common'
}
SUBSECTIONS = [
"Artifact Types",
"Data Types",
"Capability Types",
"Interface Types",
"Requirements Types",
"Relationship Types",
"Interface Types",
"Node Types",
"Group Types",
"Policy Types"
]
MODELS = {}
EXAMPLES = {}
class Section():
'''
Defines a section of the base document
'''
def __init__(self, from_id, to_id, title):
self.from_id = from_id
self.to_id = to_id
self.is_annex = title.strip().startswith("Annex")
if not self.is_annex:
cleaned_title = title.strip().split("\t")
self.title = cleaned_title[1]
self.number = int(cleaned_title[0])
else:
cleaned_title = title.strip().split(" ")
self.title = " ".join(cleaned_title[3:])
self.letter = cleaned_title[1]
def __repr__(self):
if self.is_annex:
return "({}, Annex {}, {}-{})".format(
self.title, self.letter, self.from_id, self.to_id
)
return "({}, {}, {}-{})".format(
self.title, self.number, self.from_id, self.to_id
)
'''
Returns tru if txt matches the incipit of a definition,
return bool(
re.match(r'^tosca\.[a-zA-Z\.:0-9\s]*$', txt.split("\n")[0].strip())
)
'''
Returns true when a table contains TOSCA definitions, i.e.
the table contains just one cell and text starts with an
empty space ' '
'''
len(table.rows) == 1 and \
len(table.columns) == 1 and \
def tosca_model_info(name, version, imports):
Returns a dictionary to hold information on the model
'name': name,
'fn': BASE_FILENAME.format(version.replace(".", "-"), name),
'fd': None,
'imports': imports,
'buf': StringIO()
def get_content(doc):
'''
Returns a list of all paragraphs and tables in the Document
'''
ret = []
body = doc._body
parag_count = 0
table_count = 0
for element in body._element:
if isinstance(element, docx.oxml.text.paragraph.CT_P):
ret.append(Paragraph(element, body))
elif isinstance(element, docx.oxml.table.CT_Tbl):
ret.append(Table(element, body))
# print("Non paragraph or table " + str(type(element)))
print("Paragraphs: " + str(parag_count))
print("Tables: " + str(table_count))
def find_sect(sect_to_find, start_idx, doc_content):
'''
Returns the index in the doc_content list to the first paragraph
or heading of the section with title sect_to_find,
starting the research from start_idx
'''
while start_idx < len(doc_content):
if isinstance(my_elem, Paragraph) and \
my_elem.text.strip() == sect_to_find:
break
start_idx = start_idx + 1
print("FOUND " + sect_to_find + " at " + str(start_idx))
return start_idx
def is_lvl2_section_hdn(txt):
''' Returns true if txt is level 2 heading'''
clean_txt = txt.strip()
if not bool(re.match(r'^[0-9]\.[0-9]+\t[a-zA-Z\s]*$', clean_txt)):
return False
subtitle = clean_txt.split('\t')[1]
def is_lvl1_section_hdn(txt):
''' Returns true if txt is level 1 heading'''
clean_txt = txt.strip()
return bool(re.match(r'^[0-9]+\t[a-zA-Z\s]*$', clean_txt)) or \
bool(re.match(r'^Annex[\s]*[A-Z]+[\s\t]+[a-zA-Z\s\(\)]*', clean_txt))
def find_all_sections(doc_content):
'''
Scans the body of the document to find level 1 sections
Returns a list of Section
'''
sections = []
start_indx = 0
end_indx = 1
while end_indx < len(doc_content):
my_elem = doc_content[end_indx]
if isinstance(my_elem, Paragraph) and \
is_lvl1_section_hdn(my_elem.text):
sections.append(
Section(
start_indx,
end_indx-1,
doc_content[start_indx].text)
)
sections.append(
Section(start_indx, end_indx-1, doc_content[start_indx].text)
)
def write_subsection_to_file(txt, buf):
'''
Writes a subsection header in utf-8 encoding to file buf
'''
buf.write(slugify(txt)+":")
if not txt.endswith('\n'):
buf.write('\n')
buf.write('\n')
def write_table_to_file(tab, buf):
'''
Writes content of table t in utf-8 encoding to file F
'''
buf.write(txt)
if not txt.endswith('\n'):
buf.write('\n')
buf.write('\n')
range_err_mess = "ERR: Out of range (A: {}, B: {}, IDX: {}, LEN(CONTENT): {})"
def gen_tables_btwn(a_id, b_id, content, buf):
'''
Loops over content and writes all tosca definitions to the
fdesc file. Returns the number of written definitions
'''
definitions_count = 0
print(
range_err_mess.format(a_id, b_id, idx, len(content)))
if isinstance(tmp_elem, Paragraph) and \
is_lvl2_section_hdn(tmp_elem.text):
print(tmp_elem.text)
write_subsection_to_file(tmp_elem.text.split("\t")[1], buf)
definitions_count = definitions_count + 1
if isinstance(tmp_elem, Table) and is_tosca_def(tmp_elem):
write_table_to_file(tmp_elem, buf)
definitions_count = definitions_count + 1
elif isinstance(tmp_elem, Table):
txt = tmp_elem.rows[0].cells[0].text
if txt.strip().startswith("Name") or \
txt.strip().startswith("Shorthand") or \
txt.strip().startswith("tosca_def"):
def generate_header(
model_name,
buf,
spec_version=SPEC_VERSION,
imports=None,
tosca_version=DEFAULT_TOSCA_VERSION):
'''
Writes the header to the file for a specific model
'''
buf.write(HDR.format(
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
def init_models(yaml_root, spec_ver, tosc_ver):
for model in MODEL_NAMES:
import_stmt = 'etsi_nfv_sol001_common_types.yaml'
if yaml_root != 'local':
import_stmt = \
'https://forge.etsi.org/rep/nfv/SOL001/raw/{}/{}'.format(
spec_ver, import_stmt
)
MODELS[model] = tosca_model_info(
model,
spec_ver,
'- ' + import_stmt
)
for mod in MODELS:
generate_header(
MODELS[mod]['name'],
MODELS[mod]['buf'],
spec_ver,
MODELS[mod]['imports'],
tosc_ver
)
def generate_templates(
filename,
spec_ver=SPEC_VERSION,
yaml_root='uri',
tosc_ver=DEFAULT_TOSCA_VERSION):
Takes a filename or file object and loads the definition into
the MODELS dictionary
'''
if isinstance(filename, str):
print("Opening " + filename)
try:
sol_001 = docx.Document(filename)
except:
print("Error opening the submitted Docx file")
raise ValueError("Cannot open the submitted Docx file")
content = get_content(sol_001)
sections = find_all_sections(content)
if not sect.is_annex:
if sect.number in sections_to_models.keys():
model = sections_to_models[sect.number]
count = gen_tables_btwn(
sect.from_id, sect.to_id, content, MODELS[model]['buf']
)
print("Printed " + str(count) + " types to " + model)
else:
if sect.letter == "A" or sect.letter == "E":
count = generate_examples_between(
sect.from_id, sect.to_id, content, EXAMPLES
)
print("Printed {} types to Annex {}".format(
str(count), sect.letter)
)
Prefix is a path to a folder to work into
'''
for key in MODELS:
mod = MODELS[key]
if prefix is not None:
mod['fn'] = os.path.join(prefix, mod['fn'])
print("Writing to " + mod['fn'])
mod['fd'] = open(mod['fn'], 'w')
mod['buf'].seek(0)
mod['fd'].write(mod['buf'].read())
mod['fd'].write('\n')
mod['fd'].close()
for k in EXAMPLES:
if prefix is not None:
fnm = os.path.join(prefix, "example_"+EXAMPLES[k].filename)
fnm = EXAMPLES[k].filename
print("Writing example file: " + fnm)
with open(fnm, 'w') as newf:
newf.write(EXAMPLES[k].text)
newf.write("\n")
newf.close()
def parse_version_from_filename(filename):
'''
Parses the version from the filename
'''
base_filename = os.path.basename(filename)
return "v" + base_filename.strip("gs_NFV-SOL001v") \
.replace("0", ".").strip(".").strip("p.docx")
if base_filename.startswith("gs_nfv-sol001v"):
return "v" + base_filename.strip("gs_nfv-sol001v") \
def slugify(t):
return t.replace(" ", "_").lower()
if __name__ == "__main__":
try:
SOL001_FN = sys.argv[1]
except:
print('Error: Filename missing or filename not a docx document')
print('Usage: doc2tosca <docx-with-tosca-definitions>')
sys.exit(1)
ver = parse_version_from_filename(SOL001_FN)
generate_templates(SOL001_FN, spec_ver=ver)