Newer
Older
'''
Generate tosca definitions from Docx specfication
'''
import sys
from docx.table import Table
from docx.text.paragraph import Paragraph
BASE_FILENAME = "generated_etsi_nfv_sol001_{}_{}_types.yaml"
TOSCA_VERSION = "tosca_simple_yaml_1_2"
DEFAULT_TOSCA_VERSION = "tosca_simple_yaml_1_2"
allowed_versions = ["v2.6.1", "v2.6.3", "v2.7.1", "v2.8.1", "v3.3.1"]
MODEL_NAMES = ['vnfd', 'nsd', 'pnfd', 'common']
HDR = '''tosca_definitions_version: {tosca_version}
description: ETSI NFV SOL 001 {model} types definitions version {spec_version}
- template_name: ETSI_NFV
- template_version: {spec_version}
data_types:
'''
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
MODELS = {}
EXAMPLES = {}
class Section():
'''
Defines a section of the base document
'''
def __init__(self, from_id, to_id, title):
self.from_id = from_id
self.to_id = to_id
self.is_annex = title.strip().startswith("Annex")
if not self.is_annex:
cleaned_title = title.strip().split("\t")
self.title = cleaned_title[1]
self.number = int(cleaned_title[0])
else:
cleaned_title = title.strip().split(" ")
self.title = " ".join(cleaned_title[3:])
self.letter = cleaned_title[1]
def __repr__(self):
if self.is_annex:
return "({}, Annex {}, {}-{})".format(self.title,self.letter, self.from_id, self.to_id)
else:
return "({}, {}, {}-{})".format(self.title, self.number, self.from_id, self.to_id)
class Example():
def __init__(self, filename, text):
self.filename = filename
self.text = text
'''
Returns tru if txt matches the incipit of a definition,
identified by the word 'tosca'
'''
return bool(re.match(r'^tosca\.[a-zA-Z\.:0-9\s]*$',txt.split("\n")[0].strip()))
def is_tosca_example(paragraph):
'''
Returns true when a table contains TOSCA definitions, i.e.
the table contains just one cell and text starts with an
empty space ' '
'''
txt = paragraph.text
return \
txt.startswith("tosca_definitions_version: ")
# bool(re.match(r'^[a-zA-Z\.]*.yaml$', txt))
def is_tosca_def(table):
'''
Returns true when a table contains TOSCA definitions, i.e.
the table contains just one cell and text starts with an
empty space ' '
'''
len(table.rows) == 1 and \
len(table.columns) == 1 and \
def tosca_model_info(name, version, imports):
Returns a dictionary to hold information on the model
'''
return {
'name' : name,
'fn' : BASE_FILENAME.format(version.replace(".","-"), name),
'fd' : None,
'imports' : imports,
'buf' : StringIO()
}
def get_content(doc):
'''
Returns a list of all paragraphs and tables in the Document
'''
ret = []
body = doc._body
parag_count = 0
table_count = 0
for element in body._element:
if isinstance(element, docx.oxml.text.paragraph.CT_P):
ret.append(Paragraph(element, body))
elif isinstance(element, docx.oxml.table.CT_Tbl):
ret.append(Table(element, body))
print("Non paragraph or table " + str(type(element)))
print("Paragraphs: " + str(parag_count))
print("Tables: " + str(table_count))
return ret
def find_sect(sect_to_find, start_idx, doc_content):
'''
Returns the index in the doc_content list to the first paragraph
or heading of the section with title sect_to_find,
starting the research from start_idx
'''
while start_idx < len(doc_content):
if isinstance(my_elem, Paragraph) and my_elem.text.strip() == sect_to_find:
break
start_idx = start_idx + 1
print("FOUND " + sect_to_find + " at " + str(start_idx))
return start_idx
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
def is_lvl1_section_hdn(txt):
''' Returns true if txt is level 1 heading'''
clean_txt = txt.strip()
return bool(re.match(r'^[0-9]+\t[a-zA-Z\s]*$', clean_txt)) or \
bool(re.match(r'^Annex[\s]*[A-Z]+[\s\t]+[a-zA-Z\s\(\)]*', clean_txt))
def find_all_sections(doc_content):
'''
Scans the body of the document to find level 1 sections
Returns a list of Section
'''
sections = []
start_indx = 0
end_indx = 1
while end_indx < len(doc_content):
my_elem = doc_content[end_indx]
if isinstance(my_elem, Paragraph) and is_lvl1_section_hdn(my_elem.text):
if start_indx != 0:
sections.append(Section(start_indx, end_indx-1, doc_content[start_indx].text))
start_indx = end_indx
end_indx = end_indx + 1
sections.append(Section(start_indx, end_indx-1, doc_content[start_indx].text))
return sections
def write_table_to_file(tab, buf):
'''
Writes content of table t in utf-8 encoding to file F
'''
def pad2 (txt):
if txt.startswith(" "):
return " " + txt
if txt.startswith(" "):
return " " + txt
if txt.startswith(" "):
return " " + txt
return " " + txt
txt = tab.rows[0].cells[0].text
# print("# Included in: " + tab.rows[0].cells[0].text.split("\n")[0])
# buf.write('\n# -------------------- #\n')
if not txt.endswith('\n'):
buf.write('\n')
buf.write('\n')
def generate_tables_between(a_id, b_id, content, buf):
'''
Loops over content and writes all tosca definitions to the
fdesc file. Returns the number of written definitions
'''
definitions_count = 0
if idx >= len(content):
print("A: " + str(a_id))
print("B: " + str(b_id))
print("IDX: " + str(idx))
print("LEN(CONTENT): " + str(len(content)))
return
tmp_elem = content[idx]
if isinstance(tmp_elem, Table) and is_tosca_def(tmp_elem):
write_table_to_file(tmp_elem, buf)
definitions_count = definitions_count + 1
elif isinstance(tmp_elem, Table):
txt = tmp_elem.rows[0].cells[0].text
if txt.strip().startswith("Name") or txt.strip().startswith("Shorthand") or \
txt.strip().startswith("tosca_def"):
continue
# print("----- Filtered out: " + txt.split("\n")[0])
#if not len(tmp_elem.rows) == 1:
#print(" Rows count != 1 ")
#if not len(tmp_elem.columns) == 1:
# print(" Columns count != 1 ")
#if not match_definition_incipit(txt):
# print(" Regex != 1 ")
def generate_header(model_name, buf, spec_version=SPEC_VERSION, imports=None, tosca_version=DEFAULT_TOSCA_VERSION):
'''
Writes the header to the file for a specific model
'''
buf.write(HDR.format(
def is_start_of_example(line: str):
if not isinstance(line, str):
raise ValueError("NOT A STRING")
return line.startswith("tosca_definitions_version: ")
def is_body_of_example(line: str):
return line.startswith("imports:") or\
line.startswith("node_types:") or\
line.startswith("topology_template:") or\
line.startswith("description:") or\
line.startswith(" ") or\
line.startswith(" ") or\
line == ""
def get_example_file_name(line: str):
matches = re.search(r'[a-zA-Z0-9_]*.yaml', line)
if matches is not None:
return matches.group(0)
return ""
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
def parse_all_examples(txt):
res = []
new_example = ""
filename = ""
for line in txt:
if isinstance(line, Paragraph):
linetext = str(line.text)
elif isinstance(line, str):
linetext = line
else:
continue
if is_start_of_example(linetext):
filename = get_example_file_name(previous_line)
if filename != "":
new_example = "# " + filename + "\n" + linetext
#new_example = "" + linetext
elif new_example != "" and is_body_of_example(linetext):
new_example = new_example + "\n" + linetext
elif len(new_example) > 0:
res.append(Example(filename, new_example))
new_example = ""
previous_line = linetext
return res
def generate_examples_between(a_id, b_id, content, EXAMPLES):
try:
examples = parse_all_examples(content[a_id:b_id])
except:
track = traceback.format_exc()
print(track)
return 0
for example in examples:
EXAMPLES[example.filename] = example
return len(examples)
def generate_templates(filename, spec_version=SPEC_VERSION, yaml_root_path='uri', tosca_version=DEFAULT_TOSCA_VERSION):
Takes a filename or file object and loads the definition into the MODELS dictionary
'''
if isinstance(filename, str):
print("Opening " + filename)
import_stmt = 'etsi_nfv_sol001_common_types.yaml'
if yaml_root_path != 'local':
import_stmt = 'https://forge.etsi.org/rep/nfv/SOL001/raw/{}/'.format(spec_version) + import_stmt
try:
sol_001 = docx.Document(filename)
except:
print("Error opening the submitted Docx file")
raise ValueError("Cannot open the submitted Docx file")
MODELS[m]['name'],
MODELS[m]['buf'],
spec_version,
MODELS[m]['imports'],
tosca_version
)
content = get_content(sol_001)
sections = find_all_sections(content)
sections_to_models = {
6 : 'vnfd',
7 : 'nsd',
8 : 'pnfd',
9 : 'common'
}
if not sect.is_annex:
if sect.number in sections_to_models.keys():
model = sections_to_models[sect.number]
count = generate_tables_between(sect.from_id, sect.to_id, content, MODELS[model]['buf'])
print("Printed " + str(count) + " types to " + model)
else:
if sect.letter == "A":
count = generate_examples_between(sect.from_id, sect.to_id, content, EXAMPLES)
print("Printed " + str(count) + " types to " + "Annex " + sect.letter)
def print_to_files(prefix=None):
'''
Prefix is a path to a folder to work into
'''
if prefix != None:
MODELS[m]['fn'] = os.path.join(prefix, MODELS[m]['fn'])
print("Writing to " + MODELS[m]['fn'])
MODELS[m]['fd'] = open(MODELS[m]['fn'], 'w')
MODELS[m]['buf'].seek(0)
MODELS[m]['fd'].write(MODELS[m]['buf'].read())
MODELS[m]['fd'].write('\n')
MODELS[m]['fd'].close()
for k in EXAMPLES:
if prefix is not None:
fn = os.path.join(prefix, EXAMPLES[k].filename)
else:
fn = EXAMPLES[k].filename
print("Writing example file: " + fn)
with open(fn, 'w') as newf:
newf.write(EXAMPLES[k].text)
newf.write("\n")
newf.close()
def parse_version_from_filename(filename):
'''
Parses the version from the filename
'''
base_filename = os.path.basename(filename)
if base_filename.startswith("gs_NFV-SOL001v"):
return "v" + base_filename.strip("gs_NFV-SOL001v") \
.replace("0",".").strip(".").strip("p.docx")
if base_filename.startswith("gs_nfv-sol001v"):
return "v" + base_filename.strip("gs_nfv-sol001v") \
.replace("0",".").strip(".").strip("p.docx")
return ""
if __name__ == "__main__":
try:
SOL001_FN = sys.argv[1]
except:
print('Error: Filename missing or filename not a docx document')
print('Usage: doc2tosca <docx-with-tosca-definitions>')
sys.exit(1)
ver = parse_version_from_filename(SOL001_FN)
generate_templates(SOL001_FN, spec_version=ver)