Newer
Older
'''
Generate tosca definitions from Docx specfication
'''
import sys
from docx.table import Table
from docx.text.paragraph import Paragraph
from example import generate_examples_between
BASE_FILENAME = "etsi_nfv_sol001_{}_{}_types.yaml"
TOSCA_VERSION = "tosca_simple_yaml_1_3"
DEFAULT_TOSCA_VERSION = "tosca_simple_yaml_1_3"
allowed_versions = ["v2.6.1", "v2.6.3", "v2.7.1", "v2.8.1", "v3.3.1"]
MODEL_NAMES = ['vnfd', 'nsd', 'pnfd', 'common']
HDR = '''tosca_definitions_version: {tosca_version}
description: ETSI NFV SOL 001 {model} types definitions version {spec_version}
- template_name: ETSI_NFV
- template_version: {spec_version}
sections_to_models = {
6: 'vnfd',
7: 'nsd',
8: 'pnfd',
9: 'common'
}
SUBSECTIONS = [
"Artifact Types",
"Data Types",
"Capability Types",
"Interface Types",
"Requirements Types",
"Relationship Types",
"Interface Types",
"Node Types",
"Group Types",
"Policy Types"
]
MODELS = {}
EXAMPLES = {}
class Section():
'''
Defines a section of the base document
'''
def __init__(self, from_id, to_id, title):
self.from_id = from_id
self.to_id = to_id
self.is_annex = title.strip().startswith("Annex")
if not self.is_annex:
cleaned_title = title.strip().split("\t")
self.title = cleaned_title[1]
self.number = int(cleaned_title[0])
else:
cleaned_title = title.strip().split(" ")
self.title = " ".join(cleaned_title[3:])
self.letter = cleaned_title[1]
def __repr__(self):
if self.is_annex:
return "({}, Annex {}, {}-{})".format(
self.title, self.letter, self.from_id, self.to_id
)
return "({}, {}, {}-{})".format(
self.title, self.number, self.from_id, self.to_id
)
'''
Returns tru if txt matches the incipit of a definition,
return bool(
re.match(r'^tosca\.[a-zA-Z\.:0-9\s]*$', txt.split("\n")[0].strip())
)
'''
Returns true when a table contains TOSCA definitions, i.e.
the table contains just one cell and text starts with an
empty space ' '
'''
len(table.rows) == 1 and \
len(table.columns) == 1 and \
def tosca_model_info(name, version, imports):
Returns a dictionary to hold information on the model
'name': name,
'fn': BASE_FILENAME.format(version.replace(".", "-"), name),
'fd': None,
'imports': imports,
'buf': StringIO()
def get_content(doc):
'''
Returns a list of all paragraphs and tables in the Document
'''
ret = []
body = doc._body
parag_count = 0
table_count = 0
for element in body._element:
if isinstance(element, docx.oxml.text.paragraph.CT_P):
ret.append(Paragraph(element, body))
elif isinstance(element, docx.oxml.table.CT_Tbl):
ret.append(Table(element, body))
# logging.info("Non paragraph or table " + str(type(element)))
logging.info("Paragraphs: " + str(parag_count))
logging.info("Tables: " + str(table_count))
def find_sect(sect_to_find, start_idx, doc_content):
'''
Returns the index in the doc_content list to the first paragraph
or heading of the section with title sect_to_find,
starting the research from start_idx
'''
while start_idx < len(doc_content):
if isinstance(my_elem, Paragraph) and \
my_elem.text.strip() == sect_to_find:
break
start_idx = start_idx + 1
logging.info("FOUND " + sect_to_find + " at " + str(start_idx))
return start_idx
def is_lvl2_section_hdn(txt):
''' Returns true if txt is level 2 heading'''
clean_txt = txt.strip()
if not bool(re.match(r'^[0-9]\.[0-9]+\t[a-zA-Z\s]*$', clean_txt)):
return False
subtitle = clean_txt.split('\t')[1]
def is_lvl1_section_hdn(txt):
''' Returns true if txt is level 1 heading'''
clean_txt = txt.strip()
return bool(re.match(r'^[0-9]+\t[a-zA-Z\s]*$', clean_txt)) or \
bool(re.match(r'^Annex[\s]*[A-Z]+[\s\t]+[a-zA-Z\s\(\)]*', clean_txt))
def find_all_sections(doc_content):
'''
Scans the body of the document to find level 1 sections
Returns a list of Section
'''
sections = []
start_indx = 0
end_indx = 1
while end_indx < len(doc_content):
my_elem = doc_content[end_indx]
if isinstance(my_elem, Paragraph) and \
is_lvl1_section_hdn(my_elem.text):
sections.append(
Section(
start_indx,
end_indx-1,
doc_content[start_indx].text)
)
sections.append(
Section(start_indx, end_indx-1, doc_content[start_indx].text)
)
def write_subsection_to_file(txt, buf):
'''
Writes a subsection header in utf-8 encoding to file buf
'''
buf.write(slugify(txt)+":")
if not txt.endswith('\n'):
buf.write('\n')
buf.write('\n')
def write_table_to_file(tab, buf):
'''
Writes content of table t in utf-8 encoding to file F
'''
buf.write(txt)
if not txt.endswith('\n'):
buf.write('\n')
buf.write('\n')
range_err_mess = "ERR: Out of range (A: {}, B: {}, IDX: {}, LEN(CONTENT): {})"
def gen_tables_btwn(a_id, b_id, content, buf):
'''
Loops over content and writes all tosca definitions to the
fdesc file. Returns the number of written definitions
'''
definitions_count = 0
if isinstance(tmp_elem, Paragraph) and \
is_lvl2_section_hdn(tmp_elem.text):
write_subsection_to_file(tmp_elem.text.split("\t")[1], buf)
definitions_count = definitions_count + 1
if isinstance(tmp_elem, Table) and is_tosca_def(tmp_elem):
write_table_to_file(tmp_elem, buf)
definitions_count = definitions_count + 1
elif isinstance(tmp_elem, Table):
txt = tmp_elem.rows[0].cells[0].text
if txt.strip().startswith("Name") or \
txt.strip().startswith("Shorthand") or \
txt.strip().startswith("tosca_def"):
def generate_header(
model_name,
buf,
spec_version=SPEC_VERSION,
imports=None,
tosca_version=DEFAULT_TOSCA_VERSION):
'''
Writes the header to the file for a specific model
'''
buf.write(HDR.format(
def init_models(yaml_root, spec_ver, tosc_ver):
for model in MODEL_NAMES:
import_stmt = 'etsi_nfv_sol001_common_types.yaml'
if yaml_root != 'local':
import_stmt = \
'https://forge.etsi.org/rep/nfv/SOL001/raw/{}/{}'.format(
spec_ver, import_stmt
)
MODELS[model] = tosca_model_info(
model,
spec_ver,
'- ' + import_stmt
)
for mod in MODELS:
generate_header(
MODELS[mod]['name'],
MODELS[mod]['buf'],
spec_ver,
MODELS[mod]['imports'],
tosc_ver
)
def generate_templates(
filename,
spec_ver=SPEC_VERSION,
yaml_root='uri',
tosc_ver=DEFAULT_TOSCA_VERSION):
Takes a filename or file object and loads the definition into
the MODELS dictionary
try:
sol_001 = docx.Document(filename)
except:
logging.info("Error opening the submitted Docx file")
raise ValueError("Cannot open the submitted Docx file")
content = get_content(sol_001)
sections = find_all_sections(content)
if not sect.is_annex:
if sect.number in sections_to_models.keys():
model = sections_to_models[sect.number]
count = gen_tables_btwn(
sect.from_id, sect.to_id, content, MODELS[model]['buf']
)
logging.info("Printed " + str(count) + " types to " + model)
if sect.letter == "A" or sect.letter == "E":
count = generate_examples_between(
sect.from_id, sect.to_id, content, EXAMPLES
)
logging.info("Printed {} types to Annex {}".format(
Prefix is a path to a folder to work into
'''
for key in MODELS:
mod = MODELS[key]
if prefix is not None:
mod['fn'] = os.path.join(prefix, mod['fn'])
logging.info("Writing to " + mod['fn'])
mod['fd'] = open(mod['fn'], 'w')
mod['buf'].seek(0)
mod['fd'].write(mod['buf'].read())
mod['fd'].write('\n')
mod['fd'].close()
for k in EXAMPLES:
if prefix is not None:
fnm = os.path.join(prefix, "example_"+EXAMPLES[k].filename)
fnm = EXAMPLES[k].filename
logging.info("Writing example file: " + fnm)
with open(fnm, 'w') as newf:
newf.write(EXAMPLES[k].text)
newf.write("\n")
newf.close()
def parse_version_from_filename(filename):
'''
Parses the version from the filename
'''
base_filename = os.path.basename(filename)
return "v" + base_filename.strip("gs_NFV-SOL001v") \
.replace("0", ".").strip(".").strip("p.docx")
if base_filename.startswith("gs_nfv-sol001v"):
return "v" + base_filename.strip("gs_nfv-sol001v") \
def slugify(t):
return t.replace(" ", "_").lower()
if __name__ == "__main__":
try:
SOL001_FN = sys.argv[1]
except:
logging.info('Error: Filename missing or filename not a docx document')
logging.info('Usage: doc2tosca <docx-with-tosca-definitions>')
ver = parse_version_from_filename(SOL001_FN)
generate_templates(SOL001_FN, spec_ver=ver)