Newer
Older
'''
Generate tosca definitions from Docx specfication
'''
import sys
from docx.table import Table
from docx.text.paragraph import Paragraph
BASE_FILENAME = "try-tosca-export_{}.yaml"
TOSCA_VERSION = "tosca_simple_yaml_1_2"
SPEC_VERSION = "2.5.1"
MODEL_NAMES = ['vnfd', 'nsd', 'pnfd', 'common']
HDR = '''tosca_definitions_version: {tosca_version}
description: ETSI NFV SOL 001 {model} types definitions version {spec_version}
data_types:
'''
def is_tosca_def(table):
'''
Returns true when a table contains TOSCA definitions, i.e.
the table contains just one cell and text starts with an
empty space ' '
'''
txt = table.rows[0].cells[0].text[0]
return \
len(table.rows) == 1 and \
len(table.columns) == 1 and \
txt.startswith(' ')
def tosca_model_info(name, imports):
'''
Returns a dictionary for the information on the model
'''
return {
'name' : name,
'fn' : BASE_FILENAME.format(name),
'fd' : None,
'imports' : imports,
'buf' : StringIO()
}
def get_content(doc):
'''
Returns a list of all paragraphs and tables in the Document
'''
ret = []
body = doc._body
for element in body._element:
if isinstance(element, docx.oxml.text.paragraph.CT_P):
ret.append(Paragraph(element, body))
elif isinstance(element, docx.oxml.table.CT_Tbl):
ret.append(Table(element, body))
print "Non paragraph or table " + str(type(element))
print "Paragraphs: " + str(parag_count)
print "Tables: " + str(table_count)
return ret
def find_sect(sect_to_find, start_idx, doc_content):
'''
Returns the index in the doc_content list to the first paragraph
or heading of the section with title sect_to_find,
starting the research from start_idx
'''
while start_idx < len(doc_content):
if isinstance(my_elem, Paragraph) and my_elem.text == sect_to_find:
break
start_idx = start_idx + 1
print "FOUND " + sect_to_find + " at " + str(start_idx)
return start_idx
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
def write_table_to_file(tab, buf):
'''
Writes content of table t in utf-8 encoding to file F
'''
buf.write(tab.rows[0].cells[0].text.encode('utf-8'))
buf.write('\n# -------------------- #\n')
def generate_tables_between(a_id, b_id, content, buf):
'''
Loops over content and writes all tosca definitions to the
fdesc file. Returns the number of written definitions
'''
definitions_count = 0
for idx in range(a_id, b_id):
tmp_elem = content[idx]
if isinstance(tmp_elem, Table) and is_tosca_def(tmp_elem):
write_table_to_file(tmp_elem, buf)
definitions_count = definitions_count + 1
return definitions_count
def dump_header(model_name, buf, imports):
'''
Writes the header to the file for a specific model
'''
buf.write(HDR.format(
tosca_version=TOSCA_VERSION,
model=model_name,
spec_version=SPEC_VERSION,
imports=imports))
MODELS = {}
for mn in MODEL_NAMES:
MODELS[mn] = tosca_model_info(mn, '- etsi_nfv_sol001_common_types.yaml')
try:
SOL001_FN = sys.argv[1]
except:
print 'Error: Filename missing or filename not a docx document'
print 'Usage: doc2tosca <docx-with-tosca-definitions>'
sys.exit(1)
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
for m in MODELS:
dump_header(MODELS[m]['name'], MODELS[m]['buf'], MODELS[m]['imports'])
p_id = 0
cur_sect = "0"
CONTENT = get_content(SOL001)
tables=0
while p_id < len(CONTENT):
elem = CONTENT[p_id]
if isinstance(elem, Paragraph) and elem.text == "Foreword":
break
p_id = p_id + 1
if p_id >= len(CONTENT):
print "FOREWORD NOT FOUND"
sect_6_id = find_sect("6\tVNFD TOSCA model", p_id, CONTENT)
sect_7_id = find_sect("7\tNSD TOSCA model", sect_6_id + 1, CONTENT)
sect_8_id = find_sect("8\tPNFD TOSCA model", sect_7_id + 1, CONTENT)
annex_a_id = find_sect("Annex A (informative):", sect_7_id + 1, CONTENT)
count = generate_tables_between(sect_6_id, sect_7_id, CONTENT, MODELS['vnfd']['buf'])
print "Printed " + str(count) + " types to " + "VNFD"
count = generate_tables_between(sect_7_id, sect_8_id, CONTENT, MODELS['nsd']['buf'])
print "Printed " + str(count) + " types to " + "NSD"
count = generate_tables_between(sect_8_id, annex_a_id, CONTENT, MODELS['pnfd']['buf'])
print "Printed " + str(count) + " types to " + "PNFD"