doc2tosca.py

#!/bin/python2.7
'''
Generate tosca definitions from Docx specfication
'''

import sys
import docx
from docx.table import Table
from docx.text.paragraph import Paragraph

def  is_tosca_def(table):
    '''
    Returns true when a table contains TOSCA definitions, i.e.
    the table contains just one cell and text starts with an 
    empty space ' '' 
    '''
    txt = table.rows[0].cells[0].text[0]
    return \
    len(table.rows) == 1 and \
    len(table.columns) == 1 and \
    txt.startswith(' ')


try:
    SOL001_FN = sys.argv[1] 
except:
    print 'Error: Filename missing or filename not a docx document'
    print 'Usage: doc2tosca <docx-with-tosca-definitions>'
    sys.exit(1)

OUT_FN_VNFD = 'try-tosca-export_vnfd.yaml'
OUT_FN_NSD  = 'try-tosca-export_nsd.yaml'
OUT_FN_PNFD = 'try-tosca-export_pnfd.yaml'
OUT_FN_COMM = 'try-tosca-export_comm.yaml'

SOL001 = docx.Document(SOL001_FN)

F_VNFD = open(OUT_FN_VNFD, 'w')
F_NSD = open(OUT_FN_NSD, 'w')
F_PNFD = open(OUT_FN_PNFD, 'w')
F_COMM = open(OUT_FN_COMM, 'w')

HDR_VNFD ='''tosca_definitions_version: tosca_simple_yaml_1_2
description: ETSI NFV SOL 001 nsd types definitions version 2.5.1

imports:
  - etsi_nfv_sol001_vnfd_2_5_1_types.yaml

data_types:
'''

HDR_NSD ='''tosca_definitions_version: tosca_simple_yaml_1_2
description: ETSI NFV SOL 001 nsd types definitions version 2.5.1

imports:
  - etsi_nfv_sol001_vnfd_2_5_1_types.yaml

data_types:
'''

HDR_PNFD ='''tosca_definitions_version: tosca_simple_yaml_1_2
description: ETSI NFV SOL 001 nsd types definitions version 2.5.1

imports:
  - etsi_nfv_sol001_vnfd_2_5_1_types.yaml

data_types:
'''

HDR_COMM ='''tosca_definitions_version: tosca_simple_yaml_1_2
description: ETSI NFV SOL 001 nsd types definitions version 2.5.1

imports:
  - etsi_nfv_sol001_vnfd_2_5_1_types.yaml

data_types:
'''

F_VNFD.write(HDR_VNFD)
F_NSD.write(HDR_NSD)
F_PNFD.write(HDR_PNFD)
F_COMM.write(HDR_COMM)

p_id = 0

cur_sect = "0"

from sets import Set

def get_content(doc):
    ret = []
    body = doc._body
    par = 0
    tables = 0
    for element in body._element:
        if isinstance(element, docx.oxml.text.paragraph.CT_P):
            ret.append(Paragraph(element, body))
            par = par + 1
        elif isinstance(element, docx.oxml.table.CT_Tbl):
            ret.append(Table(element, body))
            tables = tables + 1
        else:
            print "FOUND " +  str(type(element))
    print "pars " + str(par)
    print "tables " + str(tables)
    return ret

content = get_content(SOL001)
tables=0

while p_id < len(content):
    elem = content[p_id]
    if isinstance(elem, Paragraph) and elem.text == "Foreword":
        break
    p_id = p_id + 1

if p_id >= len(content):
    print "FOREWORD NOT FOUND"

def find_sect(sect_to_find, start_idx, doc_content):
    while start_idx < len(doc_content):
        my_elem = content[start_idx]
        if isinstance(my_elem, Paragraph) and my_elem.text == sect_to_find:
            break
        start_idx = start_idx + 1

    print "FOUND " + sect_to_find + " at " + str(start_idx)
    return start_idx

sect_6_id = find_sect("6\tVNFD TOSCA model", p_id, content)


sect_7_id = find_sect("7\tNSD TOSCA model", sect_6_id + 1, content)


sect_8_id = find_sect("8\tPNFD TOSCA model", sect_7_id + 1, content)

annex_a_id = find_sect("Annex A (informative):", sect_7_id + 1, content)

def write_table_to_file(t, F):
    F.write(t.rows[0].cells[0].text.encode('utf-8'))
    F.write('\n# -------------------- #\n')

def generate_tables_between(a_id, b_id, FDESC, fn):
    count=0
    for idx in range(a_id, b_id):
        tmp_elem = content[idx]
        if isinstance(tmp_elem, Table) and is_tosca_def(tmp_elem):
            write_table_to_file(tmp_elem, FDESC)
            count = count + 1
    print "Printed " + str(count) + " type to " + fn 

generate_tables_between(sect_6_id, sect_7_id, F_VNFD, "VNFD")
generate_tables_between(sect_7_id, sect_8_id, F_NSD, "NSD")
generate_tables_between(sect_8_id, annex_a_id, F_PNFD, "PNFD")

F_VNFD.write('\n')
F_NSD.write('\n')
F_PNFD.write('\n')
F_COMM.write('\n')

F_VNFD.close()
F_NSD.close()
F_PNFD.close()
F_COMM.close()