doc2tosca.py 7.28 KB
Newer Older
carignani's avatar
carignani committed
#!/bin/python3
'''
Generate tosca definitions from Docx specfication
'''

import sys
carignani's avatar
carignani committed
import os
import re
from io import StringIO
from docx.table import Table
from docx.text.paragraph import Paragraph
carignani's avatar
carignani committed
BASE_FILENAME = "generated_etsi_nfv_sol001_{}_types.yaml"
TOSCA_VERSION = "tosca_simple_yaml_1_2"
SPEC_VERSION = "2.6.1"
allowed_versions = ["2.6.1","2.7.1"]

MODEL_NAMES = ['vnfd', 'nsd', 'pnfd', 'common']
HDR = '''tosca_definitions_version: {tosca_version}
description: ETSI NFV SOL 001 {model} types definitions version {spec_version}
metadata:
carignani's avatar
carignani committed
  - template_name: etsi_nfv_sol001_{model}_types
  - template_name: ETSI_NFV
  - template_version: {spec_version}
  {imports}
def match_definition_incipit(txt):
    return bool(re.match(r'^tosca\.[a-zA-Z\.:0-9\s]*$',txt.split("\n")[0].strip()))

def  is_tosca_def(table):
    '''
    Returns true when a table contains TOSCA definitions, i.e.
    the table contains just one cell and text starts with an
    empty space ' '
    '''
    txt = table.rows[0].cells[0].text
        len(table.rows) == 1 and \
        len(table.columns) == 1 and \
        match_definition_incipit(txt)
def tosca_model_info(name, imports):
    '''
    Returns a dictionary for the information on the model
    '''
    return {
        'name' : name,
        'fn' : BASE_FILENAME.format(name),
        'fd' : None,
        'imports' : imports,
        'buf' :  StringIO()
    }
    '''
    Returns a list of all paragraphs and tables in the Document
    '''
    parag_count = 0
    table_count = 0
    for element in body._element:
        if isinstance(element, docx.oxml.text.paragraph.CT_P):
            ret.append(Paragraph(element, body))
            parag_count = parag_count + 1
        elif isinstance(element, docx.oxml.table.CT_Tbl):
            ret.append(Table(element, body))
            table_count = table_count + 1
            print("Non paragraph or table " +  str(type(element)))
    print("Paragraphs: " + str(parag_count))
    print("Tables: " + str(table_count))
    return ret

def find_sect(sect_to_find, start_idx, doc_content):
    '''
    Returns the index in the doc_content list to the first paragraph
    or heading of the section with title sect_to_find,
    starting the research from start_idx
    '''
    while start_idx < len(doc_content):
        my_elem = doc_content[start_idx]
        if isinstance(my_elem, Paragraph) and my_elem.text == sect_to_find:
            break
        start_idx = start_idx + 1

    print("FOUND " + sect_to_find + " at " + str(start_idx))
def write_table_to_file(tab, buf):
    '''
    Writes content of table t in utf-8 encoding to file F
    '''
    def pad2 (txt):
        if txt.startswith("   "):
            return " " + txt
        if txt.startswith("  "):
            return "  " + txt
        if txt.startswith(" "):
            return " " + txt
        return "  " + txt

    txt = tab.rows[0].cells[0].text
carignani's avatar
carignani committed
    # print("#  Included in: " + tab.rows[0].cells[0].text.split("\n")[0])
    buf.write("\n".join([pad2(x) for x in txt.split("\n")]))
    # buf.write('\n# -------------------- #\n')
    if not txt.endswith('\n'):
        buf.write('\n')
    buf.write('\n')

def generate_tables_between(a_id, b_id, content, buf):
    '''
    Loops over content and writes all tosca definitions to the
    fdesc file. Returns the number of written definitions
    '''
    definitions_count = 0
carignani's avatar
carignani committed

    for idx in range(a_id, b_id):
        tmp_elem = content[idx]
        if isinstance(tmp_elem, Table) and is_tosca_def(tmp_elem):
            write_table_to_file(tmp_elem, buf)
            definitions_count = definitions_count + 1
        elif isinstance(tmp_elem, Table):
            txt = tmp_elem.rows[0].cells[0].text
            if txt.strip().startswith("Name") or txt.strip().startswith("Shorthand") or \
                txt.strip().startswith("tosca_def"):
                continue
            print("----- Filtered out: " + txt.split("\n")[0])
            if not len(tmp_elem.rows) == 1:
                print("       Rows count != 1 ")
            if not len(tmp_elem.columns) == 1:
                print("       Columns count != 1 ")
            if not match_definition_incipit(txt):
                print("       Regex != 1 ")
    return definitions_count

def dump_header(model_name, buf, spec_version=SPEC_VERSION, imports=None):
    '''
    Writes the header to the file for a specific model
    '''
    buf.write(HDR.format(
        tosca_version=TOSCA_VERSION,
        model=model_name,
        spec_version=spec_version,
        imports=imports))

MODELS = {}
def generate_templates(filename, spec_version=SPEC_VERSION):
carignani's avatar
carignani committed
    '''
carignani's avatar
carignani committed
    Takes a filename or file object and loads the definition into the MODELS dictionary
carignani's avatar
carignani committed
    '''
    if isinstance(filename, str):
        print("Opening " + filename)
    for mn in MODEL_NAMES:
        MODELS[mn] = tosca_model_info(
            mn, 
            '- https://forge.etsi.org/rep/nfv/SOL001/raw/v{}/etsi_nfv_sol001_common_types.yaml'.format(spec_version)
        )

carignani's avatar
carignani committed
    sol_001 = docx.Document(filename)
    for m in MODELS:
        dump_header(
            MODELS[m]['name'], 
            MODELS[m]['buf'],
            spec_version, 
            MODELS[m]['imports'])
carignani's avatar
carignani committed
    CONTENT = get_content(sol_001)
    tables=0

    while p_id < len(CONTENT):
        elem = CONTENT[p_id]
        if isinstance(elem, Paragraph) and elem.text == "Foreword":
            break
        p_id = p_id + 1

    if p_id >= len(CONTENT):
carignani's avatar
carignani committed
        print("FOREWORD NOT FOUND")

    sect_6_id = find_sect("6\tVNFD TOSCA model", p_id, CONTENT)

    sect_7_id = find_sect("7\tNSD TOSCA model", sect_6_id + 1, CONTENT)

    sect_8_id = find_sect("8\tPNFD TOSCA model", sect_7_id + 1, CONTENT)

carignani's avatar
carignani committed
    sect_9_id = find_sect("9\tCommon Definitions", sect_8_id + 1, CONTENT)

    annex_a_id = find_sect("Annex A (informative):", sect_9_id + 1, CONTENT)
    count = generate_tables_between(sect_6_id, sect_7_id, CONTENT, MODELS['vnfd']['buf'])
    print("Printed " + str(count) + " types to " + "VNFD\n\n\n")
    count = generate_tables_between(sect_7_id, sect_8_id, CONTENT, MODELS['nsd']['buf'])
    print("Printed " + str(count) + " types to " + "NSD\n\n\n")
carignani's avatar
carignani committed
    count = generate_tables_between(sect_8_id, sect_9_id, CONTENT, MODELS['pnfd']['buf'])
    print("Printed " + str(count) + " types to " + "PNFD\n\n\n")
carignani's avatar
carignani committed
    count = generate_tables_between(sect_9_id, annex_a_id, CONTENT, MODELS['pnfd']['buf'])
    print("Printed " + str(count) + " types to " + "Common\n\n\n")

carignani's avatar
carignani committed

def print_to_files(prefix=None):
    '''
    Prefix is a path to a folder to work into
    '''
    for m in MODELS:
carignani's avatar
carignani committed
        if prefix != None:
            MODELS[m]['fn'] = os.path.join(prefix, MODELS[m]['fn'])

        print("Writing to " + MODELS[m]['fn'])
        MODELS[m]['fd'] = open(MODELS[m]['fn'], 'w')
        MODELS[m]['buf'].seek(0)
        MODELS[m]['fd'].write(MODELS[m]['buf'].read())
        MODELS[m]['fd'].write('\n')
        MODELS[m]['fd'].close()
carignani's avatar
carignani committed

if __name__ == "__main__":

    try:
        SOL001_FN = sys.argv[1]
    except:
        print('Error: Filename missing or filename not a docx document')
        print('Usage: doc2tosca <docx-with-tosca-definitions>')
        sys.exit(1)

    generate_templates(SOL001_FN)

    print_to_files()