example.py

#!/bin/python3
'''
Class and utilities to parse and export examples of data models in TOSCA
'''
import re
import traceback

from docx.text.paragraph import Paragraph

class Example():

    def __init__(self, filename, text):
        self.filename = filename
        self.text = text

def is_start_of_example(line: str):
    '''
    Returns true if the line marks the start of an examples.
    Check if lines starts with "tosca_definitions_version:"
    '''
    if not isinstance(line, str):
        raise ValueError("NOT A STRING")

    return line.startswith("tosca_definitions_version:")

def is_body_of_example(line: str):
    '''
    Returns true if the line is part of the body of an example.
    '''
    return line == "" or \
            line.startswith(".") or \
            line.startswith("#") or \
            line.startswith(" ") or \
            bool(re.match(r'^[a-zA-Z_]*:',line))

def get_example_file_name(line: str):
    '''
    Looks for the YAML filename in the line of text
    '''
    matches = re.search(r'[a-zA-Z0-9_.]*.yaml', line)
    if matches is not None:
        return matches.group(0)
    return ""

def parse_all_examples(txt):
    '''
    Parses TOSCA examples. Txt is a list of Docx items (Paragraph, etc.).
    Returns a list of Example objects
    '''

    res = []
    new_example = ""
    filename = ""
    i = 1
    clause = ""
    for line in txt:

        if isinstance(line, Paragraph):
            linetext = str(line.text)
            if "Heading" in line.style.name:
                clause = linetext.split("\t")[0]
                i = 1
        elif isinstance(line, str):
            linetext = line
        else:
            continue

        if is_start_of_example(linetext):
            filename = get_example_file_name(previous_line) 
            if filename == "":
                filename = "{:02d}".format(i) + ".yaml" 
                i = i + 1
            filename = clause+"-"+filename
            new_example = "# " + filename + "\n" + linetext
        elif new_example != "" and is_body_of_example(linetext):
            new_example = new_example + "\n" + linetext
        elif len(new_example) > 0:
            res.append(Example(filename, new_example))
            new_example = ""

        previous_line = linetext

    return res

def generate_examples_between(a_id, b_id, content, EXAMPLES):

    try:
        examples = parse_all_examples(content[a_id:b_id])
    except:
        track = traceback.format_exc()
        print(track)
        return 0

    for example in examples:
        EXAMPLES[example.filename] = example

    return len(examples)