Skip to content
Snippets Groups Projects
Commit c067c9d5 authored by carignani's avatar carignani
Browse files

support subsections (fix #10)

parent a9d6cf4c
No related branches found
No related tags found
No related merge requests found
...@@ -33,9 +33,21 @@ metadata: ...@@ -33,9 +33,21 @@ metadata:
imports: imports:
{imports} {imports}
data_types:
''' '''
SUBSECTIONS = [
"Artifact Types",
"Data Types",
"Capability Types",
"Interface Types",
"Requirements Types",
"Relationship Types",
"Interface Types",
"Node Types",
"Group Types",
"Policy Types"
]
MODELS = {} MODELS = {}
EXAMPLES = {} EXAMPLES = {}
...@@ -109,8 +121,8 @@ def get_content(doc): ...@@ -109,8 +121,8 @@ def get_content(doc):
elif isinstance(element, docx.oxml.table.CT_Tbl): elif isinstance(element, docx.oxml.table.CT_Tbl):
ret.append(Table(element, body)) ret.append(Table(element, body))
table_count = table_count + 1 table_count = table_count + 1
else: #else:
print("Non paragraph or table " + str(type(element))) # print("Non paragraph or table " + str(type(element)))
print("Paragraphs: " + str(parag_count)) print("Paragraphs: " + str(parag_count))
print("Tables: " + str(table_count)) print("Tables: " + str(table_count))
return ret return ret
...@@ -130,6 +142,17 @@ def find_sect(sect_to_find, start_idx, doc_content): ...@@ -130,6 +142,17 @@ def find_sect(sect_to_find, start_idx, doc_content):
print("FOUND " + sect_to_find + " at " + str(start_idx)) print("FOUND " + sect_to_find + " at " + str(start_idx))
return start_idx return start_idx
def is_lvl2_section_hdn(txt):
''' Returns true if txt is level 2 heading'''
clean_txt = txt.strip()
if not bool(re.match(r'^[0-9]\.[0-9]+\t[a-zA-Z\s]*$', clean_txt)):
return False
subtitle = clean_txt.split('\t')[1]
return subtitle in SUBSECTIONS
def is_lvl1_section_hdn(txt): def is_lvl1_section_hdn(txt):
''' Returns true if txt is level 1 heading''' ''' Returns true if txt is level 1 heading'''
clean_txt = txt.strip() clean_txt = txt.strip()
...@@ -159,6 +182,15 @@ def find_all_sections(doc_content): ...@@ -159,6 +182,15 @@ def find_all_sections(doc_content):
sections.append(Section(start_indx, end_indx-1, doc_content[start_indx].text)) sections.append(Section(start_indx, end_indx-1, doc_content[start_indx].text))
return sections return sections
def write_subsection_to_file(txt, buf):
'''
Writes a subsection header in utf-8 encoding to file buf
'''
buf.write(slugify(txt)+":")
if not txt.endswith('\n'):
buf.write('\n')
buf.write('\n')
def write_table_to_file(tab, buf): def write_table_to_file(tab, buf):
''' '''
Writes content of table t in utf-8 encoding to file F Writes content of table t in utf-8 encoding to file F
...@@ -177,28 +209,28 @@ def gen_tables_btwn(a_id, b_id, content, buf): ...@@ -177,28 +209,28 @@ def gen_tables_btwn(a_id, b_id, content, buf):
definitions_count = 0 definitions_count = 0
for idx in range(a_id, b_id): for idx in range(a_id, b_id):
if idx >= len(content): if idx >= len(content):
print("A: " + str(a_id)) print("ERROR: Paragraph out of range (A: {}, B: {}, IDX: {}, LEN(CONTENT): {})".format(
print("B: " + str(b_id)) a_id, b_id,idx, len(content)))
print("IDX: " + str(idx)) return definitions_count
print("LEN(CONTENT): " + str(len(content)))
return definitions_count
tmp_elem = content[idx] tmp_elem = content[idx]
if isinstance(tmp_elem, Paragraph) and is_lvl2_section_hdn(tmp_elem.text):
print(tmp_elem.text)
write_subsection_to_file(tmp_elem.text.split("\t")[1], buf)
definitions_count = definitions_count + 1
if isinstance(tmp_elem, Table) and is_tosca_def(tmp_elem): if isinstance(tmp_elem, Table) and is_tosca_def(tmp_elem):
write_table_to_file(tmp_elem, buf) write_table_to_file(tmp_elem, buf)
definitions_count = definitions_count + 1 definitions_count = definitions_count + 1
elif isinstance(tmp_elem, Table): elif isinstance(tmp_elem, Table):
txt = tmp_elem.rows[0].cells[0].text txt = tmp_elem.rows[0].cells[0].text
if txt.strip().startswith("Name") or txt.strip().startswith("Shorthand") or \ if txt.strip().startswith("Name") or txt.strip().startswith("Shorthand") or \
txt.strip().startswith("tosca_def"): txt.strip().startswith("tosca_def"):
continue continue
# print("----- Filtered out: " + txt.split("\n")[0])
#if not len(tmp_elem.rows) == 1:
#print(" Rows count != 1 ")
#if not len(tmp_elem.columns) == 1:
# print(" Columns count != 1 ")
#if not match_definition_incipit(txt):
# print(" Regex != 1 ")
return definitions_count return definitions_count
def generate_header( def generate_header(
...@@ -277,7 +309,7 @@ def generate_templates( ...@@ -277,7 +309,7 @@ def generate_templates(
def print_to_files(prefix=None): def print_to_files(prefix=None):
''' '''
Prefix is a path to a folder to work into Prefix is a path to a folder to work into
''' '''
for key in MODELS: for key in MODELS:
...@@ -317,6 +349,9 @@ def parse_version_from_filename(filename): ...@@ -317,6 +349,9 @@ def parse_version_from_filename(filename):
.replace("0",".").strip(".").strip("p.docx") .replace("0",".").strip(".").strip("p.docx")
return "" return ""
def slugify(t):
return t.replace(" ", "_").lower()
if __name__ == "__main__": if __name__ == "__main__":
try: try:
......
...@@ -20,6 +20,12 @@ def test_is_lvl1_section_hdn(): ...@@ -20,6 +20,12 @@ def test_is_lvl1_section_hdn():
assert d2t.is_lvl1_section_hdn("Annex A (informative)") assert d2t.is_lvl1_section_hdn("Annex A (informative)")
assert d2t.is_lvl1_section_hdn("Annex C (normative):\tConformance\t284") assert d2t.is_lvl1_section_hdn("Annex C (normative):\tConformance\t284")
def test_is_lvl2_section_hdn():
assert d2t.is_lvl2_section_hdn("6.3\tData Types")
assert not d2t.is_lvl2_section_hdn("6.4.2\tSomething")
assert not d2t.is_lvl2_section_hdn("6.4\tSomething")
def test_section_init(): def test_section_init():
ssss = d2t.Section(0, 10, "6\tVNFD TOSCA model") ssss = d2t.Section(0, 10, "6\tVNFD TOSCA model")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment