Loading src/doc2tosca.py +123 −72 Original line number Original line Diff line number Diff line Loading @@ -35,6 +35,14 @@ imports: ''' ''' sections_to_models = { 6: 'vnfd', 7: 'nsd', 8: 'pnfd', 9: 'common' } SUBSECTIONS = [ SUBSECTIONS = [ "Artifact Types", "Artifact Types", "Data Types", "Data Types", Loading @@ -51,6 +59,7 @@ SUBSECTIONS = [ MODELS = {} MODELS = {} EXAMPLES = {} EXAMPLES = {} class Section(): class Section(): ''' ''' Defines a section of the base document Defines a section of the base document Loading @@ -72,15 +81,23 @@ class Section(): def __repr__(self): def __repr__(self): if self.is_annex: if self.is_annex: return "({}, Annex {}, {}-{})".format(self.title,self.letter, self.from_id, self.to_id) return "({}, Annex {}, {}-{})".format( return "({}, {}, {}-{})".format(self.title, self.number, self.from_id, self.to_id) self.title, self.letter, self.from_id, self.to_id ) return "({}, {}, {}-{})".format( self.title, self.number, self.from_id, self.to_id ) def match_definition_incipit(txt): def match_definition_incipit(txt): ''' ''' Returns tru if txt matches the incipit of a definition, Returns tru if txt matches the incipit of a definition, identified by the word 'tosca' identified by the word 'tosca' ''' ''' return bool(re.match(r'^tosca\.[a-zA-Z\.:0-9\s]*$',txt.split("\n")[0].strip())) return bool( re.match(r'^tosca\.[a-zA-Z\.:0-9\s]*$', txt.split("\n")[0].strip()) ) def is_tosca_def(table): def is_tosca_def(table): ''' ''' Loading @@ -94,6 +111,7 @@ def is_tosca_def(table): len(table.columns) == 1 and \ len(table.columns) == 1 and \ match_definition_incipit(txt) match_definition_incipit(txt) def tosca_model_info(name, version, imports): def tosca_model_info(name, version, imports): ''' ''' Returns a dictionary to hold information on the model Returns a dictionary to hold information on the model Loading @@ -106,6 +124,7 @@ def tosca_model_info(name, version, imports): 'buf': StringIO() 'buf': StringIO() } } def get_content(doc): def get_content(doc): ''' ''' Returns a list of all paragraphs and tables in the Document Returns a list of all paragraphs and tables in the Document Loading @@ -127,6 +146,7 @@ def get_content(doc): print("Tables: " + str(table_count)) print("Tables: " + str(table_count)) return ret return ret def find_sect(sect_to_find, start_idx, doc_content): def find_sect(sect_to_find, start_idx, doc_content): ''' ''' Returns the index in the doc_content list to the first paragraph Returns the index in the doc_content list to the first paragraph Loading @@ -135,13 +155,15 @@ def find_sect(sect_to_find, start_idx, doc_content): ''' ''' while start_idx < len(doc_content): while start_idx < len(doc_content): my_elem = doc_content[start_idx] my_elem = doc_content[start_idx] if isinstance(my_elem, Paragraph) and my_elem.text.strip() == sect_to_find: if isinstance(my_elem, Paragraph) and \ my_elem.text.strip() == sect_to_find: break break start_idx = start_idx + 1 start_idx = start_idx + 1 print("FOUND " + sect_to_find + " at " + str(start_idx)) print("FOUND " + sect_to_find + " at " + str(start_idx)) return start_idx return start_idx def is_lvl2_section_hdn(txt): def is_lvl2_section_hdn(txt): ''' Returns true if txt is level 2 heading''' ''' Returns true if txt is level 2 heading''' clean_txt = txt.strip() clean_txt = txt.strip() Loading @@ -153,12 +175,14 @@ def is_lvl2_section_hdn(txt): return subtitle in SUBSECTIONS return subtitle in SUBSECTIONS def is_lvl1_section_hdn(txt): def is_lvl1_section_hdn(txt): ''' Returns true if txt is level 1 heading''' ''' Returns true if txt is level 1 heading''' clean_txt = txt.strip() clean_txt = txt.strip() return bool(re.match(r'^[0-9]+\t[a-zA-Z\s]*$', clean_txt)) or \ return bool(re.match(r'^[0-9]+\t[a-zA-Z\s]*$', clean_txt)) or \ bool(re.match(r'^Annex[\s]*[A-Z]+[\s\t]+[a-zA-Z\s\(\)]*', clean_txt)) bool(re.match(r'^Annex[\s]*[A-Z]+[\s\t]+[a-zA-Z\s\(\)]*', clean_txt)) def find_all_sections(doc_content): def find_all_sections(doc_content): ''' ''' Scans the body of the document to find level 1 sections Scans the body of the document to find level 1 sections Loading @@ -171,17 +195,26 @@ def find_all_sections(doc_content): while end_indx < len(doc_content): while end_indx < len(doc_content): my_elem = doc_content[end_indx] my_elem = doc_content[end_indx] if isinstance(my_elem, Paragraph) and is_lvl1_section_hdn(my_elem.text): if isinstance(my_elem, Paragraph) and \ is_lvl1_section_hdn(my_elem.text): if start_indx != 0: if start_indx != 0: sections.append(Section(start_indx, end_indx-1, doc_content[start_indx].text)) sections.append( Section( start_indx, end_indx-1, doc_content[start_indx].text) ) start_indx = end_indx start_indx = end_indx end_indx = end_indx + 1 end_indx = end_indx + 1 sections.append(Section(start_indx, end_indx-1, doc_content[start_indx].text)) sections.append( Section(start_indx, end_indx-1, doc_content[start_indx].text) ) return sections return sections def write_subsection_to_file(txt, buf): def write_subsection_to_file(txt, buf): ''' ''' Writes a subsection header in utf-8 encoding to file buf Writes a subsection header in utf-8 encoding to file buf Loading @@ -191,6 +224,7 @@ def write_subsection_to_file(txt, buf): buf.write('\n') buf.write('\n') buf.write('\n') buf.write('\n') def write_table_to_file(tab, buf): def write_table_to_file(tab, buf): ''' ''' Writes content of table t in utf-8 encoding to file F Writes content of table t in utf-8 encoding to file F Loading @@ -201,6 +235,10 @@ def write_table_to_file(tab, buf): buf.write('\n') buf.write('\n') buf.write('\n') buf.write('\n') range_err_mess = "ERR: Out of range (A: {}, B: {}, IDX: {}, LEN(CONTENT): {})" def gen_tables_btwn(a_id, b_id, content, buf): def gen_tables_btwn(a_id, b_id, content, buf): ''' ''' Loops over content and writes all tosca definitions to the Loops over content and writes all tosca definitions to the Loading @@ -211,13 +249,14 @@ def gen_tables_btwn(a_id, b_id, content, buf): for idx in range(a_id, b_id): for idx in range(a_id, b_id): if idx >= len(content): if idx >= len(content): print("ERROR: Paragraph out of range (A: {}, B: {}, IDX: {}, LEN(CONTENT): {})".format( print( a_id, b_id,idx, len(content))) range_err_mess.format(a_id, b_id, idx, len(content))) return definitions_count return definitions_count tmp_elem = content[idx] tmp_elem = content[idx] if isinstance(tmp_elem, Paragraph) and is_lvl2_section_hdn(tmp_elem.text): if isinstance(tmp_elem, Paragraph) and \ is_lvl2_section_hdn(tmp_elem.text): print(tmp_elem.text) print(tmp_elem.text) write_subsection_to_file(tmp_elem.text.split("\t")[1], buf) write_subsection_to_file(tmp_elem.text.split("\t")[1], buf) definitions_count = definitions_count + 1 definitions_count = definitions_count + 1 Loading @@ -228,11 +267,13 @@ def gen_tables_btwn(a_id, b_id, content, buf): elif isinstance(tmp_elem, Table): elif isinstance(tmp_elem, Table): txt = tmp_elem.rows[0].cells[0].text txt = tmp_elem.rows[0].cells[0].text if txt.strip().startswith("Name") or txt.strip().startswith("Shorthand") or \ if txt.strip().startswith("Name") or \ txt.strip().startswith("Shorthand") or \ txt.strip().startswith("tosca_def"): txt.strip().startswith("tosca_def"): continue continue return definitions_count return definitions_count def generate_header( def generate_header( model_name, model_name, buf, buf, Loading @@ -248,34 +289,23 @@ def generate_header( spec_version=spec_version, spec_version=spec_version, imports=imports)) imports=imports)) def generate_templates( filename, spec_ver=SPEC_VERSION, yaml_root='uri', tosc_ver=DEFAULT_TOSCA_VERSION): ''' Takes a filename or file object and loads the definition into the MODELS dictionary ''' if isinstance(filename, str): print("Opening " + filename) for mod in MODEL_NAMES: def init_models(yaml_root, spec_ver, tosc_ver): for model in MODEL_NAMES: import_stmt = 'etsi_nfv_sol001_common_types.yaml' import_stmt = 'etsi_nfv_sol001_common_types.yaml' if yaml_root != 'local': if yaml_root != 'local': import_stmt = \ import_stmt = \ 'https://forge.etsi.org/rep/nfv/SOL001/raw/{}/'.format(spec_ver) + import_stmt 'https://forge.etsi.org/rep/nfv/SOL001/raw/{}/{}'.format( MODELS[mod] = tosca_model_info( spec_ver, import_stmt mod, ) MODELS[model] = tosca_model_info( model, spec_ver, spec_ver, '- ' + import_stmt '- ' + import_stmt ) ) try: sol_001 = docx.Document(filename) except: print("Error opening the submitted Docx file") raise ValueError("Cannot open the submitted Docx file") for mod in MODELS: for mod in MODELS: generate_header( generate_header( MODELS[mod]['name'], MODELS[mod]['name'], Loading @@ -285,27 +315,47 @@ def generate_templates( tosc_ver tosc_ver ) ) def generate_templates( filename, spec_ver=SPEC_VERSION, yaml_root='uri', tosc_ver=DEFAULT_TOSCA_VERSION): ''' Takes a filename or file object and loads the definition into the MODELS dictionary ''' if isinstance(filename, str): print("Opening " + filename) init_models(yaml_root, spec_ver, tosc_ver) try: sol_001 = docx.Document(filename) except: print("Error opening the submitted Docx file") raise ValueError("Cannot open the submitted Docx file") content = get_content(sol_001) content = get_content(sol_001) sections = find_all_sections(content) sections = find_all_sections(content) sections_to_models = { 6 : 'vnfd', 7 : 'nsd', 8 : 'pnfd', 9 : 'common' } for sect in sections: for sect in sections: if not sect.is_annex: if not sect.is_annex: if sect.number in sections_to_models.keys(): if sect.number in sections_to_models.keys(): model = sections_to_models[sect.number] model = sections_to_models[sect.number] count = gen_tables_btwn(sect.from_id, sect.to_id, content, MODELS[model]['buf']) count = gen_tables_btwn( sect.from_id, sect.to_id, content, MODELS[model]['buf'] ) print("Printed " + str(count) + " types to " + model) print("Printed " + str(count) + " types to " + model) else: else: if sect.letter == "A": if sect.letter == "A" or sect.letter == "E": count = generate_examples_between(sect.from_id, sect.to_id, content, EXAMPLES) count = generate_examples_between( print("Printed " + str(count) + " types to " + "Annex " + sect.letter) sect.from_id, sect.to_id, content, EXAMPLES ) print("Printed {} types to Annex {}".format( str(count), sect.letter) ) def print_to_files(prefix=None): def print_to_files(prefix=None): Loading Loading @@ -335,6 +385,7 @@ def print_to_files(prefix=None): newf.write("\n") newf.write("\n") newf.close() newf.close() def parse_version_from_filename(filename): def parse_version_from_filename(filename): ''' ''' Parses the version from the filename Parses the version from the filename Loading @@ -349,9 +400,11 @@ def parse_version_from_filename(filename): .replace("0", ".").strip(".").strip("p.docx") .replace("0", ".").strip(".").strip("p.docx") return "" return "" def slugify(t): def slugify(t): return t.replace(" ", "_").lower() return t.replace(" ", "_").lower() if __name__ == "__main__": if __name__ == "__main__": try: try: Loading @@ -365,5 +418,3 @@ if __name__ == "__main__": generate_templates(SOL001_FN, spec_ver=ver) generate_templates(SOL001_FN, spec_ver=ver) print_to_files() print_to_files() Loading
src/doc2tosca.py +123 −72 Original line number Original line Diff line number Diff line Loading @@ -35,6 +35,14 @@ imports: ''' ''' sections_to_models = { 6: 'vnfd', 7: 'nsd', 8: 'pnfd', 9: 'common' } SUBSECTIONS = [ SUBSECTIONS = [ "Artifact Types", "Artifact Types", "Data Types", "Data Types", Loading @@ -51,6 +59,7 @@ SUBSECTIONS = [ MODELS = {} MODELS = {} EXAMPLES = {} EXAMPLES = {} class Section(): class Section(): ''' ''' Defines a section of the base document Defines a section of the base document Loading @@ -72,15 +81,23 @@ class Section(): def __repr__(self): def __repr__(self): if self.is_annex: if self.is_annex: return "({}, Annex {}, {}-{})".format(self.title,self.letter, self.from_id, self.to_id) return "({}, Annex {}, {}-{})".format( return "({}, {}, {}-{})".format(self.title, self.number, self.from_id, self.to_id) self.title, self.letter, self.from_id, self.to_id ) return "({}, {}, {}-{})".format( self.title, self.number, self.from_id, self.to_id ) def match_definition_incipit(txt): def match_definition_incipit(txt): ''' ''' Returns tru if txt matches the incipit of a definition, Returns tru if txt matches the incipit of a definition, identified by the word 'tosca' identified by the word 'tosca' ''' ''' return bool(re.match(r'^tosca\.[a-zA-Z\.:0-9\s]*$',txt.split("\n")[0].strip())) return bool( re.match(r'^tosca\.[a-zA-Z\.:0-9\s]*$', txt.split("\n")[0].strip()) ) def is_tosca_def(table): def is_tosca_def(table): ''' ''' Loading @@ -94,6 +111,7 @@ def is_tosca_def(table): len(table.columns) == 1 and \ len(table.columns) == 1 and \ match_definition_incipit(txt) match_definition_incipit(txt) def tosca_model_info(name, version, imports): def tosca_model_info(name, version, imports): ''' ''' Returns a dictionary to hold information on the model Returns a dictionary to hold information on the model Loading @@ -106,6 +124,7 @@ def tosca_model_info(name, version, imports): 'buf': StringIO() 'buf': StringIO() } } def get_content(doc): def get_content(doc): ''' ''' Returns a list of all paragraphs and tables in the Document Returns a list of all paragraphs and tables in the Document Loading @@ -127,6 +146,7 @@ def get_content(doc): print("Tables: " + str(table_count)) print("Tables: " + str(table_count)) return ret return ret def find_sect(sect_to_find, start_idx, doc_content): def find_sect(sect_to_find, start_idx, doc_content): ''' ''' Returns the index in the doc_content list to the first paragraph Returns the index in the doc_content list to the first paragraph Loading @@ -135,13 +155,15 @@ def find_sect(sect_to_find, start_idx, doc_content): ''' ''' while start_idx < len(doc_content): while start_idx < len(doc_content): my_elem = doc_content[start_idx] my_elem = doc_content[start_idx] if isinstance(my_elem, Paragraph) and my_elem.text.strip() == sect_to_find: if isinstance(my_elem, Paragraph) and \ my_elem.text.strip() == sect_to_find: break break start_idx = start_idx + 1 start_idx = start_idx + 1 print("FOUND " + sect_to_find + " at " + str(start_idx)) print("FOUND " + sect_to_find + " at " + str(start_idx)) return start_idx return start_idx def is_lvl2_section_hdn(txt): def is_lvl2_section_hdn(txt): ''' Returns true if txt is level 2 heading''' ''' Returns true if txt is level 2 heading''' clean_txt = txt.strip() clean_txt = txt.strip() Loading @@ -153,12 +175,14 @@ def is_lvl2_section_hdn(txt): return subtitle in SUBSECTIONS return subtitle in SUBSECTIONS def is_lvl1_section_hdn(txt): def is_lvl1_section_hdn(txt): ''' Returns true if txt is level 1 heading''' ''' Returns true if txt is level 1 heading''' clean_txt = txt.strip() clean_txt = txt.strip() return bool(re.match(r'^[0-9]+\t[a-zA-Z\s]*$', clean_txt)) or \ return bool(re.match(r'^[0-9]+\t[a-zA-Z\s]*$', clean_txt)) or \ bool(re.match(r'^Annex[\s]*[A-Z]+[\s\t]+[a-zA-Z\s\(\)]*', clean_txt)) bool(re.match(r'^Annex[\s]*[A-Z]+[\s\t]+[a-zA-Z\s\(\)]*', clean_txt)) def find_all_sections(doc_content): def find_all_sections(doc_content): ''' ''' Scans the body of the document to find level 1 sections Scans the body of the document to find level 1 sections Loading @@ -171,17 +195,26 @@ def find_all_sections(doc_content): while end_indx < len(doc_content): while end_indx < len(doc_content): my_elem = doc_content[end_indx] my_elem = doc_content[end_indx] if isinstance(my_elem, Paragraph) and is_lvl1_section_hdn(my_elem.text): if isinstance(my_elem, Paragraph) and \ is_lvl1_section_hdn(my_elem.text): if start_indx != 0: if start_indx != 0: sections.append(Section(start_indx, end_indx-1, doc_content[start_indx].text)) sections.append( Section( start_indx, end_indx-1, doc_content[start_indx].text) ) start_indx = end_indx start_indx = end_indx end_indx = end_indx + 1 end_indx = end_indx + 1 sections.append(Section(start_indx, end_indx-1, doc_content[start_indx].text)) sections.append( Section(start_indx, end_indx-1, doc_content[start_indx].text) ) return sections return sections def write_subsection_to_file(txt, buf): def write_subsection_to_file(txt, buf): ''' ''' Writes a subsection header in utf-8 encoding to file buf Writes a subsection header in utf-8 encoding to file buf Loading @@ -191,6 +224,7 @@ def write_subsection_to_file(txt, buf): buf.write('\n') buf.write('\n') buf.write('\n') buf.write('\n') def write_table_to_file(tab, buf): def write_table_to_file(tab, buf): ''' ''' Writes content of table t in utf-8 encoding to file F Writes content of table t in utf-8 encoding to file F Loading @@ -201,6 +235,10 @@ def write_table_to_file(tab, buf): buf.write('\n') buf.write('\n') buf.write('\n') buf.write('\n') range_err_mess = "ERR: Out of range (A: {}, B: {}, IDX: {}, LEN(CONTENT): {})" def gen_tables_btwn(a_id, b_id, content, buf): def gen_tables_btwn(a_id, b_id, content, buf): ''' ''' Loops over content and writes all tosca definitions to the Loops over content and writes all tosca definitions to the Loading @@ -211,13 +249,14 @@ def gen_tables_btwn(a_id, b_id, content, buf): for idx in range(a_id, b_id): for idx in range(a_id, b_id): if idx >= len(content): if idx >= len(content): print("ERROR: Paragraph out of range (A: {}, B: {}, IDX: {}, LEN(CONTENT): {})".format( print( a_id, b_id,idx, len(content))) range_err_mess.format(a_id, b_id, idx, len(content))) return definitions_count return definitions_count tmp_elem = content[idx] tmp_elem = content[idx] if isinstance(tmp_elem, Paragraph) and is_lvl2_section_hdn(tmp_elem.text): if isinstance(tmp_elem, Paragraph) and \ is_lvl2_section_hdn(tmp_elem.text): print(tmp_elem.text) print(tmp_elem.text) write_subsection_to_file(tmp_elem.text.split("\t")[1], buf) write_subsection_to_file(tmp_elem.text.split("\t")[1], buf) definitions_count = definitions_count + 1 definitions_count = definitions_count + 1 Loading @@ -228,11 +267,13 @@ def gen_tables_btwn(a_id, b_id, content, buf): elif isinstance(tmp_elem, Table): elif isinstance(tmp_elem, Table): txt = tmp_elem.rows[0].cells[0].text txt = tmp_elem.rows[0].cells[0].text if txt.strip().startswith("Name") or txt.strip().startswith("Shorthand") or \ if txt.strip().startswith("Name") or \ txt.strip().startswith("Shorthand") or \ txt.strip().startswith("tosca_def"): txt.strip().startswith("tosca_def"): continue continue return definitions_count return definitions_count def generate_header( def generate_header( model_name, model_name, buf, buf, Loading @@ -248,34 +289,23 @@ def generate_header( spec_version=spec_version, spec_version=spec_version, imports=imports)) imports=imports)) def generate_templates( filename, spec_ver=SPEC_VERSION, yaml_root='uri', tosc_ver=DEFAULT_TOSCA_VERSION): ''' Takes a filename or file object and loads the definition into the MODELS dictionary ''' if isinstance(filename, str): print("Opening " + filename) for mod in MODEL_NAMES: def init_models(yaml_root, spec_ver, tosc_ver): for model in MODEL_NAMES: import_stmt = 'etsi_nfv_sol001_common_types.yaml' import_stmt = 'etsi_nfv_sol001_common_types.yaml' if yaml_root != 'local': if yaml_root != 'local': import_stmt = \ import_stmt = \ 'https://forge.etsi.org/rep/nfv/SOL001/raw/{}/'.format(spec_ver) + import_stmt 'https://forge.etsi.org/rep/nfv/SOL001/raw/{}/{}'.format( MODELS[mod] = tosca_model_info( spec_ver, import_stmt mod, ) MODELS[model] = tosca_model_info( model, spec_ver, spec_ver, '- ' + import_stmt '- ' + import_stmt ) ) try: sol_001 = docx.Document(filename) except: print("Error opening the submitted Docx file") raise ValueError("Cannot open the submitted Docx file") for mod in MODELS: for mod in MODELS: generate_header( generate_header( MODELS[mod]['name'], MODELS[mod]['name'], Loading @@ -285,27 +315,47 @@ def generate_templates( tosc_ver tosc_ver ) ) def generate_templates( filename, spec_ver=SPEC_VERSION, yaml_root='uri', tosc_ver=DEFAULT_TOSCA_VERSION): ''' Takes a filename or file object and loads the definition into the MODELS dictionary ''' if isinstance(filename, str): print("Opening " + filename) init_models(yaml_root, spec_ver, tosc_ver) try: sol_001 = docx.Document(filename) except: print("Error opening the submitted Docx file") raise ValueError("Cannot open the submitted Docx file") content = get_content(sol_001) content = get_content(sol_001) sections = find_all_sections(content) sections = find_all_sections(content) sections_to_models = { 6 : 'vnfd', 7 : 'nsd', 8 : 'pnfd', 9 : 'common' } for sect in sections: for sect in sections: if not sect.is_annex: if not sect.is_annex: if sect.number in sections_to_models.keys(): if sect.number in sections_to_models.keys(): model = sections_to_models[sect.number] model = sections_to_models[sect.number] count = gen_tables_btwn(sect.from_id, sect.to_id, content, MODELS[model]['buf']) count = gen_tables_btwn( sect.from_id, sect.to_id, content, MODELS[model]['buf'] ) print("Printed " + str(count) + " types to " + model) print("Printed " + str(count) + " types to " + model) else: else: if sect.letter == "A": if sect.letter == "A" or sect.letter == "E": count = generate_examples_between(sect.from_id, sect.to_id, content, EXAMPLES) count = generate_examples_between( print("Printed " + str(count) + " types to " + "Annex " + sect.letter) sect.from_id, sect.to_id, content, EXAMPLES ) print("Printed {} types to Annex {}".format( str(count), sect.letter) ) def print_to_files(prefix=None): def print_to_files(prefix=None): Loading Loading @@ -335,6 +385,7 @@ def print_to_files(prefix=None): newf.write("\n") newf.write("\n") newf.close() newf.close() def parse_version_from_filename(filename): def parse_version_from_filename(filename): ''' ''' Parses the version from the filename Parses the version from the filename Loading @@ -349,9 +400,11 @@ def parse_version_from_filename(filename): .replace("0", ".").strip(".").strip("p.docx") .replace("0", ".").strip(".").strip("p.docx") return "" return "" def slugify(t): def slugify(t): return t.replace(" ", "_").lower() return t.replace(" ", "_").lower() if __name__ == "__main__": if __name__ == "__main__": try: try: Loading @@ -365,5 +418,3 @@ if __name__ == "__main__": generate_templates(SOL001_FN, spec_ver=ver) generate_templates(SOL001_FN, spec_ver=ver) print_to_files() print_to_files()