diff --git a/src/doc2tosca.py b/src/doc2tosca.py index 33e9a928df5329942dc8a51316acd2c5bee1b772..861d320565fc4677e5a394f6a63ba6328988042d 100644 --- a/src/doc2tosca.py +++ b/src/doc2tosca.py @@ -35,6 +35,14 @@ imports: ''' +sections_to_models = { + 6: 'vnfd', + 7: 'nsd', + 8: 'pnfd', + 9: 'common' +} + + SUBSECTIONS = [ "Artifact Types", "Data Types", @@ -51,6 +59,7 @@ SUBSECTIONS = [ MODELS = {} EXAMPLES = {} + class Section(): ''' Defines a section of the base document @@ -60,7 +69,7 @@ class Section(): self.from_id = from_id self.to_id = to_id self.is_annex = title.strip().startswith("Annex") - + if not self.is_annex: cleaned_title = title.strip().split("\t") self.title = cleaned_title[1] @@ -72,15 +81,23 @@ class Section(): def __repr__(self): if self.is_annex: - return "({}, Annex {}, {}-{})".format(self.title,self.letter, self.from_id, self.to_id) - return "({}, {}, {}-{})".format(self.title, self.number, self.from_id, self.to_id) + return "({}, Annex {}, {}-{})".format( + self.title, self.letter, self.from_id, self.to_id + ) + return "({}, {}, {}-{})".format( + self.title, self.number, self.from_id, self.to_id + ) + def match_definition_incipit(txt): ''' Returns tru if txt matches the incipit of a definition, - identified by the word 'tosca' + identified by the word 'tosca' ''' - return bool(re.match(r'^tosca\.[a-zA-Z\.:0-9\s]*$',txt.split("\n")[0].strip())) + return bool( + re.match(r'^tosca\.[a-zA-Z\.:0-9\s]*$', txt.split("\n")[0].strip()) + ) + def is_tosca_def(table): ''' @@ -92,20 +109,22 @@ def is_tosca_def(table): return \ len(table.rows) == 1 and \ len(table.columns) == 1 and \ - match_definition_incipit(txt) + match_definition_incipit(txt) + def tosca_model_info(name, version, imports): ''' Returns a dictionary to hold information on the model ''' return { - 'name' : name, - 'fn' : BASE_FILENAME.format(version.replace(".","-"), name), - 'fd' : None, - 'imports' : imports, - 'buf' : StringIO() + 'name': name, + 'fn': BASE_FILENAME.format(version.replace(".", "-"), name), + 'fd': None, + 'imports': imports, + 'buf': StringIO() } + def get_content(doc): ''' Returns a list of all paragraphs and tables in the Document @@ -121,12 +140,13 @@ def get_content(doc): elif isinstance(element, docx.oxml.table.CT_Tbl): ret.append(Table(element, body)) table_count = table_count + 1 - #else: + # else: # print("Non paragraph or table " + str(type(element))) print("Paragraphs: " + str(parag_count)) print("Tables: " + str(table_count)) return ret + def find_sect(sect_to_find, start_idx, doc_content): ''' Returns the index in the doc_content list to the first paragraph @@ -135,29 +155,33 @@ def find_sect(sect_to_find, start_idx, doc_content): ''' while start_idx < len(doc_content): my_elem = doc_content[start_idx] - if isinstance(my_elem, Paragraph) and my_elem.text.strip() == sect_to_find: + if isinstance(my_elem, Paragraph) and \ + my_elem.text.strip() == sect_to_find: break start_idx = start_idx + 1 print("FOUND " + sect_to_find + " at " + str(start_idx)) return start_idx + def is_lvl2_section_hdn(txt): ''' Returns true if txt is level 2 heading''' clean_txt = txt.strip() - + if not bool(re.match(r'^[0-9]\.[0-9]+\t[a-zA-Z\s]*$', clean_txt)): return False subtitle = clean_txt.split('\t')[1] - + return subtitle in SUBSECTIONS + def is_lvl1_section_hdn(txt): ''' Returns true if txt is level 1 heading''' clean_txt = txt.strip() return bool(re.match(r'^[0-9]+\t[a-zA-Z\s]*$', clean_txt)) or \ - bool(re.match(r'^Annex[\s]*[A-Z]+[\s\t]+[a-zA-Z\s\(\)]*', clean_txt)) + bool(re.match(r'^Annex[\s]*[A-Z]+[\s\t]+[a-zA-Z\s\(\)]*', clean_txt)) + def find_all_sections(doc_content): ''' @@ -171,17 +195,26 @@ def find_all_sections(doc_content): while end_indx < len(doc_content): my_elem = doc_content[end_indx] - if isinstance(my_elem, Paragraph) and is_lvl1_section_hdn(my_elem.text): + if isinstance(my_elem, Paragraph) and \ + is_lvl1_section_hdn(my_elem.text): if start_indx != 0: - sections.append(Section(start_indx, end_indx-1, doc_content[start_indx].text)) - + sections.append( + Section( + start_indx, + end_indx-1, + doc_content[start_indx].text) + ) + start_indx = end_indx - + end_indx = end_indx + 1 - - sections.append(Section(start_indx, end_indx-1, doc_content[start_indx].text)) + + sections.append( + Section(start_indx, end_indx-1, doc_content[start_indx].text) + ) return sections + def write_subsection_to_file(txt, buf): ''' Writes a subsection header in utf-8 encoding to file buf @@ -191,6 +224,7 @@ def write_subsection_to_file(txt, buf): buf.write('\n') buf.write('\n') + def write_table_to_file(tab, buf): ''' Writes content of table t in utf-8 encoding to file F @@ -201,6 +235,10 @@ def write_table_to_file(tab, buf): buf.write('\n') buf.write('\n') + +range_err_mess = "ERR: Out of range (A: {}, B: {}, IDX: {}, LEN(CONTENT): {})" + + def gen_tables_btwn(a_id, b_id, content, buf): ''' Loops over content and writes all tosca definitions to the @@ -211,13 +249,14 @@ def gen_tables_btwn(a_id, b_id, content, buf): for idx in range(a_id, b_id): if idx >= len(content): - print("ERROR: Paragraph out of range (A: {}, B: {}, IDX: {}, LEN(CONTENT): {})".format( - a_id, b_id,idx, len(content))) + print( + range_err_mess.format(a_id, b_id, idx, len(content))) return definitions_count tmp_elem = content[idx] - if isinstance(tmp_elem, Paragraph) and is_lvl2_section_hdn(tmp_elem.text): + if isinstance(tmp_elem, Paragraph) and \ + is_lvl2_section_hdn(tmp_elem.text): print(tmp_elem.text) write_subsection_to_file(tmp_elem.text.split("\t")[1], buf) definitions_count = definitions_count + 1 @@ -228,16 +267,18 @@ def gen_tables_btwn(a_id, b_id, content, buf): elif isinstance(tmp_elem, Table): txt = tmp_elem.rows[0].cells[0].text - if txt.strip().startswith("Name") or txt.strip().startswith("Shorthand") or \ - txt.strip().startswith("tosca_def"): + if txt.strip().startswith("Name") or \ + txt.strip().startswith("Shorthand") or \ + txt.strip().startswith("tosca_def"): continue return definitions_count + def generate_header( - model_name, - buf, - spec_version=SPEC_VERSION, - imports=None, + model_name, + buf, + spec_version=SPEC_VERSION, + imports=None, tosca_version=DEFAULT_TOSCA_VERSION): ''' Writes the header to the file for a specific model @@ -248,27 +289,46 @@ def generate_header( spec_version=spec_version, imports=imports)) + +def init_models(yaml_root, spec_ver, tosc_ver): + for model in MODEL_NAMES: + import_stmt = 'etsi_nfv_sol001_common_types.yaml' + + if yaml_root != 'local': + import_stmt = \ + 'https://forge.etsi.org/rep/nfv/SOL001/raw/{}/{}'.format( + spec_ver, import_stmt + ) + + MODELS[model] = tosca_model_info( + model, + spec_ver, + '- ' + import_stmt + ) + + for mod in MODELS: + generate_header( + MODELS[mod]['name'], + MODELS[mod]['buf'], + spec_ver, + MODELS[mod]['imports'], + tosc_ver + ) + + def generate_templates( - filename, - spec_ver=SPEC_VERSION, - yaml_root='uri', + filename, + spec_ver=SPEC_VERSION, + yaml_root='uri', tosc_ver=DEFAULT_TOSCA_VERSION): ''' - Takes a filename or file object and loads the definition into the MODELS dictionary + Takes a filename or file object and loads the definition into + the MODELS dictionary ''' if isinstance(filename, str): print("Opening " + filename) - for mod in MODEL_NAMES: - import_stmt = 'etsi_nfv_sol001_common_types.yaml' - if yaml_root != 'local': - import_stmt = \ - 'https://forge.etsi.org/rep/nfv/SOL001/raw/{}/'.format(spec_ver) + import_stmt - MODELS[mod] = tosca_model_info( - mod, - spec_ver, - '- ' + import_stmt - ) + init_models(yaml_root, spec_ver, tosc_ver) try: sol_001 = docx.Document(filename) @@ -276,40 +336,30 @@ def generate_templates( print("Error opening the submitted Docx file") raise ValueError("Cannot open the submitted Docx file") - for mod in MODELS: - generate_header( - MODELS[mod]['name'], - MODELS[mod]['buf'], - spec_ver, - MODELS[mod]['imports'], - tosc_ver - ) - content = get_content(sol_001) sections = find_all_sections(content) - sections_to_models = { - 6 : 'vnfd', - 7 : 'nsd', - 8 : 'pnfd', - 9 : 'common' - } - for sect in sections: if not sect.is_annex: if sect.number in sections_to_models.keys(): model = sections_to_models[sect.number] - count = gen_tables_btwn(sect.from_id, sect.to_id, content, MODELS[model]['buf']) + count = gen_tables_btwn( + sect.from_id, sect.to_id, content, MODELS[model]['buf'] + ) print("Printed " + str(count) + " types to " + model) else: - if sect.letter == "A": - count = generate_examples_between(sect.from_id, sect.to_id, content, EXAMPLES) - print("Printed " + str(count) + " types to " + "Annex " + sect.letter) + if sect.letter == "A" or sect.letter == "E": + count = generate_examples_between( + sect.from_id, sect.to_id, content, EXAMPLES + ) + print("Printed {} types to Annex {}".format( + str(count), sect.letter) + ) def print_to_files(prefix=None): - ''' + ''' Prefix is a path to a folder to work into ''' for key in MODELS: @@ -335,23 +385,26 @@ def print_to_files(prefix=None): newf.write("\n") newf.close() + def parse_version_from_filename(filename): ''' Parses the version from the filename ''' base_filename = os.path.basename(filename) - if base_filename.startswith("gs_NFV-SOL001v"): + if base_filename.startswith("gs_NFV-SOL001v"): return "v" + base_filename.strip("gs_NFV-SOL001v") \ - .replace("0",".").strip(".").strip("p.docx") - if base_filename.startswith("gs_nfv-sol001v"): + .replace("0", ".").strip(".").strip("p.docx") + if base_filename.startswith("gs_nfv-sol001v"): return "v" + base_filename.strip("gs_nfv-sol001v") \ - .replace("0",".").strip(".").strip("p.docx") + .replace("0", ".").strip(".").strip("p.docx") return "" + def slugify(t): return t.replace(" ", "_").lower() + if __name__ == "__main__": try: @@ -365,5 +418,3 @@ if __name__ == "__main__": generate_templates(SOL001_FN, spec_ver=ver) print_to_files() - -