Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
CTI Tools
tosca2doc
Commits
680ab594
Commit
680ab594
authored
Feb 24, 2021
by
carignani
Browse files
refactor and linting
parent
d1da74f4
Changes
1
Hide whitespace changes
Inline
Side-by-side
src/doc2tosca.py
View file @
680ab594
...
...
@@ -35,6 +35,14 @@ imports:
'''
sections_to_models
=
{
6
:
'vnfd'
,
7
:
'nsd'
,
8
:
'pnfd'
,
9
:
'common'
}
SUBSECTIONS
=
[
"Artifact Types"
,
"Data Types"
,
...
...
@@ -51,6 +59,7 @@ SUBSECTIONS = [
MODELS
=
{}
EXAMPLES
=
{}
class
Section
():
'''
Defines a section of the base document
...
...
@@ -60,7 +69,7 @@ class Section():
self
.
from_id
=
from_id
self
.
to_id
=
to_id
self
.
is_annex
=
title
.
strip
().
startswith
(
"Annex"
)
if
not
self
.
is_annex
:
cleaned_title
=
title
.
strip
().
split
(
"
\t
"
)
self
.
title
=
cleaned_title
[
1
]
...
...
@@ -72,15 +81,23 @@ class Section():
def
__repr__
(
self
):
if
self
.
is_annex
:
return
"({}, Annex {}, {}-{})"
.
format
(
self
.
title
,
self
.
letter
,
self
.
from_id
,
self
.
to_id
)
return
"({}, {}, {}-{})"
.
format
(
self
.
title
,
self
.
number
,
self
.
from_id
,
self
.
to_id
)
return
"({}, Annex {}, {}-{})"
.
format
(
self
.
title
,
self
.
letter
,
self
.
from_id
,
self
.
to_id
)
return
"({}, {}, {}-{})"
.
format
(
self
.
title
,
self
.
number
,
self
.
from_id
,
self
.
to_id
)
def
match_definition_incipit
(
txt
):
'''
Returns tru if txt matches the incipit of a definition,
identified by the word 'tosca'
identified by the word 'tosca'
'''
return
bool
(
re
.
match
(
r
'^tosca\.[a-zA-Z\.:0-9\s]*$'
,
txt
.
split
(
"
\n
"
)[
0
].
strip
()))
return
bool
(
re
.
match
(
r
'^tosca\.[a-zA-Z\.:0-9\s]*$'
,
txt
.
split
(
"
\n
"
)[
0
].
strip
())
)
def
is_tosca_def
(
table
):
'''
...
...
@@ -92,20 +109,22 @@ def is_tosca_def(table):
return
\
len
(
table
.
rows
)
==
1
and
\
len
(
table
.
columns
)
==
1
and
\
match_definition_incipit
(
txt
)
match_definition_incipit
(
txt
)
def
tosca_model_info
(
name
,
version
,
imports
):
'''
Returns a dictionary to hold information on the model
'''
return
{
'name'
:
name
,
'fn'
:
BASE_FILENAME
.
format
(
version
.
replace
(
"."
,
"-"
),
name
),
'fd'
:
None
,
'imports'
:
imports
,
'buf'
:
StringIO
()
'name'
:
name
,
'fn'
:
BASE_FILENAME
.
format
(
version
.
replace
(
"."
,
"-"
),
name
),
'fd'
:
None
,
'imports'
:
imports
,
'buf'
:
StringIO
()
}
def
get_content
(
doc
):
'''
Returns a list of all paragraphs and tables in the Document
...
...
@@ -121,12 +140,13 @@ def get_content(doc):
elif
isinstance
(
element
,
docx
.
oxml
.
table
.
CT_Tbl
):
ret
.
append
(
Table
(
element
,
body
))
table_count
=
table_count
+
1
#else:
#
else:
# print("Non paragraph or table " + str(type(element)))
print
(
"Paragraphs: "
+
str
(
parag_count
))
print
(
"Tables: "
+
str
(
table_count
))
return
ret
def
find_sect
(
sect_to_find
,
start_idx
,
doc_content
):
'''
Returns the index in the doc_content list to the first paragraph
...
...
@@ -135,29 +155,33 @@ def find_sect(sect_to_find, start_idx, doc_content):
'''
while
start_idx
<
len
(
doc_content
):
my_elem
=
doc_content
[
start_idx
]
if
isinstance
(
my_elem
,
Paragraph
)
and
my_elem
.
text
.
strip
()
==
sect_to_find
:
if
isinstance
(
my_elem
,
Paragraph
)
and
\
my_elem
.
text
.
strip
()
==
sect_to_find
:
break
start_idx
=
start_idx
+
1
print
(
"FOUND "
+
sect_to_find
+
" at "
+
str
(
start_idx
))
return
start_idx
def
is_lvl2_section_hdn
(
txt
):
''' Returns true if txt is level 2 heading'''
clean_txt
=
txt
.
strip
()
if
not
bool
(
re
.
match
(
r
'^[0-9]\.[0-9]+\t[a-zA-Z\s]*$'
,
clean_txt
)):
return
False
subtitle
=
clean_txt
.
split
(
'
\t
'
)[
1
]
return
subtitle
in
SUBSECTIONS
def
is_lvl1_section_hdn
(
txt
):
''' Returns true if txt is level 1 heading'''
clean_txt
=
txt
.
strip
()
return
bool
(
re
.
match
(
r
'^[0-9]+\t[a-zA-Z\s]*$'
,
clean_txt
))
or
\
bool
(
re
.
match
(
r
'^Annex[\s]*[A-Z]+[\s\t]+[a-zA-Z\s\(\)]*'
,
clean_txt
))
bool
(
re
.
match
(
r
'^Annex[\s]*[A-Z]+[\s\t]+[a-zA-Z\s\(\)]*'
,
clean_txt
))
def
find_all_sections
(
doc_content
):
'''
...
...
@@ -171,17 +195,26 @@ def find_all_sections(doc_content):
while
end_indx
<
len
(
doc_content
):
my_elem
=
doc_content
[
end_indx
]
if
isinstance
(
my_elem
,
Paragraph
)
and
is_lvl1_section_hdn
(
my_elem
.
text
):
if
isinstance
(
my_elem
,
Paragraph
)
and
\
is_lvl1_section_hdn
(
my_elem
.
text
):
if
start_indx
!=
0
:
sections
.
append
(
Section
(
start_indx
,
end_indx
-
1
,
doc_content
[
start_indx
].
text
))
sections
.
append
(
Section
(
start_indx
,
end_indx
-
1
,
doc_content
[
start_indx
].
text
)
)
start_indx
=
end_indx
end_indx
=
end_indx
+
1
sections
.
append
(
Section
(
start_indx
,
end_indx
-
1
,
doc_content
[
start_indx
].
text
))
sections
.
append
(
Section
(
start_indx
,
end_indx
-
1
,
doc_content
[
start_indx
].
text
)
)
return
sections
def
write_subsection_to_file
(
txt
,
buf
):
'''
Writes a subsection header in utf-8 encoding to file buf
...
...
@@ -191,6 +224,7 @@ def write_subsection_to_file(txt, buf):
buf
.
write
(
'
\n
'
)
buf
.
write
(
'
\n
'
)
def
write_table_to_file
(
tab
,
buf
):
'''
Writes content of table t in utf-8 encoding to file F
...
...
@@ -201,6 +235,10 @@ def write_table_to_file(tab, buf):
buf
.
write
(
'
\n
'
)
buf
.
write
(
'
\n
'
)
range_err_mess
=
"ERR: Out of range (A: {}, B: {}, IDX: {}, LEN(CONTENT): {})"
def
gen_tables_btwn
(
a_id
,
b_id
,
content
,
buf
):
'''
Loops over content and writes all tosca definitions to the
...
...
@@ -211,13 +249,14 @@ def gen_tables_btwn(a_id, b_id, content, buf):
for
idx
in
range
(
a_id
,
b_id
):
if
idx
>=
len
(
content
):
print
(
"ERROR: Paragraph out of range (A: {}, B: {}, IDX: {}, LEN(CONTENT): {})"
.
format
(
a_id
,
b_id
,
idx
,
len
(
content
)))
print
(
range_err_mess
.
format
(
a_id
,
b_id
,
idx
,
len
(
content
)))
return
definitions_count
tmp_elem
=
content
[
idx
]
if
isinstance
(
tmp_elem
,
Paragraph
)
and
is_lvl2_section_hdn
(
tmp_elem
.
text
):
if
isinstance
(
tmp_elem
,
Paragraph
)
and
\
is_lvl2_section_hdn
(
tmp_elem
.
text
):
print
(
tmp_elem
.
text
)
write_subsection_to_file
(
tmp_elem
.
text
.
split
(
"
\t
"
)[
1
],
buf
)
definitions_count
=
definitions_count
+
1
...
...
@@ -228,16 +267,18 @@ def gen_tables_btwn(a_id, b_id, content, buf):
elif
isinstance
(
tmp_elem
,
Table
):
txt
=
tmp_elem
.
rows
[
0
].
cells
[
0
].
text
if
txt
.
strip
().
startswith
(
"Name"
)
or
txt
.
strip
().
startswith
(
"Shorthand"
)
or
\
txt
.
strip
().
startswith
(
"tosca_def"
):
if
txt
.
strip
().
startswith
(
"Name"
)
or
\
txt
.
strip
().
startswith
(
"Shorthand"
)
or
\
txt
.
strip
().
startswith
(
"tosca_def"
):
continue
return
definitions_count
def
generate_header
(
model_name
,
buf
,
spec_version
=
SPEC_VERSION
,
imports
=
None
,
model_name
,
buf
,
spec_version
=
SPEC_VERSION
,
imports
=
None
,
tosca_version
=
DEFAULT_TOSCA_VERSION
):
'''
Writes the header to the file for a specific model
...
...
@@ -248,27 +289,46 @@ def generate_header(
spec_version
=
spec_version
,
imports
=
imports
))
def
init_models
(
yaml_root
,
spec_ver
,
tosc_ver
):
for
model
in
MODEL_NAMES
:
import_stmt
=
'etsi_nfv_sol001_common_types.yaml'
if
yaml_root
!=
'local'
:
import_stmt
=
\
'https://forge.etsi.org/rep/nfv/SOL001/raw/{}/{}'
.
format
(
spec_ver
,
import_stmt
)
MODELS
[
model
]
=
tosca_model_info
(
model
,
spec_ver
,
'- '
+
import_stmt
)
for
mod
in
MODELS
:
generate_header
(
MODELS
[
mod
][
'name'
],
MODELS
[
mod
][
'buf'
],
spec_ver
,
MODELS
[
mod
][
'imports'
],
tosc_ver
)
def
generate_templates
(
filename
,
spec_ver
=
SPEC_VERSION
,
yaml_root
=
'uri'
,
filename
,
spec_ver
=
SPEC_VERSION
,
yaml_root
=
'uri'
,
tosc_ver
=
DEFAULT_TOSCA_VERSION
):
'''
Takes a filename or file object and loads the definition into the MODELS dictionary
Takes a filename or file object and loads the definition into
the MODELS dictionary
'''
if
isinstance
(
filename
,
str
):
print
(
"Opening "
+
filename
)
for
mod
in
MODEL_NAMES
:
import_stmt
=
'etsi_nfv_sol001_common_types.yaml'
if
yaml_root
!=
'local'
:
import_stmt
=
\
'https://forge.etsi.org/rep/nfv/SOL001/raw/{}/'
.
format
(
spec_ver
)
+
import_stmt
MODELS
[
mod
]
=
tosca_model_info
(
mod
,
spec_ver
,
'- '
+
import_stmt
)
init_models
(
yaml_root
,
spec_ver
,
tosc_ver
)
try
:
sol_001
=
docx
.
Document
(
filename
)
...
...
@@ -276,40 +336,30 @@ def generate_templates(
print
(
"Error opening the submitted Docx file"
)
raise
ValueError
(
"Cannot open the submitted Docx file"
)
for
mod
in
MODELS
:
generate_header
(
MODELS
[
mod
][
'name'
],
MODELS
[
mod
][
'buf'
],
spec_ver
,
MODELS
[
mod
][
'imports'
],
tosc_ver
)
content
=
get_content
(
sol_001
)
sections
=
find_all_sections
(
content
)
sections_to_models
=
{
6
:
'vnfd'
,
7
:
'nsd'
,
8
:
'pnfd'
,
9
:
'common'
}
for
sect
in
sections
:
if
not
sect
.
is_annex
:
if
sect
.
number
in
sections_to_models
.
keys
():
model
=
sections_to_models
[
sect
.
number
]
count
=
gen_tables_btwn
(
sect
.
from_id
,
sect
.
to_id
,
content
,
MODELS
[
model
][
'buf'
])
count
=
gen_tables_btwn
(
sect
.
from_id
,
sect
.
to_id
,
content
,
MODELS
[
model
][
'buf'
]
)
print
(
"Printed "
+
str
(
count
)
+
" types to "
+
model
)
else
:
if
sect
.
letter
==
"A"
:
count
=
generate_examples_between
(
sect
.
from_id
,
sect
.
to_id
,
content
,
EXAMPLES
)
print
(
"Printed "
+
str
(
count
)
+
" types to "
+
"Annex "
+
sect
.
letter
)
if
sect
.
letter
==
"A"
or
sect
.
letter
==
"E"
:
count
=
generate_examples_between
(
sect
.
from_id
,
sect
.
to_id
,
content
,
EXAMPLES
)
print
(
"Printed {} types to Annex {}"
.
format
(
str
(
count
),
sect
.
letter
)
)
def
print_to_files
(
prefix
=
None
):
'''
'''
Prefix is a path to a folder to work into
'''
for
key
in
MODELS
:
...
...
@@ -335,23 +385,26 @@ def print_to_files(prefix=None):
newf
.
write
(
"
\n
"
)
newf
.
close
()
def
parse_version_from_filename
(
filename
):
'''
Parses the version from the filename
'''
base_filename
=
os
.
path
.
basename
(
filename
)
if
base_filename
.
startswith
(
"gs_NFV-SOL001v"
):
if
base_filename
.
startswith
(
"gs_NFV-SOL001v"
):
return
"v"
+
base_filename
.
strip
(
"gs_NFV-SOL001v"
)
\
.
replace
(
"0"
,
"."
).
strip
(
"."
).
strip
(
"p.docx"
)
if
base_filename
.
startswith
(
"gs_nfv-sol001v"
):
.
replace
(
"0"
,
"."
).
strip
(
"."
).
strip
(
"p.docx"
)
if
base_filename
.
startswith
(
"gs_nfv-sol001v"
):
return
"v"
+
base_filename
.
strip
(
"gs_nfv-sol001v"
)
\
.
replace
(
"0"
,
"."
).
strip
(
"."
).
strip
(
"p.docx"
)
.
replace
(
"0"
,
"."
).
strip
(
"."
).
strip
(
"p.docx"
)
return
""
def
slugify
(
t
):
return
t
.
replace
(
" "
,
"_"
).
lower
()
if
__name__
==
"__main__"
:
try
:
...
...
@@ -365,5 +418,3 @@ if __name__ == "__main__":
generate_templates
(
SOL001_FN
,
spec_ver
=
ver
)
print_to_files
()
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment