Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
T
tosca2doc
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
CTI Tools
tosca2doc
Commits
680ab594
Commit
680ab594
authored
4 years ago
by
carignani
Browse files
Options
Downloads
Patches
Plain Diff
refactor and linting
parent
d1da74f4
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/doc2tosca.py
+123
-72
123 additions, 72 deletions
src/doc2tosca.py
with
123 additions
and
72 deletions
src/doc2tosca.py
+
123
−
72
View file @
680ab594
...
...
@@ -35,6 +35,14 @@ imports:
'''
sections_to_models
=
{
6
:
'
vnfd
'
,
7
:
'
nsd
'
,
8
:
'
pnfd
'
,
9
:
'
common
'
}
SUBSECTIONS
=
[
"
Artifact Types
"
,
"
Data Types
"
,
...
...
@@ -51,6 +59,7 @@ SUBSECTIONS = [
MODELS
=
{}
EXAMPLES
=
{}
class
Section
():
'''
Defines a section of the base document
...
...
@@ -60,7 +69,7 @@ class Section():
self
.
from_id
=
from_id
self
.
to_id
=
to_id
self
.
is_annex
=
title
.
strip
().
startswith
(
"
Annex
"
)
if
not
self
.
is_annex
:
cleaned_title
=
title
.
strip
().
split
(
"
\t
"
)
self
.
title
=
cleaned_title
[
1
]
...
...
@@ -72,15 +81,23 @@ class Section():
def
__repr__
(
self
):
if
self
.
is_annex
:
return
"
({}, Annex {}, {}-{})
"
.
format
(
self
.
title
,
self
.
letter
,
self
.
from_id
,
self
.
to_id
)
return
"
({}, {}, {}-{})
"
.
format
(
self
.
title
,
self
.
number
,
self
.
from_id
,
self
.
to_id
)
return
"
({}, Annex {}, {}-{})
"
.
format
(
self
.
title
,
self
.
letter
,
self
.
from_id
,
self
.
to_id
)
return
"
({}, {}, {}-{})
"
.
format
(
self
.
title
,
self
.
number
,
self
.
from_id
,
self
.
to_id
)
def
match_definition_incipit
(
txt
):
'''
Returns tru if txt matches the incipit of a definition,
identified by the word
'
tosca
'
identified by the word
'
tosca
'
'''
return
bool
(
re
.
match
(
r
'
^tosca\.[a-zA-Z\.:0-9\s]*$
'
,
txt
.
split
(
"
\n
"
)[
0
].
strip
()))
return
bool
(
re
.
match
(
r
'
^tosca\.[a-zA-Z\.:0-9\s]*$
'
,
txt
.
split
(
"
\n
"
)[
0
].
strip
())
)
def
is_tosca_def
(
table
):
'''
...
...
@@ -92,20 +109,22 @@ def is_tosca_def(table):
return
\
len
(
table
.
rows
)
==
1
and
\
len
(
table
.
columns
)
==
1
and
\
match_definition_incipit
(
txt
)
match_definition_incipit
(
txt
)
def
tosca_model_info
(
name
,
version
,
imports
):
'''
Returns a dictionary to hold information on the model
'''
return
{
'
name
'
:
name
,
'
fn
'
:
BASE_FILENAME
.
format
(
version
.
replace
(
"
.
"
,
"
-
"
),
name
),
'
fd
'
:
None
,
'
imports
'
:
imports
,
'
buf
'
:
StringIO
()
'
name
'
:
name
,
'
fn
'
:
BASE_FILENAME
.
format
(
version
.
replace
(
"
.
"
,
"
-
"
),
name
),
'
fd
'
:
None
,
'
imports
'
:
imports
,
'
buf
'
:
StringIO
()
}
def
get_content
(
doc
):
'''
Returns a list of all paragraphs and tables in the Document
...
...
@@ -121,12 +140,13 @@ def get_content(doc):
elif
isinstance
(
element
,
docx
.
oxml
.
table
.
CT_Tbl
):
ret
.
append
(
Table
(
element
,
body
))
table_count
=
table_count
+
1
#else:
#
else:
# print("Non paragraph or table " + str(type(element)))
print
(
"
Paragraphs:
"
+
str
(
parag_count
))
print
(
"
Tables:
"
+
str
(
table_count
))
return
ret
def
find_sect
(
sect_to_find
,
start_idx
,
doc_content
):
'''
Returns the index in the doc_content list to the first paragraph
...
...
@@ -135,29 +155,33 @@ def find_sect(sect_to_find, start_idx, doc_content):
'''
while
start_idx
<
len
(
doc_content
):
my_elem
=
doc_content
[
start_idx
]
if
isinstance
(
my_elem
,
Paragraph
)
and
my_elem
.
text
.
strip
()
==
sect_to_find
:
if
isinstance
(
my_elem
,
Paragraph
)
and
\
my_elem
.
text
.
strip
()
==
sect_to_find
:
break
start_idx
=
start_idx
+
1
print
(
"
FOUND
"
+
sect_to_find
+
"
at
"
+
str
(
start_idx
))
return
start_idx
def
is_lvl2_section_hdn
(
txt
):
'''
Returns true if txt is level 2 heading
'''
clean_txt
=
txt
.
strip
()
if
not
bool
(
re
.
match
(
r
'
^[0-9]\.[0-9]+\t[a-zA-Z\s]*$
'
,
clean_txt
)):
return
False
subtitle
=
clean_txt
.
split
(
'
\t
'
)[
1
]
return
subtitle
in
SUBSECTIONS
def
is_lvl1_section_hdn
(
txt
):
'''
Returns true if txt is level 1 heading
'''
clean_txt
=
txt
.
strip
()
return
bool
(
re
.
match
(
r
'
^[0-9]+\t[a-zA-Z\s]*$
'
,
clean_txt
))
or
\
bool
(
re
.
match
(
r
'
^Annex[\s]*[A-Z]+[\s\t]+[a-zA-Z\s\(\)]*
'
,
clean_txt
))
bool
(
re
.
match
(
r
'
^Annex[\s]*[A-Z]+[\s\t]+[a-zA-Z\s\(\)]*
'
,
clean_txt
))
def
find_all_sections
(
doc_content
):
'''
...
...
@@ -171,17 +195,26 @@ def find_all_sections(doc_content):
while
end_indx
<
len
(
doc_content
):
my_elem
=
doc_content
[
end_indx
]
if
isinstance
(
my_elem
,
Paragraph
)
and
is_lvl1_section_hdn
(
my_elem
.
text
):
if
isinstance
(
my_elem
,
Paragraph
)
and
\
is_lvl1_section_hdn
(
my_elem
.
text
):
if
start_indx
!=
0
:
sections
.
append
(
Section
(
start_indx
,
end_indx
-
1
,
doc_content
[
start_indx
].
text
))
sections
.
append
(
Section
(
start_indx
,
end_indx
-
1
,
doc_content
[
start_indx
].
text
)
)
start_indx
=
end_indx
end_indx
=
end_indx
+
1
sections
.
append
(
Section
(
start_indx
,
end_indx
-
1
,
doc_content
[
start_indx
].
text
))
sections
.
append
(
Section
(
start_indx
,
end_indx
-
1
,
doc_content
[
start_indx
].
text
)
)
return
sections
def
write_subsection_to_file
(
txt
,
buf
):
'''
Writes a subsection header in utf-8 encoding to file buf
...
...
@@ -191,6 +224,7 @@ def write_subsection_to_file(txt, buf):
buf
.
write
(
'
\n
'
)
buf
.
write
(
'
\n
'
)
def
write_table_to_file
(
tab
,
buf
):
'''
Writes content of table t in utf-8 encoding to file F
...
...
@@ -201,6 +235,10 @@ def write_table_to_file(tab, buf):
buf
.
write
(
'
\n
'
)
buf
.
write
(
'
\n
'
)
range_err_mess
=
"
ERR: Out of range (A: {}, B: {}, IDX: {}, LEN(CONTENT): {})
"
def
gen_tables_btwn
(
a_id
,
b_id
,
content
,
buf
):
'''
Loops over content and writes all tosca definitions to the
...
...
@@ -211,13 +249,14 @@ def gen_tables_btwn(a_id, b_id, content, buf):
for
idx
in
range
(
a_id
,
b_id
):
if
idx
>=
len
(
content
):
print
(
"
ERROR: Paragraph out of range (A: {}, B: {}, IDX: {}, LEN(CONTENT): {})
"
.
format
(
a_id
,
b_id
,
idx
,
len
(
content
)))
print
(
range_err_mess
.
format
(
a_id
,
b_id
,
idx
,
len
(
content
)))
return
definitions_count
tmp_elem
=
content
[
idx
]
if
isinstance
(
tmp_elem
,
Paragraph
)
and
is_lvl2_section_hdn
(
tmp_elem
.
text
):
if
isinstance
(
tmp_elem
,
Paragraph
)
and
\
is_lvl2_section_hdn
(
tmp_elem
.
text
):
print
(
tmp_elem
.
text
)
write_subsection_to_file
(
tmp_elem
.
text
.
split
(
"
\t
"
)[
1
],
buf
)
definitions_count
=
definitions_count
+
1
...
...
@@ -228,16 +267,18 @@ def gen_tables_btwn(a_id, b_id, content, buf):
elif
isinstance
(
tmp_elem
,
Table
):
txt
=
tmp_elem
.
rows
[
0
].
cells
[
0
].
text
if
txt
.
strip
().
startswith
(
"
Name
"
)
or
txt
.
strip
().
startswith
(
"
Shorthand
"
)
or
\
txt
.
strip
().
startswith
(
"
tosca_def
"
):
if
txt
.
strip
().
startswith
(
"
Name
"
)
or
\
txt
.
strip
().
startswith
(
"
Shorthand
"
)
or
\
txt
.
strip
().
startswith
(
"
tosca_def
"
):
continue
return
definitions_count
def
generate_header
(
model_name
,
buf
,
spec_version
=
SPEC_VERSION
,
imports
=
None
,
model_name
,
buf
,
spec_version
=
SPEC_VERSION
,
imports
=
None
,
tosca_version
=
DEFAULT_TOSCA_VERSION
):
'''
Writes the header to the file for a specific model
...
...
@@ -248,27 +289,46 @@ def generate_header(
spec_version
=
spec_version
,
imports
=
imports
))
def
init_models
(
yaml_root
,
spec_ver
,
tosc_ver
):
for
model
in
MODEL_NAMES
:
import_stmt
=
'
etsi_nfv_sol001_common_types.yaml
'
if
yaml_root
!=
'
local
'
:
import_stmt
=
\
'
https://forge.etsi.org/rep/nfv/SOL001/raw/{}/{}
'
.
format
(
spec_ver
,
import_stmt
)
MODELS
[
model
]
=
tosca_model_info
(
model
,
spec_ver
,
'
-
'
+
import_stmt
)
for
mod
in
MODELS
:
generate_header
(
MODELS
[
mod
][
'
name
'
],
MODELS
[
mod
][
'
buf
'
],
spec_ver
,
MODELS
[
mod
][
'
imports
'
],
tosc_ver
)
def
generate_templates
(
filename
,
spec_ver
=
SPEC_VERSION
,
yaml_root
=
'
uri
'
,
filename
,
spec_ver
=
SPEC_VERSION
,
yaml_root
=
'
uri
'
,
tosc_ver
=
DEFAULT_TOSCA_VERSION
):
'''
Takes a filename or file object and loads the definition into the MODELS dictionary
Takes a filename or file object and loads the definition into
the MODELS dictionary
'''
if
isinstance
(
filename
,
str
):
print
(
"
Opening
"
+
filename
)
for
mod
in
MODEL_NAMES
:
import_stmt
=
'
etsi_nfv_sol001_common_types.yaml
'
if
yaml_root
!=
'
local
'
:
import_stmt
=
\
'
https://forge.etsi.org/rep/nfv/SOL001/raw/{}/
'
.
format
(
spec_ver
)
+
import_stmt
MODELS
[
mod
]
=
tosca_model_info
(
mod
,
spec_ver
,
'
-
'
+
import_stmt
)
init_models
(
yaml_root
,
spec_ver
,
tosc_ver
)
try
:
sol_001
=
docx
.
Document
(
filename
)
...
...
@@ -276,40 +336,30 @@ def generate_templates(
print
(
"
Error opening the submitted Docx file
"
)
raise
ValueError
(
"
Cannot open the submitted Docx file
"
)
for
mod
in
MODELS
:
generate_header
(
MODELS
[
mod
][
'
name
'
],
MODELS
[
mod
][
'
buf
'
],
spec_ver
,
MODELS
[
mod
][
'
imports
'
],
tosc_ver
)
content
=
get_content
(
sol_001
)
sections
=
find_all_sections
(
content
)
sections_to_models
=
{
6
:
'
vnfd
'
,
7
:
'
nsd
'
,
8
:
'
pnfd
'
,
9
:
'
common
'
}
for
sect
in
sections
:
if
not
sect
.
is_annex
:
if
sect
.
number
in
sections_to_models
.
keys
():
model
=
sections_to_models
[
sect
.
number
]
count
=
gen_tables_btwn
(
sect
.
from_id
,
sect
.
to_id
,
content
,
MODELS
[
model
][
'
buf
'
])
count
=
gen_tables_btwn
(
sect
.
from_id
,
sect
.
to_id
,
content
,
MODELS
[
model
][
'
buf
'
]
)
print
(
"
Printed
"
+
str
(
count
)
+
"
types to
"
+
model
)
else
:
if
sect
.
letter
==
"
A
"
:
count
=
generate_examples_between
(
sect
.
from_id
,
sect
.
to_id
,
content
,
EXAMPLES
)
print
(
"
Printed
"
+
str
(
count
)
+
"
types to
"
+
"
Annex
"
+
sect
.
letter
)
if
sect
.
letter
==
"
A
"
or
sect
.
letter
==
"
E
"
:
count
=
generate_examples_between
(
sect
.
from_id
,
sect
.
to_id
,
content
,
EXAMPLES
)
print
(
"
Printed {} types to Annex {}
"
.
format
(
str
(
count
),
sect
.
letter
)
)
def
print_to_files
(
prefix
=
None
):
'''
'''
Prefix is a path to a folder to work into
'''
for
key
in
MODELS
:
...
...
@@ -335,23 +385,26 @@ def print_to_files(prefix=None):
newf
.
write
(
"
\n
"
)
newf
.
close
()
def
parse_version_from_filename
(
filename
):
'''
Parses the version from the filename
'''
base_filename
=
os
.
path
.
basename
(
filename
)
if
base_filename
.
startswith
(
"
gs_NFV-SOL001v
"
):
if
base_filename
.
startswith
(
"
gs_NFV-SOL001v
"
):
return
"
v
"
+
base_filename
.
strip
(
"
gs_NFV-SOL001v
"
)
\
.
replace
(
"
0
"
,
"
.
"
).
strip
(
"
.
"
).
strip
(
"
p.docx
"
)
if
base_filename
.
startswith
(
"
gs_nfv-sol001v
"
):
.
replace
(
"
0
"
,
"
.
"
).
strip
(
"
.
"
).
strip
(
"
p.docx
"
)
if
base_filename
.
startswith
(
"
gs_nfv-sol001v
"
):
return
"
v
"
+
base_filename
.
strip
(
"
gs_nfv-sol001v
"
)
\
.
replace
(
"
0
"
,
"
.
"
).
strip
(
"
.
"
).
strip
(
"
p.docx
"
)
.
replace
(
"
0
"
,
"
.
"
).
strip
(
"
.
"
).
strip
(
"
p.docx
"
)
return
""
def
slugify
(
t
):
return
t
.
replace
(
"
"
,
"
_
"
).
lower
()
if
__name__
==
"
__main__
"
:
try
:
...
...
@@ -365,5 +418,3 @@ if __name__ == "__main__":
generate_templates
(
SOL001_FN
,
spec_ver
=
ver
)
print_to_files
()
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment