Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
T
tosca2doc
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
CTI Tools
tosca2doc
Commits
c067c9d5
Commit
c067c9d5
authored
4 years ago
by
carignani
Browse files
Options
Downloads
Patches
Plain Diff
support subsections (fix
#10
)
parent
a9d6cf4c
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/doc2tosca.py
+51
-16
51 additions, 16 deletions
src/doc2tosca.py
src/test_doc2tosca.py
+6
-0
6 additions, 0 deletions
src/test_doc2tosca.py
with
57 additions
and
16 deletions
src/doc2tosca.py
+
51
−
16
View file @
c067c9d5
...
...
@@ -33,9 +33,21 @@ metadata:
imports:
{imports}
data_types:
'''
SUBSECTIONS
=
[
"
Artifact Types
"
,
"
Data Types
"
,
"
Capability Types
"
,
"
Interface Types
"
,
"
Requirements Types
"
,
"
Relationship Types
"
,
"
Interface Types
"
,
"
Node Types
"
,
"
Group Types
"
,
"
Policy Types
"
]
MODELS
=
{}
EXAMPLES
=
{}
...
...
@@ -109,8 +121,8 @@ def get_content(doc):
elif
isinstance
(
element
,
docx
.
oxml
.
table
.
CT_Tbl
):
ret
.
append
(
Table
(
element
,
body
))
table_count
=
table_count
+
1
else
:
print
(
"
Non paragraph or table
"
+
str
(
type
(
element
)))
#
else:
#
print("Non paragraph or table " + str(type(element)))
print
(
"
Paragraphs:
"
+
str
(
parag_count
))
print
(
"
Tables:
"
+
str
(
table_count
))
return
ret
...
...
@@ -130,6 +142,17 @@ def find_sect(sect_to_find, start_idx, doc_content):
print
(
"
FOUND
"
+
sect_to_find
+
"
at
"
+
str
(
start_idx
))
return
start_idx
def
is_lvl2_section_hdn
(
txt
):
'''
Returns true if txt is level 2 heading
'''
clean_txt
=
txt
.
strip
()
if
not
bool
(
re
.
match
(
r
'
^[0-9]\.[0-9]+\t[a-zA-Z\s]*$
'
,
clean_txt
)):
return
False
subtitle
=
clean_txt
.
split
(
'
\t
'
)[
1
]
return
subtitle
in
SUBSECTIONS
def
is_lvl1_section_hdn
(
txt
):
'''
Returns true if txt is level 1 heading
'''
clean_txt
=
txt
.
strip
()
...
...
@@ -159,6 +182,15 @@ def find_all_sections(doc_content):
sections
.
append
(
Section
(
start_indx
,
end_indx
-
1
,
doc_content
[
start_indx
].
text
))
return
sections
def
write_subsection_to_file
(
txt
,
buf
):
'''
Writes a subsection header in utf-8 encoding to file buf
'''
buf
.
write
(
slugify
(
txt
)
+
"
:
"
)
if
not
txt
.
endswith
(
'
\n
'
):
buf
.
write
(
'
\n
'
)
buf
.
write
(
'
\n
'
)
def
write_table_to_file
(
tab
,
buf
):
'''
Writes content of table t in utf-8 encoding to file F
...
...
@@ -177,28 +209,28 @@ def gen_tables_btwn(a_id, b_id, content, buf):
definitions_count
=
0
for
idx
in
range
(
a_id
,
b_id
):
if
idx
>=
len
(
content
):
print
(
"
A:
"
+
str
(
a_id
))
print
(
"
B:
"
+
str
(
b_id
))
print
(
"
IDX:
"
+
str
(
idx
))
print
(
"
LEN(CONTENT):
"
+
str
(
len
(
content
)))
return
definitions_count
print
(
"
ERROR: Paragraph out of range (A: {}, B: {}, IDX: {}, LEN(CONTENT): {})
"
.
format
(
a_id
,
b_id
,
idx
,
len
(
content
)))
return
definitions_count
tmp_elem
=
content
[
idx
]
if
isinstance
(
tmp_elem
,
Paragraph
)
and
is_lvl2_section_hdn
(
tmp_elem
.
text
):
print
(
tmp_elem
.
text
)
write_subsection_to_file
(
tmp_elem
.
text
.
split
(
"
\t
"
)[
1
],
buf
)
definitions_count
=
definitions_count
+
1
if
isinstance
(
tmp_elem
,
Table
)
and
is_tosca_def
(
tmp_elem
):
write_table_to_file
(
tmp_elem
,
buf
)
definitions_count
=
definitions_count
+
1
elif
isinstance
(
tmp_elem
,
Table
):
txt
=
tmp_elem
.
rows
[
0
].
cells
[
0
].
text
if
txt
.
strip
().
startswith
(
"
Name
"
)
or
txt
.
strip
().
startswith
(
"
Shorthand
"
)
or
\
txt
.
strip
().
startswith
(
"
tosca_def
"
):
continue
# print("----- Filtered out: " + txt.split("\n")[0])
#if not len(tmp_elem.rows) == 1:
#print(" Rows count != 1 ")
#if not len(tmp_elem.columns) == 1:
# print(" Columns count != 1 ")
#if not match_definition_incipit(txt):
# print(" Regex != 1 ")
return
definitions_count
def
generate_header
(
...
...
@@ -277,7 +309,7 @@ def generate_templates(
def
print_to_files
(
prefix
=
None
):
'''
'''
Prefix is a path to a folder to work into
'''
for
key
in
MODELS
:
...
...
@@ -317,6 +349,9 @@ def parse_version_from_filename(filename):
.
replace
(
"
0
"
,
"
.
"
).
strip
(
"
.
"
).
strip
(
"
p.docx
"
)
return
""
def
slugify
(
t
):
return
t
.
replace
(
"
"
,
"
_
"
).
lower
()
if
__name__
==
"
__main__
"
:
try
:
...
...
This diff is collapsed.
Click to expand it.
src/test_doc2tosca.py
+
6
−
0
View file @
c067c9d5
...
...
@@ -20,6 +20,12 @@ def test_is_lvl1_section_hdn():
assert
d2t
.
is_lvl1_section_hdn
(
"
Annex A (informative)
"
)
assert
d2t
.
is_lvl1_section_hdn
(
"
Annex C (normative):
\t
Conformance
\t
284
"
)
def
test_is_lvl2_section_hdn
():
assert
d2t
.
is_lvl2_section_hdn
(
"
6.3
\t
Data Types
"
)
assert
not
d2t
.
is_lvl2_section_hdn
(
"
6.4.2
\t
Something
"
)
assert
not
d2t
.
is_lvl2_section_hdn
(
"
6.4
\t
Something
"
)
def
test_section_init
():
ssss
=
d2t
.
Section
(
0
,
10
,
"
6
\t
VNFD TOSCA model
"
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment