Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
controller
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
TFS
controller
Commits
fa2fbda4
Commit
fa2fbda4
authored
8 months ago
by
Konstantinos Poulakakis
Browse files
Options
Downloads
Patches
Plain Diff
Make some changes on the response object that been written to the kafka topic.
parent
18a6c2e6
No related branches found
Branches containing commit
No related tags found
Tags containing commit
2 merge requests
!294
Release TeraFlowSDN 4.0
,
!238
Automation component skeleton
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/analytics/backend/service/AnalyticsBackendService.py
+1
-1
1 addition, 1 deletion
src/analytics/backend/service/AnalyticsBackendService.py
src/analytics/backend/service/DaskStreaming.py
+34
-6
34 additions, 6 deletions
src/analytics/backend/service/DaskStreaming.py
with
35 additions
and
7 deletions
src/analytics/backend/service/AnalyticsBackendService.py
+
1
−
1
View file @
fa2fbda4
...
@@ -36,7 +36,7 @@ class AnalyticsBackendService(GenericGrpcService):
...
@@ -36,7 +36,7 @@ class AnalyticsBackendService(GenericGrpcService):
port
=
get_service_port_grpc
(
ServiceNameEnum
.
ANALYTICSBACKEND
)
port
=
get_service_port_grpc
(
ServiceNameEnum
.
ANALYTICSBACKEND
)
super
().
__init__
(
port
,
cls_name
=
cls_name
)
super
().
__init__
(
port
,
cls_name
=
cls_name
)
self
.
running_threads
=
{}
# To keep track of all running analyzers
self
.
running_threads
=
{}
# To keep track of all running analyzers
self
.
kafka_consumer
=
KafkaConsumer
({
'
bootstrap.servers
'
:
'
10.152.183.186:9092
'
,
self
.
kafka_consumer
=
KafkaConsumer
({
'
bootstrap.servers
'
:
KafkaConfig
.
get_kafka_address
()
,
'
group.id
'
:
'
analytics-frontend
'
,
'
group.id
'
:
'
analytics-frontend
'
,
'
auto.offset.reset
'
:
'
latest
'
})
'
auto.offset.reset
'
:
'
latest
'
})
...
...
This diff is collapsed.
Click to expand it.
src/analytics/backend/service/DaskStreaming.py
+
34
−
6
View file @
fa2fbda4
...
@@ -43,6 +43,7 @@ def GetAggregationMappings(thresholds):
...
@@ -43,6 +43,7 @@ def GetAggregationMappings(thresholds):
agg_dict
[
threshold_key
]
=
(
'
kpi_value
'
,
aggregation
)
agg_dict
[
threshold_key
]
=
(
'
kpi_value
'
,
aggregation
)
return
agg_dict
return
agg_dict
def
ApplyThresholds
(
aggregated_df
,
thresholds
):
def
ApplyThresholds
(
aggregated_df
,
thresholds
):
"""
"""
Apply thresholds (TH-Fall and TH-Raise) based on the thresholds dictionary
Apply thresholds (TH-Fall and TH-Raise) based on the thresholds dictionary
...
@@ -53,12 +54,14 @@ def ApplyThresholds(aggregated_df, thresholds):
...
@@ -53,12 +54,14 @@ def ApplyThresholds(aggregated_df, thresholds):
"""
"""
for
threshold_key
,
threshold_values
in
thresholds
.
items
():
for
threshold_key
,
threshold_values
in
thresholds
.
items
():
if
threshold_key
not
in
aggregated_df
.
columns
:
if
threshold_key
not
in
aggregated_df
.
columns
:
LOGGER
.
warning
(
f
"
Threshold key
'
{
threshold_key
}
'
does not correspond to any aggregation result. Skipping threshold application.
"
)
LOGGER
.
warning
(
f
"
Threshold key
'
{
threshold_key
}
'
does not correspond to any aggregation result. Skipping threshold application.
"
)
continue
continue
if
isinstance
(
threshold_values
,
(
list
,
tuple
))
and
len
(
threshold_values
)
==
2
:
if
isinstance
(
threshold_values
,
(
list
,
tuple
))
and
len
(
threshold_values
)
==
2
:
fail_th
,
raise_th
=
threshold_values
fail_th
,
raise_th
=
threshold_values
aggregated_df
[
f
"
{
threshold_key
}
_THRESHOLD_FALL
"
]
=
aggregated_df
[
threshold_key
]
<
fail_th
aggregated_df
[
"
THRESHOLD_FALL
"
]
=
aggregated_df
[
threshold_key
]
<
fail_th
aggregated_df
[
f
"
{
threshold_key
}
_THRESHOLD_RAISE
"
]
=
aggregated_df
[
threshold_key
]
>
raise_th
aggregated_df
[
"
THRESHOLD_RAISE
"
]
=
aggregated_df
[
threshold_key
]
>
raise_th
aggregated_df
[
"
value
"
]
=
aggregated_df
[
threshold_key
]
else
:
else
:
LOGGER
.
warning
(
f
"
Threshold values for
'
{
threshold_key
}
'
are not a list or tuple of length 2. Skipping threshold application.
"
)
LOGGER
.
warning
(
f
"
Threshold values for
'
{
threshold_key
}
'
are not a list or tuple of length 2. Skipping threshold application.
"
)
return
aggregated_df
return
aggregated_df
...
@@ -96,7 +99,7 @@ def process_batch(batch, agg_mappings, thresholds, key):
...
@@ -96,7 +99,7 @@ def process_batch(batch, agg_mappings, thresholds, key):
df
=
pd
.
DataFrame
(
batch
)
df
=
pd
.
DataFrame
(
batch
)
LOGGER
.
info
(
f
"
df
{
df
}
"
)
LOGGER
.
info
(
f
"
df
{
df
}
"
)
df
[
'
time_stamp
'
]
=
pd
.
to_datetime
(
df
[
'
time_stamp
'
],
errors
=
'
coerce
'
,
unit
=
'
s
'
)
df
[
'
time_stamp
'
]
=
pd
.
to_datetime
(
df
[
'
time_stamp
'
],
errors
=
'
coerce
'
)
df
.
dropna
(
subset
=
[
'
time_stamp
'
],
inplace
=
True
)
df
.
dropna
(
subset
=
[
'
time_stamp
'
],
inplace
=
True
)
LOGGER
.
info
(
f
"
df
{
df
}
"
)
LOGGER
.
info
(
f
"
df
{
df
}
"
)
required_columns
=
{
'
time_stamp
'
,
'
kpi_id
'
,
'
kpi_value
'
}
required_columns
=
{
'
time_stamp
'
,
'
kpi_id
'
,
'
kpi_value
'
}
...
@@ -110,19 +113,44 @@ def process_batch(batch, agg_mappings, thresholds, key):
...
@@ -110,19 +113,44 @@ def process_batch(batch, agg_mappings, thresholds, key):
# Perform aggregations using named aggregation
# Perform aggregations using named aggregation
try
:
try
:
agg_dict
=
{
key
:
value
for
key
,
value
in
agg_mappings
.
items
()}
agg_dict
=
{
key
:
value
for
key
,
value
in
agg_mappings
.
items
()}
df_agg
=
df
.
groupby
([
'
window_start
'
]).
agg
(
**
agg_dict
).
reset_index
()
df_agg_
=
df
.
groupby
([
'
window_start
'
]).
agg
(
**
agg_dict
).
reset_index
()
#example: agg_dict = {'min_latency_E2E': ('kpi_value', 'min')
#given that threshold has 1 value
second_value_tuple
=
next
(
iter
(
agg_dict
.
values
()))[
1
]
#in case we have multiple thresholds!
#second_values_tuples = [value[1] for value in agg_dict.values()]
if
second_value_tuple
==
"
min
"
:
df_agg
=
df_agg_
.
min
(
numeric_only
=
True
).
to_frame
().
T
elif
second_value_tuple
==
"
max
"
:
df_agg
=
df_agg_
.
max
(
numeric_only
=
True
).
to_frame
().
T
elif
second_value_tuple
==
"
std
"
:
df_agg
=
df_agg_
.
sted
(
numeric_only
=
True
).
to_frame
().
T
else
:
df_agg
=
df_agg_
.
mean
(
numeric_only
=
True
).
to_frame
().
T
# Assign the first value of window_start from the original aggregated data
df_agg
[
'
window_start
'
]
=
df_agg_
[
'
window_start
'
].
iloc
[
0
]
# Reorder columns to place 'window_start' first if needed
cols
=
[
'
window_start
'
]
+
[
col
for
col
in
df_agg
.
columns
if
col
!=
'
window_start
'
]
df_agg
=
df_agg
[
cols
]
except
Exception
as
e
:
except
Exception
as
e
:
LOGGER
.
error
(
f
"
Aggregation error:
{
e
}
"
)
LOGGER
.
error
(
f
"
Aggregation error:
{
e
}
"
)
return
[]
return
[]
# Apply thresholds
# Apply thresholds
df_thresholded
=
ApplyThresholds
(
df_agg
,
thresholds
)
df_thresholded
=
ApplyThresholds
(
df_agg
,
thresholds
)
df_thresholded
[
'
kpi_id
'
]
=
key
df_thresholded
[
'
kpi_id
'
]
=
key
df_thresholded
[
'
window_start
'
]
=
df_thresholded
[
'
window_start
'
].
dt
.
strftime
(
'
%Y-%m-%dT%H:%M:%SZ
'
)
df_thresholded
[
'
window_start
'
]
=
df_thresholded
[
'
window_start
'
].
dt
.
strftime
(
'
%Y-%m-%dT%H:%M:%SZ
'
)
# Convert aggregated DataFrame to list of dicts
# Convert aggregated DataFrame to list of dicts
result
=
df_thresholded
.
to_dict
(
orient
=
'
records
'
)
result
=
df_thresholded
.
to_dict
(
orient
=
'
records
'
)
LOGGER
.
info
(
f
"
Processed batch with
{
len
(
result
)
}
records after aggregation and thresholding.
"
)
LOGGER
.
info
(
f
"
Processed batch with
{
len
(
result
)
}
records after aggregation and thresholding.
"
)
return
result
return
result
def
produce_result
(
result
,
producer
,
destination_topic
):
def
produce_result
(
result
,
producer
,
destination_topic
):
...
@@ -197,7 +225,7 @@ def DaskStreamer(key, kpi_list, thresholds, stop_event,
...
@@ -197,7 +225,7 @@ def DaskStreamer(key, kpi_list, thresholds, stop_event,
continue
continue
try
:
try
:
message_timestamp
=
pd
.
to_datetime
(
message_value
[
time_stamp_col
],
errors
=
'
coerce
'
,
unit
=
'
s
'
)
message_timestamp
=
pd
.
to_datetime
(
message_value
[
time_stamp_col
],
errors
=
'
coerce
'
)
LOGGER
.
warning
(
f
"
message_timestamp:
{
message_timestamp
}
. Skipping message.
"
)
LOGGER
.
warning
(
f
"
message_timestamp:
{
message_timestamp
}
. Skipping message.
"
)
if
pd
.
isna
(
message_timestamp
):
if
pd
.
isna
(
message_timestamp
):
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment