Skip to content

Commit 321006a

Browse files
authored
Merge pull request #4520 from esl/simplify-element-metrics
The main goal is to simplify and unify events for XMPP (XML) elements. The main benefit is that unified metrics become easier to understand, maintain, test and document. Before, there would be pairs of events like c2s_element_in and xmpp_element_size_in. After, there is only one event: xmpp_element_in. The complete list is shown below: c2s_element_in (labels: host_type) and xmpp_element_size_in (labels: connection_type=c2s) are merged to xmpp_element_in (labels: host_type, connection_type=c2s). c2s_element_out (labels: host_type) and xmpp_element_size_out (labels: connection_type=c2s) are merged to xmpp_element_out (labels: host_type, connection_type=c2s). s2s_element_in and xmpp_element_size_in (labels: connection_type=s2s) are merged to xmpp_element_in (labels: host_type, connection_type=s2s). s2s_element_out and xmpp_element_size_out (labels: connection_type=s2s) are merged to xmpp_element_out (labels: host_type, connection_type=s2s). component_element_in and xmpp_element_size_in (labels: connection_type=component) are merged to xmpp_element_in (labels: host_type="", connection_type=component). component_element_out and xmpp_element_size_out (labels: connection_type=component) are merged to xmpp_element_out (labels: host_type="", connection_type=component). Notes: Ad 1, 2: c2s_element_* were missing if host type was unknown (e.g. in case of early stream errors). Now it is no longer the case. Size-related events and all s2s/component events didn't include host types. Now they do if host type is known. If host type is unknown, it is set to an empty binary in the labels. Prometheus sees this as an empty label value, and allows to e.g. display graphs of all events without host types (useful for debugging; I tested this manually). For exometer, we just put global as the prefix in such cases. Docs are updated to describe the new metrics. Some mistakes in the docs are corrected as well. Tests are updated to check the new metrics. Some mistakes in the tests are corrected as well.
2 parents e89f360 + 6134f1d commit 321006a

22 files changed

+393
-390
lines changed

big_tests/tests/bosh_SUITE.erl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -949,6 +949,6 @@ wait_for_zero_bosh_sessions() ->
949949

950950
instrumentation_events() ->
951951
instrument_helper:declared_events(mod_bosh, [])
952-
++ [{c2s_message_processed, #{host_type => domain_helper:host_type()}},
953-
{xmpp_element_size_out, #{connection_type => c2s}, #{metrics => #{byte_size => histogram}}},
954-
{xmpp_element_size_in, #{connection_type => c2s}, #{metrics => #{byte_size => histogram}}}].
952+
++ [{c2s_message_processed, #{host_type => host_type()}},
953+
{xmpp_element_out, #{host_type => host_type(), connection_type => c2s}},
954+
{xmpp_element_in, #{host_type => host_type(), connection_type => c2s}}].

big_tests/tests/component_SUITE.erl

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -113,30 +113,26 @@ register_one_component(Config) ->
113113
CheckServer = fun(#{lserver := S}) -> S =:= ComponentAddr end,
114114

115115
%% Expect events for handshake, but not for 'start stream'.
116-
instrument_helper:assert(xmpp_element_size_out, #{connection_type => component}, FullCheckF,
116+
instrument_helper:assert(xmpp_element_out, ht_labels(), FullCheckF,
117117
#{expected_count => 1, min_timestamp => TS}),
118-
instrument_helper:assert(xmpp_element_size_in, #{connection_type => component}, FullCheckF,
118+
instrument_helper:assert(xmpp_element_in, ht_labels(), FullCheckF,
119119
#{expected_count => 1, min_timestamp => TS}),
120120

121121
instrument_helper:assert(component_auth_failed, #{}, FullCheckF,
122122
#{expected_count => 0, min_timestamp => TS}),
123-
instrument_helper:assert(tcp_data_in, #{connection_type => component}, CheckBytes,
123+
instrument_helper:assert(tcp_data_in, labels(), CheckBytes,
124124
#{min_timestamp => TS}),
125-
instrument_helper:assert(tcp_data_out, #{connection_type => component}, CheckBytes,
125+
instrument_helper:assert(tcp_data_out, labels(), CheckBytes,
126126
#{min_timestamp => TS}),
127127

128128
TS1 = instrument_helper:timestamp(),
129129
verify_component(Config, Component, ComponentAddr),
130130

131131
% Message from Alice
132-
instrument_helper:assert(xmpp_element_size_out, #{connection_type => component}, FullCheckF,
132+
instrument_helper:assert(xmpp_element_out, ht_labels(), FullCheckF,
133133
#{expected_count => 1, min_timestamp => TS1}),
134134
% Reply to Alice
135-
instrument_helper:assert(xmpp_element_size_in, #{connection_type => component}, FullCheckF,
136-
#{expected_count => 1, min_timestamp => TS1}),
137-
instrument_helper:assert(component_element_in, #{}, CheckServer,
138-
#{expected_count => 1, min_timestamp => TS1}),
139-
instrument_helper:assert(component_element_out, #{}, CheckServer,
135+
instrument_helper:assert(xmpp_element_in, ht_labels(), FullCheckF,
140136
#{expected_count => 1, min_timestamp => TS1}),
141137

142138
component_helper:disconnect_component(Component, ComponentAddr).
@@ -151,9 +147,9 @@ register_one_component_tls(Config) ->
151147
end,
152148
CheckBytes = fun(#{byte_size := S}) -> S > 0 end,
153149
CheckServer = fun(#{lserver := S}) -> S =:= ComponentAddr end,
154-
instrument_helper:assert(tls_data_in, #{connection_type => component}, CheckBytes,
150+
instrument_helper:assert(tls_data_in, labels(), CheckBytes,
155151
#{min_timestamp => TS}),
156-
instrument_helper:assert(tls_data_out, #{connection_type => component}, CheckBytes,
152+
instrument_helper:assert(tls_data_out, labels(), CheckBytes,
157153
#{min_timestamp => TS}),
158154

159155
TS1 = instrument_helper:timestamp(),
@@ -187,9 +183,9 @@ intercomponent_communication(Config) ->
187183
FullCheckF = fun(#{byte_size := S, lserver := LServer}) ->
188184
S > 0 andalso LServer =:= CompAddr1 orelse LServer =:= CompAddr2
189185
end,
190-
instrument_helper:assert(xmpp_element_size_out, #{connection_type => component}, FullCheckF,
186+
instrument_helper:assert(xmpp_element_out, ht_labels(), FullCheckF,
191187
#{expected_count => 1, min_timestamp => TS}),
192-
instrument_helper:assert(xmpp_element_size_in, #{connection_type => component}, FullCheckF,
188+
instrument_helper:assert(xmpp_element_in, ht_labels(), FullCheckF,
193189
#{expected_count => 1, min_timestamp => TS}),
194190

195191
component_helper:disconnect_component(Comp1, CompAddr1),
@@ -241,10 +237,10 @@ register_two_components(Config) ->
241237
S > 0 andalso LServer =:= CompAddr1 orelse LServer =:= CompAddr2
242238
end,
243239
% Msg to Alice, msg to Bob
244-
instrument_helper:assert(xmpp_element_size_in, #{connection_type => component}, FullCheckF,
240+
instrument_helper:assert(xmpp_element_in, ht_labels(), FullCheckF,
245241
#{expected_count => 2, min_timestamp => TS}),
246242
% Msg from Bob, msg from Alice
247-
instrument_helper:assert(xmpp_element_size_out, #{connection_type => component}, FullCheckF,
243+
instrument_helper:assert(xmpp_element_out, ht_labels(), FullCheckF,
248244
#{expected_count => 2, min_timestamp => TS}),
249245

250246
component_helper:disconnect_component(Comp1, CompAddr1),
@@ -591,6 +587,14 @@ restore_domain(Config) ->
591587
events() ->
592588
instrument_helper:declared_events(mongoose_component_listener, [#{}]).
593589

590+
%% Data metrics have only the connection_type label
591+
labels() ->
592+
#{connection_type => component}.
593+
594+
%% XMPP element metric labels include host_type, but components don't have host types
595+
ht_labels() ->
596+
(labels())#{host_type => <<>>}.
597+
594598
%%--------------------------------------------------------------------
595599
%% Stanzas
596600
%%--------------------------------------------------------------------

big_tests/tests/graphql_metric_SUITE.erl

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -209,11 +209,11 @@ get_all_metrics_as_dicts(Config) ->
209209
check_node_result_is_valid(ParsedResult, false).
210210

211211
get_by_name_metrics_as_dicts(Config) ->
212-
Name = [<<"c2s_element_in">>, <<"stanza_count">>],
212+
Name = [<<"xmpp_element_in">>, <<"c2s">>, <<"stanza_count">>],
213213
Result = get_metrics_as_dicts_by_name([<<"_">> | Name], Config),
214214
ParsedResult = get_ok_value([data, metric, getMetricsAsDicts], Result),
215215
[_|_] = ParsedResult,
216-
%% Only c2s_element_in type
216+
%% Only xmpp_element_in type
217217
lists:foreach(fun(#{<<"dict">> := Dict, <<"name">> := [_ | N]}) when N =:= Name ->
218218
check_spiral_dict(Dict)
219219
end, ParsedResult).
@@ -227,23 +227,23 @@ get_metrics_as_dicts_with_key_one(Config) ->
227227
Result = get_metrics_as_dicts_with_keys([<<"one">>], Config),
228228
ParsedResult = get_ok_value([data, metric, getMetricsAsDicts], Result),
229229
Map = dict_objects_to_map(ParsedResult),
230-
SentName = [metric_host_type(), <<"c2s_element_out">>, <<"stanza_count">>],
230+
SentName = [metric_host_type(), <<"xmpp_element_out">>, <<"c2s">>, <<"stanza_count">>],
231231
[#{<<"key">> := <<"one">>, <<"value">> := One}] = maps:get(SentName, Map),
232232
?assert(is_integer(One)).
233233

234234
get_metrics_as_dicts_with_nonexistent_key(Config) ->
235235
Result = get_metrics_as_dicts_with_keys([<<"not_existing">>], Config),
236236
ParsedResult = get_ok_value([data, metric, getMetricsAsDicts], Result),
237237
Map = dict_objects_to_map(ParsedResult),
238-
RecvName = [<<"global">>, <<"xmpp_element_size_in">>, <<"c2s">>, <<"byte_size">>],
238+
RecvName = [metric_host_type(), <<"xmpp_element_in">>, <<"c2s">>, <<"byte_size">>],
239239
[] = maps:get(RecvName, Map).
240240

241241
get_metrics_as_dicts_empty_args(Config) ->
242242
%% Empty name
243243
Result = get_metrics_as_dicts([], [<<"median">>], Config),
244244
ParsedResult = get_ok_value([data, metric, getMetricsAsDicts], Result),
245245
Map = dict_objects_to_map(ParsedResult),
246-
RecvName = [<<"global">>, <<"xmpp_element_size_in">>, <<"c2s">>, <<"byte_size">>],
246+
RecvName = [metric_host_type(), <<"xmpp_element_in">>, <<"c2s">>, <<"byte_size">>],
247247
[#{<<"key">> := <<"median">>, <<"value">> := Median}] = maps:get(RecvName, Map),
248248
?assert(is_integer(Median)),
249249
%% Empty keys
@@ -272,13 +272,13 @@ get_cluster_metrics(Config) ->
272272
check_node_result_is_valid(Res2, true).
273273

274274
get_by_name_cluster_metrics_as_dicts(Config) ->
275-
Name = [<<"c2s_element_in">>, <<"stanza_count">>],
275+
Name = [<<"xmpp_element_in">>, <<"c2s">>, <<"stanza_count">>],
276276
Result = get_cluster_metrics_as_dicts_by_name([<<"_">> | Name], Config),
277277
NodeResult = get_ok_value([data, metric, getClusterMetricsAsDicts], Result),
278278
Map = node_objects_to_map(NodeResult),
279279
%% Contains data for at least two nodes
280280
?assert(maps:size(Map) > 1),
281-
%% Only c2s_element_in type
281+
%% Only xmpp_element_in type
282282
maps:map(fun(_Node, [_|_] = NodeRes) ->
283283
lists:foreach(fun(#{<<"dict">> := Dict, <<"name">> := [_ | N]}) when N =:= Name ->
284284
check_spiral_dict(Dict)
@@ -317,7 +317,7 @@ get_cluster_metrics_empty_args(Config) ->
317317
ParsedResult = get_ok_value([data, metric, getClusterMetricsAsDicts], Result),
318318
[#{<<"node">> := Node, <<"result">> := ResList}] = ParsedResult,
319319
Map = dict_objects_to_map(ResList),
320-
SentName = [<<"global">>, <<"c2s_element_in">>, <<"stanza_count">>],
320+
SentName = [<<"global">>, <<"xmpp_element_in">>, <<"c2s">>, <<"stanza_count">>],
321321
[#{<<"key">> := <<"one">>, <<"value">> := One}] = maps:get(SentName, Map),
322322
?assert(is_integer(One)),
323323
%% Empty keys
@@ -351,15 +351,16 @@ get_cluster_metrics_empty_strings(Config) ->
351351
check_node_result_is_valid(ResList, MetricsAreGlobal) ->
352352
%% Check that result contains something
353353
Map = dict_objects_to_map(ResList),
354-
SentName = case MetricsAreGlobal of
355-
true -> [<<"global">>, <<"c2s_element_in">>, <<"stanza_count">>];
356-
false -> [metric_host_type(), <<"c2s_element_in">>, <<"stanza_count">>]
357-
end,
354+
Prefix = case MetricsAreGlobal of
355+
true -> <<"global">>;
356+
false -> metric_host_type()
357+
end,
358+
SentName = [Prefix, <<"xmpp_element_in">>, <<"c2s">>, <<"stanza_count">>],
358359
check_spiral_dict(maps:get(SentName, Map)),
359360
[#{<<"key">> := <<"value">>,<<"value">> := V} | _] =
360-
maps:get([<<"global">>,<<"sm_unique_sessions">>,<<"count">>], Map),
361+
maps:get([<<"global">>, <<"sm_unique_sessions">>, <<"count">>], Map),
361362
?assert(is_integer(V)),
362-
HistObjects = maps:get([<<"global">>, <<"xmpp_element_size_in">>, <<"c2s">>, <<"byte_size">>], Map),
363+
HistObjects = maps:get([Prefix, <<"xmpp_element_in">>, <<"c2s">>, <<"byte_size">>], Map),
363364
check_histogram(kv_objects_to_map(HistObjects)).
364365

365366
check_histogram(Map) ->

big_tests/tests/metrics_api_SUITE.erl

Lines changed: 41 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ non_existent_metrics(_Config) ->
107107
assert_status(404, request(<<"GET">>, "/metrics/global/badMetric")),
108108
assert_status(404, request(<<"GET">>, "/metrics/host_type/badHostType")),
109109
assert_status(404, request(<<"GET">>,
110-
"/metrics/host_type/badHostType/c2s_element_out.stanza_count")),
110+
"/metrics/host_type/badHostType/xmpp_element_out.c2s.stanza_count")),
111111
assert_status(404, request(<<"GET">>, ["/metrics/", HostType, "/", GlobalMetricName])),
112112
assert_status(404, request(<<"GET">>, ["/metrics/", HostType, "/badMetric"])).
113113

@@ -122,14 +122,14 @@ one_client_just_logs_in(Config) ->
122122
(Config, metrics_helper:userspec(1, Config),
123123
fun(_User1) -> end_of_story end,
124124
%% A list of metrics and their expected relative increase
125-
[{'c2s_element_in.iq_count', 0 + user_alpha(2)},
126-
{'c2s_element_out.iq_count', 0 + user_alpha(2)},
127-
{'c2s_element_in.message_count', 0},
128-
{'c2s_element_out.message_count', 0},
129-
{'c2s_element_in.presence_count', 0 + user_alpha(1)},
130-
{'c2s_element_out.presence_count', 0 + user_alpha(1)},
131-
{'c2s_element_in.stanza_count', 0 + user_alpha(3)},
132-
{'c2s_element_out.stanza_count', 0 + user_alpha(3)},
125+
[{'xmpp_element_in.c2s.iq_count', 0 + user_alpha(2)},
126+
{'xmpp_element_out.c2s.iq_count', 0 + user_alpha(2)},
127+
{'xmpp_element_in.c2s.message_count', 0},
128+
{'xmpp_element_out.c2s.message_count', 0},
129+
{'xmpp_element_in.c2s.presence_count', 0 + user_alpha(1)},
130+
{'xmpp_element_out.c2s.presence_count', 0 + user_alpha(1)},
131+
{'xmpp_element_in.c2s.stanza_count', 0 + user_alpha(3)},
132+
{'xmpp_element_out.c2s.stanza_count', 0 + user_alpha(3)},
133133
{'sm_session.logins', 0 + user_alpha(1)},
134134
{'sm_session.logouts', 0 + user_alpha(1)}
135135
]).
@@ -138,14 +138,14 @@ two_clients_just_log_in(Config) ->
138138
instrumented_story
139139
(Config, metrics_helper:userspec(1, 1, Config),
140140
fun(_User1, _User2) -> end_of_story end,
141-
[{'c2s_element_in.iq_count', 0 + user_alpha(4)},
142-
{'c2s_element_out.iq_count', 0 + user_alpha(4)},
143-
{'c2s_element_in.message_count', 0},
144-
{'c2s_element_out.message_count', 0},
145-
{'c2s_element_in.presence_count', 0 + user_alpha(2)},
146-
{'c2s_element_out.presence_count', 0 + user_alpha(2)},
147-
{'c2s_element_in.stanza_count', 0 + user_alpha(6)},
148-
{'c2s_element_out.stanza_count', 0 + user_alpha(6)},
141+
[{'xmpp_element_in.c2s.iq_count', 0 + user_alpha(4)},
142+
{'xmpp_element_out.c2s.iq_count', 0 + user_alpha(4)},
143+
{'xmpp_element_in.c2s.message_count', 0},
144+
{'xmpp_element_out.c2s.message_count', 0},
145+
{'xmpp_element_in.c2s.presence_count', 0 + user_alpha(2)},
146+
{'xmpp_element_out.c2s.presence_count', 0 + user_alpha(2)},
147+
{'xmpp_element_in.c2s.stanza_count', 0 + user_alpha(6)},
148+
{'xmpp_element_out.c2s.stanza_count', 0 + user_alpha(6)},
149149
{'sm_session.logins', 0 + user_alpha(2)},
150150
{'sm_session.logouts', 0 + user_alpha(2)}
151151
]).
@@ -158,8 +158,8 @@ one_message_sent(Config) ->
158158
escalus_client:send(User1, Chat),
159159
escalus_client:wait_for_stanza(User2)
160160
end,
161-
[{'c2s_element_in.message_count', 1},
162-
{'c2s_element_out.message_count', 1}]).
161+
[{'xmpp_element_in.c2s.message_count', 1},
162+
{'xmpp_element_out.c2s.message_count', 1}]).
163163

164164
one_direct_presence_sent(Config) ->
165165
Userspec = metrics_helper:userspec(1, 1, Config),
@@ -170,10 +170,10 @@ one_direct_presence_sent(Config) ->
170170
escalus:send(User1, Presence),
171171
escalus:wait_for_stanza(User2)
172172
end,
173-
[{'c2s_element_in.presence_count', 1 + user_alpha(2)},
174-
{'c2s_element_out.presence_count', 1 + user_alpha(2)},
175-
{'c2s_element_in.stanza_count', 1 + user_alpha(6)},
176-
{'c2s_element_out.stanza_count', 1 + user_alpha(6)}]).
173+
[{'xmpp_element_in.c2s.presence_count', 1 + user_alpha(2)},
174+
{'xmpp_element_out.c2s.presence_count', 1 + user_alpha(2)},
175+
{'xmpp_element_in.c2s.stanza_count', 1 + user_alpha(6)},
176+
{'xmpp_element_out.c2s.stanza_count', 1 + user_alpha(6)}]).
177177

178178
one_iq_sent(Config) ->
179179
instrumented_story
@@ -183,11 +183,11 @@ one_iq_sent(Config) ->
183183
escalus_client:send(User1, RosterIq),
184184
escalus_client:wait_for_stanza(User1)
185185
end,
186-
[{'c2s_element_in.iq_count', 3},
187-
{'c2s_element_out.iq_count', 3},
186+
[{'xmpp_element_in.c2s.iq_count', 3},
187+
{'xmpp_element_out.c2s.iq_count', 3},
188188
{'mod_roster_get.count', 1},
189-
{'c2s_element_in.stanza_count', 1 + user_alpha(3)},
190-
{'c2s_element_out.stanza_count', 1 + user_alpha(3)}]).
189+
{'xmpp_element_in.c2s.stanza_count', 1 + user_alpha(3)},
190+
{'xmpp_element_out.c2s.stanza_count', 1 + user_alpha(3)}]).
191191

192192
one_message_error(Config) ->
193193
instrumented_story
@@ -198,10 +198,10 @@ one_message_error(Config) ->
198198
escalus_client:send(User1, Chat),
199199
escalus_client:wait_for_stanza(User1)
200200
end,
201-
[{'c2s_element_out.error_count', 1},
202-
{'c2s_element_out.iq_error_count', 0},
203-
{'c2s_element_out.message_error_count', 1},
204-
{'c2s_element_out.presence_error_count', 0}]).
201+
[{'xmpp_element_out.c2s.error_count', 1},
202+
{'xmpp_element_out.c2s.iq_error_count', 0},
203+
{'xmpp_element_out.c2s.message_error_count', 1},
204+
{'xmpp_element_out.c2s.presence_error_count', 0}]).
205205

206206
one_iq_error(Config) ->
207207
instrumented_story
@@ -211,10 +211,10 @@ one_iq_error(Config) ->
211211
escalus_client:send(User1, BadIQ),
212212
escalus_client:wait_for_stanza(User1)
213213
end,
214-
[{'c2s_element_out.error_count', 1},
215-
{'c2s_element_out.iq_error_count', 1},
216-
{'c2s_element_out.message_error_count', 0},
217-
{'c2s_element_out.presence_error_count', 0}]).
214+
[{'xmpp_element_out.c2s.error_count', 1},
215+
{'xmpp_element_out.c2s.iq_error_count', 1},
216+
{'xmpp_element_out.c2s.message_error_count', 0},
217+
{'xmpp_element_out.c2s.presence_error_count', 0}]).
218218

219219
one_presence_error(Config) ->
220220
instrumented_story
@@ -225,10 +225,10 @@ one_presence_error(Config) ->
225225
escalus_client:send(User1, BadPres),
226226
escalus_client:wait_for_stanza(User1)
227227
end,
228-
[{'c2s_element_out.error_count', 1},
229-
{'c2s_element_out.iq_error_count', 0},
230-
{'c2s_element_out.message_error_count', 0},
231-
{'c2s_element_out.presence_error_count', 1}]).
228+
[{'xmpp_element_out.c2s.error_count', 1},
229+
{'xmpp_element_out.c2s.iq_error_count', 0},
230+
{'xmpp_element_out.c2s.message_error_count', 0},
231+
{'xmpp_element_out.c2s.presence_error_count', 1}]).
232232

233233
session_counters(Config) ->
234234
escalus:story
@@ -344,9 +344,9 @@ metrics_msg_flow(_Config) ->
344344
user_alpha(NumberOfUsers) ->
345345
%% This represents the overhead of logging in N users via escalus:story/3
346346
%% For each user,
347-
%% c2s_element_(in|out).stanza_count
347+
%% xmpp_element_(in|out).c2s.stanza_count
348348
%% and
349-
%% c2s_element_(in|out).presence_count
349+
%% xmpp_element_(in|out).c2s.presence_count
350350
%% will be bumped by +1 at login.
351351
NumberOfUsers.
352352

0 commit comments

Comments
 (0)