Skip to content

ref: move grouping module into issues #97911

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
20 changes: 10 additions & 10 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -396,15 +396,6 @@ module = [
"sentry.eventtypes.error",
"sentry.feedback.migrations.*",
"sentry.flags.migrations.*",
"sentry.grouping.api",
"sentry.grouping.component",
"sentry.grouping.enhancer.parser",
"sentry.grouping.fingerprinting.*",
"sentry.grouping.grouping_info",
"sentry.grouping.ingest.*",
"sentry.grouping.parameterization",
"sentry.grouping.utils",
"sentry.grouping.variants",
"sentry.hybridcloud.*",
"sentry.identity.discord.*",
"sentry.identity.github_enterprise.*",
Expand Down Expand Up @@ -482,6 +473,15 @@ module = [
"sentry.issues.escalating.escalating_group_forecast",
"sentry.issues.escalating.escalating_issues_alg",
"sentry.issues.escalating.forecasts",
"sentry.issues.grouping.api",
"sentry.issues.grouping.component",
"sentry.issues.grouping.enhancer.parser",
"sentry.issues.grouping.fingerprinting.*",
"sentry.issues.grouping.grouping_info",
"sentry.issues.grouping.ingest.*",
"sentry.issues.grouping.parameterization",
"sentry.issues.grouping.utils",
"sentry.issues.grouping.variants",
"sentry.issues.ignored",
"sentry.issues.ingest",
"sentry.issues.issue_occurrence",
Expand Down Expand Up @@ -682,7 +682,6 @@ module = [
"tests.sentry.eventstream.kafka.*",
"tests.sentry.eventtypes.*",
"tests.sentry.flags.providers.*",
"tests.sentry.grouping.*",
"tests.sentry.hybridcloud.*",
"tests.sentry.incidents.handlers.*",
"tests.sentry.incidents.serializers.*",
Expand All @@ -702,6 +701,7 @@ module = [
"tests.sentry.issues.auto_source_code_config.*",
"tests.sentry.issues.endpoints.*",
"tests.sentry.issues.escalating.*",
"tests.sentry.issues.grouping.*",
"tests.sentry.issues.ownership.*",
"tests.sentry.issues.test_attributes",
"tests.sentry.issues.test_group_attributes_dataset",
Expand Down
2 changes: 1 addition & 1 deletion src/sentry/api/endpoints/project_rule_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from sentry.api.base import region_silo_endpoint
from sentry.api.bases import ProjectAlertRulePermission, ProjectEndpoint
from sentry.api.serializers.rest_framework import DummyRuleSerializer
from sentry.grouping.grouptype import ErrorGroupType
from sentry.issues.grouping.grouptype import ErrorGroupType
from sentry.models.rule import Rule
from sentry.notifications.notification_action.utils import should_fire_workflow_actions
from sentry.notifications.types import TEST_NOTIFICATION_ID
Expand Down
6 changes: 3 additions & 3 deletions src/sentry/core/endpoints/project_details.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@
from sentry.dynamic_sampling import get_supported_biases_ids, get_user_biases
from sentry.dynamic_sampling.types import DynamicSamplingMode
from sentry.dynamic_sampling.utils import has_custom_dynamic_sampling, has_dynamic_sampling
from sentry.grouping.enhancer import Enhancements
from sentry.grouping.enhancer.exceptions import InvalidEnhancerConfig
from sentry.grouping.fingerprinting import FingerprintingRules, InvalidFingerprintingConfig
from sentry.ingest.inbound_filters import FilterTypes
from sentry.issues.grouping.enhancer import Enhancements
from sentry.issues.grouping.enhancer.exceptions import InvalidEnhancerConfig
from sentry.issues.grouping.fingerprinting import FingerprintingRules, InvalidFingerprintingConfig
from sentry.issues.highlights import HighlightContextField
from sentry.lang.native.sources import (
InvalidSourcesError,
Expand Down
34 changes: 20 additions & 14 deletions src/sentry/event_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,35 +49,41 @@
from sentry.eventtypes import EventType
from sentry.eventtypes.transaction import TransactionEvent
from sentry.exceptions import HashDiscarded
from sentry.grouping.api import (
from sentry.ingest.inbound_filters import FilterStatKeys
from sentry.ingest.transaction_clusterer.datasource.redis import (
record_transaction_name as record_transaction_name_for_clustering,
)
from sentry.integrations.tasks.kick_off_status_syncs import kick_off_status_syncs
from sentry.issues.grouping.api import (
NULL_GROUPHASH_INFO,
GroupHashInfo,
GroupingConfig,
get_grouping_config_dict_for_project,
)
from sentry.grouping.enhancer import get_enhancements_version
from sentry.grouping.grouptype import ErrorGroupType
from sentry.grouping.ingest.config import is_in_transition, update_or_set_grouping_config_if_needed
from sentry.grouping.ingest.hashing import (
from sentry.issues.grouping.enhancer import get_enhancements_version
from sentry.issues.grouping.grouptype import ErrorGroupType
from sentry.issues.grouping.ingest.config import (
is_in_transition,
update_or_set_grouping_config_if_needed,
)
from sentry.issues.grouping.ingest.hashing import (
find_grouphash_with_group,
get_or_create_grouphashes,
maybe_run_background_grouping,
maybe_run_secondary_grouping,
run_primary_grouping,
)
from sentry.grouping.ingest.metrics import record_hash_calculation_metrics, record_new_group_metrics
from sentry.grouping.ingest.seer import maybe_check_seer_for_matching_grouphash
from sentry.grouping.ingest.utils import (
from sentry.issues.grouping.ingest.metrics import (
record_hash_calculation_metrics,
record_new_group_metrics,
)
from sentry.issues.grouping.ingest.seer import maybe_check_seer_for_matching_grouphash
from sentry.issues.grouping.ingest.utils import (
add_group_id_to_grouphashes,
check_for_group_creation_load_shed,
is_non_error_type_group,
)
from sentry.grouping.variants import BaseVariant
from sentry.ingest.inbound_filters import FilterStatKeys
from sentry.ingest.transaction_clusterer.datasource.redis import (
record_transaction_name as record_transaction_name_for_clustering,
)
from sentry.integrations.tasks.kick_off_status_syncs import kick_off_status_syncs
from sentry.issues.grouping.variants import BaseVariant
from sentry.issues.issue_occurrence import IssueOccurrence
from sentry.issues.producer import PayloadType, produce_occurrence_to_kafka
from sentry.killswitches import killswitch_matches_context
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
query_recent_feedbacks_with_ai_labels,
query_top_ai_labels_by_feedback_count,
)
from sentry.grouping.utils import hash_from_values
from sentry.issues.grouping.utils import hash_from_values
from sentry.models.organization import Organization
from sentry.seer.seer_setup import has_seer_access
from sentry.seer.signed_seer_api import sign_with_seer_secret
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from sentry.api.bases.organization import OrganizationEndpoint, OrganizationUserReportsPermission
from sentry.api.utils import get_date_range_from_stats_period
from sentry.exceptions import InvalidParams
from sentry.grouping.utils import hash_from_values
from sentry.issues.grouping.utils import hash_from_values
from sentry.issues.grouptype import FeedbackGroup
from sentry.models.group import Group, GroupStatus
from sentry.models.organization import Organization
Expand Down
155 changes: 3 additions & 152 deletions src/sentry/grouping/__init__.py
Original file line number Diff line number Diff line change
@@ -1,152 +1,3 @@
"""
sentry.grouping
~~~~~~~~~~~~~~~

This package implements various grouping-related functionality in Sentry.
It's an evolution of an earlier grouping system and the backwards compatibility
with that grouping system has created a certain amount of complexity.

General Overview
----------------

Events are grouped together into issues by calculating hashes. When a hash
is already associated with a group that group is reused. The grouping code
can however generate more than one hash, the sum of which will be added to a
group. In the database this is represented in the `GroupHash` model.

Grouping hashes can be created from different methods. These are called
variants and are implemented in `sentry.grouping.variants`. The
`get_grouping_variants_for_event` function of the grouping API evaluates all
variants that would be available for the event and returns them.

Afterward each variant can produce hashes which are then used for grouping.
Some variants (like the component variants) will use the component system
to annotate the inputs into the hashing function so that they can be shown
to the user. These values are not persisted, but the configuration inputs are
stored in the event.

For reproducibility an event config is persisted in the event dictionary
consisting of two main sources of configuration: event enhancers and the
version of the grouping config. Enhancers are custom rules applied to the
default grouping algorithm (specifically the stacktrace) and the base version
picks one of the many versions of the algorithm.

The default version of the algorithm is selected on a per project basis.

Variants
--------

`ChecksumVariant`:
This is the legacy variant of the grouping algorithm. Clients at one point
in the past were able to provide a grouping hash (the checksum) which was
used for grouping exclusively.

`HashedChecksumVariant`:
A ChecksumVariant with a hash value which has been normalized by running the
provided checksum through a hashing function.

`FallbackVariant`:
This variant produces always the same hash. It's used if nothing else works.

`ComponentVariant`:
This is the main variant which uses the underlying component-based grouping
strategy system. It produces hashes but can also expose the component
tree with annotations so users can debug it.

`CustomFingerprintVariant`:
This variant is similar to the checksum variant but instead produces
fingerprint values which are then hashed. This can be used both for
events produced by the server as well as events which are fingerprinted
by the SDK. If the special `{{ default }}` value is used then this
variant is not used.

`BuiltInFingerprintVariant`:
Same as the custom fingerprint variant but produced by pre-defined
built-in rules. This is used for server-side fingerprinting.

`SaltedComponentVariant`:
This variant is used when the server or client produces a fingerprint
that refers (with the special `{{ default }}` value) to the default
grouping algorithm and the default grouping algorithm uses the
component based grouping system.

Component-Based Grouping
------------------------

The component-based grouping system produces a tree of nodes (referred to
as a component) to produce the hash. These are defined in
`sentry.grouping.component`. A component has an `id` which is a string
identifying the component in the tree. This `id` is not unique in the tree
but unique on one level of the hierarchy. It has an optional `hint` which is
a string that carries information for the user about why a value was used, not
used, or why it might have been modified. A component can additionally carry `values`,
which are either components or primitives (strings or integers). Lastly a
component has a boolean `contributes` flag. When set to `False`, this flag
removes a component (and its children) entirely from the grouping output.

Here an example of how components can be used::

function_name = 'lambda$1234'
threads = BaseGroupingComponent(
id="function",
values=[function_name],
contributes=False,
hint='Unused because generated code'
)

Strategies and Strategy Configurations
--------------------------------------

Component-based grouping is triggered by strategies. Strategies define the
logic for how to generate a component for an interface in the event. Each
strategy can only produce a component for one interface. For instance, a
stacktrace strategy can produce a component tree for a stacktrace. Because
events can have different forms and different strategies for the same interface,
strategy configurations define which ones are picked.

For instance, there was a `frame:legacy` strategy, which was the legacy
version of the `frame` strategy. Then there are the new ones (`frame:v1`,
`frame:v2`, etc.). The strategy configuration defines which one is used.
These are in `sentry.grouping.strategies.configurations`. A strategy can
inherit from another one, in which case a lot of behavior is inherited unless
overridden.

This for instance is how one of the configurations is defined::

register_strategy_config(
id="newstyle:SomeDate",
base="newstyle:AnotherDate",
delegates=["frame:v4"],
)

The configuration ID (`newstyle:YYYY-MM-DD`) is defined in the project
options and then becomes the strategy configuration of choice for all new
events. Because in this case it inherits from another strategy, the default
configurations from that strategy are reused. Here the `frame` is changed
to version `v4`.

Note that here the frame is defined as a delegate. A delegate is a strategy
that is used for an interface which by itself is not used for grouping. This
means that just because an event has a frame, the frame strategy does not necessarily
activate. Only if another interface recurses down into a frame will this strategy
will be used.

Fingerprinting and Enhancements
-------------------------------

Server-side fingerprinting and enhancements are also folded into this grouping
system. The former is in `sentry.grouping.fingerprinting` and can fundamentally
override the default grouping system. Enhancements are used by the stacktrace
strategies to improve how stacktraces are used for grouping. Enhancements
activate in two places: as part of stacktrace normalization to update the
`in-app` flag and later on during grouping by the stacktrace strategy, to add or remove
frames from the grouping algorithm.

Testing
-------

Tests for grouping are in `tests/sentry/grouping`. They are snapshot-based,
and snapshots exist for all tests and all versions of the grouping algorithm.
If you add a new one, copy the folder in the snapshot directory over to match
the new name, which makes it easier to evaluate differences.
"""
# Shim for backward compatibility with getsentry
# The grouping module has been moved to sentry.issues.grouping
from sentry.issues.grouping import * # noqa: F401, F403
43 changes: 3 additions & 40 deletions src/sentry/grouping/grouptype.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,3 @@
from dataclasses import dataclass
from typing import TypeVar

from sentry.issues.grouptype import GroupCategory, GroupType
from sentry.models.group import DEFAULT_TYPE_ID
from sentry.types.group import PriorityLevel
from sentry.workflow_engine.endpoints.validators.error_detector import ErrorDetectorValidator
from sentry.workflow_engine.handlers.detector.base import DetectorHandler
from sentry.workflow_engine.models.data_source import DataPacket
from sentry.workflow_engine.types import (
DetectorEvaluationResult,
DetectorGroupKey,
DetectorSettings,
)

T = TypeVar("T")


class ErrorDetectorHandler(DetectorHandler):
def evaluate(
self, data_packet: DataPacket[T]
) -> dict[DetectorGroupKey, DetectorEvaluationResult]:
# placeholder
return {}


@dataclass(frozen=True)
class ErrorGroupType(GroupType):
type_id = DEFAULT_TYPE_ID
slug = "error"
description = "Error"
category = GroupCategory.ERROR.value
category_v2 = GroupCategory.ERROR.value
default_priority = PriorityLevel.MEDIUM
released = True
detector_settings = DetectorSettings(
handler=ErrorDetectorHandler,
validator=ErrorDetectorValidator,
config_schema={"type": "object", "additionalProperties": False},
)
# Shim for backward compatibility with getsentry
# The grouping module has been moved to sentry.issues.grouping
from sentry.issues.grouping.grouptype import * # noqa: F401, F403
3 changes: 3 additions & 0 deletions src/sentry/grouping/ingest/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Shim for backward compatibility with getsentry
# The grouping module has been moved to sentry.issues.grouping
from sentry.issues.grouping.ingest import * # noqa: F401, F403
Loading
Loading