From 4f03ada35b840e7d184118a54396e0b1796755f5 Mon Sep 17 00:00:00 2001 From: Jason Little Date: Thu, 4 Dec 2025 09:43:21 -0600 Subject: [PATCH 1/9] poetry dependencies --- poetry.lock | 32 +++++++++++++++++++++++--------- pyproject.toml | 1 + 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/poetry.lock b/poetry.lock index 27fcf1fd74..f3f5aa0a7e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -960,7 +960,7 @@ files = [ {file = "importlib_metadata-6.7.0-py3-none-any.whl", hash = "sha256:cb52082e659e97afc5dac71e79de97d8681de3aa07ff18578330904a9d18e5b5"}, {file = "importlib_metadata-6.7.0.tar.gz", hash = "sha256:1aaf550d4f73e5d6783e7acb77aec43d49da8017410afae93822cc9cca98c4d4"}, ] -markers = {main = "extra == \"opentracing-otlp\" or extra == \"opentelemetry-log-handler\" or extra == \"all\"", dev = "platform_machine != \"ppc64le\" and platform_machine != \"s390x\" and python_version < \"3.12\""} +markers = {dev = "platform_machine != \"ppc64le\" and platform_machine != \"s390x\" and python_version < \"3.12\""} [package.dependencies] zipp = ">=0.5" @@ -1673,10 +1673,9 @@ nicer-shell = ["ipython"] name = "opentelemetry-api" version = "1.34.1" description = "OpenTelemetry Python API" -optional = true +optional = false python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"opentracing-otlp\" or extra == \"opentelemetry-log-handler\" or extra == \"all\"" files = [ {file = "opentelemetry_api-1.34.1-py3-none-any.whl", hash = "sha256:b7df4cb0830d5a6c29ad0c0691dbae874d8daefa934b8b1d642de48323d32a8c"}, {file = "opentelemetry_api-1.34.1.tar.gz", hash = "sha256:64f0bd06d42824843731d05beea88d4d4b6ae59f9fe347ff7dfa2cc14233bbb3"}, @@ -1766,6 +1765,23 @@ opentelemetry-sdk = ">=1.34.1,<1.35.0" requests = ">=2.7,<3.0" typing-extensions = ">=4.5.0" +[[package]] +name = "opentelemetry-exporter-prometheus" +version = "0.55b1" +description = "Prometheus Metric Exporter for OpenTelemetry" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "opentelemetry_exporter_prometheus-0.55b1-py3-none-any.whl", hash = "sha256:f364fbbff9e5de37a112ff104d1185fb1d7e2046c5ab5911e5afebc7ab3ddf0e"}, + {file = "opentelemetry_exporter_prometheus-0.55b1.tar.gz", hash = "sha256:d13ec0b22bf394113ff1ada5da98133a4b051779b803dae183188e26c4bd9ee0"}, +] + +[package.dependencies] +opentelemetry-api = ">=1.12,<2.0" +opentelemetry-sdk = ">=1.34.1,<1.35.0" +prometheus-client = ">=0.5.0,<1.0.0" + [[package]] name = "opentelemetry-opentracing-shim" version = "0.55b1" @@ -1804,10 +1820,9 @@ protobuf = ">=5.0,<6.0" name = "opentelemetry-sdk" version = "1.34.1" description = "OpenTelemetry Python SDK" -optional = true +optional = false python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"opentracing-otlp\" or extra == \"opentelemetry-log-handler\" or extra == \"all\"" files = [ {file = "opentelemetry_sdk-1.34.1-py3-none-any.whl", hash = "sha256:308effad4059562f1d92163c61c8141df649da24ce361827812c40abb2a1e96e"}, {file = "opentelemetry_sdk-1.34.1.tar.gz", hash = "sha256:8091db0d763fcd6098d4781bbc80ff0971f94e260739aa6afe6fd379cdf3aa4d"}, @@ -1822,10 +1837,9 @@ typing-extensions = ">=4.5.0" name = "opentelemetry-semantic-conventions" version = "0.55b1" description = "OpenTelemetry Semantic Conventions" -optional = true +optional = false python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"opentracing-otlp\" or extra == \"opentelemetry-log-handler\" or extra == \"all\"" files = [ {file = "opentelemetry_semantic_conventions-0.55b1-py3-none-any.whl", hash = "sha256:5da81dfdf7d52e3d37f8fe88d5e771e191de924cfff5f550ab0b8f7b2409baed"}, {file = "opentelemetry_semantic_conventions-0.55b1.tar.gz", hash = "sha256:ef95b1f009159c28d7a7849f5cbc71c4c34c845bb514d66adfdf1b3fff3598b3"}, @@ -3517,7 +3531,7 @@ files = [ {file = "zipp-3.19.1-py3-none-any.whl", hash = "sha256:2828e64edb5386ea6a52e7ba7cdb17bb30a73a858f5eb6eb93d8d36f5ea26091"}, {file = "zipp-3.19.1.tar.gz", hash = "sha256:35427f6d5594f4acf82d25541438348c26736fa9b3afa2754bcd63cdb99d8e8f"}, ] -markers = {main = "extra == \"opentracing-otlp\" or extra == \"opentelemetry-log-handler\" or extra == \"all\"", dev = "platform_machine != \"ppc64le\" and platform_machine != \"s390x\" and python_version < \"3.12\""} +markers = {dev = "platform_machine != \"ppc64le\" and platform_machine != \"s390x\" and python_version < \"3.12\""} [package.extras] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] @@ -3638,4 +3652,4 @@ url-preview = ["lxml"] [metadata] lock-version = "2.1" python-versions = "^3.10.0" -content-hash = "7f5d25ff7f67ce8283b4d3bf831e20b1a11367c0996160a333abb7df5569c9c7" +content-hash = "194de82d864503514051cf1361cefca7f1d3a3ae613305b45b0dea2e740ec90a" diff --git a/pyproject.toml b/pyproject.toml index e4afc89312..f3a19ee3c1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -225,6 +225,7 @@ packaging = ">=20.0" # We support pydantic v1 and pydantic v2 via the pydantic.v1 compat module. # See https://github.com/matrix-org/synapse/issues/15858 pydantic = ">=1.7.4, <3" +opentelemetry-exporter-prometheus = "0.55b1" # This is for building the rust components during "poetry install", which # currently ignores the `build-system.requires` directive (c.f. From d793eeedf31bbd5c2ee9e3006977c770c01030b2 Mon Sep 17 00:00:00 2001 From: Jason Little Date: Thu, 4 Dec 2025 09:50:53 -0600 Subject: [PATCH 2/9] Load the metrics from the global registery. I think this grabs from the otel metrics too? Local test seems to support this. Needs better eyes than me --- synapse/metrics/__init__.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index fe673eea19..5639309d6d 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -43,6 +43,12 @@ ) import attr +from opentelemetry import metrics +from opentelemetry.exporter.prometheus import ( + PrometheusMetricReader, +) +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.resources import SERVICE_NAME, Resource as OtelResource from packaging.version import parse as parse_version from prometheus_client import ( CollectorRegistry, @@ -138,6 +144,16 @@ def _set_prometheus_client_use_created_metrics(new_value: bool) -> None: # Set this globally so it applies wherever we generate/collect metrics _set_prometheus_client_use_created_metrics(False) +# This will create a Resource that can do the displaying of the prometheus metrics. The +# start_http_server() that is used by the listen_metrics() call in _base.py will pick +# this up and serve it. +resource = OtelResource(attributes={SERVICE_NAME: "synapse"}) +reader = PrometheusMetricReader() +provider = MeterProvider(resource=resource, metric_readers=[reader]) +metrics.set_meter_provider(provider) +# Global meter for registering otel metrics +meter = provider.get_meter("synapse-otel-meter") + class _RegistryProxy: @staticmethod From c3f745d9c6f2445330a6dafcca94dfbbd32e9cf8 Mon Sep 17 00:00:00 2001 From: Jason Little Date: Thu, 4 Dec 2025 10:01:51 -0600 Subject: [PATCH 3/9] Create a SynapseCounter that simulates the API of a prometheus Counter but goes to a Otel meter instead --- synapse/metrics/__init__.py | 81 +++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 5639309d6d..1a4accf771 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -27,6 +27,7 @@ import threading from importlib import metadata from typing import ( + Any, Callable, Dict, Generic, @@ -63,6 +64,7 @@ GaugeHistogramMetricFamily, GaugeMetricFamily, ) +from typing_extensions import Self from twisted.python.threadpool import ThreadPool from twisted.web.resource import Resource @@ -170,6 +172,84 @@ def collect() -> Iterable[Metric]: RegistryProxy = cast(CollectorRegistry, _RegistryProxy) +def _build_full_name( + metric_type: str, name: str, namespace: str, subsystem: str, unit: str +) -> str: + # Ripped from prometheus_client/metrics.py + if not name: + raise ValueError("Metric name should not be empty") + full_name = "" + if namespace: + full_name += namespace + "_" + if subsystem: + full_name += subsystem + "_" + full_name += name + if metric_type == "counter" and full_name.endswith("_total"): + full_name = full_name[:-6] # Munge to OpenMetrics. + if unit and not full_name.endswith("_" + unit): + full_name += "_" + unit + if unit and metric_type in ("info", "stateset"): + raise ValueError( + "Metric name is of a type that cannot have a unit: " + full_name + ) + return full_name + + +class SynapseCounter: + _type: str = "counter" + + def __init__( + self, + name: str, + documentation: str, + labelnames: Iterable[str] = (), + namespace: str = "", + subsystem: str = "", + unit: str = "", + registry: Optional[CollectorRegistry] = None, + _labelvalues: Optional[Sequence[str]] = None, + ) -> None: + # Here is where we grab the global meter to create a FauxCounter + self._counter = meter.create_counter(name, unit=unit, description=documentation) + self._name = _build_full_name(self._type, name, namespace, subsystem, unit) + + # prom validates these, should we do that? + # labelnames provide a simple way to register that a given set of kwargs call + # from labels can be used. All should be used in a call? + self._labelnames = tuple(labelnames or ()) + + self._labelvalues = tuple(_labelvalues or ()) + + self._current_attributes = () + self._lock = threading.Lock() + + def labels(self, *labelvalues: Any, **labelkwargs: Any) -> Self: + if labelkwargs: + if sorted(labelkwargs) != sorted(self._labelnames): + raise ValueError("Incorrect label names") + labelvalues = tuple(str(labelkwargs[lv]) for lv in self._labelnames) + else: + if len(labelvalues) != len(self._labelnames): + raise ValueError("Incorrect label count") + labelvalues = tuple(str(lv) for lv in labelvalues) + + # The lock needs to block, which means it will wait until it is released for the + # next call. May need a timeout later, dunno + if self._lock.acquire_lock(): + self._current_attributes = labelvalues + + return self + + def inc(self, amount: float = 1.0) -> None: + # Need to verify what happens with Counters that do not have labels as children, + # this may not be appropriate in those cases. Can probably just leave the + # attributes param as empty in that case? + self._counter.add(amount, dict(zip(self._labelnames, self._current_attributes))) + # If this was a "child" metric, then the lock will have been taken in labels() + if self._lock.locked(): + self._lock.release() + + @attr.s(slots=True, hash=True, auto_attribs=True, kw_only=True) class LaterGauge(Collector): """A Gauge which periodically calls a user-provided callback to produce metrics.""" @@ -791,4 +871,5 @@ def render_GET(self, request: Request) -> bytes: "GaugeBucketCollector", "MIN_TIME_BETWEEN_GCS", "install_gc_manager", + "SynapseCounter", ] From 8cf5a4fd0f29080e90e619aa6798d74fcfcf2a72 Mon Sep 17 00:00:00 2001 From: Jason Little Date: Thu, 4 Dec 2025 10:02:18 -0600 Subject: [PATCH 4/9] Try and replace a couple of regular Counters --- synapse/federation/federation_client.py | 5 ++--- synapse/util/metrics.py | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 8c91336dbc..ce4ee58445 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -44,7 +44,6 @@ ) import attr -from prometheus_client import Counter from synapse.api.constants import Direction, EventContentFields, EventTypes, Membership from synapse.api.errors import ( @@ -74,7 +73,7 @@ from synapse.http.client import is_unknown_endpoint from synapse.http.types import QueryParams from synapse.logging.opentracing import SynapseTags, log_kv, set_tag, tag_args, trace -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter from synapse.types import JsonDict, StrCollection, UserID, get_domain_from_id from synapse.types.handlers.policy_server import RECOMMENDATION_OK, RECOMMENDATION_SPAM from synapse.util.async_helpers import concurrently_execute @@ -86,7 +85,7 @@ logger = logging.getLogger(__name__) -sent_queries_counter = Counter( +sent_queries_counter = SynapseCounter( "synapse_federation_client_sent_queries", "", labelnames=["type", SERVER_NAME_LABEL] ) diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index f71380d689..1492fd37b4 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -41,14 +41,14 @@ LoggingContext, current_context, ) -from synapse.metrics import SERVER_NAME_LABEL, InFlightGauge +from synapse.metrics import SERVER_NAME_LABEL, InFlightGauge, SynapseCounter from synapse.util.clock import Clock logger = logging.getLogger(__name__) # Metrics to see the number of and how much time is spend in various blocks of code. # -block_counter = Counter( +block_counter = SynapseCounter( "synapse_util_metrics_block_count", documentation="The number of times this block has been called.", labelnames=["block_name", SERVER_NAME_LABEL], From cec8d65db70185d523e4b9feb71e328b1ec6ec1a Mon Sep 17 00:00:00 2001 From: FrenchgGithubUser Date: Mon, 15 Dec 2025 16:10:34 +0100 Subject: [PATCH 5/9] use `SynapseCounter` everywhere --- synapse/appservice/api.py | 13 ++-- synapse/federation/federation_server.py | 10 +-- synapse/federation/sender/__init__.py | 6 +- .../sender/per_destination_queue.py | 7 +- synapse/handlers/appservice.py | 5 +- synapse/handlers/auth.py | 5 +- synapse/handlers/federation_event.py | 6 +- synapse/handlers/presence.py | 22 +++--- synapse/handlers/register.py | 8 +- synapse/handlers/sync.py | 5 +- synapse/http/client.py | 7 +- synapse/http/matrixfederationclient.py | 7 +- synapse/http/request_metrics.py | 32 ++++---- synapse/metrics/__init__.py | 74 +++++++++++++++++-- synapse/metrics/background_process_metrics.py | 16 ++-- synapse/notifier.py | 7 +- synapse/push/bulk_push_rule_evaluator.py | 8 +- synapse/push/httppusher.py | 12 ++- synapse/push/mailer.py | 5 +- synapse/replication/http/_base.py | 6 +- synapse/replication/tcp/external_cache.py | 8 +- synapse/replication/tcp/handler.py | 14 ++-- synapse/replication/tcp/protocol.py | 9 +-- synapse/replication/tcp/resource.py | 6 +- synapse/state/__init__.py | 10 +-- synapse/storage/controllers/persist_events.py | 16 ++-- synapse/storage/database.py | 8 +- .../databases/main/event_federation.py | 6 +- synapse/storage/databases/main/events.py | 7 +- synapse/util/metrics.py | 14 ++-- synapse/util/ratelimitutils.py | 8 +- 31 files changed, 200 insertions(+), 167 deletions(-) diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py index f08a921998..80b9c6b81d 100644 --- a/synapse/appservice/api.py +++ b/synapse/appservice/api.py @@ -31,7 +31,6 @@ Union, ) -from prometheus_client import Counter from typing_extensions import ParamSpec, TypeGuard from synapse.api.constants import EventTypes, Membership, ThirdPartyEntityKind @@ -45,7 +44,7 @@ from synapse.events.utils import SerializeEventConfig, serialize_event from synapse.http.client import SimpleHttpClient, is_unknown_endpoint from synapse.logging import opentracing -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter from synapse.types import DeviceListUpdates, JsonDict, JsonMapping, ThirdPartyInstanceID from synapse.util.caches.response_cache import ResponseCache @@ -54,31 +53,31 @@ logger = logging.getLogger(__name__) -sent_transactions_counter = Counter( +sent_transactions_counter = SynapseCounter( "synapse_appservice_api_sent_transactions", "Number of /transactions/ requests sent", labelnames=["service", SERVER_NAME_LABEL], ) -failed_transactions_counter = Counter( +failed_transactions_counter = SynapseCounter( "synapse_appservice_api_failed_transactions", "Number of /transactions/ requests that failed to send", labelnames=["service", SERVER_NAME_LABEL], ) -sent_events_counter = Counter( +sent_events_counter = SynapseCounter( "synapse_appservice_api_sent_events", "Number of events sent to the AS", labelnames=["service", SERVER_NAME_LABEL], ) -sent_ephemeral_counter = Counter( +sent_ephemeral_counter = SynapseCounter( "synapse_appservice_api_sent_ephemeral", "Number of ephemeral events sent to the AS", labelnames=["service", SERVER_NAME_LABEL], ) -sent_todevice_counter = Counter( +sent_todevice_counter = SynapseCounter( "synapse_appservice_api_sent_todevice", "Number of todevice messages sent to the AS", labelnames=["service", SERVER_NAME_LABEL], diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 9d1a981d97..cf3eff7e3f 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -32,7 +32,7 @@ Union, ) -from prometheus_client import Counter, Gauge, Histogram +from prometheus_client import Gauge, Histogram from twisted.python import failure @@ -79,7 +79,7 @@ tag_args, trace, ) -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.replication.http.federation import ( ReplicationFederationSendEduRestServlet, @@ -102,15 +102,15 @@ logger = logging.getLogger(__name__) -received_pdus_counter = Counter( +received_pdus_counter = SynapseCounter( "synapse_federation_server_received_pdus", "", labelnames=[SERVER_NAME_LABEL] ) -received_edus_counter = Counter( +received_edus_counter = SynapseCounter( "synapse_federation_server_received_edus", "", labelnames=[SERVER_NAME_LABEL] ) -received_queries_counter = Counter( +received_queries_counter = SynapseCounter( "synapse_federation_server_received_queries", "", labelnames=["type", SERVER_NAME_LABEL], diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py index 229ae647c0..e30f75f031 100644 --- a/synapse/federation/sender/__init__.py +++ b/synapse/federation/sender/__init__.py @@ -142,7 +142,6 @@ ) import attr -from prometheus_client import Counter from twisted.internet import defer @@ -160,6 +159,7 @@ from synapse.metrics import ( SERVER_NAME_LABEL, LaterGauge, + SynapseCounter, event_processing_loop_counter, event_processing_loop_room_count, events_processed_counter, @@ -184,13 +184,13 @@ logger = logging.getLogger(__name__) -sent_pdus_destination_dist_count = Counter( +sent_pdus_destination_dist_count = SynapseCounter( "synapse_federation_client_sent_pdu_destinations_count", "Number of PDUs queued for sending to one or more destinations", labelnames=[SERVER_NAME_LABEL], ) -sent_pdus_destination_dist_total = Counter( +sent_pdus_destination_dist_total = SynapseCounter( "synapse_federation_client_sent_pdu_destinations", "Total number of PDUs queued for sending across all destinations", labelnames=[SERVER_NAME_LABEL], diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py index ecf4789d76..1b69d26333 100644 --- a/synapse/federation/sender/per_destination_queue.py +++ b/synapse/federation/sender/per_destination_queue.py @@ -26,7 +26,6 @@ from typing import TYPE_CHECKING, Hashable, Iterable, Optional import attr -from prometheus_client import Counter from twisted.internet import defer @@ -42,7 +41,7 @@ from synapse.handlers.presence import format_user_presence_state from synapse.logging import issue9533_logger from synapse.logging.opentracing import SynapseTags, set_tag -from synapse.metrics import SERVER_NAME_LABEL, sent_transactions_counter +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter, sent_transactions_counter from synapse.types import JsonDict, ReadReceipt from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter from synapse.visibility import filter_events_for_server @@ -56,13 +55,13 @@ logger = logging.getLogger(__name__) -sent_edus_counter = Counter( +sent_edus_counter = SynapseCounter( "synapse_federation_client_sent_edus", "Total number of EDUs successfully sent", labelnames=[SERVER_NAME_LABEL], ) -sent_edus_by_type = Counter( +sent_edus_by_type = SynapseCounter( "synapse_federation_client_sent_edus_by_type", "Number of sent EDUs successfully sent, by event type", labelnames=["type", SERVER_NAME_LABEL], diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py index 5240178d80..f7cd3ace85 100644 --- a/synapse/handlers/appservice.py +++ b/synapse/handlers/appservice.py @@ -28,8 +28,6 @@ Union, ) -from prometheus_client import Counter - from twisted.internet import defer import synapse @@ -40,6 +38,7 @@ from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.metrics import ( SERVER_NAME_LABEL, + SynapseCounter, event_processing_loop_counter, event_processing_loop_room_count, ) @@ -65,7 +64,7 @@ logger = logging.getLogger(__name__) -events_processed_counter = Counter( +events_processed_counter = SynapseCounter( "synapse_handlers_appservice_events_processed", "", labelnames=[SERVER_NAME_LABEL] ) diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 32ef89bab4..9ccd6bffa9 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -41,7 +41,6 @@ import attr import bcrypt import unpaddedbase64 -from prometheus_client import Counter from twisted.internet.defer import CancelledError from twisted.web.server import Request @@ -66,7 +65,7 @@ from synapse.http.server import finish_request, respond_with_html from synapse.http.site import SynapseRequest from synapse.logging.context import defer_to_thread -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.databases.main.registration import ( LoginTokenExpired, @@ -89,7 +88,7 @@ INVALID_USERNAME_OR_PASSWORD = "Invalid username or password" -invalid_login_token_counter = Counter( +invalid_login_token_counter = SynapseCounter( "synapse_user_login_invalid_login_tokens", "Counts the number of rejected m.login.token on /login", labelnames=["reason", SERVER_NAME_LABEL], diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 32b603e947..228f3484fb 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -32,7 +32,7 @@ Sequence, ) -from prometheus_client import Counter, Histogram +from prometheus_client import Histogram from synapse import event_auth from synapse.api.constants import ( @@ -76,7 +76,7 @@ tag_args, trace, ) -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter from synapse.replication.http.federation import ( ReplicationFederationSendEventsRestServlet, ) @@ -102,7 +102,7 @@ logger = logging.getLogger(__name__) -soft_failed_event_counter = Counter( +soft_failed_event_counter = SynapseCounter( "synapse_federation_soft_failed_events_total", "Events received over federation that we marked as soft_failed", labelnames=[SERVER_NAME_LABEL], diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index d8150a5857..4f366802a7 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -91,8 +91,6 @@ Optional, ) -from prometheus_client import Counter - import synapse.metrics from synapse.api.constants import EduTypes, EventTypes, Membership, PresenceState from synapse.api.errors import SynapseError @@ -100,7 +98,7 @@ from synapse.appservice import ApplicationService from synapse.events.presence_router import PresenceRouter from synapse.logging.context import run_in_background -from synapse.metrics import SERVER_NAME_LABEL, LaterGauge +from synapse.metrics import SERVER_NAME_LABEL, LaterGauge, SynapseCounter from synapse.metrics.background_process_metrics import ( wrap_as_background_process, ) @@ -131,37 +129,37 @@ logger = logging.getLogger(__name__) -notified_presence_counter = Counter( +notified_presence_counter = SynapseCounter( "synapse_handler_presence_notified_presence", "", labelnames=[SERVER_NAME_LABEL] ) -federation_presence_out_counter = Counter( +federation_presence_out_counter = SynapseCounter( "synapse_handler_presence_federation_presence_out", "", labelnames=[SERVER_NAME_LABEL], ) -presence_updates_counter = Counter( +presence_updates_counter = SynapseCounter( "synapse_handler_presence_presence_updates", "", labelnames=[SERVER_NAME_LABEL] ) -timers_fired_counter = Counter( +timers_fired_counter = SynapseCounter( "synapse_handler_presence_timers_fired", "", labelnames=[SERVER_NAME_LABEL] ) -federation_presence_counter = Counter( +federation_presence_counter = SynapseCounter( "synapse_handler_presence_federation_presence", "", labelnames=[SERVER_NAME_LABEL] ) -bump_active_time_counter = Counter( +bump_active_time_counter = SynapseCounter( "synapse_handler_presence_bump_active_time", "", labelnames=[SERVER_NAME_LABEL] ) -get_updates_counter = Counter( +get_updates_counter = SynapseCounter( "synapse_handler_presence_get_updates", "", labelnames=["type", SERVER_NAME_LABEL] ) -notify_reason_counter = Counter( +notify_reason_counter = SynapseCounter( "synapse_handler_presence_notify_reason", "", labelnames=["locality", "reason", SERVER_NAME_LABEL], ) -state_transition_counter = Counter( +state_transition_counter = SynapseCounter( "synapse_handler_presence_state_transition", "", labelnames=["locality", "from", "to", SERVER_NAME_LABEL], diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 8b620a91bc..89116cae6a 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -30,8 +30,6 @@ TypedDict, ) -from prometheus_client import Counter - from synapse import types from synapse.api.constants import ( MAX_USERID_LENGTH, @@ -50,7 +48,7 @@ from synapse.appservice import ApplicationService from synapse.config.server import is_threepid_reserved from synapse.http.servlet import assert_params_in_dict -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter from synapse.replication.http.login import RegisterDeviceReplicationServlet from synapse.replication.http.register import ( ReplicationPostRegisterActionsServlet, @@ -65,13 +63,13 @@ logger = logging.getLogger(__name__) -registration_counter = Counter( +registration_counter = SynapseCounter( "synapse_user_registrations_total", "Number of new users registered (since restart)", labelnames=["guest", "shadow_banned", "auth_provider", SERVER_NAME_LABEL], ) -login_counter = Counter( +login_counter = SynapseCounter( "synapse_user_logins_total", "Number of user logins (since restart)", labelnames=["guest", "auth_provider", SERVER_NAME_LABEL], diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index a19b75203b..8d9f4905fc 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -30,7 +30,6 @@ ) import attr -from prometheus_client import Counter from synapse.api.constants import ( AccountDataTypes, @@ -54,7 +53,7 @@ start_active_span, trace, ) -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter from synapse.storage.databases.main.event_push_actions import RoomNotifCounts from synapse.storage.databases.main.roommember import extract_heroes_from_room_summary from synapse.storage.databases.main.stream import PaginateFunction @@ -91,7 +90,7 @@ # "initial_sync", "full_state_sync" or "incremental_sync", `lazy_loaded` is # "true" or "false" depending on if the request asked for lazy loaded members or # not. -non_empty_sync_counter = Counter( +non_empty_sync_counter = SynapseCounter( "synapse_handlers_sync_nonempty_total", "Count of non empty sync responses. type is initial_sync/full_state_sync" "/incremental_sync. lazy_loaded indicates if lazy loaded members were " diff --git a/synapse/http/client.py b/synapse/http/client.py index ff1f7c7128..2098277053 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -37,7 +37,6 @@ import treq from canonicaljson import encode_canonical_json from netaddr import AddrFormatError, IPAddress, IPSet -from prometheus_client import Counter from zope.interface import implementer from OpenSSL import SSL @@ -81,7 +80,7 @@ from synapse.http.types import QueryParams from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.logging.opentracing import set_tag, start_active_span, tags -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter from synapse.types import ISynapseReactor, StrSequence from synapse.util.async_helpers import timeout_deferred from synapse.util.clock import Clock @@ -106,10 +105,10 @@ logger = logging.getLogger(__name__) -outgoing_requests_counter = Counter( +outgoing_requests_counter = SynapseCounter( "synapse_http_client_requests", "", labelnames=["method", SERVER_NAME_LABEL] ) -incoming_responses_counter = Counter( +incoming_responses_counter = SynapseCounter( "synapse_http_client_responses", "", labelnames=["method", "code", SERVER_NAME_LABEL], diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 562007c74f..23d6401ad0 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -44,7 +44,6 @@ import attr import treq from canonicaljson import encode_canonical_json -from prometheus_client import Counter from signedjson.sign import sign_json from twisted.internet import defer @@ -84,7 +83,7 @@ from synapse.logging import opentracing from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.logging.opentracing import set_tag, start_active_span, tags -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter from synapse.types import JsonDict from synapse.util.async_helpers import AwakenableSleeper, Linearizer, timeout_deferred from synapse.util.clock import Clock @@ -97,12 +96,12 @@ logger = logging.getLogger(__name__) -outgoing_requests_counter = Counter( +outgoing_requests_counter = SynapseCounter( "synapse_http_matrixfederationclient_requests", "", labelnames=["method", SERVER_NAME_LABEL], ) -incoming_responses_counter = Counter( +incoming_responses_counter = SynapseCounter( "synapse_http_matrixfederationclient_responses", "", labelnames=["method", "code", SERVER_NAME_LABEL], diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py index 5cc8a2ebd8..3f2ffd9050 100644 --- a/synapse/http/request_metrics.py +++ b/synapse/http/request_metrics.py @@ -24,28 +24,28 @@ import traceback from typing import Mapping -from prometheus_client.core import Counter, Histogram +from prometheus_client.core import Histogram from synapse.logging.context import current_context -from synapse.metrics import SERVER_NAME_LABEL, LaterGauge +from synapse.metrics import SERVER_NAME_LABEL, LaterGauge, SynapseCounter logger = logging.getLogger(__name__) # total number of responses served, split by method/servlet/tag -response_count = Counter( +response_count = SynapseCounter( "synapse_http_server_response_count", "", labelnames=["method", "servlet", "tag", SERVER_NAME_LABEL], ) -requests_counter = Counter( +requests_counter = SynapseCounter( "synapse_http_server_requests_received", "", labelnames=["method", "servlet", SERVER_NAME_LABEL], ) -outgoing_responses_counter = Counter( +outgoing_responses_counter = SynapseCounter( "synapse_http_server_responses", "", labelnames=["method", "code", SERVER_NAME_LABEL], @@ -57,19 +57,19 @@ labelnames=["method", "servlet", "tag", "code", SERVER_NAME_LABEL], ) -response_ru_utime = Counter( +response_ru_utime = SynapseCounter( "synapse_http_server_response_ru_utime_seconds", "sec", labelnames=["method", "servlet", "tag", SERVER_NAME_LABEL], ) -response_ru_stime = Counter( +response_ru_stime = SynapseCounter( "synapse_http_server_response_ru_stime_seconds", "sec", labelnames=["method", "servlet", "tag", SERVER_NAME_LABEL], ) -response_db_txn_count = Counter( +response_db_txn_count = SynapseCounter( "synapse_http_server_response_db_txn_count", "", labelnames=["method", "servlet", "tag", SERVER_NAME_LABEL], @@ -77,21 +77,21 @@ # seconds spent waiting for db txns, excluding scheduling time, when processing # this request -response_db_txn_duration = Counter( +response_db_txn_duration = SynapseCounter( "synapse_http_server_response_db_txn_duration_seconds", "", labelnames=["method", "servlet", "tag", SERVER_NAME_LABEL], ) # seconds spent waiting for a db connection, when processing this request -response_db_sched_duration = Counter( +response_db_sched_duration = SynapseCounter( "synapse_http_server_response_db_sched_duration_seconds", "", labelnames=["method", "servlet", "tag", SERVER_NAME_LABEL], ) # size in bytes of the response written -response_size = Counter( +response_size = SynapseCounter( "synapse_http_server_response_size", "", labelnames=["method", "servlet", "tag", SERVER_NAME_LABEL], @@ -100,19 +100,19 @@ # In flight metrics are incremented while the requests are in flight, rather # than when the response was written. -in_flight_requests_ru_utime = Counter( +in_flight_requests_ru_utime = SynapseCounter( "synapse_http_server_in_flight_requests_ru_utime_seconds", "", labelnames=["method", "servlet", SERVER_NAME_LABEL], ) -in_flight_requests_ru_stime = Counter( +in_flight_requests_ru_stime = SynapseCounter( "synapse_http_server_in_flight_requests_ru_stime_seconds", "", labelnames=["method", "servlet", SERVER_NAME_LABEL], ) -in_flight_requests_db_txn_count = Counter( +in_flight_requests_db_txn_count = SynapseCounter( "synapse_http_server_in_flight_requests_db_txn_count", "", labelnames=["method", "servlet", SERVER_NAME_LABEL], @@ -120,14 +120,14 @@ # seconds spent waiting for db txns, excluding scheduling time, when processing # this request -in_flight_requests_db_txn_duration = Counter( +in_flight_requests_db_txn_duration = SynapseCounter( "synapse_http_server_in_flight_requests_db_txn_duration_seconds", "", labelnames=["method", "servlet", SERVER_NAME_LABEL], ) # seconds spent waiting for a db connection, when processing this request -in_flight_requests_db_sched_duration = Counter( +in_flight_requests_db_sched_duration = SynapseCounter( "synapse_http_server_in_flight_requests_db_sched_duration_seconds", "", labelnames=["method", "servlet", SERVER_NAME_LABEL], diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 84999594e7..06ce8e0461 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -49,7 +49,6 @@ from packaging.version import parse as parse_version from prometheus_client import ( CollectorRegistry, - Counter, Gauge, Histogram, Metric, @@ -59,6 +58,7 @@ REGISTRY, GaugeHistogramMetricFamily, GaugeMetricFamily, + Sample, ) from typing_extensions import Self @@ -208,6 +208,8 @@ def __init__( # Here is where we grab the global meter to create a FauxCounter self._counter = meter.create_counter(name, unit=unit, description=documentation) self._name = _build_full_name(self._type, name, namespace, subsystem, unit) + self._documentation = documentation + self._unit = unit # prom validates these, should we do that? # labelnames provide a simple way to register that a given set of kwargs call @@ -215,6 +217,7 @@ def __init__( self._labelnames = tuple(labelnames or ()) self._labelvalues = tuple(_labelvalues or ()) + self._metrics = {} # type: ignore[var-annotated] self._current_attributes = () self._lock = threading.Lock() @@ -229,13 +232,68 @@ def labels(self, *labelvalues: Any, **labelkwargs: Any) -> Self: raise ValueError("Incorrect label count") labelvalues = tuple(str(lv) for lv in labelvalues) - # The lock needs to block, which means it will wait until it is released for the - # next call. May need a timeout later, dunno - if self._lock.acquire_lock(): + with self._lock: self._current_attributes = labelvalues return self + def _child_samples(self) -> Iterable[Sample]: # pragma: no cover + raise NotImplementedError("_child_samples() must be implemented by %r" % self) + + def _is_parent(self): # type: ignore[no-untyped-def] + return self._labelnames and not self._labelvalues + + def _samples(self) -> Iterable[Sample]: + if self._is_parent(): + return self._multi_samples() + else: + return self._child_samples() + + def _multi_samples(self) -> Iterable[Sample]: + with self._lock: + metrics = self._metrics.copy() + for labels, metric in metrics.items(): + series_labels = list(zip(self._labelnames, labels)) + for ( + suffix, + sample_labels, + value, + timestamp, + exemplar, + native_histogram_value, + ) in metric._samples(): + yield Sample( + suffix, + dict(series_labels + list(sample_labels.items())), + value, + timestamp, + exemplar, + native_histogram_value, + ) + + def _get_metric(self): # type: ignore[no-untyped-def] + return Metric(self._name, self._documentation, self._type, self._unit) + + def collect(self) -> Iterable[Metric]: + metric = self._get_metric() + for ( + suffix, + labels, + value, + timestamp, + exemplar, + native_histogram_value, + ) in self._samples(): + metric.add_sample( + self._name + suffix, + labels, + value, + timestamp, + exemplar, + native_histogram_value, + ) + return [metric] + def inc(self, amount: float = 1.0) -> None: # Need to verify what happens with Counters that do not have labels as children, # this may not be appropriate in those cases. Can probably just leave the @@ -696,21 +754,21 @@ def collect(self) -> Iterable[Metric]: # Federation Metrics # -sent_transactions_counter = Counter( +sent_transactions_counter = SynapseCounter( "synapse_federation_client_sent_transactions", "", labelnames=[SERVER_NAME_LABEL] ) -events_processed_counter = Counter( +events_processed_counter = SynapseCounter( "synapse_federation_client_events_processed", "", labelnames=[SERVER_NAME_LABEL] ) -event_processing_loop_counter = Counter( +event_processing_loop_counter = SynapseCounter( "synapse_event_processing_loop_count", "Event processing loop iterations", labelnames=["name", SERVER_NAME_LABEL], ) -event_processing_loop_room_count = Counter( +event_processing_loop_room_count = SynapseCounter( "synapse_event_processing_loop_room_count", "Rooms seen per event processing loop iteration", labelnames=["name", SERVER_NAME_LABEL], diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py index c871598680..a389836c89 100644 --- a/synapse/metrics/background_process_metrics.py +++ b/synapse/metrics/background_process_metrics.py @@ -38,7 +38,7 @@ ) from prometheus_client import Metric -from prometheus_client.core import REGISTRY, Counter, Gauge +from prometheus_client.core import REGISTRY, Gauge from typing_extensions import Concatenate, ParamSpec from twisted.internet import defer @@ -54,7 +54,7 @@ start_active_span, start_active_span_follows_from, ) -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter from synapse.metrics._types import Collector if TYPE_CHECKING: @@ -74,7 +74,7 @@ logger = logging.getLogger(__name__) -_background_process_start_count = Counter( +_background_process_start_count = SynapseCounter( "synapse_background_process_start_count", "Number of background processes started", labelnames=["name", SERVER_NAME_LABEL], @@ -90,28 +90,28 @@ # the default registry. Instead we collect them all via the CustomCollector, # which ensures that we can update them before they are collected. # -_background_process_ru_utime = Counter( +_background_process_ru_utime = SynapseCounter( "synapse_background_process_ru_utime_seconds", "User CPU time used by background processes, in seconds", labelnames=["name", SERVER_NAME_LABEL], registry=None, ) -_background_process_ru_stime = Counter( +_background_process_ru_stime = SynapseCounter( "synapse_background_process_ru_stime_seconds", "System CPU time used by background processes, in seconds", labelnames=["name", SERVER_NAME_LABEL], registry=None, ) -_background_process_db_txn_count = Counter( +_background_process_db_txn_count = SynapseCounter( "synapse_background_process_db_txn_count", "Number of database transactions done by background processes", labelnames=["name", SERVER_NAME_LABEL], registry=None, ) -_background_process_db_txn_duration = Counter( +_background_process_db_txn_duration = SynapseCounter( "synapse_background_process_db_txn_duration_seconds", ( "Seconds spent by background processes waiting for database " @@ -121,7 +121,7 @@ registry=None, ) -_background_process_db_sched_duration = Counter( +_background_process_db_sched_duration = SynapseCounter( "synapse_background_process_db_sched_duration_seconds", "Seconds spent by background processes waiting for database connections", labelnames=["name", SERVER_NAME_LABEL], diff --git a/synapse/notifier.py b/synapse/notifier.py index 4a75d07e37..2ab5e45e66 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -35,7 +35,6 @@ ) import attr -from prometheus_client import Counter from twisted.internet import defer from twisted.internet.defer import Deferred @@ -47,7 +46,7 @@ from synapse.logging import issue9533_logger from synapse.logging.context import PreserveLoggingContext from synapse.logging.opentracing import log_kv, start_active_span -from synapse.metrics import SERVER_NAME_LABEL, LaterGauge +from synapse.metrics import SERVER_NAME_LABEL, LaterGauge, SynapseCounter from synapse.streams.config import PaginationConfig from synapse.types import ( ISynapseReactor, @@ -72,11 +71,11 @@ logger = logging.getLogger(__name__) # FIXME: Unused metric, remove if not needed. -notified_events_counter = Counter( +notified_events_counter = SynapseCounter( "synapse_notifier_notified_events", "", labelnames=[SERVER_NAME_LABEL] ) -users_woken_by_stream_counter = Counter( +users_woken_by_stream_counter = SynapseCounter( "synapse_notifier_users_woken_by_stream", "", labelnames=["stream", SERVER_NAME_LABEL], diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 9fcd7fdc6e..c62d730ec5 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -31,8 +31,6 @@ cast, ) -from prometheus_client import Counter - from twisted.internet.defer import Deferred from synapse.api.constants import ( @@ -47,7 +45,7 @@ from synapse.events import EventBase, relation_from_event from synapse.events.snapshot import EventContext, EventPersistencePair from synapse.logging.context import make_deferred_yieldable, run_in_background -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter from synapse.state import CREATE_KEY, POWER_KEY from synapse.storage.databases.main.roommember import EventIdMembership from synapse.storage.invite_rule import InviteRule @@ -67,13 +65,13 @@ logger = logging.getLogger(__name__) # FIXME: Unused metric, remove if not needed. -push_rules_invalidation_counter = Counter( +push_rules_invalidation_counter = SynapseCounter( "synapse_push_bulk_push_rule_evaluator_push_rules_invalidation_counter", "", labelnames=[SERVER_NAME_LABEL], ) # FIXME: Unused metric, remove if not needed. -push_rules_state_size_counter = Counter( +push_rules_state_size_counter = SynapseCounter( "synapse_push_bulk_push_rule_evaluator_push_rules_state_size_counter", "", labelnames=[SERVER_NAME_LABEL], diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index 8df106b859..bba47808b0 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -23,15 +23,13 @@ import urllib.parse from typing import TYPE_CHECKING, Optional, Union -from prometheus_client import Counter - from twisted.internet.error import AlreadyCalled, AlreadyCancelled from twisted.internet.interfaces import IDelayedCall from synapse.api.constants import EventTypes from synapse.events import EventBase from synapse.logging import opentracing -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter from synapse.push import Pusher, PusherConfig, PusherConfigException from synapse.storage.databases.main.event_push_actions import HttpPushAction from synapse.types import JsonDict, JsonMapping @@ -43,25 +41,25 @@ logger = logging.getLogger(__name__) -http_push_processed_counter = Counter( +http_push_processed_counter = SynapseCounter( "synapse_http_httppusher_http_pushes_processed", "Number of push notifications successfully sent", labelnames=[SERVER_NAME_LABEL], ) -http_push_failed_counter = Counter( +http_push_failed_counter = SynapseCounter( "synapse_http_httppusher_http_pushes_failed", "Number of push notifications which failed", labelnames=[SERVER_NAME_LABEL], ) -http_badges_processed_counter = Counter( +http_badges_processed_counter = SynapseCounter( "synapse_http_httppusher_badge_updates_processed", "Number of badge updates successfully sent", labelnames=[SERVER_NAME_LABEL], ) -http_badges_failed_counter = Counter( +http_badges_failed_counter = SynapseCounter( "synapse_http_httppusher_badge_updates_failed", "Number of badge updates which failed", labelnames=[SERVER_NAME_LABEL], diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py index 3dac61aed5..73ac13196a 100644 --- a/synapse/push/mailer.py +++ b/synapse/push/mailer.py @@ -26,13 +26,12 @@ import bleach import jinja2 from markupsafe import Markup -from prometheus_client import Counter from synapse.api.constants import EventContentFields, EventTypes, Membership, RoomTypes from synapse.api.errors import StoreError from synapse.config.emailconfig import EmailSubjectConfig from synapse.events import EventBase -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter from synapse.push.presentable_names import ( calculate_room_name, descriptor_from_member_events, @@ -58,7 +57,7 @@ T = TypeVar("T") -emails_sent_counter = Counter( +emails_sent_counter = SynapseCounter( "synapse_emails_sent_total", "Emails sent by type", labelnames=["type", SERVER_NAME_LABEL], diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py index d76b40cf39..f134140731 100644 --- a/synapse/replication/http/_base.py +++ b/synapse/replication/http/_base.py @@ -25,7 +25,7 @@ from inspect import signature from typing import TYPE_CHECKING, Any, Awaitable, Callable, ClassVar -from prometheus_client import Counter, Gauge +from prometheus_client import Gauge from twisted.internet.error import ConnectError, DNSLookupError from twisted.web.server import Request @@ -38,7 +38,7 @@ from synapse.http.site import SynapseRequest from synapse.logging import opentracing from synapse.logging.opentracing import trace_with_opname -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter from synapse.types import JsonDict from synapse.util.caches.response_cache import ResponseCache from synapse.util.cancellation import is_function_cancellable @@ -55,7 +55,7 @@ labelnames=["name", SERVER_NAME_LABEL], ) -_outgoing_request_counter = Counter( +_outgoing_request_counter = SynapseCounter( "synapse_outgoing_replication_requests", "Number of outgoing replication requests, by replication method name and result", labelnames=["name", "code", SERVER_NAME_LABEL], diff --git a/synapse/replication/tcp/external_cache.py b/synapse/replication/tcp/external_cache.py index bcdd55d2e6..9f2b790c70 100644 --- a/synapse/replication/tcp/external_cache.py +++ b/synapse/replication/tcp/external_cache.py @@ -22,11 +22,11 @@ import logging from typing import TYPE_CHECKING, Any, Optional -from prometheus_client import Counter, Histogram +from prometheus_client import Histogram from synapse.logging import opentracing from synapse.logging.context import make_deferred_yieldable -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter from synapse.util.json import json_decoder, json_encoder if TYPE_CHECKING: @@ -34,13 +34,13 @@ from synapse.server import HomeServer -set_counter = Counter( +set_counter = SynapseCounter( "synapse_external_cache_set", "Number of times we set a cache", labelnames=["cache_name", SERVER_NAME_LABEL], ) -get_counter = Counter( +get_counter = SynapseCounter( "synapse_external_cache_get", "Number of times we get a cache", labelnames=["cache_name", "hit", SERVER_NAME_LABEL], diff --git a/synapse/replication/tcp/handler.py b/synapse/replication/tcp/handler.py index bd1ee5ff9d..f0c7ec7748 100644 --- a/synapse/replication/tcp/handler.py +++ b/synapse/replication/tcp/handler.py @@ -32,11 +32,9 @@ Union, ) -from prometheus_client import Counter - from twisted.internet.protocol import ReconnectingClientFactory -from synapse.metrics import SERVER_NAME_LABEL, LaterGauge +from synapse.metrics import SERVER_NAME_LABEL, LaterGauge, SynapseCounter from synapse.replication.tcp.commands import ( ClearUserSyncsCommand, Command, @@ -79,25 +77,25 @@ # number of updates received for each RDATA stream -inbound_rdata_count = Counter( +inbound_rdata_count = SynapseCounter( "synapse_replication_tcp_protocol_inbound_rdata_count", "", labelnames=["stream_name", SERVER_NAME_LABEL], ) -user_sync_counter = Counter( +user_sync_counter = SynapseCounter( "synapse_replication_tcp_resource_user_sync", "", labelnames=[SERVER_NAME_LABEL] ) -federation_ack_counter = Counter( +federation_ack_counter = SynapseCounter( "synapse_replication_tcp_resource_federation_ack", "", labelnames=[SERVER_NAME_LABEL], ) # FIXME: Unused metric, remove if not needed. -remove_pusher_counter = Counter( +remove_pusher_counter = SynapseCounter( "synapse_replication_tcp_resource_remove_pusher", "", labelnames=[SERVER_NAME_LABEL] ) -user_ip_cache_counter = Counter( +user_ip_cache_counter = SynapseCounter( "synapse_replication_tcp_resource_user_ip_cache", "", labelnames=[SERVER_NAME_LABEL] ) diff --git a/synapse/replication/tcp/protocol.py b/synapse/replication/tcp/protocol.py index 733643cb64..aabc0f0201 100644 --- a/synapse/replication/tcp/protocol.py +++ b/synapse/replication/tcp/protocol.py @@ -30,7 +30,6 @@ from inspect import isawaitable from typing import TYPE_CHECKING, Any, Collection, Optional -from prometheus_client import Counter from zope.interface import Interface, implementer from twisted.internet import task @@ -39,7 +38,7 @@ from twisted.python.failure import Failure from synapse.logging.context import PreserveLoggingContext -from synapse.metrics import SERVER_NAME_LABEL, LaterGauge +from synapse.metrics import SERVER_NAME_LABEL, LaterGauge, SynapseCounter from synapse.metrics.background_process_metrics import ( BackgroundProcessLoggingContext, ) @@ -62,19 +61,19 @@ from synapse.server import HomeServer -connection_close_counter = Counter( +connection_close_counter = SynapseCounter( "synapse_replication_tcp_protocol_close_reason", "", labelnames=["reason_type", SERVER_NAME_LABEL], ) -tcp_inbound_commands_counter = Counter( +tcp_inbound_commands_counter = SynapseCounter( "synapse_replication_tcp_protocol_inbound_commands", "Number of commands received from replication, by command and name of process connected to", labelnames=["command", "name", SERVER_NAME_LABEL], ) -tcp_outbound_commands_counter = Counter( +tcp_outbound_commands_counter = SynapseCounter( "synapse_replication_tcp_protocol_outbound_commands", "Number of commands sent to replication, by command and name of process connected to", labelnames=["command", "name", SERVER_NAME_LABEL], diff --git a/synapse/replication/tcp/resource.py b/synapse/replication/tcp/resource.py index 8df0a3853f..f1d0b17e0d 100644 --- a/synapse/replication/tcp/resource.py +++ b/synapse/replication/tcp/resource.py @@ -24,12 +24,10 @@ import random from typing import TYPE_CHECKING, Optional -from prometheus_client import Counter - from twisted.internet.interfaces import IAddress from twisted.internet.protocol import ServerFactory -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter from synapse.replication.tcp.commands import PositionCommand from synapse.replication.tcp.protocol import ServerReplicationStreamProtocol from synapse.replication.tcp.streams import EventsStream @@ -39,7 +37,7 @@ if TYPE_CHECKING: from synapse.server import HomeServer -stream_updates_counter = Counter( +stream_updates_counter = SynapseCounter( "synapse_replication_tcp_resource_stream_updates", "", labelnames=["stream_name", SERVER_NAME_LABEL], diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 991e1f847a..1cdd133304 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -33,7 +33,7 @@ import attr from immutabledict import immutabledict -from prometheus_client import Counter, Histogram +from prometheus_client import Histogram from synapse.api.constants import EventTypes from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, StateResolutionVersions @@ -45,7 +45,7 @@ ) from synapse.logging.context import ContextResourceUsage from synapse.logging.opentracing import tag_args, trace -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter from synapse.replication.http.state import ReplicationUpdateCurrentStateRestServlet from synapse.state import v1, v2 from synapse.storage.databases.main.event_federation import StateDifference @@ -600,13 +600,13 @@ class _StateResMetrics: db_events: int = 0 -_biggest_room_by_cpu_counter = Counter( +_biggest_room_by_cpu_counter = SynapseCounter( "synapse_state_res_cpu_for_biggest_room_seconds", "CPU time spent performing state resolution for the single most expensive " "room for state resolution", labelnames=[SERVER_NAME_LABEL], ) -_biggest_room_by_db_counter = Counter( +_biggest_room_by_db_counter = SynapseCounter( "synapse_state_res_db_for_biggest_room_seconds", "Database time spent performing state resolution for the single most " "expensive room for state resolution", @@ -858,7 +858,7 @@ def _report_biggest( self, extract_key: Callable[[_StateResMetrics], Any], metric_name: str, - prometheus_counter_metric: Counter, + prometheus_counter_metric: SynapseCounter, ) -> None: """Report metrics on the biggest rooms for state res diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py index 0daf4830d9..3b6ddf0e01 100644 --- a/synapse/storage/controllers/persist_events.py +++ b/synapse/storage/controllers/persist_events.py @@ -40,7 +40,7 @@ ) import attr -from prometheus_client import Counter, Histogram +from prometheus_client import Histogram from twisted.internet import defer @@ -56,7 +56,7 @@ start_active_span_follows_from, trace, ) -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter from synapse.storage.controllers.state import StateStorageController from synapse.storage.databases import Databases from synapse.storage.databases.main.events import DeltaState @@ -77,13 +77,13 @@ logger = logging.getLogger(__name__) # The number of times we are recalculating the current state -state_delta_counter = Counter( +state_delta_counter = SynapseCounter( "synapse_storage_events_state_delta", "", labelnames=[SERVER_NAME_LABEL] ) # The number of times we are recalculating state when there is only a # single forward extremity -state_delta_single_event_counter = Counter( +state_delta_single_event_counter = SynapseCounter( "synapse_storage_events_state_delta_single_event", "", labelnames=[SERVER_NAME_LABEL], @@ -92,7 +92,7 @@ # The number of times we are reculating state when we could have resonably # calculated the delta when we calculated the state for an event we were # persisting. -state_delta_reuse_delta_counter = Counter( +state_delta_reuse_delta_counter = SynapseCounter( "synapse_storage_events_state_delta_reuse_delta", "", labelnames=[SERVER_NAME_LABEL] ) @@ -113,19 +113,19 @@ buckets=(0, 1, 2, 3, 5, 7, 10, 15, 20, 50, 100, 200, 500, "+Inf"), ) -state_resolutions_during_persistence = Counter( +state_resolutions_during_persistence = SynapseCounter( "synapse_storage_events_state_resolutions_during_persistence", "Number of times we had to do state res to calculate new current state", labelnames=[SERVER_NAME_LABEL], ) -potential_times_prune_extremities = Counter( +potential_times_prune_extremities = SynapseCounter( "synapse_storage_events_potential_times_prune_extremities", "Number of times we might be able to prune extremities", labelnames=[SERVER_NAME_LABEL], ) -times_pruned_extremities = Counter( +times_pruned_extremities = SynapseCounter( "synapse_storage_events_times_pruned_extremities", "Number of times we were actually be able to prune extremities", labelnames=[SERVER_NAME_LABEL], diff --git a/synapse/storage/database.py b/synapse/storage/database.py index b7f870bd26..c10e5b6439 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -43,7 +43,7 @@ ) import attr -from prometheus_client import Counter, Histogram +from prometheus_client import Histogram from typing_extensions import Concatenate, ParamSpec from twisted.enterprise import adbapi @@ -57,7 +57,7 @@ current_context, make_deferred_yieldable, ) -from synapse.metrics import SERVER_NAME_LABEL, register_threadpool +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter, register_threadpool from synapse.storage.background_updates import BackgroundUpdater from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine from synapse.storage.types import Connection, Cursor, SQLQueryParameters @@ -84,12 +84,12 @@ sql_query_timer = Histogram( "synapse_storage_query_time", "sec", labelnames=["verb", SERVER_NAME_LABEL] ) -sql_txn_count = Counter( +sql_txn_count = SynapseCounter( "synapse_storage_transaction_time_count", "sec", labelnames=["desc", SERVER_NAME_LABEL], ) -sql_txn_duration = Counter( +sql_txn_duration = SynapseCounter( "synapse_storage_transaction_time_sum", "sec", labelnames=["desc", SERVER_NAME_LABEL], diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 0a8571f0c8..c945207abc 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -33,14 +33,14 @@ ) import attr -from prometheus_client import Counter, Gauge +from prometheus_client import Gauge from synapse.api.constants import MAX_DEPTH from synapse.api.errors import StoreError from synapse.api.room_versions import EventFormatVersions, RoomVersion from synapse.events import EventBase, make_event_from_dict from synapse.logging.opentracing import tag_args, trace -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.storage._base import db_to_json, make_in_list_sql_clause from synapse.storage.background_updates import ForeignKeyConstraint @@ -75,7 +75,7 @@ labelnames=[SERVER_NAME_LABEL], ) -pdus_pruned_from_federation_queue = Counter( +pdus_pruned_from_federation_queue = SynapseCounter( "synapse_federation_server_number_inbound_pdu_pruned", "The number of events in the inbound federation staging that have been " "pruned due to the queue getting too long", diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index da9ecfbdb9..0013e59f52 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -36,7 +36,6 @@ ) import attr -from prometheus_client import Counter import synapse.metrics from synapse.api.constants import ( @@ -56,7 +55,7 @@ from synapse.events.snapshot import EventPersistencePair from synapse.events.utils import parse_stripped_state_event from synapse.logging.opentracing import trace -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter from synapse.storage._base import db_to_json, make_in_list_sql_clause from synapse.storage.database import ( DatabasePool, @@ -91,10 +90,10 @@ logger = logging.getLogger(__name__) -persist_event_counter = Counter( +persist_event_counter = SynapseCounter( "synapse_storage_events_persisted_events", "", labelnames=[SERVER_NAME_LABEL] ) -event_counter = Counter( +event_counter = SynapseCounter( "synapse_storage_events_persisted_events_sep", "", labelnames=["type", "origin_type", "origin_entity", SERVER_NAME_LABEL], diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index 6f3f4adb0c..7163540ab3 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -31,7 +31,7 @@ TypeVar, ) -from prometheus_client import CollectorRegistry, Counter, Metric +from prometheus_client import CollectorRegistry, Metric from typing_extensions import Concatenate, ParamSpec from synapse.logging.context import ( @@ -53,28 +53,28 @@ ) """The number of times this block has been called.""" -block_timer = Counter( +block_timer = SynapseCounter( "synapse_util_metrics_block_time_seconds", documentation="The cumulative time spent executing this block across all calls, in seconds.", labelnames=["block_name", SERVER_NAME_LABEL], ) """The cumulative time spent executing this block across all calls, in seconds.""" -block_ru_utime = Counter( +block_ru_utime = SynapseCounter( "synapse_util_metrics_block_ru_utime_seconds", documentation="Resource usage: user CPU time in seconds used in this block", labelnames=["block_name", SERVER_NAME_LABEL], ) """Resource usage: user CPU time in seconds used in this block""" -block_ru_stime = Counter( +block_ru_stime = SynapseCounter( "synapse_util_metrics_block_ru_stime_seconds", documentation="Resource usage: system CPU time in seconds used in this block", labelnames=["block_name", SERVER_NAME_LABEL], ) """Resource usage: system CPU time in seconds used in this block""" -block_db_txn_count = Counter( +block_db_txn_count = SynapseCounter( "synapse_util_metrics_block_db_txn_count", documentation="Number of database transactions completed in this block", labelnames=["block_name", SERVER_NAME_LABEL], @@ -82,7 +82,7 @@ """Number of database transactions completed in this block""" # seconds spent waiting for db txns, excluding scheduling time, in this block -block_db_txn_duration = Counter( +block_db_txn_duration = SynapseCounter( "synapse_util_metrics_block_db_txn_duration_seconds", documentation="Seconds spent waiting for database txns, excluding scheduling time, in this block", labelnames=["block_name", SERVER_NAME_LABEL], @@ -90,7 +90,7 @@ """Seconds spent waiting for database txns, excluding scheduling time, in this block""" # seconds spent waiting for a db connection, in this block -block_db_sched_duration = Counter( +block_db_sched_duration = SynapseCounter( "synapse_util_metrics_block_db_sched_duration_seconds", documentation="Seconds spent waiting for a db connection, in this block", labelnames=["block_name", SERVER_NAME_LABEL], diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py index 37d2e4505d..cb751d3a1a 100644 --- a/synapse/util/ratelimitutils.py +++ b/synapse/util/ratelimitutils.py @@ -35,8 +35,6 @@ ) from weakref import WeakSet -from prometheus_client.core import Counter - from twisted.internet import defer from synapse.api.errors import LimitExceededError @@ -47,7 +45,7 @@ run_in_background, ) from synapse.logging.opentracing import start_active_span -from synapse.metrics import SERVER_NAME_LABEL, Histogram, LaterGauge +from synapse.metrics import SERVER_NAME_LABEL, Histogram, LaterGauge, SynapseCounter from synapse.util.clock import Clock if typing.TYPE_CHECKING: @@ -57,12 +55,12 @@ # Track how much the ratelimiter is affecting requests -rate_limit_sleep_counter = Counter( +rate_limit_sleep_counter = SynapseCounter( "synapse_rate_limit_sleep", "Number of requests slept by the rate limiter", labelnames=["rate_limiter_name", SERVER_NAME_LABEL], ) -rate_limit_reject_counter = Counter( +rate_limit_reject_counter = SynapseCounter( "synapse_rate_limit_reject", "Number of requests rejected by the rate limiter", labelnames=["rate_limiter_name", SERVER_NAME_LABEL], From 9c8fc7e270fcb93dce03e13a42bb2c7cb3708268 Mon Sep 17 00:00:00 2001 From: FrenchgGithubUser Date: Tue, 16 Dec 2025 10:38:03 +0100 Subject: [PATCH 6/9] move general methods to wrapper class --- synapse/metrics/__init__.py | 82 ++++++++++++++++++++++++++++--------- 1 file changed, 63 insertions(+), 19 deletions(-) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 06ce8e0461..5d512c2052 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -60,7 +60,7 @@ GaugeMetricFamily, Sample, ) -from typing_extensions import Self +from typing_extensions import Dict, Self from twisted.python.threadpool import ThreadPool from twisted.web.resource import Resource @@ -191,37 +191,50 @@ def _build_full_name( return full_name -class SynapseCounter: - _type: str = "counter" +T = TypeVar("T", bound="SynapseMetricWrapperBase") + +class SynapseMetricWrapperBase: def __init__( - self, + self: T, name: str, documentation: str, labelnames: Iterable[str] = (), namespace: str = "", subsystem: str = "", unit: str = "", - registry: Optional[CollectorRegistry] = None, + registry: Optional[CollectorRegistry] = REGISTRY, _labelvalues: Optional[Sequence[str]] = None, ) -> None: - # Here is where we grab the global meter to create a FauxCounter - self._counter = meter.create_counter(name, unit=unit, description=documentation) + self._type: str = "" + self._original_name = name + self._namespace = namespace + self._subsystem = subsystem self._name = _build_full_name(self._type, name, namespace, subsystem, unit) - self._documentation = documentation - self._unit = unit - # prom validates these, should we do that? # labelnames provide a simple way to register that a given set of kwargs call # from labels can be used. All should be used in a call? self._labelnames = tuple(labelnames or ()) - self._labelvalues = tuple(_labelvalues or ()) + self._kwargs: Dict[str, Any] = {} + self._documentation = documentation + self._unit = unit self._metrics = {} # type: ignore[var-annotated] - - self._current_attributes = () self._lock = threading.Lock() + # if self._is_parent(): + # # Prepare the fields needed for child metrics. + # self._lock = Lock() + # self._metrics: Dict[Sequence[str], T] = {} + + # if self._is_observable(): + # self._metric_init() + + # if not self._labelvalues: + # # Register the multi-wrapper parent metric, or if a label-less metric, the whole shebang. + # if registry: + # registry.register(self) + def labels(self, *labelvalues: Any, **labelkwargs: Any) -> Self: if labelkwargs: if sorted(labelkwargs) != sorted(self._labelnames): @@ -237,12 +250,12 @@ def labels(self, *labelvalues: Any, **labelkwargs: Any) -> Self: return self - def _child_samples(self) -> Iterable[Sample]: # pragma: no cover - raise NotImplementedError("_child_samples() must be implemented by %r" % self) - def _is_parent(self): # type: ignore[no-untyped-def] return self._labelnames and not self._labelvalues + def _child_samples(self) -> Iterable[Sample]: # pragma: no cover + raise NotImplementedError("_child_samples() must be implemented by %r" % self) + def _samples(self) -> Iterable[Sample]: if self._is_parent(): return self._multi_samples() @@ -271,6 +284,37 @@ def _multi_samples(self) -> Iterable[Sample]: native_histogram_value, ) + +class SynapseCounter(SynapseMetricWrapperBase): + def __init__( + self, + name: str, + documentation: str, + labelnames: Iterable[str] = (), + namespace: str = "", + subsystem: str = "", + unit: str = "", + registry: Optional[CollectorRegistry] = REGISTRY, + _labelvalues: Optional[Sequence[str]] = None, + ) -> None: + super().__init__( + name, + documentation, + labelnames, + namespace, + subsystem, + unit, + registry, + _labelvalues, + ) + self._type = "counter" + # Here is where we grab the global meter to create a FauxCounter + self._counter = meter.create_counter( + self._name, unit=self._unit, description=self._documentation + ) + + self._current_attributes = () + def _get_metric(self): # type: ignore[no-untyped-def] return Metric(self._name, self._documentation, self._type, self._unit) @@ -299,9 +343,9 @@ def inc(self, amount: float = 1.0) -> None: # this may not be appropriate in those cases. Can probably just leave the # attributes param as empty in that case? self._counter.add(amount, dict(zip(self._labelnames, self._current_attributes))) - # If this was a "child" metric, then the lock will have been taken in labels() - if self._lock.locked(): - self._lock.release() + # # If this was a "child" metric, then the lock will have been taken in labels() + # if self._lock.locked(): + # self._lock.release() @attr.s(slots=True, hash=True, auto_attribs=True, kw_only=True) From 49eb3cca4f3b2764591c0baab857fe88b4b3c86d Mon Sep 17 00:00:00 2001 From: FrenchgGithubUser Date: Tue, 16 Dec 2025 16:54:28 +0100 Subject: [PATCH 7/9] create mock for Gauge class --- synapse/app/phone_stats_home.py | 12 +- synapse/federation/federation_server.py | 6 +- .../federation/sender/transaction_manager.py | 6 +- synapse/metrics/__init__.py | 335 +++++++++++++++--- synapse/metrics/background_process_metrics.py | 6 +- synapse/metrics/common_usage_metrics.py | 11 +- synapse/push/pusherpool.py | 6 +- synapse/replication/http/_base.py | 6 +- .../databases/main/event_federation.py | 7 +- .../storage/databases/main/events_worker.py | 5 +- synapse/util/batching_queue.py | 12 +- synapse/util/caches/deferred_cache.py | 6 +- tests/handlers/test_stats.py | 12 +- tests/util/test_batching_queue.py | 4 +- 14 files changed, 336 insertions(+), 98 deletions(-) diff --git a/synapse/app/phone_stats_home.py b/synapse/app/phone_stats_home.py index 13a0e3db7c..6931e0f875 100644 --- a/synapse/app/phone_stats_home.py +++ b/synapse/app/phone_stats_home.py @@ -24,11 +24,9 @@ import sys from typing import TYPE_CHECKING, Mapping, Sized -from prometheus_client import Gauge - from twisted.internet import defer -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseGauge from synapse.types import JsonDict from synapse.util.constants import ( MILLISECONDS_PER_SECOND, @@ -57,22 +55,22 @@ _stats_process: list[tuple[int, "resource.struct_rusage"]] = [] # Gauges to expose monthly active user control metrics -current_mau_gauge = Gauge( +current_mau_gauge = SynapseGauge( "synapse_admin_mau_current", "Current MAU", labelnames=[SERVER_NAME_LABEL], ) -current_mau_by_service_gauge = Gauge( +current_mau_by_service_gauge = SynapseGauge( "synapse_admin_mau_current_mau_by_service", "Current MAU by service", labelnames=["app_service", SERVER_NAME_LABEL], ) -max_mau_gauge = Gauge( +max_mau_gauge = SynapseGauge( "synapse_admin_mau_max", "MAU Limit", labelnames=[SERVER_NAME_LABEL], ) -registered_reserved_users_mau_gauge = Gauge( +registered_reserved_users_mau_gauge = SynapseGauge( "synapse_admin_mau_registered_reserved_users", "Registered users with reserved threepids", labelnames=[SERVER_NAME_LABEL], diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index cf3eff7e3f..2111db2c0a 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -32,7 +32,7 @@ Union, ) -from prometheus_client import Gauge, Histogram +from prometheus_client import Histogram from twisted.python import failure @@ -79,7 +79,7 @@ tag_args, trace, ) -from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter, SynapseGauge from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.replication.http.federation import ( ReplicationFederationSendEduRestServlet, @@ -122,7 +122,7 @@ labelnames=[SERVER_NAME_LABEL], ) -last_pdu_ts_metric = Gauge( +last_pdu_ts_metric = SynapseGauge( "synapse_federation_last_received_pdu_time", "The timestamp of the last PDU which was successfully received from the given domain", labelnames=("origin_server_name", SERVER_NAME_LABEL), diff --git a/synapse/federation/sender/transaction_manager.py b/synapse/federation/sender/transaction_manager.py index 99aa05ebd6..47315527db 100644 --- a/synapse/federation/sender/transaction_manager.py +++ b/synapse/federation/sender/transaction_manager.py @@ -20,8 +20,6 @@ import logging from typing import TYPE_CHECKING -from prometheus_client import Gauge - from synapse.api.constants import EduTypes from synapse.api.errors import HttpResponseException from synapse.events import EventBase @@ -34,7 +32,7 @@ tags, whitelisted_homeserver, ) -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseGauge from synapse.types import JsonDict from synapse.util.json import json_decoder from synapse.util.metrics import measure_func @@ -45,7 +43,7 @@ logger = logging.getLogger(__name__) issue_8631_logger = logging.getLogger("synapse.8631_debug") -last_pdu_ts_metric = Gauge( +last_pdu_ts_metric = SynapseGauge( "synapse_federation_last_sent_pdu_time", "The timestamp of the last PDU which was successfully sent to the given domain", labelnames=("destination_server_name", SERVER_NAME_LABEL), diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 5d512c2052..9d107fa285 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -26,11 +26,14 @@ import platform import threading from importlib import metadata +from time import time +from types import MethodType from typing import ( Any, Callable, Generic, Iterable, + Literal, Mapping, Optional, Sequence, @@ -49,7 +52,6 @@ from packaging.version import parse as parse_version from prometheus_client import ( CollectorRegistry, - Gauge, Histogram, Metric, generate_latest, @@ -60,6 +62,8 @@ GaugeMetricFamily, Sample, ) +from prometheus_client.metrics import _get_use_created +from prometheus_client.values import ValueClass from typing_extensions import Dict, Self from twisted.python.threadpool import ThreadPool @@ -195,6 +199,19 @@ def _build_full_name( class SynapseMetricWrapperBase: + def _raise_if_not_observable(self) -> None: + # Functions that mutate the state of the metric, for example incrementing + # a counter, will fail if the metric is not observable, because only if a + # metric is observable will the value be initialized. + if not self._is_observable(): + raise ValueError("%s metric is missing label values" % str(self._type)) + + def _is_observable(self): # type: ignore[no-untyped-def] + # Whether this metric is observable, i.e. + # * a metric without label names and values, or + # * the child of a labelled metric. + return not self._labelnames or (self._labelnames and self._labelvalues) + def __init__( self: T, name: str, @@ -227,28 +244,92 @@ def __init__( # self._lock = Lock() # self._metrics: Dict[Sequence[str], T] = {} - # if self._is_observable(): - # self._metric_init() + if self._is_observable(): + self._metric_init() # if not self._labelvalues: # # Register the multi-wrapper parent metric, or if a label-less metric, the whole shebang. # if registry: # registry.register(self) + self._registry = registry + + def _metric_init(self): # type: ignore[no-untyped-def] # pragma: no cover + """ + Initialize the metric object as a child, i.e. when it has labels (if any) set. + + This is factored as a separate function to allow for deferred initialization. + """ + raise NotImplementedError("_metric_init() must be implemented by %r" % self) def labels(self, *labelvalues: Any, **labelkwargs: Any) -> Self: + if not self._labelnames: + raise ValueError("No label names were set when constructing %s" % self) + + if self._labelvalues: + raise ValueError( + "{} already has labels set ({}); can not chain calls to .labels()".format( + self, dict(zip(self._labelnames, self._labelvalues)) + ) + ) + + if labelvalues and labelkwargs: + raise ValueError("Can't pass both *args and **kwargs") + if labelkwargs: if sorted(labelkwargs) != sorted(self._labelnames): raise ValueError("Incorrect label names") - labelvalues = tuple(str(labelkwargs[lv]) for lv in self._labelnames) + labelvalues = tuple( + str(labelkwargs[lablename]) for lablename in self._labelnames + ) else: if len(labelvalues) != len(self._labelnames): raise ValueError("Incorrect label count") - labelvalues = tuple(str(lv) for lv in labelvalues) - + labelvalues = tuple(str(labelvalue) for labelvalue in labelvalues) with self._lock: - self._current_attributes = labelvalues + if labelvalues not in self._metrics: + original_name = getattr(self, "_original_name", self._name) + namespace = getattr(self, "_namespace", "") + subsystem = getattr(self, "_subsystem", "") + unit = getattr(self, "_unit", "") + + child_kwargs = dict(self._kwargs) if self._kwargs else {} + for k in ("namespace", "subsystem", "unit"): + child_kwargs.pop(k, None) + + self._metrics[labelvalues] = self.__class__( + original_name, + documentation=self._documentation, + labelnames=self._labelnames, + namespace=namespace, + subsystem=subsystem, + unit=unit, + _labelvalues=labelvalues, + **child_kwargs, + ) + return self._metrics[labelvalues] - return self + def _get_metric(self): # type: ignore[no-untyped-def] + return Metric(self._name, self._documentation, self._type, self._unit) + + def collect(self) -> Iterable[Metric]: + metric = self._get_metric() + for ( + suffix, + labels, + value, + timestamp, + exemplar, + native_histogram_value, + ) in self._samples(): + metric.add_sample( + self._name + suffix, + labels, + value, + timestamp, + exemplar, + native_histogram_value, + ) + return [metric] def _is_parent(self): # type: ignore[no-untyped-def] return self._labelnames and not self._labelvalues @@ -284,6 +365,22 @@ def _multi_samples(self) -> Iterable[Sample]: native_histogram_value, ) + # not sure if this is needed, putting it there for now to make the linter happy + def remove(self, *labelvalues: Any) -> None: + if not self._labelnames: + raise ValueError("No label names were set when constructing %s" % self) + + """Remove the given labelset from the metric.""" + if len(labelvalues) != len(self._labelnames): + raise ValueError( + "Incorrect label count (expected %d, got %s)" + % (len(self._labelnames), labelvalues) + ) + labelvalues = tuple(str(labelvalue) for labelvalue in labelvalues) + with self._lock: + if labelvalues in self._metrics: + del self._metrics[labelvalues] + class SynapseCounter(SynapseMetricWrapperBase): def __init__( @@ -315,38 +412,189 @@ def __init__( self._current_attributes = () - def _get_metric(self): # type: ignore[no-untyped-def] - return Metric(self._name, self._documentation, self._type, self._unit) - - def collect(self) -> Iterable[Metric]: - metric = self._get_metric() - for ( - suffix, - labels, - value, - timestamp, - exemplar, - native_histogram_value, - ) in self._samples(): - metric.add_sample( - self._name + suffix, - labels, - value, - timestamp, - exemplar, - native_histogram_value, - ) - return [metric] - def inc(self, amount: float = 1.0) -> None: # Need to verify what happens with Counters that do not have labels as children, # this may not be appropriate in those cases. Can probably just leave the # attributes param as empty in that case? + self._value.inc(amount) self._counter.add(amount, dict(zip(self._labelnames, self._current_attributes))) # # If this was a "child" metric, then the lock will have been taken in labels() # if self._lock.locked(): # self._lock.release() + def _metric_init(self) -> None: + self._value = ValueClass( + self._type, + self._name, + self._name + "_total", + self._labelnames, + self._labelvalues, + self._documentation, + ) + self._created = time() + + def _child_samples(self) -> Iterable[Sample]: + sample = Sample( + "_total", {}, self._value.get(), None, self._value.get_exemplar() + ) + if _get_use_created(): + return (sample, Sample("_created", {}, self._created, None, None)) + return (sample,) + + +F = TypeVar("F", bound=Callable[..., Any]) + + +class InprogressTracker: + def __init__(self, gauge) -> None: # type: ignore[no-untyped-def] + self._gauge = gauge + + def __enter__(self) -> None: + self._gauge.inc() + + def __exit__(self, typ, value, traceback) -> None: # type: ignore[no-untyped-def] + self._gauge.dec() + + # def __call__(self, f: "F") -> "F": + # def wrapped(func, *args, **kwargs): + # with self: + # return func(*args, **kwargs) + + # return decorate(f, wrapped) + + +class SynapseGauge(SynapseMetricWrapperBase): + _MULTIPROC_MODES = frozenset( + ( + "all", + "liveall", + "min", + "livemin", + "max", + "livemax", + "sum", + "livesum", + "mostrecent", + "livemostrecent", + ) + ) + _MOST_RECENT_MODES = frozenset(("mostrecent", "livemostrecent")) + + def __init__( + self, + name: str, + documentation: str, + labelnames: Iterable[str] = (), + namespace: str = "", + subsystem: str = "", + unit: str = "", + registry: Optional[CollectorRegistry] = REGISTRY, + _labelvalues: Optional[Sequence[str]] = None, + multiprocess_mode: Literal[ + "all", + "liveall", + "min", + "livemin", + "max", + "livemax", + "sum", + "livesum", + "mostrecent", + "livemostrecent", + ] = "all", + ): + self._multiprocess_mode = multiprocess_mode + if multiprocess_mode not in self._MULTIPROC_MODES: + raise ValueError("Invalid multiprocess mode: " + multiprocess_mode) + super().__init__( + name=name, + documentation=documentation, + labelnames=labelnames, + namespace=namespace, + subsystem=subsystem, + unit=unit, + registry=registry, + _labelvalues=_labelvalues, + ) + self._type = "gauge" + # Here is where we grab the global meter to create a FauxGauge + self._gauge = meter.create_gauge( + self._name, unit=self._unit, description=self._documentation + ) + self._kwargs["multiprocess_mode"] = self._multiprocess_mode + self._is_most_recent = self._multiprocess_mode in self._MOST_RECENT_MODES + self._gauge_value: float = 0 + + if not self._labelvalues and self._registry: + # TODO: look into what to do here + self._registry.register(self) # type: ignore + + def set(self, value: float) -> None: + """Set gauge to the given value.""" + self._raise_if_not_observable() + if self._is_most_recent: + self._value.set(float(value), timestamp=time()) + else: + self._value.set(float(value)) + self._gauge.set(value) + self._gauge_value = value + # self._value.set(0) + + def inc(self, amount: float = 1) -> None: + """Increment gauge by the given amount.""" + if self._is_most_recent: + raise RuntimeError("inc must not be used with the mostrecent mode") + # self._raise_if_not_observable() + self._value.inc(amount) + self._gauge_value += amount + self._gauge.set(self._gauge_value) + + def dec(self, amount: float = 1) -> None: + """Decrement gauge by the given amount.""" + if self._is_most_recent: + raise RuntimeError("inc must not be used with the mostrecent mode") + # self._raise_if_not_observable() + self._gauge_value -= amount + self._gauge.set(self._gauge_value) + self._value.inc(-amount) + + def track_inprogress(self) -> InprogressTracker: + """Track inprogress blocks of code or functions. + + Can be used as a function decorator or context manager. + Increments the gauge when the code is entered, + and decrements when it is exited. + """ + # self._raise_if_not_observable() + return InprogressTracker(self) + + def set_function(self, f: Callable[[], float]) -> None: + """Call the provided function to return the Gauge value. + + The function must return a float, and may be called from + multiple threads. All other methods of the Gauge become NOOPs. + """ + # self._raise_if_not_observable() + + def samples(_: SynapseGauge) -> Iterable[Sample]: + return (Sample("", {}, float(f()), None, None),) + + self._child_samples = MethodType(samples, self) # type: ignore + + def _child_samples(self) -> Iterable[Sample]: + return (Sample("", {}, self._value.get(), None, None),) + + def _metric_init(self) -> None: + self._value = ValueClass( + self._type, + self._name, + self._name, + self._labelnames, + self._labelvalues, + self._documentation, + multiprocess_mode=self._multiprocess_mode, + ) + @attr.s(slots=True, hash=True, auto_attribs=True, kw_only=True) class LaterGauge(Collector): @@ -821,25 +1069,25 @@ def collect(self) -> Iterable[Metric]: # Used to track where various components have processed in the event stream, # e.g. federation sending, appservice sending, etc. -event_processing_positions = Gauge( +event_processing_positions = SynapseGauge( "synapse_event_processing_positions", "", labelnames=["name", SERVER_NAME_LABEL] ) # Used to track the current max events stream position -event_persisted_position = Gauge( +event_persisted_position = SynapseGauge( "synapse_event_persisted_position", "", labelnames=[SERVER_NAME_LABEL] ) # Used to track the received_ts of the last event processed by various # components -event_processing_last_ts = Gauge( +event_processing_last_ts = SynapseGauge( "synapse_event_processing_last_ts", "", labelnames=["name", SERVER_NAME_LABEL] ) # Used to track the lag processing events. This is the time difference # between the last processed event's received_ts and the time it was # finished being processed. -event_processing_lag = Gauge( +event_processing_lag = SynapseGauge( "synapse_event_processing_lag", "", labelnames=["name", SERVER_NAME_LABEL] ) @@ -854,7 +1102,7 @@ def collect(self) -> Iterable[Metric]: # This is a process-level metric, so it does not have the `SERVER_NAME_LABEL`. We # consider this process-level because all Synapse homeservers running in the process # will use the same Synapse version. -build_info = Gauge( # type: ignore[missing-server-name-label] +build_info = SynapseGauge( "synapse_build_info", "Build information", ["pythonversion", "version", "osversion"] ) build_info.labels( @@ -864,7 +1112,7 @@ def collect(self) -> Iterable[Metric]: ).set(1) # Loaded modules info -module_instances_info = Gauge( +module_instances_info = SynapseGauge( "synapse_module_info", "Information about loaded modules", labelnames=["package_name", "module_name", "module_version", SERVER_NAME_LABEL], @@ -880,38 +1128,38 @@ def collect(self) -> Iterable[Metric]: labelnames=("type", "reason", SERVER_NAME_LABEL), ) -threadpool_total_threads = Gauge( +threadpool_total_threads = SynapseGauge( "synapse_threadpool_total_threads", "Total number of threads currently in the threadpool", labelnames=["name", SERVER_NAME_LABEL], ) -threadpool_total_working_threads = Gauge( +threadpool_total_working_threads = SynapseGauge( "synapse_threadpool_working_threads", "Number of threads currently working in the threadpool", labelnames=["name", SERVER_NAME_LABEL], ) -threadpool_total_min_threads = Gauge( +threadpool_total_min_threads = SynapseGauge( "synapse_threadpool_min_threads", "Minimum number of threads configured in the threadpool", labelnames=["name", SERVER_NAME_LABEL], ) -threadpool_total_max_threads = Gauge( +threadpool_total_max_threads = SynapseGauge( "synapse_threadpool_max_threads", "Maximum number of threads configured in the threadpool", labelnames=["name", SERVER_NAME_LABEL], ) # Gauges for room counts -known_rooms_gauge = Gauge( +known_rooms_gauge = SynapseGauge( "synapse_known_rooms_total", "Total number of rooms", labelnames=[SERVER_NAME_LABEL], ) -locally_joined_rooms_gauge = Gauge( +locally_joined_rooms_gauge = SynapseGauge( "synapse_locally_joined_rooms_total", "Total number of locally joined rooms", labelnames=[SERVER_NAME_LABEL], @@ -970,4 +1218,5 @@ def render_GET(self, request: Request) -> bytes: "MIN_TIME_BETWEEN_GCS", "install_gc_manager", "SynapseCounter", + "SynapseGauge", ] diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py index a389836c89..1d793bc172 100644 --- a/synapse/metrics/background_process_metrics.py +++ b/synapse/metrics/background_process_metrics.py @@ -38,7 +38,7 @@ ) from prometheus_client import Metric -from prometheus_client.core import REGISTRY, Gauge +from prometheus_client.core import REGISTRY from typing_extensions import Concatenate, ParamSpec from twisted.internet import defer @@ -54,7 +54,7 @@ start_active_span, start_active_span_follows_from, ) -from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter, SynapseGauge from synapse.metrics._types import Collector if TYPE_CHECKING: @@ -80,7 +80,7 @@ labelnames=["name", SERVER_NAME_LABEL], ) -_background_process_in_flight_count = Gauge( +_background_process_in_flight_count = SynapseGauge( "synapse_background_process_in_flight_count", "Number of background processes in flight", labelnames=["name", SERVER_NAME_LABEL], diff --git a/synapse/metrics/common_usage_metrics.py b/synapse/metrics/common_usage_metrics.py index 0c3f380177..eaa5d2fdf2 100644 --- a/synapse/metrics/common_usage_metrics.py +++ b/synapse/metrics/common_usage_metrics.py @@ -23,35 +23,34 @@ import attr -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseGauge if TYPE_CHECKING: from synapse.server import HomeServer -from prometheus_client import Gauge # Gauge to expose daily active users metrics -current_dau_gauge = Gauge( +current_dau_gauge = SynapseGauge( "synapse_admin_daily_active_users", "Current daily active users count", labelnames=[SERVER_NAME_LABEL], ) # Gauge for users -users_in_status_gauge = Gauge( +users_in_status_gauge = SynapseGauge( "synapse_user_count", "Number of users in active, deactivated, suspended, and locked status", ["status", SERVER_NAME_LABEL], ) -users_in_time_ranges_gauge = Gauge( +users_in_time_ranges_gauge = SynapseGauge( "synapse_active_users", "Number of active users in time ranges in 24h, 7d, and 30d", ["time_range", SERVER_NAME_LABEL], ) # We may want to add additional ranges in the future. -retained_users_gauge = Gauge( +retained_users_gauge = SynapseGauge( "synapse_retained_users", "Number of retained users in 30d", ["time_range", SERVER_NAME_LABEL], diff --git a/synapse/push/pusherpool.py b/synapse/push/pusherpool.py index 6b70de976a..6350e4f5b1 100644 --- a/synapse/push/pusherpool.py +++ b/synapse/push/pusherpool.py @@ -22,10 +22,8 @@ import logging from typing import TYPE_CHECKING, Iterable, Optional -from prometheus_client import Gauge - from synapse.api.errors import Codes, SynapseError -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseGauge from synapse.metrics.background_process_metrics import ( wrap_as_background_process, ) @@ -46,7 +44,7 @@ logger = logging.getLogger(__name__) -synapse_pushers = Gauge( +synapse_pushers = SynapseGauge( "synapse_pushers", "Number of active synapse pushers", labelnames=["kind", "app_id", SERVER_NAME_LABEL], diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py index f134140731..1809605f06 100644 --- a/synapse/replication/http/_base.py +++ b/synapse/replication/http/_base.py @@ -25,8 +25,6 @@ from inspect import signature from typing import TYPE_CHECKING, Any, Awaitable, Callable, ClassVar -from prometheus_client import Gauge - from twisted.internet.error import ConnectError, DNSLookupError from twisted.web.server import Request @@ -38,7 +36,7 @@ from synapse.http.site import SynapseRequest from synapse.logging import opentracing from synapse.logging.opentracing import trace_with_opname -from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter, SynapseGauge from synapse.types import JsonDict from synapse.util.caches.response_cache import ResponseCache from synapse.util.cancellation import is_function_cancellable @@ -49,7 +47,7 @@ logger = logging.getLogger(__name__) -_pending_outgoing_requests = Gauge( +_pending_outgoing_requests = SynapseGauge( "synapse_pending_outgoing_replication_requests", "Number of active outgoing replication requests, by replication method name", labelnames=["name", SERVER_NAME_LABEL], diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index c945207abc..c4eadee5b9 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -33,14 +33,13 @@ ) import attr -from prometheus_client import Gauge from synapse.api.constants import MAX_DEPTH from synapse.api.errors import StoreError from synapse.api.room_versions import EventFormatVersions, RoomVersion from synapse.events import EventBase, make_event_from_dict from synapse.logging.opentracing import tag_args, trace -from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter, SynapseGauge from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.storage._base import db_to_json, make_in_list_sql_clause from synapse.storage.background_updates import ForeignKeyConstraint @@ -63,13 +62,13 @@ if TYPE_CHECKING: from synapse.server import HomeServer -oldest_pdu_in_federation_staging = Gauge( +oldest_pdu_in_federation_staging = SynapseGauge( "synapse_federation_server_oldest_inbound_pdu_in_staging", "The age in seconds since we received the oldest pdu in the federation staging area", labelnames=[SERVER_NAME_LABEL], ) -number_pdus_in_federation_queue = Gauge( +number_pdus_in_federation_queue = SynapseGauge( "synapse_federation_server_number_inbound_pdu_in_staging", "The total number of events in the inbound federation staging", labelnames=[SERVER_NAME_LABEL], diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index 005f75a2d8..da7d610e4a 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -37,7 +37,6 @@ ) import attr -from prometheus_client import Gauge from twisted.internet import defer @@ -64,7 +63,7 @@ tag_args, trace, ) -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseGauge from synapse.metrics.background_process_metrics import ( wrap_as_background_process, ) @@ -132,7 +131,7 @@ def __init__( EVENT_QUEUE_TIMEOUT_S = 0.1 # Timeout when waiting for requests for events -event_fetch_ongoing_gauge = Gauge( +event_fetch_ongoing_gauge = SynapseGauge( "synapse_event_fetch_ongoing", "The number of event fetchers that are running", labelnames=[SERVER_NAME_LABEL], diff --git a/synapse/util/batching_queue.py b/synapse/util/batching_queue.py index 514abcbec1..e676ccb71c 100644 --- a/synapse/util/batching_queue.py +++ b/synapse/util/batching_queue.py @@ -29,12 +29,10 @@ TypeVar, ) -from prometheus_client import Gauge - from twisted.internet import defer from synapse.logging.context import PreserveLoggingContext, make_deferred_yieldable -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseGauge from synapse.util.clock import Clock if TYPE_CHECKING: @@ -46,19 +44,19 @@ V = TypeVar("V") R = TypeVar("R") -number_queued = Gauge( +number_queued = SynapseGauge( "synapse_util_batching_queue_number_queued", "The number of items waiting in the queue across all keys", labelnames=("name", SERVER_NAME_LABEL), ) -number_in_flight = Gauge( +number_in_flight = SynapseGauge( "synapse_util_batching_queue_number_pending", "The number of items across all keys either being processed or waiting in a queue", labelnames=("name", SERVER_NAME_LABEL), ) -number_of_keys = Gauge( +number_of_keys = SynapseGauge( "synapse_util_batching_queue_number_of_keys", "The number of distinct keys that have items queued", labelnames=("name", SERVER_NAME_LABEL), @@ -123,7 +121,7 @@ def __init__( name=self._name, **{SERVER_NAME_LABEL: self.server_name} ).set_function(lambda: len(self._next_values)) - self._number_in_flight_metric: Gauge = number_in_flight.labels( + self._number_in_flight_metric: SynapseGauge = number_in_flight.labels( name=self._name, **{SERVER_NAME_LABEL: self.server_name} ) diff --git a/synapse/util/caches/deferred_cache.py b/synapse/util/caches/deferred_cache.py index 380f2a78ca..fdf0fc48d6 100644 --- a/synapse/util/caches/deferred_cache.py +++ b/synapse/util/caches/deferred_cache.py @@ -35,18 +35,16 @@ cast, ) -from prometheus_client import Gauge - from twisted.internet import defer from twisted.python.failure import Failure -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseGauge from synapse.util.async_helpers import ObservableDeferred from synapse.util.caches.lrucache import LruCache from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry from synapse.util.clock import Clock -cache_pending_metric = Gauge( +cache_pending_metric = SynapseGauge( "synapse_util_caches_cache_pending", "Number of lookups currently pending for this cache", labelnames=["name", SERVER_NAME_LABEL], diff --git a/tests/handlers/test_stats.py b/tests/handlers/test_stats.py index bf1fbd9835..b9625082c4 100644 --- a/tests/handlers/test_stats.py +++ b/tests/handlers/test_stats.py @@ -20,10 +20,11 @@ from typing import Any, Optional, cast -from prometheus_client import REGISTRY, Gauge +from prometheus_client import REGISTRY from twisted.internet.testing import MemoryReactor +from synapse.metrics import SynapseGauge from synapse.rest import admin from synapse.rest.client import login, room from synapse.server import HomeServer @@ -60,9 +61,12 @@ def _set_metrics_to_zero(self) -> None: metrics = ["synapse_known_rooms_total", "synapse_locally_joined_rooms_total"] for metric_name in metrics: gauge = REGISTRY._names_to_collectors.get(metric_name) - if gauge is not None and isinstance(gauge, Gauge): - for labels in gauge._metrics: - gauge.labels(*labels).set(0) + if gauge is not None and isinstance(gauge, SynapseGauge): + # if isinstance(gauge, SynapseGauge): + gauge.labels(server_name=self.hs.hostname).set(0) + # else: + # for labels in gauge._metrics: + # gauge.labels(*labels).set(0) def _add_background_updates(self) -> None: """ diff --git a/tests/util/test_batching_queue.py b/tests/util/test_batching_queue.py index 30b07dc6ad..8ce7acaba1 100644 --- a/tests/util/test_batching_queue.py +++ b/tests/util/test_batching_queue.py @@ -19,11 +19,11 @@ # # -from prometheus_client import Gauge from twisted.internet import defer from synapse.logging.context import make_deferred_yieldable +from synapse.metrics import SynapseGauge from synapse.util.batching_queue import ( BatchingQueue, number_in_flight, @@ -59,7 +59,7 @@ async def _process_queue(self, values: list[str]) -> str: self._pending_calls.append((values, d)) return await make_deferred_yieldable(d) - def _get_sample_with_name(self, metric: Gauge, name: str) -> float: + def _get_sample_with_name(self, metric: SynapseGauge, name: str) -> float: """For a prometheus metric get the value of the sample that has a matching "name" label. """ From 62849852c75b1296caaa18e9e4d7c742baf87ec1 Mon Sep 17 00:00:00 2001 From: FrenchgGithubUser Date: Wed, 17 Dec 2025 10:52:56 +0100 Subject: [PATCH 8/9] add SynapseHistogram --- synapse/api/auth/__init__.py | 6 +- synapse/federation/federation_server.py | 11 +- synapse/handlers/federation.py | 6 +- synapse/handlers/federation_event.py | 7 +- synapse/handlers/sliding_sync/__init__.py | 5 +- synapse/http/request_metrics.py | 11 +- synapse/metrics/__init__.py | 149 +++++++++++++++++- synapse/replication/tcp/external_cache.py | 6 +- synapse/rest/client/room.py | 7 +- synapse/state/__init__.py | 11 +- synapse/storage/controllers/persist_events.py | 11 +- synapse/storage/database.py | 12 +- synapse/util/ratelimitutils.py | 10 +- 13 files changed, 196 insertions(+), 56 deletions(-) diff --git a/synapse/api/auth/__init__.py b/synapse/api/auth/__init__.py index cc0c0d4601..4d2a736a79 100644 --- a/synapse/api/auth/__init__.py +++ b/synapse/api/auth/__init__.py @@ -20,13 +20,11 @@ # from typing import TYPE_CHECKING, Optional, Protocol -from prometheus_client import Histogram - from twisted.web.server import Request from synapse.appservice import ApplicationService from synapse.http.site import SynapseRequest -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseHistogram from synapse.types import Requester if TYPE_CHECKING: @@ -36,7 +34,7 @@ GUEST_DEVICE_ID = "guest_device" -introspection_response_timer = Histogram( +introspection_response_timer = SynapseHistogram( "synapse_api_auth_delegated_introspection_response", "Time taken to get a response for an introspection request", labelnames=["code", SERVER_NAME_LABEL], diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 2111db2c0a..743866970f 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -32,8 +32,6 @@ Union, ) -from prometheus_client import Histogram - from twisted.python import failure from synapse.api.constants import ( @@ -79,7 +77,12 @@ tag_args, trace, ) -from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter, SynapseGauge +from synapse.metrics import ( + SERVER_NAME_LABEL, + SynapseCounter, + SynapseGauge, + SynapseHistogram, +) from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.replication.http.federation import ( ReplicationFederationSendEduRestServlet, @@ -116,7 +119,7 @@ labelnames=["type", SERVER_NAME_LABEL], ) -pdu_process_time = Histogram( +pdu_process_time = SynapseHistogram( "synapse_federation_server_pdu_process_time", "Time taken to process an event", labelnames=[SERVER_NAME_LABEL], diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index e325a0e209..6c37be3130 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -36,7 +36,6 @@ ) import attr -from prometheus_client import Histogram from signedjson.key import decode_verify_key_bytes from signedjson.sign import verify_signed_json from unpaddedbase64 import decode_base64 @@ -67,7 +66,7 @@ from synapse.http.servlet import assert_params_in_dict from synapse.logging.context import nested_logging_context from synapse.logging.opentracing import SynapseTags, set_tag, tag_args, trace -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseHistogram from synapse.module_api import NOT_SPAM from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.storage.invite_rule import InviteRule @@ -83,7 +82,7 @@ logger = logging.getLogger(__name__) # Added to debug performance and track progress on optimizations -backfill_processing_before_timer = Histogram( +backfill_processing_before_timer = SynapseHistogram( "synapse_federation_backfill_processing_before_time_seconds", "sec", labelnames=[SERVER_NAME_LABEL], @@ -101,7 +100,6 @@ 40.0, 60.0, 80.0, - "+Inf", ), ) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 228f3484fb..f4ab278207 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -32,8 +32,6 @@ Sequence, ) -from prometheus_client import Histogram - from synapse import event_auth from synapse.api.constants import ( EventContentFields, @@ -76,7 +74,7 @@ tag_args, trace, ) -from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter, SynapseHistogram from synapse.replication.http.federation import ( ReplicationFederationSendEventsRestServlet, ) @@ -109,7 +107,7 @@ ) # Added to debug performance and track progress on optimizations -backfill_processing_after_timer = Histogram( +backfill_processing_after_timer = SynapseHistogram( "synapse_federation_backfill_processing_after_time_seconds", "sec", labelnames=[SERVER_NAME_LABEL], @@ -134,7 +132,6 @@ 120.0, 150.0, 180.0, - "+Inf", ), ) diff --git a/synapse/handlers/sliding_sync/__init__.py b/synapse/handlers/sliding_sync/__init__.py index cea4b857ee..cebe3a2d5f 100644 --- a/synapse/handlers/sliding_sync/__init__.py +++ b/synapse/handlers/sliding_sync/__init__.py @@ -17,7 +17,6 @@ from itertools import chain from typing import TYPE_CHECKING, AbstractSet, Mapping, Optional -from prometheus_client import Histogram from typing_extensions import assert_never from synapse.api.constants import Direction, EventTypes, Membership @@ -38,7 +37,7 @@ tag_args, trace, ) -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseHistogram from synapse.storage.databases.main.roommember import extract_heroes_from_room_summary from synapse.storage.databases.main.state_deltas import StateDelta from synapse.storage.databases.main.stream import PaginateFunction @@ -77,7 +76,7 @@ logger = logging.getLogger(__name__) -sync_processing_time = Histogram( +sync_processing_time = SynapseHistogram( "synapse_sliding_sync_processing_time", "Time taken to generate a sliding sync response, ignoring wait times.", labelnames=["initial", SERVER_NAME_LABEL], diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py index 3f2ffd9050..54995475d4 100644 --- a/synapse/http/request_metrics.py +++ b/synapse/http/request_metrics.py @@ -24,10 +24,13 @@ import traceback from typing import Mapping -from prometheus_client.core import Histogram - from synapse.logging.context import current_context -from synapse.metrics import SERVER_NAME_LABEL, LaterGauge, SynapseCounter +from synapse.metrics import ( + SERVER_NAME_LABEL, + LaterGauge, + SynapseCounter, + SynapseHistogram, +) logger = logging.getLogger(__name__) @@ -51,7 +54,7 @@ labelnames=["method", "code", SERVER_NAME_LABEL], ) -response_timer = Histogram( +response_timer = SynapseHistogram( "synapse_http_server_response_time_seconds", "sec", labelnames=["method", "servlet", "tag", "code", SERVER_NAME_LABEL], diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 9d107fa285..ee517eedeb 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -52,10 +52,11 @@ from packaging.version import parse as parse_version from prometheus_client import ( CollectorRegistry, - Histogram, Metric, generate_latest, + values, ) +from prometheus_client.context_managers import Timer from prometheus_client.core import ( REGISTRY, GaugeHistogramMetricFamily, @@ -63,6 +64,8 @@ Sample, ) from prometheus_client.metrics import _get_use_created +from prometheus_client.samples import Exemplar +from prometheus_client.utils import INF, floatToGoString from prometheus_client.values import ValueClass from typing_extensions import Dict, Self @@ -526,7 +529,7 @@ def __init__( self._gauge_value: float = 0 if not self._labelvalues and self._registry: - # TODO: look into what to do here + # TODO: look into what to do here, and maybe move it to the wrapperbase? self._registry.register(self) # type: ignore def set(self, value: float) -> None: @@ -596,6 +599,143 @@ def _metric_init(self) -> None: ) +class SynapseHistogram(SynapseMetricWrapperBase): + _type = "histogram" + _reserved_labelnames = ["le"] + DEFAULT_BUCKETS = ( + 0.005, + 0.01, + 0.025, + 0.05, + 0.075, + 0.1, + 0.25, + 0.5, + 0.75, + 1.0, + 2.5, + 5.0, + 7.5, + 10.0, + INF, + ) + + def __init__( + self, + name: str, + documentation: str, + labelnames: Iterable[str] = (), + namespace: str = "", + subsystem: str = "", + unit: str = "", + registry: Optional[CollectorRegistry] = REGISTRY, + _labelvalues: Optional[Sequence[str]] = None, + buckets: Sequence[float] = DEFAULT_BUCKETS, + ): + self._prepare_buckets(buckets) + super().__init__( + name=name, + documentation=documentation, + labelnames=labelnames, + namespace=namespace, + subsystem=subsystem, + unit=unit, + registry=registry, + _labelvalues=_labelvalues, + ) + self._histogram = meter.create_histogram( + self._name, + unit=self._unit, + description=self._documentation, + explicit_bucket_boundaries_advisory=buckets, + ) + self._kwargs["buckets"] = buckets + + def _prepare_buckets(self, source_buckets: Sequence[Union[float, str]]) -> None: + buckets = [float(b) for b in source_buckets] + if buckets != sorted(buckets): + # This is probably an error on the part of the user, + # so raise rather than sorting for them. + raise ValueError("Buckets not in sorted order") + if buckets and buckets[-1] != INF: + buckets.append(INF) + if len(buckets) < 2: + raise ValueError("Must have at least two buckets") + self._upper_bounds = buckets + + def _metric_init(self) -> None: + self._buckets: list[values.ValueClass] = [] + self._created = time() + bucket_labelnames = self._labelnames + ("le",) + self._sum = values.ValueClass( + self._type, + self._name, + self._name + "_sum", + self._labelnames, + self._labelvalues, + self._documentation, + ) + for b in self._upper_bounds: + self._buckets.append( + values.ValueClass( + self._type, + self._name, + self._name + "_bucket", + bucket_labelnames, + self._labelvalues + (floatToGoString(b),), + self._documentation, + ) + ) + + def observe(self, amount: float, exemplar: Optional[Dict[str, str]] = None) -> None: + """Observe the given amount. + + The amount is usually positive or zero. Negative values are + accepted but prevent current versions of Prometheus from + properly detecting counter resets in the sum of + observations. See + https://prometheus.io/docs/practices/histograms/#count-and-sum-of-observations + for details. + """ + self._raise_if_not_observable() + self._sum.inc(amount) + for i, bound in enumerate(self._upper_bounds): + if amount <= bound: + self._buckets[i].inc(1) + if exemplar: + # _validate_exemplar(exemplar) + self._buckets[i].set_exemplar(Exemplar(exemplar, amount, time())) + break + + def time(self) -> Timer: + """Time a block of code or function, and observe the duration in seconds. + + Can be used as a function decorator or context manager. + """ + return Timer(self, "observe") + + def _child_samples(self) -> Iterable[Sample]: + samples = [] + acc = 0.0 + for i, bound in enumerate(self._upper_bounds): + acc += self._buckets[i].get() + samples.append( + Sample( + "_bucket", + {"le": floatToGoString(bound)}, + acc, + None, + self._buckets[i].get_exemplar(), + ) + ) + samples.append(Sample("_count", {}, acc, None, None)) + if self._upper_bounds[0] >= 0: + samples.append(Sample("_sum", {}, self._sum.get(), None, None)) + if _get_use_created(): + samples.append(Sample("_created", {}, self._created, None, None)) + return tuple(samples) + + @attr.s(slots=True, hash=True, auto_attribs=True, kw_only=True) class LaterGauge(Collector): """A Gauge which periodically calls a user-provided callback to produce metrics.""" @@ -1091,7 +1231,7 @@ def collect(self) -> Iterable[Metric]: "synapse_event_processing_lag", "", labelnames=["name", SERVER_NAME_LABEL] ) -event_processing_lag_by_event = Histogram( +event_processing_lag_by_event = SynapseHistogram( "synapse_event_processing_lag_by_event", "Time between an event being persisted and it being queued up to be sent to the relevant remote servers", labelnames=["name", SERVER_NAME_LABEL], @@ -1119,7 +1259,7 @@ def collect(self) -> Iterable[Metric]: ) # 3PID send info -threepid_send_requests = Histogram( +threepid_send_requests = SynapseHistogram( "synapse_threepid_send_requests_with_tries", documentation="Number of requests for a 3pid token by try count. Note if" " there is a request with try count of 4, then there would have been one" @@ -1219,4 +1359,5 @@ def render_GET(self, request: Request) -> bytes: "install_gc_manager", "SynapseCounter", "SynapseGauge", + "SynapseHistogram", ] diff --git a/synapse/replication/tcp/external_cache.py b/synapse/replication/tcp/external_cache.py index 9f2b790c70..36ace55c53 100644 --- a/synapse/replication/tcp/external_cache.py +++ b/synapse/replication/tcp/external_cache.py @@ -22,11 +22,9 @@ import logging from typing import TYPE_CHECKING, Any, Optional -from prometheus_client import Histogram - from synapse.logging import opentracing from synapse.logging.context import make_deferred_yieldable -from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter, SynapseHistogram from synapse.util.json import json_decoder, json_encoder if TYPE_CHECKING: @@ -46,7 +44,7 @@ labelnames=["cache_name", "hit", SERVER_NAME_LABEL], ) -response_timer = Histogram( +response_timer = SynapseHistogram( "synapse_external_cache_response_time_seconds", "Time taken to get a response from Redis for a cache get/set request", labelnames=["method", SERVER_NAME_LABEL], diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index 38e315d0e7..bbcf4a6393 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -28,8 +28,6 @@ from typing import TYPE_CHECKING, Awaitable, Optional from urllib import parse as urlparse -from prometheus_client.core import Histogram - from twisted.web.server import Request from synapse import event_auth @@ -65,7 +63,7 @@ from synapse.http.site import SynapseRequest from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.logging.opentracing import set_tag -from synapse.metrics import SERVER_NAME_LABEL +from synapse.metrics import SERVER_NAME_LABEL, SynapseHistogram from synapse.rest.client._base import client_patterns from synapse.rest.client.transactions import HttpTransactionCache from synapse.state import CREATE_KEY, POWER_KEY @@ -113,7 +111,7 @@ def from_member_count(member_count: int) -> "_RoomSize": # greater than 10s. We use a separate dedicated histogram with its own buckets # so that we don't increase the cardinality of the general one because it's # multiplied across hundreds of servlets. -messsages_response_timer = Histogram( +messsages_response_timer = SynapseHistogram( "synapse_room_message_list_rest_servlet_response_time_seconds", "sec", # We have a label for room size so we can try to see a more realistic @@ -141,7 +139,6 @@ def from_member_count(member_count: int) -> "_RoomSize": 120.0, 150.0, 180.0, - "+Inf", ), ) diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 1cdd133304..e6698781b2 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -33,7 +33,6 @@ import attr from immutabledict import immutabledict -from prometheus_client import Histogram from synapse.api.constants import EventTypes from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, StateResolutionVersions @@ -45,7 +44,7 @@ ) from synapse.logging.context import ContextResourceUsage from synapse.logging.opentracing import tag_args, trace -from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter, SynapseHistogram from synapse.replication.http.state import ReplicationUpdateCurrentStateRestServlet from synapse.state import v1, v2 from synapse.storage.databases.main.event_federation import StateDifference @@ -67,11 +66,11 @@ metrics_logger = logging.getLogger("synapse.state.metrics") # Metrics for number of state groups involved in a resolution. -state_groups_histogram = Histogram( +state_groups_histogram = SynapseHistogram( "synapse_state_number_state_groups_in_resolution", "Number of state groups used when performing a state resolution", labelnames=[SERVER_NAME_LABEL], - buckets=(1, 2, 3, 5, 7, 10, 15, 20, 50, 100, 200, 500, "+Inf"), + buckets=(1, 2, 3, 5, 7, 10, 15, 20, 50, 100, 200, 500), ) @@ -613,12 +612,12 @@ class _StateResMetrics: labelnames=[SERVER_NAME_LABEL], ) -_cpu_times = Histogram( +_cpu_times = SynapseHistogram( "synapse_state_res_cpu_for_all_rooms_seconds", "CPU time (utime+stime) spent computing a single state resolution", labelnames=[SERVER_NAME_LABEL], ) -_db_times = Histogram( +_db_times = SynapseHistogram( "synapse_state_res_db_for_all_rooms_seconds", "Database time spent computing a single state resolution", labelnames=[SERVER_NAME_LABEL], diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py index 3b6ddf0e01..b7d29a52aa 100644 --- a/synapse/storage/controllers/persist_events.py +++ b/synapse/storage/controllers/persist_events.py @@ -40,7 +40,6 @@ ) import attr -from prometheus_client import Histogram from twisted.internet import defer @@ -56,7 +55,7 @@ start_active_span_follows_from, trace, ) -from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter +from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter, SynapseHistogram from synapse.storage.controllers.state import StateStorageController from synapse.storage.databases import Databases from synapse.storage.databases.main.events import DeltaState @@ -97,20 +96,20 @@ ) # The number of forward extremities for each new event. -forward_extremities_counter = Histogram( +forward_extremities_counter = SynapseHistogram( "synapse_storage_events_forward_extremities_persisted", "Number of forward extremities for each new event", labelnames=[SERVER_NAME_LABEL], - buckets=(1, 2, 3, 5, 7, 10, 15, 20, 50, 100, 200, 500, "+Inf"), + buckets=(1, 2, 3, 5, 7, 10, 15, 20, 50, 100, 200, 500), ) # The number of stale forward extremities for each new event. Stale extremities # are those that were in the previous set of extremities as well as the new. -stale_forward_extremities_counter = Histogram( +stale_forward_extremities_counter = SynapseHistogram( "synapse_storage_events_stale_forward_extremities_persisted", "Number of unchanged forward extremities for each new event", labelnames=[SERVER_NAME_LABEL], - buckets=(0, 1, 2, 3, 5, 7, 10, 15, 20, 50, 100, 200, 500, "+Inf"), + buckets=(0, 1, 2, 3, 5, 7, 10, 15, 20, 50, 100, 200, 500), ) state_resolutions_during_persistence = SynapseCounter( diff --git a/synapse/storage/database.py b/synapse/storage/database.py index c10e5b6439..a7f2eee04d 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -43,7 +43,6 @@ ) import attr -from prometheus_client import Histogram from typing_extensions import Concatenate, ParamSpec from twisted.enterprise import adbapi @@ -57,7 +56,12 @@ current_context, make_deferred_yieldable, ) -from synapse.metrics import SERVER_NAME_LABEL, SynapseCounter, register_threadpool +from synapse.metrics import ( + SERVER_NAME_LABEL, + SynapseCounter, + SynapseHistogram, + register_threadpool, +) from synapse.storage.background_updates import BackgroundUpdater from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine from synapse.storage.types import Connection, Cursor, SQLQueryParameters @@ -77,11 +81,11 @@ transaction_logger = logging.getLogger("synapse.storage.txn") perf_logger = logging.getLogger("synapse.storage.TIME") -sql_scheduling_timer = Histogram( +sql_scheduling_timer = SynapseHistogram( "synapse_storage_schedule_time", "sec", labelnames=[SERVER_NAME_LABEL] ) -sql_query_timer = Histogram( +sql_query_timer = SynapseHistogram( "synapse_storage_query_time", "sec", labelnames=["verb", SERVER_NAME_LABEL] ) sql_txn_count = SynapseCounter( diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py index cb751d3a1a..45bd6fd331 100644 --- a/synapse/util/ratelimitutils.py +++ b/synapse/util/ratelimitutils.py @@ -45,7 +45,12 @@ run_in_background, ) from synapse.logging.opentracing import start_active_span -from synapse.metrics import SERVER_NAME_LABEL, Histogram, LaterGauge, SynapseCounter +from synapse.metrics import ( + SERVER_NAME_LABEL, + LaterGauge, + SynapseCounter, + SynapseHistogram, +) from synapse.util.clock import Clock if typing.TYPE_CHECKING: @@ -65,7 +70,7 @@ "Number of requests rejected by the rate limiter", labelnames=["rate_limiter_name", SERVER_NAME_LABEL], ) -queue_wait_timer = Histogram( +queue_wait_timer = SynapseHistogram( "synapse_rate_limit_queue_wait_time_seconds", "Amount of time spent waiting for the rate limiter to let our request through.", labelnames=["rate_limiter_name", SERVER_NAME_LABEL], @@ -83,7 +88,6 @@ 5.0, 10.0, 20.0, - "+Inf", ), ) From d997f5cfd5f2083ecafef81013427a67aa377265 Mon Sep 17 00:00:00 2001 From: FrenchgGithubUser Date: Wed, 17 Dec 2025 14:15:32 +0100 Subject: [PATCH 9/9] mock classes by directly calling the prometheus methods under the hood --- synapse/handlers/federation.py | 1 + synapse/handlers/federation_event.py | 1 + synapse/metrics/__init__.py | 521 ++---------------- synapse/rest/client/room.py | 1 + synapse/state/__init__.py | 2 +- synapse/storage/controllers/persist_events.py | 4 +- synapse/util/batching_queue.py | 4 +- synapse/util/ratelimitutils.py | 1 + tests/handlers/test_stats.py | 12 +- 9 files changed, 75 insertions(+), 472 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 6c37be3130..190fe49547 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -100,6 +100,7 @@ 40.0, 60.0, 80.0, + "+Inf", ), ) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index f4ab278207..0e0df3d324 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -132,6 +132,7 @@ 120.0, 150.0, 180.0, + "+Inf", ), ) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index ee517eedeb..b7fc4b0048 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -26,8 +26,6 @@ import platform import threading from importlib import metadata -from time import time -from types import MethodType from typing import ( Any, Callable, @@ -52,22 +50,19 @@ from packaging.version import parse as parse_version from prometheus_client import ( CollectorRegistry, + Counter, + Gauge, + Histogram, Metric, generate_latest, - values, ) -from prometheus_client.context_managers import Timer +from prometheus_client.context_managers import InprogressTracker, Timer from prometheus_client.core import ( REGISTRY, GaugeHistogramMetricFamily, GaugeMetricFamily, - Sample, ) -from prometheus_client.metrics import _get_use_created -from prometheus_client.samples import Exemplar -from prometheus_client.utils import INF, floatToGoString -from prometheus_client.values import ValueClass -from typing_extensions import Dict, Self +from prometheus_client.utils import INF from twisted.python.threadpool import ThreadPool from twisted.web.resource import Resource @@ -175,217 +170,7 @@ def collect() -> Iterable[Metric]: RegistryProxy = cast(CollectorRegistry, _RegistryProxy) -def _build_full_name( - metric_type: str, name: str, namespace: str, subsystem: str, unit: str -) -> str: - # Ripped from prometheus_client/metrics.py - if not name: - raise ValueError("Metric name should not be empty") - full_name = "" - if namespace: - full_name += namespace + "_" - if subsystem: - full_name += subsystem + "_" - full_name += name - if metric_type == "counter" and full_name.endswith("_total"): - full_name = full_name[:-6] # Munge to OpenMetrics. - if unit and not full_name.endswith("_" + unit): - full_name += "_" + unit - if unit and metric_type in ("info", "stateset"): - raise ValueError( - "Metric name is of a type that cannot have a unit: " + full_name - ) - return full_name - - -T = TypeVar("T", bound="SynapseMetricWrapperBase") - - -class SynapseMetricWrapperBase: - def _raise_if_not_observable(self) -> None: - # Functions that mutate the state of the metric, for example incrementing - # a counter, will fail if the metric is not observable, because only if a - # metric is observable will the value be initialized. - if not self._is_observable(): - raise ValueError("%s metric is missing label values" % str(self._type)) - - def _is_observable(self): # type: ignore[no-untyped-def] - # Whether this metric is observable, i.e. - # * a metric without label names and values, or - # * the child of a labelled metric. - return not self._labelnames or (self._labelnames and self._labelvalues) - - def __init__( - self: T, - name: str, - documentation: str, - labelnames: Iterable[str] = (), - namespace: str = "", - subsystem: str = "", - unit: str = "", - registry: Optional[CollectorRegistry] = REGISTRY, - _labelvalues: Optional[Sequence[str]] = None, - ) -> None: - self._type: str = "" - self._original_name = name - self._namespace = namespace - self._subsystem = subsystem - self._name = _build_full_name(self._type, name, namespace, subsystem, unit) - # prom validates these, should we do that? - # labelnames provide a simple way to register that a given set of kwargs call - # from labels can be used. All should be used in a call? - self._labelnames = tuple(labelnames or ()) - self._labelvalues = tuple(_labelvalues or ()) - self._kwargs: Dict[str, Any] = {} - self._documentation = documentation - self._unit = unit - self._metrics = {} # type: ignore[var-annotated] - self._lock = threading.Lock() - - # if self._is_parent(): - # # Prepare the fields needed for child metrics. - # self._lock = Lock() - # self._metrics: Dict[Sequence[str], T] = {} - - if self._is_observable(): - self._metric_init() - - # if not self._labelvalues: - # # Register the multi-wrapper parent metric, or if a label-less metric, the whole shebang. - # if registry: - # registry.register(self) - self._registry = registry - - def _metric_init(self): # type: ignore[no-untyped-def] # pragma: no cover - """ - Initialize the metric object as a child, i.e. when it has labels (if any) set. - - This is factored as a separate function to allow for deferred initialization. - """ - raise NotImplementedError("_metric_init() must be implemented by %r" % self) - - def labels(self, *labelvalues: Any, **labelkwargs: Any) -> Self: - if not self._labelnames: - raise ValueError("No label names were set when constructing %s" % self) - - if self._labelvalues: - raise ValueError( - "{} already has labels set ({}); can not chain calls to .labels()".format( - self, dict(zip(self._labelnames, self._labelvalues)) - ) - ) - - if labelvalues and labelkwargs: - raise ValueError("Can't pass both *args and **kwargs") - - if labelkwargs: - if sorted(labelkwargs) != sorted(self._labelnames): - raise ValueError("Incorrect label names") - labelvalues = tuple( - str(labelkwargs[lablename]) for lablename in self._labelnames - ) - else: - if len(labelvalues) != len(self._labelnames): - raise ValueError("Incorrect label count") - labelvalues = tuple(str(labelvalue) for labelvalue in labelvalues) - with self._lock: - if labelvalues not in self._metrics: - original_name = getattr(self, "_original_name", self._name) - namespace = getattr(self, "_namespace", "") - subsystem = getattr(self, "_subsystem", "") - unit = getattr(self, "_unit", "") - - child_kwargs = dict(self._kwargs) if self._kwargs else {} - for k in ("namespace", "subsystem", "unit"): - child_kwargs.pop(k, None) - - self._metrics[labelvalues] = self.__class__( - original_name, - documentation=self._documentation, - labelnames=self._labelnames, - namespace=namespace, - subsystem=subsystem, - unit=unit, - _labelvalues=labelvalues, - **child_kwargs, - ) - return self._metrics[labelvalues] - - def _get_metric(self): # type: ignore[no-untyped-def] - return Metric(self._name, self._documentation, self._type, self._unit) - - def collect(self) -> Iterable[Metric]: - metric = self._get_metric() - for ( - suffix, - labels, - value, - timestamp, - exemplar, - native_histogram_value, - ) in self._samples(): - metric.add_sample( - self._name + suffix, - labels, - value, - timestamp, - exemplar, - native_histogram_value, - ) - return [metric] - - def _is_parent(self): # type: ignore[no-untyped-def] - return self._labelnames and not self._labelvalues - - def _child_samples(self) -> Iterable[Sample]: # pragma: no cover - raise NotImplementedError("_child_samples() must be implemented by %r" % self) - - def _samples(self) -> Iterable[Sample]: - if self._is_parent(): - return self._multi_samples() - else: - return self._child_samples() - - def _multi_samples(self) -> Iterable[Sample]: - with self._lock: - metrics = self._metrics.copy() - for labels, metric in metrics.items(): - series_labels = list(zip(self._labelnames, labels)) - for ( - suffix, - sample_labels, - value, - timestamp, - exemplar, - native_histogram_value, - ) in metric._samples(): - yield Sample( - suffix, - dict(series_labels + list(sample_labels.items())), - value, - timestamp, - exemplar, - native_histogram_value, - ) - - # not sure if this is needed, putting it there for now to make the linter happy - def remove(self, *labelvalues: Any) -> None: - if not self._labelnames: - raise ValueError("No label names were set when constructing %s" % self) - - """Remove the given labelset from the metric.""" - if len(labelvalues) != len(self._labelnames): - raise ValueError( - "Incorrect label count (expected %d, got %s)" - % (len(self._labelnames), labelvalues) - ) - labelvalues = tuple(str(labelvalue) for labelvalue in labelvalues) - with self._lock: - if labelvalues in self._metrics: - del self._metrics[labelvalues] - - -class SynapseCounter(SynapseMetricWrapperBase): +class SynapseCounter: def __init__( self, name: str, @@ -397,92 +182,37 @@ def __init__( registry: Optional[CollectorRegistry] = REGISTRY, _labelvalues: Optional[Sequence[str]] = None, ) -> None: - super().__init__( - name, - documentation, - labelnames, - namespace, - subsystem, - unit, - registry, - _labelvalues, + # TODO: remove the ignore + self._prometheus_counter = Counter( # type: ignore[missing-server-name-label] + name=name, + documentation=documentation, + labelnames=labelnames, + namespace=namespace, + subsystem=subsystem, + unit=unit, + registry=registry, + _labelvalues=_labelvalues, ) - self._type = "counter" # Here is where we grab the global meter to create a FauxCounter - self._counter = meter.create_counter( - self._name, unit=self._unit, description=self._documentation + self._otel_counter = meter.create_counter( + name, unit=unit, description=documentation ) - self._current_attributes = () - def inc(self, amount: float = 1.0) -> None: + self._prometheus_counter.inc(amount=amount) # Need to verify what happens with Counters that do not have labels as children, # this may not be appropriate in those cases. Can probably just leave the # attributes param as empty in that case? - self._value.inc(amount) - self._counter.add(amount, dict(zip(self._labelnames, self._current_attributes))) - # # If this was a "child" metric, then the lock will have been taken in labels() - # if self._lock.locked(): - # self._lock.release() - - def _metric_init(self) -> None: - self._value = ValueClass( - self._type, - self._name, - self._name + "_total", - self._labelnames, - self._labelvalues, - self._documentation, - ) - self._created = time() - - def _child_samples(self) -> Iterable[Sample]: - sample = Sample( - "_total", {}, self._value.get(), None, self._value.get_exemplar() - ) - if _get_use_created(): - return (sample, Sample("_created", {}, self._created, None, None)) - return (sample,) - - -F = TypeVar("F", bound=Callable[..., Any]) - - -class InprogressTracker: - def __init__(self, gauge) -> None: # type: ignore[no-untyped-def] - self._gauge = gauge - - def __enter__(self) -> None: - self._gauge.inc() - - def __exit__(self, typ, value, traceback) -> None: # type: ignore[no-untyped-def] - self._gauge.dec() + self._otel_counter.add(amount) - # def __call__(self, f: "F") -> "F": - # def wrapped(func, *args, **kwargs): - # with self: - # return func(*args, **kwargs) - - # return decorate(f, wrapped) + def labels(self, *labelvalues: Any, **labelkwargs: Any) -> Counter: + return self._prometheus_counter.labels(*labelvalues, **labelkwargs) + def collect(self) -> Iterable[Metric]: + return self._prometheus_counter.collect() -class SynapseGauge(SynapseMetricWrapperBase): - _MULTIPROC_MODES = frozenset( - ( - "all", - "liveall", - "min", - "livemin", - "max", - "livemax", - "sum", - "livesum", - "mostrecent", - "livemostrecent", - ) - ) - _MOST_RECENT_MODES = frozenset(("mostrecent", "livemostrecent")) +class SynapseGauge: def __init__( self, name: str, @@ -506,10 +236,8 @@ def __init__( "livemostrecent", ] = "all", ): - self._multiprocess_mode = multiprocess_mode - if multiprocess_mode not in self._MULTIPROC_MODES: - raise ValueError("Invalid multiprocess mode: " + multiprocess_mode) - super().__init__( + # TODO: remove the type ignore + self._prometheus_gauge = Gauge( # type: ignore[missing-server-name-label] name=name, documentation=documentation, labelnames=labelnames, @@ -518,90 +246,43 @@ def __init__( unit=unit, registry=registry, _labelvalues=_labelvalues, + multiprocess_mode=multiprocess_mode, ) - self._type = "gauge" # Here is where we grab the global meter to create a FauxGauge - self._gauge = meter.create_gauge( - self._name, unit=self._unit, description=self._documentation + self._otel_gauge = meter.create_gauge( + name, unit=unit, description=documentation ) - self._kwargs["multiprocess_mode"] = self._multiprocess_mode - self._is_most_recent = self._multiprocess_mode in self._MOST_RECENT_MODES - self._gauge_value: float = 0 - - if not self._labelvalues and self._registry: - # TODO: look into what to do here, and maybe move it to the wrapperbase? - self._registry.register(self) # type: ignore def set(self, value: float) -> None: - """Set gauge to the given value.""" - self._raise_if_not_observable() - if self._is_most_recent: - self._value.set(float(value), timestamp=time()) - else: - self._value.set(float(value)) - self._gauge.set(value) - self._gauge_value = value - # self._value.set(0) + self._prometheus_gauge.set(value) + self._otel_gauge.set(value) def inc(self, amount: float = 1) -> None: - """Increment gauge by the given amount.""" - if self._is_most_recent: - raise RuntimeError("inc must not be used with the mostrecent mode") - # self._raise_if_not_observable() - self._value.inc(amount) - self._gauge_value += amount - self._gauge.set(self._gauge_value) + self._prometheus_gauge.inc(amount) + self._otel_gauge.set(self._prometheus_gauge._value.get()) def dec(self, amount: float = 1) -> None: - """Decrement gauge by the given amount.""" - if self._is_most_recent: - raise RuntimeError("inc must not be used with the mostrecent mode") - # self._raise_if_not_observable() - self._gauge_value -= amount - self._gauge.set(self._gauge_value) - self._value.inc(-amount) + self._prometheus_gauge.dec(amount) + self._otel_gauge.set(self._prometheus_gauge._value.get()) def track_inprogress(self) -> InprogressTracker: - """Track inprogress blocks of code or functions. - - Can be used as a function decorator or context manager. - Increments the gauge when the code is entered, - and decrements when it is exited. - """ - # self._raise_if_not_observable() - return InprogressTracker(self) + return self._prometheus_gauge.track_inprogress() def set_function(self, f: Callable[[], float]) -> None: - """Call the provided function to return the Gauge value. + self._prometheus_gauge.set_function(f) + # TODO: figure out what's the equivalent for otel here - The function must return a float, and may be called from - multiple threads. All other methods of the Gauge become NOOPs. - """ - # self._raise_if_not_observable() - - def samples(_: SynapseGauge) -> Iterable[Sample]: - return (Sample("", {}, float(f()), None, None),) + def labels(self, *labelvalues: Any, **labelkwargs: Any) -> Gauge: + return self._prometheus_gauge.labels(*labelvalues, **labelkwargs) - self._child_samples = MethodType(samples, self) # type: ignore - - def _child_samples(self) -> Iterable[Sample]: - return (Sample("", {}, self._value.get(), None, None),) + def remove(self, *labelvalues: Any) -> None: + self._prometheus_gauge.remove(*labelvalues) - def _metric_init(self) -> None: - self._value = ValueClass( - self._type, - self._name, - self._name, - self._labelnames, - self._labelvalues, - self._documentation, - multiprocess_mode=self._multiprocess_mode, - ) + def collect(self) -> Iterable[Metric]: + return self._prometheus_gauge.collect() -class SynapseHistogram(SynapseMetricWrapperBase): - _type = "histogram" - _reserved_labelnames = ["le"] +class SynapseHistogram: DEFAULT_BUCKETS = ( 0.005, 0.01, @@ -630,110 +311,30 @@ def __init__( unit: str = "", registry: Optional[CollectorRegistry] = REGISTRY, _labelvalues: Optional[Sequence[str]] = None, - buckets: Sequence[float] = DEFAULT_BUCKETS, + buckets: Sequence[Union[float, str]] = DEFAULT_BUCKETS, ): - self._prepare_buckets(buckets) - super().__init__( + # TODO: remove the type ignore + self._prometheus_histogram = Histogram( # type: ignore[missing-server-name-label] name=name, documentation=documentation, labelnames=labelnames, - namespace=namespace, - subsystem=subsystem, - unit=unit, - registry=registry, - _labelvalues=_labelvalues, - ) - self._histogram = meter.create_histogram( - self._name, - unit=self._unit, - description=self._documentation, - explicit_bucket_boundaries_advisory=buckets, + buckets=buckets, ) - self._kwargs["buckets"] = buckets - - def _prepare_buckets(self, source_buckets: Sequence[Union[float, str]]) -> None: - buckets = [float(b) for b in source_buckets] - if buckets != sorted(buckets): - # This is probably an error on the part of the user, - # so raise rather than sorting for them. - raise ValueError("Buckets not in sorted order") - if buckets and buckets[-1] != INF: - buckets.append(INF) - if len(buckets) < 2: - raise ValueError("Must have at least two buckets") - self._upper_bounds = buckets - - def _metric_init(self) -> None: - self._buckets: list[values.ValueClass] = [] - self._created = time() - bucket_labelnames = self._labelnames + ("le",) - self._sum = values.ValueClass( - self._type, - self._name, - self._name + "_sum", - self._labelnames, - self._labelvalues, - self._documentation, + self._otel_histogram = meter.create_histogram( + name, + unit=unit, + description=documentation, + # prometheus_client accepts "INF" as a boundary, but otel doesn't, we just remove it + explicit_bucket_boundaries_advisory=[ + x for x in buckets if isinstance(x, float) + ], ) - for b in self._upper_bounds: - self._buckets.append( - values.ValueClass( - self._type, - self._name, - self._name + "_bucket", - bucket_labelnames, - self._labelvalues + (floatToGoString(b),), - self._documentation, - ) - ) - - def observe(self, amount: float, exemplar: Optional[Dict[str, str]] = None) -> None: - """Observe the given amount. - - The amount is usually positive or zero. Negative values are - accepted but prevent current versions of Prometheus from - properly detecting counter resets in the sum of - observations. See - https://prometheus.io/docs/practices/histograms/#count-and-sum-of-observations - for details. - """ - self._raise_if_not_observable() - self._sum.inc(amount) - for i, bound in enumerate(self._upper_bounds): - if amount <= bound: - self._buckets[i].inc(1) - if exemplar: - # _validate_exemplar(exemplar) - self._buckets[i].set_exemplar(Exemplar(exemplar, amount, time())) - break def time(self) -> Timer: - """Time a block of code or function, and observe the duration in seconds. + return self._prometheus_histogram.time() - Can be used as a function decorator or context manager. - """ - return Timer(self, "observe") - - def _child_samples(self) -> Iterable[Sample]: - samples = [] - acc = 0.0 - for i, bound in enumerate(self._upper_bounds): - acc += self._buckets[i].get() - samples.append( - Sample( - "_bucket", - {"le": floatToGoString(bound)}, - acc, - None, - self._buckets[i].get_exemplar(), - ) - ) - samples.append(Sample("_count", {}, acc, None, None)) - if self._upper_bounds[0] >= 0: - samples.append(Sample("_sum", {}, self._sum.get(), None, None)) - if _get_use_created(): - samples.append(Sample("_created", {}, self._created, None, None)) - return tuple(samples) + def labels(self, *labelvalues: Any, **labelkwargs: Any) -> Histogram: + return self._prometheus_histogram.labels(*labelvalues, **labelkwargs) @attr.s(slots=True, hash=True, auto_attribs=True, kw_only=True) diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index bbcf4a6393..7f4b8aced0 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -139,6 +139,7 @@ def from_member_count(member_count: int) -> "_RoomSize": 120.0, 150.0, 180.0, + "+Inf", ), ) diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index e6698781b2..1e3c267183 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -70,7 +70,7 @@ "synapse_state_number_state_groups_in_resolution", "Number of state groups used when performing a state resolution", labelnames=[SERVER_NAME_LABEL], - buckets=(1, 2, 3, 5, 7, 10, 15, 20, 50, 100, 200, 500), + buckets=(1, 2, 3, 5, 7, 10, 15, 20, 50, 100, 200, 500, "+Inf"), ) diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py index b7d29a52aa..f9d9934b56 100644 --- a/synapse/storage/controllers/persist_events.py +++ b/synapse/storage/controllers/persist_events.py @@ -100,7 +100,7 @@ "synapse_storage_events_forward_extremities_persisted", "Number of forward extremities for each new event", labelnames=[SERVER_NAME_LABEL], - buckets=(1, 2, 3, 5, 7, 10, 15, 20, 50, 100, 200, 500), + buckets=(1, 2, 3, 5, 7, 10, 15, 20, 50, 100, 200, 500, "+Inf"), ) # The number of stale forward extremities for each new event. Stale extremities @@ -109,7 +109,7 @@ "synapse_storage_events_stale_forward_extremities_persisted", "Number of unchanged forward extremities for each new event", labelnames=[SERVER_NAME_LABEL], - buckets=(0, 1, 2, 3, 5, 7, 10, 15, 20, 50, 100, 200, 500), + buckets=(0, 1, 2, 3, 5, 7, 10, 15, 20, 50, 100, 200, 500, "+Inf"), ) state_resolutions_during_persistence = SynapseCounter( diff --git a/synapse/util/batching_queue.py b/synapse/util/batching_queue.py index e676ccb71c..7e96b35086 100644 --- a/synapse/util/batching_queue.py +++ b/synapse/util/batching_queue.py @@ -29,6 +29,8 @@ TypeVar, ) +from prometheus_client import Gauge + from twisted.internet import defer from synapse.logging.context import PreserveLoggingContext, make_deferred_yieldable @@ -121,7 +123,7 @@ def __init__( name=self._name, **{SERVER_NAME_LABEL: self.server_name} ).set_function(lambda: len(self._next_values)) - self._number_in_flight_metric: SynapseGauge = number_in_flight.labels( + self._number_in_flight_metric: Gauge = number_in_flight.labels( name=self._name, **{SERVER_NAME_LABEL: self.server_name} ) diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py index 45bd6fd331..d5309f73f4 100644 --- a/synapse/util/ratelimitutils.py +++ b/synapse/util/ratelimitutils.py @@ -88,6 +88,7 @@ 5.0, 10.0, 20.0, + "+Inf", ), ) diff --git a/tests/handlers/test_stats.py b/tests/handlers/test_stats.py index b9625082c4..bf1fbd9835 100644 --- a/tests/handlers/test_stats.py +++ b/tests/handlers/test_stats.py @@ -20,11 +20,10 @@ from typing import Any, Optional, cast -from prometheus_client import REGISTRY +from prometheus_client import REGISTRY, Gauge from twisted.internet.testing import MemoryReactor -from synapse.metrics import SynapseGauge from synapse.rest import admin from synapse.rest.client import login, room from synapse.server import HomeServer @@ -61,12 +60,9 @@ def _set_metrics_to_zero(self) -> None: metrics = ["synapse_known_rooms_total", "synapse_locally_joined_rooms_total"] for metric_name in metrics: gauge = REGISTRY._names_to_collectors.get(metric_name) - if gauge is not None and isinstance(gauge, SynapseGauge): - # if isinstance(gauge, SynapseGauge): - gauge.labels(server_name=self.hs.hostname).set(0) - # else: - # for labels in gauge._metrics: - # gauge.labels(*labels).set(0) + if gauge is not None and isinstance(gauge, Gauge): + for labels in gauge._metrics: + gauge.labels(*labels).set(0) def _add_background_updates(self) -> None: """