Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ A proxy for transforming, pre-aggregating and routing statsd metrics, like
Currently supports the following transformations:

* Deny- or allow-listing of specific tag keys or metric names
* Deny tags based on prefix or suffix
* Adding hardcoded tags to all metrics
* Basic cardinality limiting, tracking the number of distinct tag values per
key or the number of overall timeseries (=combinations of metrics and tags).
Expand Down
3 changes: 3 additions & 0 deletions example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@

middlewares:
# Remove a list of tag names ("a", "b" and "c") from incoming metrics
# Also removes tags that start or end with certain words ("foo" or "bar")
- type: deny-tag
tags: [a, b, c]
starts_with: [foo]
ends_with: [bar]
Comment on lines +14 to +15
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm thinking about calling this prefix/suffix, what do you think?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no strong opinion, either is fine. i think the "best" API would be to impl glob support in deny-tag, but probably too complicated and possibly slow.


# Allow a list of tag names ("a", "b" and "c") from incoming metrics, and
# remove all other tags.
Expand Down
11 changes: 11 additions & 0 deletions src/config.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#[cfg(feature = "cli")]
use std::fmt::Formatter;
use std::time::Duration;
#[cfg(feature = "cli")]
Expand Down Expand Up @@ -39,6 +40,10 @@ pub enum MiddlewareConfig {
#[derive(Debug, PartialEq)]
pub struct DenyTagConfig {
pub tags: Vec<String>,
#[cfg_attr(feature = "cli", serde(default))]
pub starts_with: Vec<String>,
#[cfg_attr(feature = "cli", serde(default))]
pub ends_with: Vec<String>,
}

#[cfg_attr(feature = "cli", derive(Deserialize))]
Expand Down Expand Up @@ -180,6 +185,12 @@ mod tests {
"b",
"c",
],
starts_with: [
"foo",
],
ends_with: [
"bar",
],
},
),
AllowTag(
Expand Down
137 changes: 130 additions & 7 deletions src/middleware/deny_tag.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
use std::collections::HashSet;
use crate::config::DenyTagConfig;
use crate::middleware::Middleware;
use crate::types::Metric;
use anyhow::Error;
use std::collections::HashSet;

/// A middleware that denies metric tags based on configurable filter rules.
///
/// This middleware allows you to explicitly deny tags from metrics based on predefined
/// filter rules. It's particularly useful when you want to:
/// - Consistently deny specific tags across multiple metric calls
/// - Control metric cardinality by denying high-cardinality tags
/// - Centralize tag denial rules rather than handling them in individual metric calls
///
/// A common use case is managing metric cardinality. For example, you can
/// deny high-cardinality tags (like user IDs) in certain environments while allowing them
/// in others, all without modifying the metric emission code.
pub struct DenyTag<M> {
tags: HashSet<Vec<u8>>,
filters: HashSet<DenyType>,
next: M,
}

Expand All @@ -14,10 +25,14 @@ where
M: Middleware,
{
pub fn new(config: DenyTagConfig, next: M) -> Self {
let tags: HashSet<Vec<u8>> =
HashSet::from_iter(config.tags.iter().cloned().map(|tag| tag.into_bytes()));
let filters = config.starts_with.into_iter()
.map(DenyType::StartsWith)
.chain(config.ends_with.into_iter()
.map(DenyType::EndsWith))
.chain(config.tags.into_iter().map(DenyType::Equals))
.collect();

Self { tags, next }
Self { filters, next }
}
}

Expand All @@ -34,7 +49,7 @@ where
let mut rewrite_tags = false;

for tag in metric.tags_iter() {
if self.tags.contains(tag.name()) {
if self.filters.iter().any(|f| f.matches(tag.name())) {
log::debug!("deny_tag: Dropping tag {:?}", tag.name());
rewrite_tags = true;
} else {
Expand All @@ -56,17 +71,40 @@ where
}
}

/// Different types of operations that can be used to strip out a metric tag by name.
#[derive(PartialEq, Eq, Hash, Clone, Debug)]
pub enum DenyType {
/// The metric tag starts with the specified string.
StartsWith(String),
/// The metric tag ends with the specified string.
EndsWith(String),
/// The metric tag matches the word exactly.
Equals(String),
}

impl DenyType {
/// Returns `true` if the metric name (in bytes) matches the given filter operation.
pub fn matches(&self, value: &[u8]) -> bool {
match self {
Self::StartsWith(starts_with) => value.starts_with(starts_with.as_bytes()),
Self::EndsWith(ends_with) => value.ends_with(ends_with.as_bytes()),
Self::Equals(equals) => equals.as_bytes() == value,
}
}
}

#[cfg(test)]
mod tests {
use std::cell::RefCell;

use super::*;
use crate::testutils::FnStep;

#[test]
fn basic() {
let config = DenyTagConfig {
tags: vec!["nope".to_string()],
starts_with: vec![],
ends_with: vec![]
};

let results = RefCell::new(vec![]);
Expand All @@ -91,4 +129,89 @@ mod tests {
Metric::new(b"servers.online:1|c|#country:china,extra_stuff,,".to_vec())
);
}

#[test]
fn test_filter_starts_with() {
let config = DenyTagConfig {
tags: vec![],
starts_with: vec!["hc_".to_owned()],
ends_with: vec![]
};
let results = RefCell::new(Vec::new());
let next = FnStep(|metric: &mut Metric| {
results.borrow_mut().push(metric.clone());
});
let mut filter = DenyTag::new(config, next);
filter.submit(&mut Metric::new(
b"foo.bar:1|c|#abc.tag:test,hc_project:1000".to_vec(),
));

assert_eq!(
results.borrow()[0],
Metric::new(b"foo.bar:1|c|#abc.tag:test".to_vec())
);
}

#[test]
fn test_filter_ends_with() {
let config = DenyTagConfig {
tags: vec![],
starts_with: vec![],
ends_with: vec!["_hc".to_owned()]
};
let results = RefCell::new(Vec::new());
let next = FnStep(|metric: &mut Metric| {
results.borrow_mut().push(metric.clone());
});
let mut filter = DenyTag::new(config, next);
filter.submit(&mut Metric::new(
b"foo.bar:1|c|#abc.tag:test,project_hc:1000".to_vec(),
));

assert_eq!(
results.borrow()[0],
Metric::new(b"foo.bar:1|c|#abc.tag:test".to_vec())
);
}

#[test]
fn test_multiple_filters() {
let config = DenyTagConfig {
tags: vec![],
starts_with: vec!["hc_".to_owned()],
ends_with: vec!["_with_ending".to_owned()]
};
let results = RefCell::new(Vec::new());
let next = FnStep(|metric: &mut Metric| {
results.borrow_mut().push(metric.clone());
});
let mut filter = DenyTag::new(config, next);
filter.submit(&mut Metric::new(
b"foo.bar:1|c|#abc.tag:test,hc_project:1000,metric_with_ending:12".to_vec(),
));

assert_eq!(
results.borrow()[0],
Metric::new(b"foo.bar:1|c|#abc.tag:test".to_vec())
);
}

#[test]
fn test_deduplication() {
let config = DenyTagConfig {
tags: vec!["test1".to_owned(), "test1".to_owned()],
starts_with: vec!["start1".to_owned(), "start1".to_owned()],
ends_with: vec!["end1".to_owned(), "end1".to_owned()]
};
let results = RefCell::new(Vec::new());
let next = FnStep(|metric: &mut Metric| {
results.borrow_mut().push(metric.clone());
});
let filter = DenyTag::new(config, next);
let expected = HashSet::from_iter(vec![
DenyType::Equals("test1".to_owned()),
DenyType::StartsWith("start1".to_owned()),
DenyType::EndsWith("end1".to_owned())].iter().cloned());
assert_eq!(filter.filters, expected);
}
}