Skip to content

Commit

Permalink
feat(pii): Scrub attachments with explicit rules (#4415)
Browse files Browse the repository at this point in the history
Opt-in projects that have defined explicit `$attachment.'filename.txt'`
rules to non-mindump attachment scrubbing.
  • Loading branch information
jjbayer authored Jan 8, 2025
1 parent a93763a commit 95cc7cb
Show file tree
Hide file tree
Showing 4 changed files with 180 additions and 34 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## Unreleased

**Features**:

- Scrub non-minidump attachments if there are explicit `$attachment` rules. ([#4415](https://github.com/getsentry/relay/pull/4415))

**Internal**:

- Remove the `spool` command from Relay. ([#4423](https://github.com/getsentry/relay/pull/4423))
Expand Down
174 changes: 140 additions & 34 deletions relay-server/src/services/processor/attachment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::error::Error;
use std::sync::Arc;
use std::time::Instant;

use relay_pii::PiiAttachmentsProcessor;
use relay_pii::{PiiAttachmentsProcessor, SelectorPathItem, SelectorSpec};
use relay_statsd::metric;

use crate::envelope::{AttachmentType, ContentType};
Expand Down Expand Up @@ -65,43 +65,149 @@ pub fn scrub<Group>(managed_envelope: &mut TypedEnvelope<Group>, project_info: A
.get_item_by_mut(|item| item.attachment_type() == Some(&AttachmentType::Minidump));

if let Some(item) = minidump {
let filename = item.filename().unwrap_or_default();
let mut payload = item.payload().to_vec();

let processor = PiiAttachmentsProcessor::new(config.compiled());

// Minidump scrubbing can fail if the minidump cannot be parsed. In this case, we
// must be conservative and treat it as a plain attachment. Under extreme
// conditions, this could destroy stack memory.
let start = Instant::now();
match processor.scrub_minidump(filename, &mut payload) {
Ok(modified) => {
metric!(
timer(RelayTimers::MinidumpScrubbing) = start.elapsed(),
status = if modified { "ok" } else { "n/a" },
);
}
Err(scrub_error) => {
metric!(
timer(RelayTimers::MinidumpScrubbing) = start.elapsed(),
status = "error"
);
relay_log::warn!(
error = &scrub_error as &dyn Error,
"failed to scrub minidump",
);
metric!(timer(RelayTimers::AttachmentScrubbing), {
processor.scrub_attachment(filename, &mut payload);
})
scrub_minidump(item, config);
} else if has_simple_attachment_selector(config) {
// We temporarily only scrub attachments to projects that have at least one simple attachment rule,
// such as `$attachments.'foo.txt'`.
// After we have assessed the impact on performance we can relax this condition.
for item in envelope
.items_mut()
.filter(|item| item.attachment_type().is_some())
{
scrub_attachment(item, config);
}
}
}
}

fn scrub_minidump(item: &mut crate::envelope::Item, config: &relay_pii::PiiConfig) {
debug_assert_eq!(item.attachment_type(), Some(&AttachmentType::Minidump));
let filename = item.filename().unwrap_or_default();
let mut payload = item.payload().to_vec();

let processor = PiiAttachmentsProcessor::new(config.compiled());

// Minidump scrubbing can fail if the minidump cannot be parsed. In this case, we
// must be conservative and treat it as a plain attachment. Under extreme
// conditions, this could destroy stack memory.
let start = Instant::now();
match processor.scrub_minidump(filename, &mut payload) {
Ok(modified) => {
metric!(
timer(RelayTimers::MinidumpScrubbing) = start.elapsed(),
status = if modified { "ok" } else { "n/a" },
);
}
Err(scrub_error) => {
metric!(
timer(RelayTimers::MinidumpScrubbing) = start.elapsed(),
status = "error"
);
relay_log::warn!(
error = &scrub_error as &dyn Error,
"failed to scrub minidump",
);
metric!(
timer(RelayTimers::AttachmentScrubbing),
attachment_type = "minidump",
{
processor.scrub_attachment(filename, &mut payload);
}
)
}
}

let content_type = item
.content_type()
.unwrap_or(&ContentType::Minidump)
.clone();

item.set_payload(content_type, payload);
}

fn has_simple_attachment_selector(config: &relay_pii::PiiConfig) -> bool {
for application in &config.applications {
if let SelectorSpec::Path(vec) = &application.0 {
let Some([a, b]) = vec.get(0..2) else {
continue;
};
if matches!(
a,
SelectorPathItem::Type(relay_event_schema::processor::ValueType::Attachments)
) && matches!(b, SelectorPathItem::Key(_))
{
return true;
}
}
}
false
}

let content_type = item
.content_type()
.unwrap_or(&ContentType::Minidump)
.clone();
fn scrub_attachment(item: &mut crate::envelope::Item, config: &relay_pii::PiiConfig) {
let filename = item.filename().unwrap_or_default();
let mut payload = item.payload().to_vec();

item.set_payload(content_type, payload);
let processor = PiiAttachmentsProcessor::new(config.compiled());
let attachment_type_tag = match item.attachment_type() {
Some(t) => t.to_string(),
None => "".to_owned(),
};
metric!(
timer(RelayTimers::AttachmentScrubbing),
attachment_type = &attachment_type_tag,
{
processor.scrub_attachment(filename, &mut payload);
}
);

item.set_payload_without_content_type(payload);
}

#[cfg(test)]
mod tests {
use relay_pii::PiiConfig;

use super::*;

#[test]
fn matches_attachment_selector() {
let config = r#"{
"rules": {"0": {"type": "ip", "redaction": {"method": "remove"}}},
"applications": {"$attachments.'foo.txt'": ["0"]}
}"#;
let config: PiiConfig = serde_json::from_str(config).unwrap();
assert!(has_simple_attachment_selector(&config));
}

#[test]
fn does_not_match_wildcard() {
let config = r#"{
"rules": {},
"applications": {"$attachments.**":["0"]}
}"#;
let config: PiiConfig = serde_json::from_str(config).unwrap();
assert!(!has_simple_attachment_selector(&config));
}

#[test]
fn does_not_match_empty() {
let config = r#"{
"rules": {},
"applications": {}
}"#;
let config: PiiConfig = serde_json::from_str(config).unwrap();
assert!(!has_simple_attachment_selector(&config));
}

#[test]
fn does_not_match_something_else() {
let config = r#"{
"rules": {},
"applications": {
"**": ["0"]
}
}"#;
let config: PiiConfig = serde_json::from_str(config).unwrap();
assert!(!has_simple_attachment_selector(&config));
}
}
4 changes: 4 additions & 0 deletions relay-server/src/statsd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,10 @@ pub enum RelayTimers {
/// applied. Note that minidumps which failed to be parsed (status="error" in
/// scrubbing.minidumps.duration) will be scrubbed as plain attachments and count
/// towards this.
///
/// This metric is tagged with:
///
/// - `attachment_type`: The type of attachment, e.g. "minidump".
AttachmentScrubbing,
/// Total time spent to send request to upstream Relay and handle the response.
///
Expand Down
32 changes: 32 additions & 0 deletions tests/integration/test_attachments.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,38 @@ def test_attachments_ratelimit(
outcomes_consumer.assert_rate_limited("static_disabled_quota")


def test_attachments_pii(mini_sentry, relay):
event_id = "515539018c9b4260a6f999572f1661ee"

project_id = 42
project_config = mini_sentry.add_full_project_config(project_id)
project_config["config"]["piiConfig"] = {
"rules": {"0": {"type": "ip", "redaction": {"method": "remove"}}},
"applications": {"$attachments.'foo.txt'": ["0"]},
}
relay = relay(mini_sentry)

attachments = [
("att_1", "foo.txt", b"here's an IP that should get masked -> 127.0.0.1 <-"),
(
"att_2",
"bar.txt",
b"here's an IP that should not get scrubbed -> 127.0.0.1 <-",
),
]

for attachment in attachments:
relay.send_attachments(project_id, event_id, [attachment])

payloads = {
mini_sentry.captured_events.get().items[0].payload.bytes for _ in range(2)
}
assert payloads == {
b"here's an IP that should get masked -> ********* <-",
b"here's an IP that should not get scrubbed -> 127.0.0.1 <-",
}


def test_attachments_quotas(
mini_sentry,
relay_with_processing,
Expand Down

0 comments on commit 95cc7cb

Please sign in to comment.