diff --git a/crates/server/src/proxy/compute/data_collection/mod.rs b/crates/server/src/proxy/compute/data_collection/mod.rs index 6604e63..a32a41a 100644 --- a/crates/server/src/proxy/compute/data_collection/mod.rs +++ b/crates/server/src/proxy/compute/data_collection/mod.rs @@ -58,7 +58,7 @@ pub async fn process_from_html( .data_collection .as_mut() .unwrap() - .populate_event_contexts(); + .populate_event_contexts("edge"); // check if payload.events is empty, if so, add a page event if payload.data_collection.as_ref().unwrap().events.is_none() { @@ -81,6 +81,7 @@ pub async fn process_from_html( context: payload.data_collection.clone().unwrap().context.clone(), // fill in components with payload.data_collection.unwrap().components if exists components: None, + from: Some("edge".to_string()), }]); } @@ -146,7 +147,6 @@ pub async fn process_from_html( .header("Content-Type", "application/json") .header("Authorization", format!("Basic {}", b64)) .header("X-Edgee-Debug", debug) - .header("X-Edgee-From", "edge") .header("X-Edgee-Host", host) .body(events_json) .send() @@ -181,12 +181,18 @@ pub async fn process_from_json( // add more info from the request payload = add_more_info_from_request(request, payload); + let from = if from_third_party_sdk { + "third" + } else { + "client" + }; + // populate the events with the data collection context payload .data_collection .as_mut() .unwrap() - .populate_event_contexts(); + .populate_event_contexts(from); let mut events = payload .data_collection @@ -250,11 +256,6 @@ pub async fn process_from_json( } else { "false" }; - let from = if from_third_party_sdk { - "third" - } else { - "client" - }; let host = request.get_host().to_string(); // now, we can send the payload to the edgee data-collection-api without waiting for the response tokio::spawn(async move { @@ -263,7 +264,6 @@ pub async fn process_from_json( .header("Content-Type", "application/json") .header("Authorization", format!("Basic {}", b64)) .header("X-Edgee-Debug", debug) - .header("X-Edgee-From", from) .header("X-Edgee-Host", host) .body(events_json) .send() @@ -838,19 +838,6 @@ fn add_more_info_from_request(request: &RequestHandle, mut payload: Payload) -> .unwrap() .ip = Some(request.get_client_ip().to_string()); - // anonymize the ip address - payload - .data_collection - .as_mut() - .unwrap() - .context - .as_mut() - .unwrap() - .client - .as_mut() - .unwrap() - .anonymize_ip(); - // locale let locale = get_preferred_language(request.get_headers()); payload diff --git a/crates/wasmtime/src/components/mod.rs b/crates/wasmtime/src/components/mod.rs index bd60a33..f0124c6 100644 --- a/crates/wasmtime/src/components/mod.rs +++ b/crates/wasmtime/src/components/mod.rs @@ -42,7 +42,7 @@ pub async fn send_data_collection( for event in events { // Convert the event to the one which can be passed to the component - let provider_event: provider::Event = event.clone().into(); + let mut provider_event: provider::Event = event.clone().into(); let client = reqwest::Client::builder() .timeout(Duration::from_secs(5)) @@ -54,7 +54,10 @@ pub async fn send_data_collection( provider::EventType::Track => "track", }; - let anonymized_client_ip = HeaderValue::from_str(&provider_event.context.client.ip)?; + // todo: anonymize ip following the consent mapping + provider_event.context.client.ip = anonymize_ip(provider_event.context.client.ip.clone()); + + let client_ip = HeaderValue::from_str(&provider_event.context.client.ip)?; let user_agent = HeaderValue::from_str(&provider_event.context.client.user_agent)?; for cfg in component_config.data_collection.iter() { @@ -116,7 +119,7 @@ pub async fn send_data_collection( for (key, value) in request.headers.iter() { headers.insert(HeaderName::from_str(key)?, HeaderValue::from_str(value)?); } - insert_expected_headers(&mut headers, event, &anonymized_client_ip, &user_agent)?; + insert_expected_headers(&mut headers, event, &client_ip, &user_agent)?; let client = client.clone(); @@ -204,17 +207,41 @@ pub async fn send_data_collection( Ok(()) } +fn anonymize_ip(ip: String) -> String { + if ip.is_empty() { + return ip; + } + + use std::net::IpAddr; + + const KEEP_IPV4_BYTES: usize = 3; + const KEEP_IPV6_BYTES: usize = 6; + + let ip: IpAddr = ip.clone().parse().unwrap(); + let anonymized_ip = match ip { + IpAddr::V4(ip) => { + let mut data = ip.octets(); + data[KEEP_IPV4_BYTES..].fill(0); + IpAddr::V4(data.into()) + } + IpAddr::V6(ip) => { + let mut data = ip.octets(); + data[KEEP_IPV6_BYTES..].fill(0); + IpAddr::V6(data.into()) + } + }; + + anonymized_ip.to_string() +} + fn insert_expected_headers( headers: &mut HeaderMap, event: &Event, - anonymized_client_ip: &HeaderValue, + client_ip: &HeaderValue, user_agent: &HeaderValue, ) -> anyhow::Result<()> { // Insert client ip in the x-forwarded-for header - headers.insert( - HeaderName::from_str("x-forwarded-for")?, - anonymized_client_ip.clone(), - ); + headers.insert(HeaderName::from_str("x-forwarded-for")?, client_ip.clone()); // Insert User-Agent in the user-agent header headers.insert(header::USER_AGENT, user_agent.clone()); diff --git a/crates/wasmtime/src/payload.rs b/crates/wasmtime/src/payload.rs index 2c739b5..08c43ea 100644 --- a/crates/wasmtime/src/payload.rs +++ b/crates/wasmtime/src/payload.rs @@ -50,7 +50,7 @@ pub struct DataCollection { } impl DataCollection { - pub fn populate_event_contexts(&mut self) { + pub fn populate_event_contexts(&mut self, from: &str) { let components = self.components.clone(); // if events are set, we use the data collection context to fill in the missing fields @@ -58,6 +58,7 @@ impl DataCollection { for event in events.iter_mut() { event.uuid = uuid::Uuid::new_v4().to_string(); event.timestamp = Utc::now(); + event.from = Some(from.to_string()); // fill in the missing context fields if let Some(context) = &mut event.context { @@ -120,6 +121,9 @@ pub struct Event { #[serde(skip_serializing_if = "Option::is_none")] pub components: Option>, + + #[serde(skip_serializing_if = "Option::is_none")] + pub from: Option, } impl<'de> Deserialize<'de> for Event { @@ -134,6 +138,7 @@ impl<'de> Deserialize<'de> for Event { data: Option, context: Option, components: Option>, + from: Option, } let helper = EventHelper::deserialize(deserializer)?; @@ -162,6 +167,7 @@ impl<'de> Deserialize<'de> for Event { data, context: helper.context, components: helper.components, + from: helper.from, }) } } @@ -435,35 +441,6 @@ pub struct Client { pub city: Option, } -impl Client { - pub fn anonymize_ip(&mut self) { - if self.ip.is_none() { - return; - } - - use std::net::IpAddr; - - const KEEP_IPV4_BYTES: usize = 3; - const KEEP_IPV6_BYTES: usize = 6; - - let ip: IpAddr = self.ip.clone().unwrap().parse().unwrap(); - let anonymized_ip = match ip { - IpAddr::V4(ip) => { - let mut data = ip.octets(); - data[KEEP_IPV4_BYTES..].fill(0); - IpAddr::V4(data.into()) - } - IpAddr::V6(ip) => { - let mut data = ip.octets(); - data[KEEP_IPV6_BYTES..].fill(0); - IpAddr::V6(data.into()) - } - }; - - self.ip = Some(anonymized_ip.to_string()); - } -} - #[derive(Serialize, Deserialize, Debug, Default, Clone)] pub struct Session { pub session_id: String,