From 3175d984b41611eb05df0b582aec16502d82d9f0 Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Tue, 16 Jun 2026 11:42:03 -0500 Subject: [PATCH] urls: sanitize known tracker parameters --- README.md | 9 +++++++ urls.go | 38 +++++++++++++++++++++++++++++ urls_test.go | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+) diff --git a/README.md b/README.md index 8183e61..096c74e 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,15 @@ webtools.SetCacheControl(w, 24 * time.Hour) webtools.SetBasicAuth(w, "admin", "passw0rd") ``` +##### sanitize a url + +Use `Sanitize` to remove known trackers from URL parameters. + +```go +u := webtools.Sanitize("https://example.org?utm_source=abc123&page=1") +// utm_source=abc123 is removed from the url +``` + Also helps with crafting `net/url.URL` values with correctly encoded URL paramter values. diff --git a/urls.go b/urls.go index 75f40b3..011edd4 100644 --- a/urls.go +++ b/urls.go @@ -35,3 +35,41 @@ func GetDomain(s string) string { } return u.Host } + +// Sanitize purges known tracker URL parameters. +// +// If s fails to parse as a url, the original string is returned. +func Sanitize(s string) string { + u, err := url.Parse(s) + if err != nil || u == nil { + return s // leave as-is + } + + query := u.Query() + + parameters := []string{ + // urchin tracking module + "utm_source", "utm_medium", "utm_campaign", "utm_term", "utm_content", + "utm_social_handle_id", "utm_id", "utm_source_platform", + "utm_creative_format", "utm_marketing_tactic", + + // socials + "soc_src", "soc_trk", + + // ad clicks + "gclid", "fbclid", "msclkid", "ttclid", "twclid", "dclid", "yclid", + + // mailers + "mc_cid", "mc_eid", "_hsenc", "mkt_tok", "_kx", "_hsmi", + + // guce + "guccounter", "guce_referrer", "guce_referrer_sig", + } + + for _, parameter := range parameters { + query.Del(parameter) + } + + u.RawQuery = query.Encode() + return u.String() +} diff --git a/urls_test.go b/urls_test.go index c6db20e..a92228c 100644 --- a/urls_test.go +++ b/urls_test.go @@ -26,3 +26,71 @@ func TestGetDomain(t *testing.T) { result := GetDomain(orig) must.Eq(t, "stage.example.org", result) } + +func Test_Sanitize(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + input string + exp string + }{ + { + name: "broken", + input: "abc123", + exp: "abc123", + }, + { + name: "simple", + input: "https://example.org", + exp: "https://example.org", + }, + { + name: "source", + input: "https://example.org?utm_source=blah", + exp: "https://example.org", + }, + { + name: "mix", + input: "https://example.org?file=AA&utm_term=A", + exp: "https://example.org?file=AA", + }, + { + name: "multiple", + input: "https://example.org?utm_source=blah&file=AA&utm_content=none&page=2", + exp: "https://example.org?file=AA&page=2", + }, + { + name: "social handle", + input: "https://example.org?a=1&utm_social_handle_id=1847478489", + exp: "https://example.org?a=1", + }, + { + name: "ad clicks", + input: "https://example.org?gclid=abc123", + exp: "https://example.org", + }, + { + name: "mailers", + input: "https://example.org?mc_cid=abc234", + exp: "https://example.org", + }, + { + name: "soc_src", + input: "https://example.org?soc_src=123", + exp: "https://example.org", + }, + { + name: "soc_trk", + input: "https://example.org?soc_trk=reddit", + exp: "https://example.org", + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + result := Sanitize(tc.input) + must.Eq(t, tc.exp, result) + }) + } +}