Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,15 @@ webtools.SetCacheControl(w, 24 * time.Hour)
webtools.SetBasicAuth(w, "admin", "passw0rd")
```

##### sanitize a url

Use `Sanitize` to remove known trackers from URL parameters.

```go
u := webtools.Sanitize("https://example.org?utm_source=abc123&page=1")
// utm_source=abc123 is removed from the url
```

Also helps with crafting `net/url.URL` values with correctly encoded URL
paramter values.

Expand Down
38 changes: 38 additions & 0 deletions urls.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,41 @@ func GetDomain(s string) string {
}
return u.Host
}

// Sanitize purges known tracker URL parameters.
//
// If s fails to parse as a url, the original string is returned.
func Sanitize(s string) string {
u, err := url.Parse(s)
if err != nil || u == nil {
return s // leave as-is
}

query := u.Query()

parameters := []string{
// urchin tracking module
"utm_source", "utm_medium", "utm_campaign", "utm_term", "utm_content",
"utm_social_handle_id", "utm_id", "utm_source_platform",
"utm_creative_format", "utm_marketing_tactic",

// socials
"soc_src", "soc_trk",

// ad clicks
"gclid", "fbclid", "msclkid", "ttclid", "twclid", "dclid", "yclid",

// mailers
"mc_cid", "mc_eid", "_hsenc", "mkt_tok", "_kx", "_hsmi",

// guce
"guccounter", "guce_referrer", "guce_referrer_sig",
}

for _, parameter := range parameters {
query.Del(parameter)
}

u.RawQuery = query.Encode()
return u.String()
}
68 changes: 68 additions & 0 deletions urls_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,71 @@ func TestGetDomain(t *testing.T) {
result := GetDomain(orig)
must.Eq(t, "stage.example.org", result)
}

func Test_Sanitize(t *testing.T) {
t.Parallel()

cases := []struct {
name string
input string
exp string
}{
{
name: "broken",
input: "abc123",
exp: "abc123",
},
{
name: "simple",
input: "https://example.org",
exp: "https://example.org",
},
{
name: "source",
input: "https://example.org?utm_source=blah",
exp: "https://example.org",
},
{
name: "mix",
input: "https://example.org?file=AA&utm_term=A",
exp: "https://example.org?file=AA",
},
{
name: "multiple",
input: "https://example.org?utm_source=blah&file=AA&utm_content=none&page=2",
exp: "https://example.org?file=AA&page=2",
},
{
name: "social handle",
input: "https://example.org?a=1&utm_social_handle_id=1847478489",
exp: "https://example.org?a=1",
},
{
name: "ad clicks",
input: "https://example.org?gclid=abc123",
exp: "https://example.org",
},
{
name: "mailers",
input: "https://example.org?mc_cid=abc234",
exp: "https://example.org",
},
{
name: "soc_src",
input: "https://example.org?soc_src=123",
exp: "https://example.org",
},
{
name: "soc_trk",
input: "https://example.org?soc_trk=reddit",
exp: "https://example.org",
},
}

for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
result := Sanitize(tc.input)
must.Eq(t, tc.exp, result)
})
}
}