Skip to content

Commit

Permalink
feat(composer): strip tracking ids on paste (#782)
Browse files Browse the repository at this point in the history
Co-authored-by: Chris Talbot <chris@talbothome.com>
  • Loading branch information
GeopJr and kop316 authored Feb 15, 2024
1 parent b989e7e commit 43ffe5e
Show file tree
Hide file tree
Showing 3 changed files with 199 additions and 1 deletion.
38 changes: 37 additions & 1 deletion src/Dialogs/Composer/EditorPage.vala
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ public class Tuba.EditorPage : ComposerPage {
}
}

public Adw.ToastOverlay toast_overlay;
construct {
// translators: "Text" as in text-based input
title = _("Text");
Expand All @@ -22,6 +23,8 @@ public class Tuba.EditorPage : ComposerPage {
if (char_limit_api > 0)
char_limit = char_limit_api;
remaining_chars = char_limit;

toast_overlay = new Adw.ToastOverlay ();
}

public void set_cursor_at_start () {
Expand Down Expand Up @@ -57,6 +60,37 @@ public class Tuba.EditorPage : ComposerPage {
recount_chars ();
}

protected void on_paste (Gdk.Clipboard clp) {
if (!settings.strip_tracking) return;
var clean_buffer = Tracking.cleanup_content_with_uris (
editor.buffer.text,
Tracking.extract_uris (editor.buffer.text),
Tracking.CleanupType.STRIP_TRACKING
);
if (clean_buffer == editor.buffer.text) return;

Gtk.TextIter start_iter;
Gtk.TextIter end_iter;
editor.buffer.get_bounds (out start_iter, out end_iter);
editor.buffer.begin_user_action ();
editor.buffer.delete (ref start_iter, ref end_iter);
editor.buffer.insert (ref start_iter, clean_buffer, -1);
editor.buffer.end_user_action ();

var toast = new Adw.Toast (
_("Stripped tracking parameters")
) {
timeout = 3,
button_label = _("Undo")
};
toast.button_clicked.connect (undo);
toast_overlay.add_toast (toast);
}

private void undo () {
editor.buffer.undo ();
}

public override void on_push () {
status.status = editor.buffer.text;
status.sensitive = cw_button.active;
Expand Down Expand Up @@ -188,6 +222,7 @@ public class Tuba.EditorPage : ComposerPage {
};
bottom_bar.pack_end (char_counter);
editor.buffer.changed.connect (validate);
editor.buffer.paste_done.connect (on_paste);
}

protected void update_style_scheme () {
Expand Down Expand Up @@ -219,7 +254,8 @@ public class Tuba.EditorPage : ComposerPage {
child = editor
};

content.prepend (overlay);
toast_overlay.child = overlay;
content.prepend (toast_overlay);
editor.buffer.text = t_content;
if (force_cursor_at_start) set_cursor_at_start ();
}
Expand Down
75 changes: 75 additions & 0 deletions src/Utils/Tracking.vala
Original file line number Diff line number Diff line change
Expand Up @@ -118,4 +118,79 @@ public class Tuba.Tracking {

return @"$(str.slice(0, -1))$fragment";
}

// Mastodon's url regex depends on other libraries and gets computed on runtime.
// It includes every single TLD among other things. Let's instead use GLib's Uri
// which will promote writing URIs fully (including the scheme).
public static GLib.Uri[] extract_uris (string content) {
GLib.Uri[] res = {};
if (content.length == 0 || !("://" in content)) return res;

foreach (var word in content.split_set (" \n\r\t'\"()[]")) {
if (!("://" in word)) continue;
try {
res += GLib.Uri.parse (word, GLib.UriFlags.ENCODED);
} catch {}
}

return res;
}

public enum CleanupType {
STRIP_TRACKING,
SPECIFIC_LENGTH;
}

public static string cleanup_content_with_uris (owned string content, GLib.Uri[] uris, CleanupType cleanup_type, int characters_reserved_per_url = 23) {
if (uris.length == 0) return content;

int last_index = 0;
switch (cleanup_type) {
case CleanupType.STRIP_TRACKING:
foreach (var uri in uris) {
if (uri.get_query () == null) continue;
try {
string stripped = strip_utm_from_uri (uri).to_string ();
string original = uri.to_string ();

// 1 extra arguments for `string string.replace (string, string)' ???
// content = content.replace (uri.to_string (), stripped, 1);
last_index = content.index_of (original, last_index);
if (last_index == -1) {
last_index = 0;
continue;
}

StringBuilder builder = new StringBuilder (content);
builder.erase (last_index, original.length);
builder.insert (last_index, stripped);

content = builder.str;
} catch {}
}
break;
case CleanupType.SPECIFIC_LENGTH:
if (characters_reserved_per_url <= 0) break;
string replacement = string.nfill (characters_reserved_per_url, 'X');
foreach (var uri in uris) {
string original = uri.to_string ();

last_index = content.index_of (original, last_index);
if (last_index == -1) {
last_index = 0;
continue;
}

StringBuilder builder = new StringBuilder (content);
builder.erase (last_index, original.length);
builder.insert (last_index, replacement);

content = builder.str;
}
break;
default: break;
}

return content;
}
}
87 changes: 87 additions & 0 deletions tests/Tracking.test.vala
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@ struct TestUrl {
public string result;
}

struct TestUrlCleanup {
public string content;
public GLib.Uri[] uris;
public string stripped_content;
public int characters_reserved_per_url;
public string replaced_content;
}

const TestUrl[] URLS = {
{ "https://www.gnome.org/", "https://www.gnome.org/" },
{ "https://www.gnome.org/test", "https://www.gnome.org/test" },
Expand All @@ -16,6 +24,52 @@ const TestUrl[] URLS = {
{ "https://www.gnome.org/test?utm_source=tuba&foo=bar&oft_id=1312#main", "https://www.gnome.org/test?foo=bar#main" }
};

TestUrlCleanup[] get_cleanup_urls () {
TestUrlCleanup[] res = {};

res += TestUrlCleanup () {
content = "https :/ /www .gnome .org/",
uris = {},
stripped_content = "https :/ /www .gnome .org/",
characters_reserved_per_url = 1,
replaced_content = "https :/ /www .gnome .org/"
};

res += TestUrlCleanup () {
content = "https://www.gnome.org/",
uris = {GLib.Uri.build (GLib.UriFlags.ENCODED, "https", null, "www.gnome.org/", -1, "", null, null)},
stripped_content = "https://www.gnome.org/",
characters_reserved_per_url = 15,
replaced_content = "XXXXXXXXXXXXXXX"
};

res += TestUrlCleanup () {
content = "Albums:\nDorian Electra - Fanfare https://dorianelectramusic.bandcamp.com/album/fanfare-explicit?foo=bar&fizz=buzz#main\n[bo en - pale machine 2](https://boen.bandcamp.com/album/pale-machine-2?utm_source=tuba&foo=bar&oft_id=1312#main)\n<a href=\"https://osno1.bandcamp.com/album/i-just-dont-wanna-name-it-anything-with-beach-in-the-title?tag=soft_clothes\">laura les - i just dont wanna name it anything with \"beach\" in the title</a>",
uris = {GLib.Uri.build (GLib.UriFlags.ENCODED, "https", null, "dorianelectramusic.bandcamp.com", -1, "/album/fanfare-explicit", "foo=bar&fizz=buzz", "main"), GLib.Uri.build (GLib.UriFlags.ENCODED, "https", null, "boen.bandcamp.com", -1, "/album/pale-machine-2", "utm_source=tuba&foo=bar&oft_id=1312", "main"), GLib.Uri.build (GLib.UriFlags.ENCODED, "https", null, "osno1.bandcamp.com", -1, "/album/i-just-dont-wanna-name-it-anything-with-beach-in-the-title", "tag=soft_clothes", null)},
stripped_content = "Albums:\nDorian Electra - Fanfare https://dorianelectramusic.bandcamp.com/album/fanfare-explicit?foo=bar&fizz=buzz#main\n[bo en - pale machine 2](https://boen.bandcamp.com/album/pale-machine-2?foo=bar#main)\n<a href=\"https://osno1.bandcamp.com/album/i-just-dont-wanna-name-it-anything-with-beach-in-the-title?tag=soft_clothes\">laura les - i just dont wanna name it anything with \"beach\" in the title</a>",
characters_reserved_per_url = 6,
replaced_content = "Albums:\nDorian Electra - Fanfare XXXXXX\n[bo en - pale machine 2](XXXXXX)\n<a href=\"XXXXXX\">laura les - i just dont wanna name it anything with \"beach\" in the title</a>"
};

res += TestUrlCleanup () {
content = "https://www.gnome.org/",
uris = {GLib.Uri.build (GLib.UriFlags.ENCODED, "https", null, "www.gnome.org/", -1, "", null, null)},
stripped_content = "https://www.gnome.org/",
characters_reserved_per_url = -5,
replaced_content = "https://www.gnome.org/"
};

res += TestUrlCleanup () {
content = "https://www.gnome.org/test?oft_c=1312 https://www.gnome.org/test?foo=bar&oft_c=1312 https://www.gnome.org/test?oft_c=1312&foo=bar&ad_id=1312",
uris = {GLib.Uri.build (GLib.UriFlags.ENCODED, "https", null, "www.gnome.org", -1, "/test", "oft_c=1312", null), GLib.Uri.build (GLib.UriFlags.ENCODED, "https", null, "www.gnome.org", -1, "/test", "foo=bar&oft_c=1312", null), GLib.Uri.build (GLib.UriFlags.ENCODED, "https", null, "www.gnome.org", -1, "/test", "oft_c=1312&foo=bar&ad_id=1312", null)},
stripped_content = "https://www.gnome.org/test https://www.gnome.org/test?foo=bar https://www.gnome.org/test?foo=bar",
characters_reserved_per_url = 1,
replaced_content = "X X X"
};

return res;
}

public void test_strip_utm () {
foreach (var test_url in URLS) {
var stripped_url = Tuba.Tracking.strip_utm (test_url.original);
Expand All @@ -32,10 +86,43 @@ public void test_strip_utm_fallback () {
}
}

public void test_cleanup () {
foreach (var test_url in get_cleanup_urls ()) {
GLib.Uri[] extracted_uris = Tuba.Tracking.extract_uris (test_url.content);
assert_cmpint (extracted_uris.length, CompareOperator.EQ, test_url.uris.length);

for (var i=0; i < test_url.uris.length; i++) {
assert_cmpstr (extracted_uris[i].to_string (), CompareOperator.EQ, test_url.uris[i].to_string ());
}

assert_cmpstr (
Tuba.Tracking.cleanup_content_with_uris (
test_url.content,
test_url.uris,
Tuba.Tracking.CleanupType.STRIP_TRACKING
),
CompareOperator.EQ,
test_url.stripped_content
);

assert_cmpstr (
Tuba.Tracking.cleanup_content_with_uris (
test_url.content,
test_url.uris,
Tuba.Tracking.CleanupType.SPECIFIC_LENGTH,
test_url.characters_reserved_per_url
),
CompareOperator.EQ,
test_url.replaced_content
);
}
}

public int main (string[] args) {
Test.init (ref args);

Test.add_func ("/test_strip_utm", test_strip_utm);
Test.add_func ("/test_strip_utm_fallback", test_strip_utm_fallback);
Test.add_func ("/test_cleanup", test_cleanup);
return Test.run ();
}

0 comments on commit 43ffe5e

Please sign in to comment.