--
-- utm-filter.lua: filter out Google Analytics bullshit from URLs
--
-- Copyright (c) 2015, Přemysl Janouch
--
-- Permission to use, copy, modify, and/or distribute this software for any
-- purpose with or without fee is hereby granted, provided that the above
-- copyright notice and this permission notice appear in all copies.
--
-- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-- WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
-- SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
-- OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
-- CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
--
-- A list of useless URL parameters that don't affect page function
local banned = {
gclid = 1,
utm_source = 1,
utm_medium = 1,
utm_term = 1,
utm_content = 1,
utm_campaign = 1,
}
-- Go through a parameter list and throw out any banned elements
local do_args = function (args)
local filtered = {}
for part in args:gmatch ("[^&]+") do
if not banned[part:match ("^[^=]*")] then
table.insert (filtered, part)
end
end
return table.concat (filtered, "&")
end
-- Filter parameters in both the query and the fragment part of an URL
local do_single_url = function (url)
return url:gsub ('^([^?#]*)%?([^#]*)', function (start, query)
local clean = do_args (query)
return #clean > 0 and start .. "?" .. clean or start
end, 1):gsub ('^([^#]*)#(.*)', function (start, fragment)
local clean = do_args (fragment)
return #clean > 0 and start .. "#" .. clean or start
end, 1)
end
local do_text = function (text)
return text:gsub ('%f[%g]https?://%g+', do_single_url)
end
degesch.hook_irc (function (hook, server, line)
local start, message = line:match ("^(.* :)(.*)$")
return message and start .. do_text (message) or line
end)
degesch.hook_input (function (hook, buffer, input)
return do_text (input)
end)