Files
squid-ssl-bumping-lab/fluent-bit/conf/transform.lua
2025-06-14 17:03:07 +02:00

84 lines
2.7 KiB
Lua

--[[
Lua transform script for Squid logs.
This script enriches parsed logs before sending them to SigNoz.
- Adds standard fields (http.*, client.*)
- Extracts the search term from URLs
- Identifies the search engine used
]]
-- Table mapping search engine domains to names. Easy to extend!
local search_engines = {
["google.com"] = "Google",
["youtube.com"] = "YouTube",
["bing.com"] = "Bing",
["duckduckgo.com"] = "DuckDuckGo",
["yahoo.com"] = "Yahoo",
["qwant.com"] = "Qwant",
["ecosia.org"] = "Ecosia"
}
-- Helper function to decode URL-encoded strings.
-- Declared 'local' since it's only used within this file.
local function url_decode(str)
if str == nil then return nil end
str = string.gsub(str, "+", " ")
str = string.gsub(str, "%%(%x%x)", function(h) return string.char(tonumber(h, 16)) end)
return str
end
--
-- Main transform function, called by Fluent Bit for each log.
-- THIS FUNCTION MUST REMAIN GLOBAL to be visible to the Fluent Bit engine.
---@diagnostic disable-next-line: lowercase-global
function remap_records(tag, timestamp, record)
if record["method"] == nil then
return 0, nil, nil
end
local new_record = {}
-- Field mapping
new_record["http.method"] = record["method"]
new_record["http.url"] = record["url"]
new_record["http.status_code"] = record["status_code"]
new_record["http.response_content_length"] = record["response_size"]
new_record["client.ip"] = record["client_ip"]
new_record["log_body"] = "Squid request"
if record["user_agent"] then
new_record["http.user_agent"] = record["user_agent"]
end
-- Extraction of search term and search engine
if record["url"] and string.match(record["url"], "[?&]q=") then
local search_term = string.match(record["url"], "[?&]q=([^&]*)")
if search_term and search_term ~= "" then
new_record["search.query"] = url_decode(search_term)
-- Search engine identification
local engine_found = false
for domain, name in pairs(search_engines) do
-- string.find(string, pattern, start_pos, plain_search)
if string.find(record["url"], domain, 1, true) then
new_record["search.engine"] = name
engine_found = true
break -- We stop as soon as we find a match
end
end
if not engine_found then
new_record["search.engine"] = "Unknown" -- Engine not listed
end
end
end
-- Adding resource data
new_record["resource"] = {
["service.name"] = "squid-ssl-proxy-final",
["deployment.environment"] = "lab"
}
return 2, timestamp, new_record
end