84 lines
2.7 KiB
Lua
84 lines
2.7 KiB
Lua
--[[
|
|
Lua transform script for Squid logs.
|
|
This script enriches parsed logs before sending them to SigNoz.
|
|
- Adds standard fields (http.*, client.*)
|
|
- Extracts the search term from URLs
|
|
- Identifies the search engine used
|
|
]]
|
|
|
|
-- Table mapping search engine domains to names. Easy to extend!
|
|
local search_engines = {
|
|
["google.com"] = "Google",
|
|
["youtube.com"] = "YouTube",
|
|
["bing.com"] = "Bing",
|
|
["duckduckgo.com"] = "DuckDuckGo",
|
|
["yahoo.com"] = "Yahoo",
|
|
["qwant.com"] = "Qwant",
|
|
["ecosia.org"] = "Ecosia"
|
|
}
|
|
|
|
-- Helper function to decode URL-encoded strings.
|
|
-- Declared 'local' since it's only used within this file.
|
|
local function url_decode(str)
|
|
if str == nil then return nil end
|
|
str = string.gsub(str, "+", " ")
|
|
str = string.gsub(str, "%%(%x%x)", function(h) return string.char(tonumber(h, 16)) end)
|
|
return str
|
|
end
|
|
|
|
--
|
|
-- Main transform function, called by Fluent Bit for each log.
|
|
-- THIS FUNCTION MUST REMAIN GLOBAL to be visible to the Fluent Bit engine.
|
|
---@diagnostic disable-next-line: lowercase-global
|
|
function remap_records(tag, timestamp, record)
|
|
if record["method"] == nil then
|
|
return 0, nil, nil
|
|
end
|
|
|
|
local new_record = {}
|
|
|
|
-- Field mapping
|
|
new_record["http.method"] = record["method"]
|
|
new_record["http.url"] = record["url"]
|
|
new_record["http.status_code"] = record["status_code"]
|
|
new_record["http.response_content_length"] = record["response_size"]
|
|
new_record["client.ip"] = record["client_ip"]
|
|
new_record["log_body"] = "Squid request"
|
|
|
|
if record["user_agent"] then
|
|
new_record["http.user_agent"] = record["user_agent"]
|
|
end
|
|
|
|
-- Extraction of search term and search engine
|
|
if record["url"] and string.match(record["url"], "[?&]q=") then
|
|
local search_term = string.match(record["url"], "[?&]q=([^&]*)")
|
|
|
|
if search_term and search_term ~= "" then
|
|
new_record["search.query"] = url_decode(search_term)
|
|
|
|
-- Search engine identification
|
|
local engine_found = false
|
|
for domain, name in pairs(search_engines) do
|
|
-- string.find(string, pattern, start_pos, plain_search)
|
|
if string.find(record["url"], domain, 1, true) then
|
|
new_record["search.engine"] = name
|
|
engine_found = true
|
|
break -- We stop as soon as we find a match
|
|
end
|
|
end
|
|
|
|
if not engine_found then
|
|
new_record["search.engine"] = "Unknown" -- Engine not listed
|
|
end
|
|
end
|
|
end
|
|
|
|
-- Adding resource data
|
|
new_record["resource"] = {
|
|
["service.name"] = "squid-ssl-proxy-final",
|
|
["deployment.environment"] = "lab"
|
|
}
|
|
|
|
return 2, timestamp, new_record
|
|
end
|