--[[ Lua transform script for Squid logs. This script enriches parsed logs before sending them to SigNoz. - Adds standard fields (http.*, client.*) - Extracts the search term from URLs - Identifies the search engine used ]] -- Table mapping search engine domains to names. Easy to extend! local search_engines = { ["google.com"] = "Google", ["youtube.com"] = "YouTube", ["bing.com"] = "Bing", ["duckduckgo.com"] = "DuckDuckGo", ["yahoo.com"] = "Yahoo", ["qwant.com"] = "Qwant", ["ecosia.org"] = "Ecosia" } -- Helper function to decode URL-encoded strings. -- Declared 'local' since it's only used within this file. local function url_decode(str) if str == nil then return nil end str = string.gsub(str, "+", " ") str = string.gsub(str, "%%(%x%x)", function(h) return string.char(tonumber(h, 16)) end) return str end -- -- Main transform function, called by Fluent Bit for each log. -- THIS FUNCTION MUST REMAIN GLOBAL to be visible to the Fluent Bit engine. ---@diagnostic disable-next-line: lowercase-global function remap_records(tag, timestamp, record) if record["method"] == nil then return 0, nil, nil end local new_record = {} -- Field mapping new_record["http.method"] = record["method"] new_record["http.url"] = record["url"] new_record["http.status_code"] = record["status_code"] new_record["http.response_content_length"] = record["response_size"] new_record["client.ip"] = record["client_ip"] new_record["log_body"] = "Squid request" if record["user_agent"] then new_record["http.user_agent"] = record["user_agent"] end -- Extraction of search term and search engine if record["url"] and string.match(record["url"], "[?&]q=") then local search_term = string.match(record["url"], "[?&]q=([^&]*)") if search_term and search_term ~= "" then new_record["search.query"] = url_decode(search_term) -- Search engine identification local engine_found = false for domain, name in pairs(search_engines) do -- string.find(string, pattern, start_pos, plain_search) if string.find(record["url"], domain, 1, true) then new_record["search.engine"] = name engine_found = true break -- We stop as soon as we find a match end end if not engine_found then new_record["search.engine"] = "Unknown" -- Engine not listed end end end -- Adding resource data new_record["resource"] = { ["service.name"] = "squid-ssl-proxy-final", ["deployment.environment"] = "lab" } return 2, timestamp, new_record end