Initial commit

This commit is contained in:
darnodo
2025-06-14 17:03:07 +02:00
commit d999440f13
10 changed files with 449 additions and 0 deletions

View File

@@ -0,0 +1,40 @@
[SERVICE]
# Flush data every 5 seconds
Flush 5
# Log level for Fluent Bit itself (useful for debugging)
Log_Level info
# Location of parser definitions
Parsers_File parsers.conf
# [INPUT] - Where do the logs come from?
[INPUT]
Name tail
Tag squid.access
Path /var/log/squid/access.log
# Use the parser defined in parsers.conf
Parser squid_parser
# Database file to track reading position
DB /fluent-bit/db/squid.db
# Do not skip the first line if the file is new
Read_from_Head true
# [FILTER] - How to transform logs?
[FILTER]
Name lua
Match squid.access
# Lua script file to use
script transform.lua
# Function to call in the script
call remap_records
# [OUTPUT] - Where to send the logs?
[OUTPUT]
Name opentelemetry
Match squid.access
# Host and port of your OTLP/HTTP SigNoz receiver
Host my-log-instance.com
Port 4318
# API path for logs
logs_uri /v1/logs
# Do not use TLS for an http:// connection
tls Off

View File

@@ -0,0 +1,11 @@
[PARSER]
Name squid_parser
Format regex
# Regex adapted to the real log format (with User-Agent in quotes)
Regex ^(?<time>\S+)\s+(?<client_ip>\S+)\s+(?<status_code>\S+)\s+(?<response_size>\S+)\s+(?<method>\S+)\s+(?<url>\S+)\s+"(?<user_agent>[^"]*)"$
# Indicates which field contains the timestamp
Time_Key time
# Indicates the time format (Unix timestamp with milliseconds)
Time_Format %s.%L
# Automatically converts types
Types response_size:integer

View File

@@ -0,0 +1,83 @@
--[[
Lua transform script for Squid logs.
This script enriches parsed logs before sending them to SigNoz.
- Adds standard fields (http.*, client.*)
- Extracts the search term from URLs
- Identifies the search engine used
]]
-- Table mapping search engine domains to names. Easy to extend!
local search_engines = {
["google.com"] = "Google",
["youtube.com"] = "YouTube",
["bing.com"] = "Bing",
["duckduckgo.com"] = "DuckDuckGo",
["yahoo.com"] = "Yahoo",
["qwant.com"] = "Qwant",
["ecosia.org"] = "Ecosia"
}
-- Helper function to decode URL-encoded strings.
-- Declared 'local' since it's only used within this file.
local function url_decode(str)
if str == nil then return nil end
str = string.gsub(str, "+", " ")
str = string.gsub(str, "%%(%x%x)", function(h) return string.char(tonumber(h, 16)) end)
return str
end
--
-- Main transform function, called by Fluent Bit for each log.
-- THIS FUNCTION MUST REMAIN GLOBAL to be visible to the Fluent Bit engine.
---@diagnostic disable-next-line: lowercase-global
function remap_records(tag, timestamp, record)
if record["method"] == nil then
return 0, nil, nil
end
local new_record = {}
-- Field mapping
new_record["http.method"] = record["method"]
new_record["http.url"] = record["url"]
new_record["http.status_code"] = record["status_code"]
new_record["http.response_content_length"] = record["response_size"]
new_record["client.ip"] = record["client_ip"]
new_record["log_body"] = "Squid request"
if record["user_agent"] then
new_record["http.user_agent"] = record["user_agent"]
end
-- Extraction of search term and search engine
if record["url"] and string.match(record["url"], "[?&]q=") then
local search_term = string.match(record["url"], "[?&]q=([^&]*)")
if search_term and search_term ~= "" then
new_record["search.query"] = url_decode(search_term)
-- Search engine identification
local engine_found = false
for domain, name in pairs(search_engines) do
-- string.find(string, pattern, start_pos, plain_search)
if string.find(record["url"], domain, 1, true) then
new_record["search.engine"] = name
engine_found = true
break -- We stop as soon as we find a match
end
end
if not engine_found then
new_record["search.engine"] = "Unknown" -- Engine not listed
end
end
end
-- Adding resource data
new_record["resource"] = {
["service.name"] = "squid-ssl-proxy-final",
["deployment.environment"] = "lab"
}
return 2, timestamp, new_record
end