mirror of
https://github.com/amir20/dozzle.git
synced 2026-06-23 04:10:12 +00:00
fix: rank log level guesses by confidence (#4772)
Deploy VitePress site to Pages / build (push) Has been cancelled
Deploy VitePress site to Pages / Deploy (push) Has been cancelled
Push container / Push branches and PRs (push) Has been cancelled
Test / Typecheck (push) Has been cancelled
Test / JavaScript Tests (push) Has been cancelled
Test / Go Tests (push) Has been cancelled
Test / Go Staticcheck (push) Has been cancelled
Test / Integration Tests (push) Has been cancelled
Deploy VitePress site to Pages / build (push) Has been cancelled
Deploy VitePress site to Pages / Deploy (push) Has been cancelled
Push container / Push branches and PRs (push) Has been cancelled
Test / Typecheck (push) Has been cancelled
Test / JavaScript Tests (push) Has been cancelled
Test / Go Tests (push) Has been cancelled
Test / Go Staticcheck (push) Has been cancelled
Test / Integration Tests (push) Has been cancelled
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -2,6 +2,7 @@ package container
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/rs/zerolog/log"
|
||||
@@ -23,20 +24,30 @@ var logLevels = [][]string{
|
||||
// aliasToCanonical maps every alias to its canonical level name.
|
||||
var aliasToCanonical = map[string]string{}
|
||||
|
||||
// levelRegexes holds one combined regex per canonical level. Each regex is an
|
||||
// alternation of all the shapes a level can take in a log line:
|
||||
// levelMatcher extracts a canonical log level from a line. re must expose a
|
||||
// single capture group holding the level alias, or a single-letter code when
|
||||
// single is true (mapped via singleLetterToLevel).
|
||||
type levelMatcher struct {
|
||||
re *regexp.Regexp
|
||||
single bool
|
||||
}
|
||||
|
||||
// levelTiers groups matchers by how confidently their shape identifies the log
|
||||
// level, highest confidence first:
|
||||
//
|
||||
// (?i:^<alt>[^a-z] // plain prefix: "error: ..."
|
||||
// |\[ ?<alt> ?\] // bracketed: "[ERROR]" / "[ error ]"
|
||||
// | <alt>[/|:-] // separator: " error|", " info:" (z2m)
|
||||
// |:<alt>\s) // colon prefix: "Tag:info " (z2m)
|
||||
// |"<UPPER>" // quoted: "\"ERROR\""
|
||||
// |\s<UPPER>\s // spaced: " ERROR "
|
||||
// 1. ^<level> start-of-line prefix: "ERROR: ...", "INF ..."
|
||||
// 2. [<level>] bracketed tag / single-letter: "[ERROR]", "[E]"
|
||||
// 3. <tag>:<level> structured prefix: "Zigbee2MQTT:info "
|
||||
// 4. "<LEVEL>" quoted upper-case value: LL="ERROR"
|
||||
// 5. <sp><level>[/|:-] separator: " error:", " info|"
|
||||
// 6. <sp><LEVEL><sp> bare upper-case token mid-line: "123 ERROR foo"
|
||||
//
|
||||
// The case-insensitive group covers the boundary-anchored forms; the trailing
|
||||
// uppercase-only branches catch mid-line `ERROR` tokens without false-firing on
|
||||
// the word "error" in prose.
|
||||
var levelRegexes = map[string]*regexp.Regexp{}
|
||||
// guessFromString walks the tiers in order and stops at the first that matches,
|
||||
// so a real level prefix at the front of the line always beats a level word
|
||||
// buried in the message body. Within a tier, two different levels mean the line
|
||||
// is ambiguous and we return "unknown" rather than guess. Match position and
|
||||
// level severity are deliberately not used as tie-breakers.
|
||||
var levelTiers [][]levelMatcher
|
||||
|
||||
// singleLetterBracket matches single-letter levels in brackets, e.g. [I], [E], [W]
|
||||
var singleLetterBracket = regexp.MustCompile(`\[([EWIDFTV])\]`)
|
||||
@@ -48,26 +59,33 @@ var levelKeys = []string{"@l", "level", "log.level", "severity"}
|
||||
|
||||
func init() {
|
||||
SupportedLogLevels = make(map[string]struct{}, len(logLevels)+1)
|
||||
var aliases []string
|
||||
for _, group := range logLevels {
|
||||
canonical := group[0]
|
||||
SupportedLogLevels[canonical] = struct{}{}
|
||||
for _, alias := range group {
|
||||
aliasToCanonical[alias] = canonical
|
||||
aliases = append(aliases, alias)
|
||||
}
|
||||
|
||||
alt := "(?:" + strings.Join(group, "|") + ")"
|
||||
upper := strings.ToUpper(alt)
|
||||
|
||||
levelRegexes[canonical] = regexp.MustCompile(
|
||||
`(?i:^` + alt + `[^a-z]` +
|
||||
`|\[ ?` + alt + ` ?\]` +
|
||||
`| ` + alt + `[/|:-]` +
|
||||
`|:` + alt + `\s)` +
|
||||
`|"` + upper + `"` +
|
||||
`|\s` + upper + `\s`,
|
||||
)
|
||||
}
|
||||
SupportedLogLevels["unknown"] = struct{}{}
|
||||
|
||||
// Longest aliases first so e.g. "warning" is preferred over "warn".
|
||||
sort.SliceStable(aliases, func(i, j int) bool { return len(aliases[i]) > len(aliases[j]) })
|
||||
joined := strings.Join(aliases, "|")
|
||||
upper := strings.ToUpper(joined)
|
||||
|
||||
levelTiers = [][]levelMatcher{
|
||||
{{re: regexp.MustCompile(`(?i)^(` + joined + `)[^a-z]`)}},
|
||||
{
|
||||
{re: regexp.MustCompile(`(?i)\[ ?(` + joined + `) ?\]`)},
|
||||
{re: singleLetterBracket, single: true},
|
||||
},
|
||||
{{re: regexp.MustCompile(`(?i):(` + joined + `)\s`)}},
|
||||
{{re: regexp.MustCompile(`"(` + upper + `)"`)}},
|
||||
{{re: regexp.MustCompile(`(?i) (` + joined + `)[/|:-]`)}},
|
||||
{{re: regexp.MustCompile(`\s(` + upper + `)\s`)}},
|
||||
}
|
||||
}
|
||||
|
||||
func guessLogLevel(logEvent *LogEvent) string {
|
||||
@@ -117,14 +135,30 @@ var singleLetterToLevel = map[byte]string{
|
||||
func guessFromString(value string) string {
|
||||
value = StripANSI(value)
|
||||
value = timestampRegex.ReplaceAllString(value, "")
|
||||
for _, group := range logLevels {
|
||||
if levelRegexes[group[0]].MatchString(value) {
|
||||
return group[0]
|
||||
for _, tier := range levelTiers {
|
||||
level := ""
|
||||
for _, m := range tier {
|
||||
for _, match := range m.re.FindAllStringSubmatch(value, -1) {
|
||||
var canonical string
|
||||
if m.single {
|
||||
canonical = singleLetterToLevel[match[1][0]]
|
||||
} else {
|
||||
canonical = aliasToCanonical[strings.ToLower(match[1])]
|
||||
}
|
||||
if canonical == "" {
|
||||
continue
|
||||
}
|
||||
if level == "" {
|
||||
level = canonical
|
||||
} else if level != canonical {
|
||||
// Two different levels at the same confidence: ambiguous.
|
||||
return "unknown"
|
||||
}
|
||||
}
|
||||
}
|
||||
if level != "" {
|
||||
return level
|
||||
}
|
||||
}
|
||||
|
||||
if m := singleLetterBracket.FindStringSubmatch(value); m != nil {
|
||||
return singleLetterToLevel[m[1][0]]
|
||||
}
|
||||
return "unknown"
|
||||
}
|
||||
|
||||
@@ -107,6 +107,17 @@ func TestGuessLogLevel(t *testing.T) {
|
||||
{"[T] trace message", "trace"},
|
||||
{"[V] verbose message", "trace"},
|
||||
{"12:00:00 [I] starting up", "info"},
|
||||
// Issue #4768: a real level prefix must win over a level word in the message body.
|
||||
{"INFO: connection established, retrying after error: timeout", "info"},
|
||||
{"INFO handling request failed with error: bad gateway", "info"},
|
||||
{"2024-12-30T17:43:16Z INF some message about an error: foo", "info"},
|
||||
{"INFO request completed but contained ERROR token", "info"},
|
||||
{"WARN: connection error: retrying", "warn"},
|
||||
// Symmetric: an ERROR prefix still wins over a later info word.
|
||||
{"ERROR: handler failed, info: will retry", "error"},
|
||||
// Equal confidence between two different levels -> unknown (don't guess).
|
||||
{"saw info: here and error: there", "unknown"},
|
||||
{"[INFO] [DEBUG] both bracketed", "unknown"},
|
||||
{orderedmap.New[string, any](
|
||||
orderedmap.WithInitialData(
|
||||
orderedmap.Pair[string, any]{Key: "key", Value: "value"},
|
||||
|
||||
Reference in New Issue
Block a user