[format] order the formats based on sample matches

This commit is contained in:
Timothy Stack 2015-04-05 07:12:20 -07:00
parent fd55176df8
commit a300ce9977
10 changed files with 181 additions and 3 deletions

4
NEWS
View File

@ -31,6 +31,10 @@ lnav v0.7.3:
specially on OS X. If you come across this error, use the
'--with-readline=prefix' switch to specify the path to the correct
location of libreadline.
* The order that log formats are tried against a log file is now
automatically determined so that more specific formats are tested
before more general ones. The order is determined on startup based on
how each format matches each other formats sample lines.
lnav v0.7.2:
* Added log formats for vdsm, openstack, and the vmkernel.

View File

@ -185,3 +185,21 @@ Executing the format file should then install it automatically::
$ chmod ugo+rx myformat.json
$ ./myformat.json
info: installed: /home/example/.lnav/formats/installed/myformat_log.json
Format Order When Scanning a File
---------------------------------
When **lnav** loads a file, it tries each log format against the first ~1000
lines of the file trying to find a match. When a match is found, that log
format will be locked in and used for the rest of the lines in that file.
Since there may be overlap between formats, **lnav** performs a test on
startup to determine which formats match each others sample lines. Using
this information it will create an ordering of the formats so that the more
specific formats are tried before the more generic ones. For example, a
format that matches certain syslog messages will match its own sample lines,
but not the ones in the syslog samples. On the other hand, the syslog format
will match its own samples and those in the more specific format. You can
see the order of the format by enabling debugging and checking the **lnav**
log file for the "Format order" message::
$ lnav -d /tmp/lnav.log

View File

@ -1213,8 +1213,9 @@ void external_log_format::build(std::vector<std::string> &errors)
++pat_iter) {
pattern &pat = *(*pat_iter);
if (!pat.p_pcre)
if (!pat.p_pcre) {
continue;
}
std::string line_partial = iter->s_line;
@ -1243,6 +1244,32 @@ void external_log_format::build(std::vector<std::string> &errors)
}
}
bool external_log_format::match_samples(const vector<sample> &samples) const
{
for (vector<sample>::const_iterator sample_iter = samples.begin();
sample_iter != samples.end();
++sample_iter) {
for (std::vector<external_log_format::pattern *>::const_iterator pat_iter = this->elf_pattern_order.begin();
pat_iter != this->elf_pattern_order.end();
++pat_iter) {
pattern &pat = *(*pat_iter);
if (!pat.p_pcre) {
continue;
}
pcre_context_static<128> pc;
pcre_input pi(sample_iter->s_line);
if (pat.p_pcre->match(pc, pi)) {
return true;
}
}
}
return false;
}
class external_log_table : public log_vtab_impl {
public:
external_log_table(const external_log_format &elf) :

View File

@ -40,6 +40,7 @@
#include <sys/types.h>
#include <set>
#include <list>
#include <string>
#include <vector>
#include <memory>
@ -688,6 +689,8 @@ public:
void build(std::vector<std::string> &errors);
bool match_samples(const std::vector<sample> &samples) const;
std::auto_ptr<log_format> specialized() {
external_log_format *elf = new external_log_format(*this);
std::auto_ptr<log_format> retval((log_format *)elf);
@ -744,6 +747,7 @@ public:
};
std::set<std::string> elf_source_path;
std::list<std::string> elf_collision;
std::string elf_file_pattern;
pcrepp *elf_filename_pcre;
std::map<std::string, pattern> elf_patterns;

View File

@ -541,13 +541,78 @@ void load_formats(const std::vector<std::string> &extra_paths,
load_from_path(*path_iter, errors);
}
vector<external_log_format *> alpha_ordered_formats;
for (map<string, external_log_format *>::iterator iter = LOG_FORMATS.begin();
iter != LOG_FORMATS.end();
++iter) {
iter->second->build(errors);
external_log_format *elf = iter->second;
elf->build(errors);
for (map<string, external_log_format *>::iterator check_iter = LOG_FORMATS.begin();
check_iter != LOG_FORMATS.end();
++check_iter) {
if (iter->first == check_iter->first) {
continue;
}
external_log_format *check_elf = check_iter->second;
if (elf->match_samples(check_elf->elf_samples)) {
log_warning("Format collision, format '%s' matches sample from '%s'",
elf->get_name().c_str(),
check_elf->get_name().c_str());
elf->elf_collision.push_back(check_elf->get_name());
}
}
if (errors.empty()) {
log_format::get_root_formats().insert(log_format::get_root_formats().begin(), iter->second);
alpha_ordered_formats.push_back(elf);
}
}
vector<external_log_format *> graph_ordered_formats;
while (!alpha_ordered_formats.empty()) {
vector<string> popped_formats;
for (vector<external_log_format *>::iterator iter = alpha_ordered_formats.begin();
iter != alpha_ordered_formats.end();) {
external_log_format *elf = *iter;
if (elf->elf_collision.empty()) {
iter = alpha_ordered_formats.erase(iter);
popped_formats.push_back(elf->get_name());
graph_ordered_formats.push_back(elf);
}
else {
++iter;
}
}
if (popped_formats.empty() && !alpha_ordered_formats.empty()) {
external_log_format *elf = alpha_ordered_formats.front();
log_warning("Detected a cycle, breaking by picking %s",
elf->get_name().c_str());
elf->elf_collision.clear();
}
for (vector<external_log_format *>::iterator iter = alpha_ordered_formats.begin();
iter != alpha_ordered_formats.end();
++iter) {
external_log_format *elf = *iter;
for (vector<string>::iterator pop_iter = popped_formats.begin();
pop_iter != popped_formats.end();
++pop_iter) {
elf->elf_collision.remove(*pop_iter);
}
}
}
log_info("Format order:")
for (vector<external_log_format *>::iterator iter = graph_ordered_formats.begin();
iter != graph_ordered_formats.end();
++iter) {
log_info(" %s", (*iter)->get_name().c_str());
}
vector<log_format *> &roots = log_format::get_root_formats();
roots.insert(roots.begin(), graph_ordered_formats.begin(), graph_ordered_formats.end());
}

View File

@ -222,6 +222,7 @@ dist_noinst_DATA = \
listview_output.6 \
logfile_access_log.0 \
logfile_access_log.1 \
logfile_blued.0 \
logfile_empty.0 \
logfile_filter.0 \
logfile_for_join.0 \
@ -247,6 +248,7 @@ dist_noinst_DATA = \
vt52_curses_input.1 \
vt52_curses_output.0 \
vt52_curses_output.1 \
formats/collision/format.json \
formats/jsontest/format.json \
log-samples/sample-27353a72ba4025448f261dcfa6ea16e474187795.txt \
log-samples/sample-70c906b3c1a1cf03f15bde92ee78edfa6f9b7960.txt \

View File

@ -67,6 +67,7 @@ int main(int argc, char *argv[])
{
std::vector<std::string> paths, errors;
paths.push_back(getenv("test_dir"));
load_formats(paths, errors);
}

View File

@ -0,0 +1,47 @@
{
"zblued_log" : {
"title" : "blued",
"regex" : {
"std" : {
"pattern" : "^(?<timestamp>\\w{3}\\s+\\d{1,2} \\d{2}:\\d{2}:\\d{2})(?: (?<log_hostname>[a-zA-Z0-9:][^ ]+[a-zA-Z0-9]))? blued(?:\\[(?<log_pid>\\d+)])?:(?<body>(?:.|\\n)*)$"
}
},
"level-field" : "body",
"level" : {
"error" : "(?:failed|failure|error)",
"warning" : "(?:warn|not responding|init: cannot execute)"
},
"value" : {
"log_hostname" : {
"kind" : "string",
"collate" : "ipaddress",
"identifier" : true
}
},
"sample" : [
{
"line" : "Apr 4 20:02:32 Tim-Stacks-iMac.local blued[59]: Release the WiFi lock"
}
]
},
"xerror_log" : {
"title" : "Common Error Log",
"description" : "The default web error log format for servers like Apache.",
"regex" : {
"cups" : {
"pattern" : "^(?<level>\\w) \\[(?<timestamp>[^\\]]+)\\] (?<body>.*)"
}
},
"level-field": "level",
"level" : {
"error" : "E",
"warning" : "W",
"info" : "I"
},
"sample" : [
{
"line" : "E [08/Jun/2013:11:28:58 -0700] Unknown directive BrowseOrder on line 22 of /private/etc/cups/cupsd.conf."
}
]
}
}

6
test/logfile_blued.0 Normal file
View File

@ -0,0 +1,6 @@
Apr 4 20:02:32 Tim-Stacks-iMac.local blued[59]: Release the WiFi lock
Apr 4 20:02:32 Tim-Stacks-iMac.local blued[59]: Acquiring the wifi lock
Apr 4 20:02:32 Tim-Stacks-iMac.local blued[59]: Acquired the wifi lock
Apr 4 20:02:42 Tim-Stacks-iMac.local blued[59]: Release the WiFi lock
Apr 4 20:02:42 Tim-Stacks-iMac.local blued[59]: Acquiring the wifi lock
Apr 4 20:02:42 Tim-Stacks-iMac.local blued[59]: Acquired the wifi lock

View File

@ -16,6 +16,10 @@ run_test ./drive_logfile -f strace_log ${srcdir}/logfile_strace_log.0
on_error_fail_with "Didn't infer strace_log log format?"
run_test ./drive_logfile -f zblued_log ${srcdir}/logfile_blued.0
on_error_fail_with "Didn't infer blued_log that collides with syslog?"
run_test ./drive_logfile ${srcdir}/logfile_empty.0