From 21c5c528433df3167249c1b9e26a98e68b63b1a3 Mon Sep 17 00:00:00 2001 From: Timothy Stack Date: Wed, 13 Mar 2019 16:37:41 -0700 Subject: [PATCH] [format] allow multiple log format patterns per log file This tries to address #624 --- NEWS | 4 + src/all_logs_vtab.hh | 6 +- src/default-log-formats.json | 165 ++++++++++++++++++++++++++++++++++- src/field_overlay_source.cc | 6 +- src/hotkeys.cc | 6 +- src/lnav.cc | 2 +- src/lnav_commands.cc | 4 +- src/log_data_helper.hh | 2 +- src/log_data_table.hh | 11 ++- src/log_format.cc | 89 +++++++++++++------ src/log_format.hh | 57 +++++++----- src/log_format_impls.cc | 19 ++-- src/log_search_table.hh | 12 +-- src/log_vtab_impl.cc | 6 +- src/log_vtab_impl.hh | 9 +- src/logfile_sub_source.cc | 3 +- test/Makefile.am | 1 + test/drive_data_scanner.cc | 2 +- test/logfile_haproxy.0 | 17 ++++ test/test_logfile.sh | 9 ++ 20 files changed, 345 insertions(+), 85 deletions(-) create mode 100644 test/logfile_haproxy.0 diff --git a/NEWS b/NEWS index 4268015a..33153fd6 100644 --- a/NEWS +++ b/NEWS @@ -23,6 +23,10 @@ lnav v0.8.5: given file. This command is mostly useful in scripts where one might want to redirect all output from commands like ":echo" and ":write-to -" to a single file. + * If a log file format has multiple patterns for matching log messages, + each pattern is now tried to match a message in a file. Previously, + only one pattern was ever used for an entire file. + * Added haproxy log format from Peter Hoffmann. * Added 'spooky_hash()' and 'group_spooky_hash()' SQL functions to generate a hash of their parameters. * Added 'time_offset' to the 'lnav_file' table so that the timestamps in diff --git a/src/all_logs_vtab.hh b/src/all_logs_vtab.hh index 85eb88b3..69a0929b 100644 --- a/src/all_logs_vtab.hh +++ b/src/all_logs_vtab.hh @@ -48,7 +48,9 @@ public: cols.emplace_back(this->alv_schema_name.get(), SQLITE3_TEXT, nullptr, true); }; - void extract(std::shared_ptr lf, shared_buffer_ref &line, + void extract(std::shared_ptr lf, + uint64_t line_number, + shared_buffer_ref &line, std::vector &values) { log_format *format = lf->get_format(); values.emplace_back(this->alv_value_name, format->get_name(), 0); @@ -57,7 +59,7 @@ public: struct line_range body; string_attrs_t sa; - format->annotate(line, sa, sub_values); + format->annotate(line_number, line, sa, sub_values, false); body = find_string_attr_range(sa, &textview_curses::SA_BODY); if (body.lr_start == -1) { diff --git a/src/default-log-formats.json b/src/default-log-formats.json index 4661ebbb..9b7af6e0 100644 --- a/src/default-log-formats.json +++ b/src/default-log-formats.json @@ -644,11 +644,174 @@ }, "sample" : [ { - "line" : "E0517 15:04:22.619632 1952452992 logging_unittest.cc:253] Log every 3, iteration 19", + "line": "E0517 15:04:22.619632 1952452992 logging_unittest.cc:253] Log every 3, iteration 19" + }, + { "line" : "E0517 15:04:22.619632 52992 logging_unittest.cc:253] Log every 3, iteration 19" } ] }, + "haproxy_log" : { + "title" : "HAProxy HTTP Log Format", + "description" : "The HAProxy log format", + "url" : "http://www.haproxy.org/download/1.4/doc/configuration.txt", + "regex" : { + "event_started" : { + "pattern" : "(?\\w{3} \\d{2} \\d{2}:\\d{2}:\\d{2}) (?[^ ]+) (?\\w+)\\[(?\\d+)\\]: Proxy (?[^ ]+) started." + }, + "event_stopping": { + "pattern": "(?\\w{3} \\d{2} \\d{2}:\\d{2}:\\d{2}) (?[^ ]+) (?\\w+)\\[(?\\d+)\\]: Stopping frontend (?[^ ]+) in (?\\d+) ms." + }, + "event_stopped": { + "pattern": "(?\\w{3} \\d{2} \\d{2}:\\d{2}:\\d{2}) (?[^ ]+) (?\\w+)\\[(?\\d+)\\]: Proxy (?[^ ]+) stopped \\(FE: (?\\d+) conns, BE: (?\\d+) conns\\)." + }, + "tcp": { + "pattern": "(?\\w{3} \\d{2} \\d{2}:\\d{2}:\\d{2}) (?[^ ]+) (?\\w+)\\[(?\\d+)\\]: (?[^:]+):(?\\d+) \\[(?\\d{2}\\/\\w{3}\\/\\d{4}:\\d{2}:\\d{2}:\\d{2}.\\d{3})\\] (?[^ ]+) (?[^ ]+)\\/(?[^ ]+) (?\\d+)\\/(?\\d+)\\/(?\\d+) (?\\d+) (?..) (?\\d+)\\/(?\\d+)\\/(?\\d+)\\/(?\\d+)\\/(?\\d+) (?\\d+)\\/(?\\d+)" + }, + "http": { + "pattern": "(?\\w{3} \\d{2} \\d{2}:\\d{2}:\\d{2}) (?[^ ]+) (?\\w+)\\[(?\\d+)\\]: (?[^:]+):(?\\d+) \\[(?\\d{2}\\/\\w{3}\\/\\d{4}:\\d{2}:\\d{2}:\\d{2}.\\d{3})\\] (?[^ ]+)(?~)? (?[^ ]+)\\/(?[^ ]+) (?-?\\d+)\\/(?-?\\d+)\\/(?-?\\d+)\\/(?-?\\d+)\\/(?\\d+) (?\\d{3}|-1) (?\\d+) (?.*) (?.*) (?....) (?\\d+)\\/(?\\d+)\\/(?\\d+)\\/(?\\d+)\\/(?\\d+) (?\\d+)\\/(?\\d+) (?:\\{(?.*)\\} \\{(?.*)\\} )?\"(?[A-Z<>]+)(?: (?.*?))?(?: (?HTTP\\/\\d+.\\d+))?\"?$" + }, + "ssl": { + "pattern": "(?\\w{3} \\d{2} \\d{2}:\\d{2}:\\d{2}) (?[^ ]+) (?\\w+)\\[(?\\d+)\\]: (?[^:]+):(?\\d+) \\[(?\\d{2}\\/\\w{3}\\/\\d{4}:\\d{2}:\\d{2}:\\d{2}.\\d{3})\\] (?[^ ]+)\\/(?[^ ]+): (?.+)$" + } + }, + "json": false, + "value" : { + "stopping_timeout" : { + "kind" : "integer" + }, + "frontend_connections" : { + "kind" : "integer" + }, + "backend_connections" : { + "kind" : "integer" + }, + "logging_host" : { + "kind" : "string" + }, + "process_name" : { + "kind" : "string" + }, + "pid" : { + "kind" : "integer" + }, + "client_ip" : { + "kind" : "string" + }, + "client_port" : { + "kind" : "integer" + }, + "accept_date" : { + "kind" : "string" + }, + "frontend_name" : { + "kind" : "string", + "identifier" : true + }, + "ssl" : { + "kind" : "string" + }, + "ssl_error" : { + "kind" : "string" + }, + "backend_name" : { + "kind" : "string", + "identifier" : true + }, + "server_name" : { + "kind" : "string", + "identifier" : true + }, + "tq" : { + "kind" : "integer" + }, + "tw" : { + "kind" : "integer" + }, + "tc" : { + "kind" : "integer" + }, + "tr" : { + "kind" : "integer" + }, + "tt" : { + "kind" : "integer" + }, + "status_code": { + "kind" : "integer", + "identifier" : true + }, + "bytes_read": { + "kind" : "integer" + }, + "captured_request_cookie": { + "kind" : "string" + }, + "captured_response_cookie": { + "kind" : "string" + }, + "termination_state": { + "kind" : "string" + }, + "actconn" : { + "kind" : "integer" + }, + "feconn" : { + "kind" : "integer" + }, + "beconn": { + "kind" : "integer" + }, + "srv_conn": { + "kind" : "integer" + }, + "retries": { + "kind" : "integer" + }, + "srv_queue": { + "kind" : "integer" + }, + "backend_queue": { + "kind" : "integer" + }, + "captured_request_headers": { + "kind" : "string" + }, + "captured_response_headers": { + "kind" : "string" + }, + "http_method": { + "kind" : "string", + "identifier" : true + }, + "http_url": { + "kind" : "string" + }, + "http_version": { + "kind" : "string" + } + }, + "sample" : [ + { + "line" : "Feb 26 10:07:24 192.168.8.2 haproxy[1]: Proxy prod_http_in started." + }, + { + "line" : "Feb 26 10:00:47 192.168.8.2 haproxy[7]: Stopping frontend prod_http_in in 0 ms." + }, + { + "line" : "Feb 26 10:00:47 192.168.8.2 haproxy[7]: Proxy prod_http_in stopped (FE: 847876 conns, BE: 0 conns)." + }, + { + "line" : "Feb 26 23:08:47 192.168.8.2 haproxy[7]: 178.203.144.192:50210 [26/Feb/2019:23:08:47.266] prod_http_in/slsp: Connection closed during SSL handshake" + }, + { + "line" : "Feb 26 23:16:16 192.168.8.2 haproxy[7]: 178.203.144.192:50210 [26/Feb/2019:23:16:15.321] prod_ssh_in prod_ssh_out/ssh1 1/1/861 1485 -- 2/1/0/0/0 0/0" + }, + { + "line" : "Feb 26 00:29:44 192.168.8.2 haproxy[7]: 178.203.144.192:50210 [26/Feb/2019:00:29:44.326] prod_http_in~ prod_http_out/nginx1 0/0/1/48/49 200 3313 - - ---- 3/2/0/0/0 0/0 {Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0} {} \"POST /schulportal/?Script=934&lehrer=126537&anm=3235&onlinetest=admin HTTP/1.1\"" + } + ] + }, "java_log" : { "title" : "Java log format", "description" : "Log format used by log4j and output by most java programs", diff --git a/src/field_overlay_source.cc b/src/field_overlay_source.cc index 7edbc5e8..f62f7248 100644 --- a/src/field_overlay_source.cc +++ b/src/field_overlay_source.cc @@ -358,12 +358,12 @@ void field_overlay_source::build_field_lines(const listview_curses &lv) } log_format *lf = this->fos_log_helper.ldh_file->get_format(); - if (!lf->get_pattern_regex().empty()) { + if (!lf->get_pattern_regex(cl).empty()) { attr_line_t pattern_al; std::string &pattern_str = pattern_al.get_string(); - pattern_str = " Pattern: " + lf->get_pattern_name() + " = "; + pattern_str = " Pattern: " + lf->get_pattern_name(cl) + " = "; int skip = pattern_str.length(); - pattern_str += lf->get_pattern_regex(); + pattern_str += lf->get_pattern_regex(cl); readline_regex_highlighter(pattern_al, skip); this->fos_lines.emplace_back(pattern_al); } diff --git a/src/hotkeys.cc b/src/hotkeys.cc index 5d9d4f2f..c894ca8c 100644 --- a/src/hotkeys.cc +++ b/src/hotkeys.cc @@ -83,9 +83,11 @@ public: auto ll = lf->begin() + cl; log_format *format = lf->get_format(); lf->read_full_message(ll, this->lh_msg_buffer); - format->annotate(this->lh_msg_buffer, + format->annotate(cl, + this->lh_msg_buffer, this->lh_string_attrs, - this->lh_line_values); + this->lh_line_values, + false); }; std::string to_string(const struct line_range &lr) { diff --git a/src/lnav.cc b/src/lnav.cc index 2cceda22..fbd0a614 100644 --- a/src/lnav.cc +++ b/src/lnav.cc @@ -2321,7 +2321,7 @@ int main(int argc, char *argv[]) fprintf(stderr, "error:%s:%ld:line did not match format %s\n", lf->get_filename().c_str(), line_number, - fmt->get_pattern_name().c_str()); + fmt->get_pattern_name(line_number).c_str()); fprintf(stderr, "error:%s:%ld: line -- %s\n", lf->get_filename().c_str(), line_number, diff --git a/src/lnav_commands.cc b/src/lnav_commands.cc index aa906a29..f08c9e60 100644 --- a/src/lnav_commands.cc +++ b/src/lnav_commands.cc @@ -3421,7 +3421,7 @@ public: lf->read_full_message(ll, sbr); sa.clear(); values.clear(); - format->annotate(sbr, sa, values); + format->annotate(cl, sbr, sa, values, false); vector::iterator lv_iter; @@ -3474,7 +3474,7 @@ public: lf->read_full_message(ll, sbr); sa.clear(); values.clear(); - format->annotate(sbr, sa, values); + format->annotate(cl, sbr, sa, values, false); vector::iterator lv_iter; diff --git a/src/log_data_helper.hh b/src/log_data_helper.hh index 0ee3d760..f80e2a30 100644 --- a/src/log_data_helper.hh +++ b/src/log_data_helper.hh @@ -99,7 +99,7 @@ public: this->ldh_line_attrs.clear(); this->ldh_line_values.clear(); this->ldh_file->read_full_message(ll, this->ldh_msg); - format->annotate(this->ldh_msg, sa, this->ldh_line_values); + format->annotate(this->ldh_line_index, this->ldh_msg, sa, this->ldh_line_values, false); body = find_string_attr_range(sa, &textview_curses::SA_BODY); if (body.lr_start == -1) { diff --git a/src/log_data_table.hh b/src/log_data_table.hh index 9060526b..e8e37d37 100644 --- a/src/log_data_table.hh +++ b/src/log_data_table.hh @@ -76,7 +76,7 @@ public: } this->ldt_parent_column_count = cols.size(); lf->read_full_message(lf->begin() + cl_copy, line); - format->annotate(line, sa, line_values); + format->annotate(cl_copy, line, sa, line_values, false); body = find_string_attr_range(sa, &textview_curses::SA_BODY); if (body.lr_end == -1) { this->ldt_schema_id.clear(); @@ -161,7 +161,11 @@ public: std::vector line_values; lf->read_full_message(lf_iter, this->ldt_current_line); - lf->get_format()->annotate(this->ldt_current_line, sa, line_values); + lf->get_format()->annotate(cl, + this->ldt_current_line, + sa, + line_values, + false); body = find_string_attr_range(sa, &textview_curses::SA_BODY); if (body.lr_end == -1) { return false; @@ -187,6 +191,7 @@ public: }; void extract(std::shared_ptr lf, + uint64_t line_number, shared_buffer_ref &line, std::vector &values) { @@ -194,7 +199,7 @@ public: int next_column = this->ldt_parent_column_count; - this->ldt_format_impl->extract(lf, line, values); + this->ldt_format_impl->extract(lf, line_number, line, values); values.emplace_back(instance_name, this->ldt_instance); logline_value &lv = values.back(); lv.lv_column = next_column++; diff --git a/src/log_format.cc b/src/log_format.cc index 937555ab..6dc9a597 100644 --- a/src/log_format.cc +++ b/src/log_format.cc @@ -169,14 +169,15 @@ const char *log_format::log_scanf(const char *line, struct timeval *tv_out, ...) { - int curr_fmt = -1; - const char * retval = NULL; + int curr_fmt = -1; + const char *retval = NULL; bool done = false; pcre_input pi(line, 0, len); pcre_context_static<128> pc; va_list args; + int pat_index = this->last_pattern_index(); - while (!done && next_format(fmt, curr_fmt, this->lf_fmt_lock)) { + while (!done && next_format(fmt, curr_fmt, pat_index)) { va_start(args, tv_out); pi.reset(line, 0, len); @@ -197,7 +198,9 @@ const char *log_format::log_scanf(const char *line, pi.get_substr_start(ts), ts->length(), NULL, tm_out, *tv_out); if (retval) { - this->lf_fmt_lock = curr_fmt; + if (curr_fmt != pat_index) { + this->lf_pattern_locks.emplace_back(0, curr_fmt); + } this->lf_timestamp_flags = tm_out->et_flags; done = true; } @@ -480,7 +483,7 @@ bool external_log_format::scan_for_partial(shared_buffer_ref &sbr, size_t &len_o return false; } - auto pat = this->elf_pattern_order[this->lf_fmt_lock]; + auto pat = this->elf_pattern_order[this->last_pattern_index()]; pcre_input pi(sbr.get_data(), 0, sbr.length()); if (!this->elf_multiline) { @@ -512,7 +515,7 @@ log_format::scan_result_t external_log_format::scan(nonstd::optional return log_format::SCAN_INCOMPLETE; } - const unsigned char *line_data = (const unsigned char *) sbr.get_data(); + const auto *line_data = (const unsigned char *) sbr.get_data(); size_t line_end = sbr.length(); if (line_end > 0) { @@ -570,9 +573,10 @@ log_format::scan_result_t external_log_format::scan(nonstd::optional pcre_input pi(sbr.get_data(), 0, sbr.length()); pcre_context_static<128> pc; - int curr_fmt = -1; + int curr_fmt = -1, orig_lock = this->last_pattern_index(); + int pat_index = orig_lock; - while (::next_format(this->elf_pattern_order, curr_fmt, this->lf_fmt_lock)) { + while (::next_format(this->elf_pattern_order, curr_fmt, pat_index)) { auto fpat = this->elf_pattern_order[curr_fmt]; pcrepp *pat = fpat->p_pcre; @@ -581,6 +585,11 @@ log_format::scan_result_t external_log_format::scan(nonstd::optional } if (!pat->match(pc, pi)) { + if (!this->lf_pattern_locks.empty() && pat_index != -1) { + log_debug("no match on pattern %d", pat_index); + curr_fmt = -1; + pat_index = -1; + } continue; } @@ -620,11 +629,11 @@ log_format::scan_result_t external_log_format::scan(nonstd::optional this->check_for_new_year(dst, log_time_tm, log_tv); } - if (opid_cap != NULL) { + if (opid_cap != nullptr) { opid = hash_str(pi.get_substr_start(opid_cap), opid_cap->length()); } - if (mod_cap != NULL) { + if (mod_cap != nullptr) { intern_string_t mod_name = intern_string::lookup( pi.get_substr_start(mod_cap), mod_cap->length()); auto mod_iter = MODULE_FORMATS.find(mod_name); @@ -642,13 +651,13 @@ log_format::scan_result_t external_log_format::scan(nonstd::optional const value_def &vd = *ivd.ivd_value_def; pcre_context::capture_t *num_cap = pc[ivd.ivd_index]; - if (num_cap != NULL && num_cap->is_valid()) { - const struct scaling_factor *scaling = NULL; + if (num_cap != nullptr && num_cap->is_valid()) { + const struct scaling_factor *scaling = nullptr; if (ivd.ivd_unit_field_index >= 0) { pcre_context::iterator unit_cap = pc[ivd.ivd_unit_field_index]; - if (unit_cap != NULL && unit_cap->is_valid()) { + if (unit_cap != nullptr && unit_cap->is_valid()) { intern_string_t unit_val = intern_string::lookup( pi.get_substr_start(unit_cap), unit_cap->length()); std::map::const_iterator unit_iter; @@ -677,7 +686,10 @@ log_format::scan_result_t external_log_format::scan(nonstd::optional dst.emplace_back(offset, log_tv, level, mod_index, opid); - this->lf_fmt_lock = curr_fmt; + if (orig_lock != curr_fmt) { + log_debug("changing pattern lock %d -> %d", orig_lock, curr_fmt); + this->lf_pattern_locks.emplace_back(dst.size() - 1, curr_fmt); + } return log_format::SCAN_MATCH; } @@ -728,10 +740,8 @@ uint8_t external_log_format::module_scan(const pcre_input &pi, return 0; } -void external_log_format::annotate(shared_buffer_ref &line, - string_attrs_t &sa, - std::vector &values, - bool annotate_module) const +void external_log_format::annotate(uint64_t line_number, shared_buffer_ref &line, string_attrs_t &sa, + std::vector &values, bool annotate_module) const { pcre_context_static<128> pc; pcre_input pi(line.get_data(), 0, line.length()); @@ -744,7 +754,8 @@ void external_log_format::annotate(shared_buffer_ref &line, return; } - pattern &pat = *this->elf_pattern_order[this->lf_fmt_lock]; + int pat_index = this->pattern_index_for_line(line_number); + pattern &pat = *this->elf_pattern_order[pat_index]; if (!pat.p_pcre->match(pc, pi)) { // A continued line still needs a body. @@ -840,7 +851,7 @@ void external_log_format::annotate(shared_buffer_ref &line, body_cap->ltrim(line.get_data()); body_ref.subset(line, body_cap->c_begin, body_cap->length()); - mf.mf_mod_format->annotate(body_ref, sa, values, false); + mf.mf_mod_format->annotate(line_number, body_ref, sa, values, false); for (auto &value : values) { if (!value.lv_from_module) { continue; @@ -1875,7 +1886,7 @@ public: lf->read_line(lf_iter, line); this->vi_attrs.clear(); - format->annotate(line, this->vi_attrs, values, false); + format->annotate(cl, line, this->vi_attrs, values, false); this->elt_container_body = find_string_attr_range(this->vi_attrs, &textview_curses::SA_BODY); if (!this->elt_container_body.is_valid()) { return false; @@ -1905,23 +1916,28 @@ public: }; virtual void extract(shared_ptr lf, + uint64_t line_number, shared_buffer_ref &line, std::vector &values) { log_format *format = lf->get_format(); - if (this->elt_module_format.mf_mod_format != NULL) { + if (this->elt_module_format.mf_mod_format != nullptr) { shared_buffer_ref body_ref; body_ref.subset(line, this->elt_container_body.lr_start, this->elt_container_body.length()); this->vi_attrs.clear(); values.clear(); - this->elt_module_format.mf_mod_format->annotate(body_ref, this->vi_attrs, values); + this->elt_module_format.mf_mod_format->annotate(line_number, + body_ref, + this->vi_attrs, + values, + false); } else { this->vi_attrs.clear(); - format->annotate(line, this->vi_attrs, values, false); + format->annotate(line_number, line, this->vi_attrs, values, false); } }; @@ -1930,10 +1946,33 @@ public: struct line_range elt_container_body; }; -log_vtab_impl *external_log_format::get_vtab_impl(void) const +log_vtab_impl *external_log_format::get_vtab_impl() const { return new external_log_table(*this); } +int log_format::pattern_index_for_line(uint64_t line_number) const +{ + auto iter = lower_bound(this->lf_pattern_locks.cbegin(), + this->lf_pattern_locks.cend(), + line_number, + [](const pattern_for_lines &pfl, uint32_t line) { + return pfl.pfl_line < line; + }); + + if (iter == this->lf_pattern_locks.end() || + iter->pfl_line != line_number) { + --iter; + } + + return iter->pfl_pat_index; +} + +log_format::pattern_for_lines::pattern_for_lines( + uint32_t pfl_line, uint32_t pfl_pat_index) : + pfl_line(pfl_line), pfl_pat_index(pfl_pat_index) +{ +} + /* XXX */ #include "log_format_impls.cc" diff --git a/src/log_format.hh b/src/log_format.hh index 302897ca..0cb80e26 100644 --- a/src/log_format.hh +++ b/src/log_format.hh @@ -665,7 +665,6 @@ public: }; log_format() : lf_mod_index(0), - lf_fmt_lock(-1), lf_timestamp_field(intern_string::lookup("timestamp", -1)), lf_timestamp_flags(0), lf_is_self_describing(false), @@ -674,9 +673,9 @@ public: virtual ~log_format() { }; - virtual void clear(void) + virtual void clear() { - this->lf_fmt_lock = -1; + this->lf_pattern_locks.clear(); this->lf_date_time.clear(); }; @@ -723,10 +722,9 @@ public: */ virtual void scrub(std::string &line) { }; - virtual void annotate(shared_buffer_ref &sbr, - string_attrs_t &sa, - std::vector &values, - bool annotate_module = true) const + virtual void + annotate(uint64_t line_number, shared_buffer_ref &sbr, string_attrs_t &sa, + std::vector &values, bool annotate_module = true) const { }; virtual void rewrite(exec_context &ec, @@ -776,19 +774,37 @@ public: void check_for_new_year(std::vector &dst, exttm log_tv, timeval timeval1); - virtual std::string get_pattern_name() const { + virtual std::string get_pattern_name(uint64_t line_number) const { + int pat_index = this->pattern_index_for_line(line_number); char name[32]; - snprintf(name, sizeof(name), "builtin (%d)", this->lf_fmt_lock); + snprintf(name, sizeof(name), "builtin (%d)", pat_index); return name; }; - virtual std::string get_pattern_regex() const { + virtual std::string get_pattern_regex(uint64_t line_number) const { return ""; }; + struct pattern_for_lines { + pattern_for_lines(uint32_t pfl_line, uint32_t pfl_pat_index); + + uint32_t pfl_line; + int pfl_pat_index; + }; + + int last_pattern_index() const { + if (this->lf_pattern_locks.empty()) { + return -1; + } + + return this->lf_pattern_locks.back().pfl_pat_index; + } + + int pattern_index_for_line(uint64_t line_number) const; + uint8_t lf_mod_index; date_time_scanner lf_date_time; - int lf_fmt_lock; + std::vector lf_pattern_locks; intern_string_t lf_timestamp_field; std::vector lf_timestamp_format; int lf_timestamp_flags; @@ -951,10 +967,8 @@ public: bool scan_for_partial(shared_buffer_ref &sbr, size_t &len_out); - void annotate(shared_buffer_ref &line, - string_attrs_t &sa, - std::vector &values, - bool annotate_module = true) const; + void annotate(uint64_t line_number, shared_buffer_ref &line, string_attrs_t &sa, + std::vector &values, bool annotate_module = true) const; void rewrite(exec_context &ec, shared_buffer_ref &line, @@ -983,8 +997,9 @@ public: external_log_format *elf = new external_log_format(*this); std::unique_ptr retval(elf); + this->lf_pattern_locks.clear(); if (fmt_lock != -1) { - elf->lf_fmt_lock = fmt_lock; + elf->lf_pattern_locks.emplace_back(0, fmt_lock); } if (this->elf_type == ELF_TYPE_JSON) { @@ -1136,18 +1151,20 @@ public: return iter != this->elf_value_defs.end(); }; - std::string get_pattern_name() const { + std::string get_pattern_name(uint64_t line_number) const { if (this->elf_type != ELF_TYPE_TEXT) { return "structured"; } - return this->elf_pattern_order[this->lf_fmt_lock]->p_config_path; + int pat_index = this->pattern_index_for_line(line_number); + return this->elf_pattern_order[pat_index]->p_config_path; } - std::string get_pattern_regex() const { + std::string get_pattern_regex(uint64_t line_number) const { if (this->elf_type != ELF_TYPE_TEXT) { return ""; } - return this->elf_pattern_order[this->lf_fmt_lock]->p_string; + int pat_index = this->pattern_index_for_line(line_number); + return this->elf_pattern_order[pat_index]->p_string; } log_level_t convert_level(const pcre_input &pi, pcre_context::capture_t *level_cap) const { diff --git a/src/log_format_impls.cc b/src/log_format_impls.cc index 6932107d..9db7e5ea 100644 --- a/src/log_format_impls.cc +++ b/src/log_format_impls.cc @@ -111,8 +111,9 @@ class generic_log_format : public log_format { return log_fmt; }; - std::string get_pattern_regex() const { - return get_pcre_log_formats()[this->lf_fmt_lock].name; + std::string get_pattern_regex(uint64_t line_number) const { + int pat_index = this->pattern_index_for_line(line_number); + return get_pcre_log_formats()[pat_index].name; } intern_string_t get_name() const { @@ -172,12 +173,11 @@ class generic_log_format : public log_format { return SCAN_NO_MATCH; }; - void annotate(shared_buffer_ref &line, - string_attrs_t &sa, - std::vector &values, - bool annotate_module) const + void annotate(uint64_t line_number, shared_buffer_ref &line, string_attrs_t &sa, + std::vector &values, bool annotate_module) const { - pcre_format &fmt = get_pcre_log_formats()[this->lf_fmt_lock]; + int pat_index = this->pattern_index_for_line(line_number); + pcre_format &fmt = get_pcre_log_formats()[pat_index]; struct line_range lr; int prefix_len = 0; pcre_input pi(line.get_data(), 0, line.length()); @@ -602,9 +602,8 @@ public: return SCAN_NO_MATCH; }; - void annotate(shared_buffer_ref &sbr, string_attrs_t &sa, - std::vector &values, - bool annotate_module) const { + void annotate(uint64_t line_number, shared_buffer_ref &sbr, string_attrs_t &sa, + std::vector &values, bool annotate_module) const { static const intern_string_t TS = intern_string::lookup("bro_ts"); static const intern_string_t UID = intern_string::lookup("bro_uid"); diff --git a/src/log_search_table.hh b/src/log_search_table.hh index c3dd9638..a09a316f 100644 --- a/src/log_search_table.hh +++ b/src/log_search_table.hh @@ -116,7 +116,7 @@ public: cl = lss.at(lc.lc_curr_line); std::shared_ptr lf = lss.find(cl); - logfile::iterator lf_iter = lf->begin() + cl; + auto lf_iter = lf->begin() + cl; if (lf_iter->is_continued()) { return false; @@ -126,7 +126,8 @@ public: std::vector line_values; lf->read_full_message(lf_iter, this->lst_current_line); - lf->get_format()->annotate(this->lst_current_line, sa, line_values); + lf->get_format()->annotate(cl, this->lst_current_line, sa, line_values, + false); pcre_input pi(this->lst_current_line.get_data(), 0, this->lst_current_line.length()); @@ -141,6 +142,7 @@ public: }; void extract(std::shared_ptr lf, + uint64_t line_number, shared_buffer_ref &line, std::vector &values) { @@ -152,16 +154,14 @@ public: this->lst_current_line.length()); int next_column = 0; - values.push_back(logline_value(instance_name, this->lst_instance)); + values.emplace_back(instance_name, this->lst_instance); values.back().lv_column = next_column++; for (int lpc = 0; lpc < this->lst_regex.get_capture_count(); lpc++) { pcre_context::capture_t *cap = this->lst_match_context[lpc]; shared_buffer_ref value_sbr; value_sbr.subset(line, cap->c_begin, cap->length()); - values.push_back(logline_value(empty, - this->lst_column_types[lpc], - value_sbr)); + values.emplace_back(empty, this->lst_column_types[lpc], value_sbr); values.back().lv_column = next_column++; } }; diff --git a/src/log_vtab_impl.cc b/src/log_vtab_impl.cc index 2851636e..a9d4f220 100644 --- a/src/log_vtab_impl.cc +++ b/src/log_vtab_impl.cc @@ -369,7 +369,7 @@ static int vt_column(sqlite3_vtab_cursor *cur, sqlite3_context *ctx, int col) if (ll->is_time_skewed()) { if (vc->line_values.empty()) { lf->read_full_message(ll, vc->log_msg); - vt->vi->extract(lf, vc->log_msg, vc->line_values); + vt->vi->extract(lf, line_number, vc->log_msg, vc->line_values); } struct line_range time_range; @@ -540,7 +540,7 @@ static int vt_column(sqlite3_vtab_cursor *cur, sqlite3_context *ctx, int col) case 2: { if (vc->line_values.empty()) { lf->read_full_message(ll, vc->log_msg); - vt->vi->extract(lf, vc->log_msg, vc->line_values); + vt->vi->extract(lf, line_number, vc->log_msg, vc->line_values); } struct line_range body_range; @@ -565,7 +565,7 @@ static int vt_column(sqlite3_vtab_cursor *cur, sqlite3_context *ctx, int col) else { if (vc->line_values.empty()) { lf->read_full_message(ll, vc->log_msg); - vt->vi->extract(lf, vc->log_msg, vc->line_values); + vt->vi->extract(lf, line_number, vc->log_msg, vc->line_values); } size_t sub_col = col - VT_COL_MAX; diff --git a/src/log_vtab_impl.hh b/src/log_vtab_impl.hh index 3b1a47d5..2127a60e 100644 --- a/src/log_vtab_impl.hh +++ b/src/log_vtab_impl.hh @@ -128,19 +128,20 @@ public: virtual void get_foreign_keys(std::vector &keys_inout) const { - keys_inout.push_back("log_line"); - keys_inout.push_back("min(log_line)"); - keys_inout.push_back("log_mark"); + keys_inout.emplace_back("log_line"); + keys_inout.emplace_back("min(log_line)"); + keys_inout.emplace_back("log_mark"); }; virtual void extract(std::shared_ptr lf, + uint64_t line_number, shared_buffer_ref &line, std::vector &values) { log_format *format = lf->get_format(); this->vi_attrs.clear(); - format->annotate(line, this->vi_attrs, values); + format->annotate(line_number, line, this->vi_attrs, values, false); }; bool vi_supports_indexes; diff --git a/src/logfile_sub_source.cc b/src/logfile_sub_source.cc index dad9f53b..4ee47a58 100644 --- a/src/logfile_sub_source.cc +++ b/src/logfile_sub_source.cc @@ -202,7 +202,8 @@ void logfile_sub_source::text_value_for_line(textview_curses &tc, sbr.share(this->lss_share_manager, (char *)this->lss_token_value.c_str(), this->lss_token_value.size()); - format->annotate(sbr, this->lss_token_attrs, this->lss_token_values); + format->annotate(line, sbr, this->lss_token_attrs, this->lss_token_values, + false); if (this->lss_token_line->get_sub_offset() != 0) { this->lss_token_attrs.clear(); } diff --git a/test/Makefile.am b/test/Makefile.am index bc9e87b7..f5302a35 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -303,6 +303,7 @@ dist_noinst_DATA = \ logfile_generic.1 \ logfile_generic.2 \ logfile_glog.0 \ + logfile_haproxy.0 \ logfile_journald.json \ logfile_json.json \ logfile_json2.json \ diff --git a/test/drive_data_scanner.cc b/test/drive_data_scanner.cc index 98a58dfa..16580844 100644 --- a/test/drive_data_scanner.cc +++ b/test/drive_data_scanner.cc @@ -165,7 +165,7 @@ int main(int argc, char *argv[]) string_attrs_t sa; if (format.get() != NULL) { - format->annotate(sbr, sa, ll_values); + format->annotate(0, sbr, sa, ll_values); body = find_string_attr_range(sa, &textview_curses::SA_BODY); } diff --git a/test/logfile_haproxy.0 b/test/logfile_haproxy.0 new file mode 100644 index 00000000..a9ee9e6c --- /dev/null +++ b/test/logfile_haproxy.0 @@ -0,0 +1,17 @@ +Feb 25 16:19:38 192.168.4.2 haproxy[1]: Proxy prod_http_in started. +Feb 25 16:19:38 192.168.4.2 haproxy[1]: Proxy tools_http_frnt started. +Feb 25 16:19:38 192.168.4.2 haproxy[1]: Proxy git_inio_ssh_frnt started. +Feb 25 16:19:49 192.168.4.2 haproxy[7]: 141.35.244.171:53332 [25/Feb/2019:16:19:48.143] prod_http_in~ bk_admin/nginx_sonst 0/0/7/1457/1487 200 11394 - - ---- 2/1/0/0/0 0/0 {Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0} {} "GET /admin/colt/administration?survey=130&input_stats=1&ajax=1&&ajax=1&refresh=1&ihandle=1 HTTP/1.1" +Feb 25 16:19:49 192.168.4.2 haproxy[7]: 92.193.212.151:59841 [25/Feb/2019:16:19:49.851] prod_http_in~ bk_ktest_kt/nginx_sonst 0/0/0/62/62 200 306 - - ---- 3/2/0/0/0 0/0 {Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36} {} "POST /portal/?Script=934&onlinetest=speichereKorrektur&anm=10347 HTTP/1.1" +Feb 25 16:19:57 192.168.4.2 haproxy[7]: 92.193.212.151:59842 [25/Feb/2019:16:19:57.515] prod_http_in~ bk_ktest_kt/nginx_sonst 0/0/1/37/38 200 306 - - ---- 2/1/0/0/0 0/0 {Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36} {} "POST /portal/?Script=934&onlinetest=speichereKorrektur&anm=10347 HTTP/1.1" +Feb 25 16:20:02 192.168.4.2 haproxy[7]: 79.246.138.36:7891 [25/Feb/2019:16:20:02.367] prod_http_in~ bk_ktest_kt/nginx_sonst 0/0/1/327/328 200 306 - - ---- 1/1/0/0/0 0/0 {Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0} {} "POST /portal/?Script=934&onlinetest=speichereKorrektur&anm=13531 HTTP/1.1" +Feb 25 16:20:04 192.168.4.2 haproxy[7]: 141.35.244.171:53337 [25/Feb/2019:16:20:03.208] prod_http_in~ bk_admin/nginx_sonst 0/0/1/1031/1039 200 11394 - - ---- 3/2/0/0/0 0/0 {Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0} {} "GET /admin/colt/administration?survey=130&input_stats=1&ajax=1&&ajax=1&refresh=1&ihandle=1 HTTP/1.1" +Feb 25 16:20:09 192.168.4.2 haproxy[7]: 89.247.124.65:15564 [25/Feb/2019:16:20:06.321] prod_http_in~ bk_ktest_kt/nginx_sonst 0/0/1/43/2707 200 26170 - - ---- 3/3/1/1/0 0/0 {Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0} {} "GET /portal/?Script=934&onlinetest=korrektur&anm=13915¤tPage=0 HTTP/1.1" +Feb 25 16:20:11 192.168.4.2 haproxy[7]: 89.247.124.65:15565 [25/Feb/2019:16:20:08.872] prod_http_in~ bk_ktest_kt/nginx_sonst 0/0/1/26/2442 200 26170 - - ---- 3/2/1/1/0 0/0 {Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0} {} "GET /portal/?Script=934&onlinetest=korrektur&anm=13915¤tPage=0 HTTP/1.1" +Feb 25 16:20:12 192.168.4.2 haproxy[7]: 87.183.41.77:50186 [25/Feb/2019:16:20:11.910] prod_http_in~ bk_ktest_kt/nginx_sonst 0/0/0/236/236 200 4586 - - ---- 4/4/3/3/0 0/0 {Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0} {} "GET /portal/?Script=934&lehrer=77798 HTTP/1.1" +Feb 25 16:20:12 192.168.4.2 haproxy[7]: 87.183.41.77:50186 [25/Feb/2019:16:20:12.234] prod_http_in~ bk_ktest_sonst/nginx_sonst 0/0/1/0/1 200 16416 - - ---- 4/4/0/0/0 0/0 {Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0} {} "GET /media/core/bootstrap_3.3.7/css/bootstrap.css?1550939643 HTTP/1.1" +Feb 25 16:20:12 192.168.4.2 haproxy[7]: 87.183.41.77:50186 [25/Feb/2019:16:20:12.317] prod_http_in~ bk_ktest_sonst/nginx_sonst 0/0/0/1/1 200 11065 - - ---- 9/9/0/0/0 0/0 {Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0} {} "GET /media/core/jquery/jquery-ui-1.12.1.js?1550939557 HTTP/1.1" +Feb 25 16:20:12 192.168.4.2 haproxy[7]: 95.216.197.33:56224 [25/Feb/2019:16:20:10.111] prod_http_in/sktst2: SSL handshake failure +Feb 25 16:20:12 192.168.4.2 haproxy[7]: 87.183.41.77:50188 [25/Feb/2019:16:20:12.321] prod_http_in~ bk_ktest_sonst/nginx_sonst 0/0/1/0/1 200 5959 - - ---- 9/9/0/0/0 0/0 {Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0} {} "GET /media/pi_fontawesome/css/font-awesome.css?1550939694 HTTP/1.1" +Feb 25 16:20:12 192.168.4.2 haproxy[7]: 87.183.41.77:50187 [25/Feb/2019:16:20:12.325] prod_http_in~ bk_ktest_sonst/nginx_sonst 0/0/1/0/1 200 1859 - - ---- 9/9/0/0/0 0/0 {Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0} {} "GET /media/pi_popup/1.1.0/magnific-popup.css?1550939704 HTTP/1.1" +Feb 25 16:20:12 192.168.4.2 haproxy[7]: 87.183.41.77:50189 [25/Feb/2019:16:20:12.331] prod_http_in~ bk_ktest_sonst/nginx_sonst 0/0/1/0/1 200 2496 - - ---- 9/9/0/0/0 0/0 {Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0} {} "GET /media/core/core.css?1550939640 HTTP/1.1" diff --git a/test/test_logfile.sh b/test/test_logfile.sh index 503dc7d7..e86c7f24 100644 --- a/test/test_logfile.sh +++ b/test/test_logfile.sh @@ -369,6 +369,15 @@ check_output "CR-LF line-endings not handled?" <