From 9eb734ef7e448025bda79614c755fea2a30578d7 Mon Sep 17 00:00:00 2001 From: Tim Stack Date: Sun, 2 Oct 2022 21:58:10 -0700 Subject: [PATCH] [log_format] support for a separate sub-second field --- NEWS.md | 3 ++ docs/schemas/format-v1.schema.json | 15 ++++++++ docs/source/formats.rst | 10 ++++++ src/log_format.cc | 35 +++++++++++++++++++ src/log_format.hh | 8 +++++ src/log_format_loader.cc | 24 ++++++++++--- src/yajlpp/yajlpp.cc | 2 +- src/yajlpp/yajlpp.hh | 15 ++++++++ src/yajlpp/yajlpp_def.hh | 19 ++++++++-- test/Makefile.am | 2 ++ .../formats/invalid-properties/format.json | 1 + test/expected/expected.am | 2 ++ ...92e2695b7e6cf1f3520dbb87af8fc2b8f27088.err | 11 +++--- ...a23804c39b0f74642286d69865ee9d0961a58a.err | 0 ...a23804c39b0f74642286d69865ee9d0961a58a.out | 2 ++ test/formats/jsontest-subsec/format.json | 27 ++++++++++++++ test/logfile_json_subsec.json | 2 ++ test/test_json_format.sh | 4 +++ 18 files changed, 170 insertions(+), 12 deletions(-) create mode 100644 test/expected/test_json_format.sh_c1a23804c39b0f74642286d69865ee9d0961a58a.err create mode 100644 test/expected/test_json_format.sh_c1a23804c39b0f74642286d69865ee9d0961a58a.out create mode 100644 test/formats/jsontest-subsec/format.json create mode 100644 test/logfile_json_subsec.json diff --git a/NEWS.md b/NEWS.md index 61e141cb..89aefc20 100644 --- a/NEWS.md +++ b/NEWS.md @@ -22,6 +22,9 @@ Features: that the implementation relies on libcurl which has some limitations, like not supporting all types of schemes (e.g. `mailto:`). +* Added the `subsecond-field` and `subsecond-units` log + format properties to allow for specifying a separate + field for the sub-second portion of a timestamp. * Added a keymap for Swedish keyboards. Breaking changes: diff --git a/docs/schemas/format-v1.schema.json b/docs/schemas/format-v1.schema.json index 506e6adb..089251ff 100644 --- a/docs/schemas/format-v1.schema.json +++ b/docs/schemas/format-v1.schema.json @@ -101,6 +101,21 @@ "description": "The name of the timestamp field in the log message pattern", "type": "string" }, + "subsecond-field": { + "title": "//subsecond-field", + "description": "The path to the property in a JSON-lines log message that contains the sub-second time value", + "type": "string" + }, + "subsecond-units": { + "title": "//subsecond-units", + "description": "The units of the subsecond-field property value", + "type": "string", + "enum": [ + "milli", + "micro", + "nano" + ] + }, "time-field": { "title": "//time-field", "description": "The name of the time field in the log message pattern. This field should only be specified if the timestamp field only contains a date.", diff --git a/docs/source/formats.rst b/docs/source/formats.rst index e8168f37..89607617 100644 --- a/docs/source/formats.rst +++ b/docs/source/formats.rst @@ -202,6 +202,16 @@ should be another object with the following fields: to divide the timestamp by to get the number of seconds and fractional seconds. + :subsecond-field: (v0.11.1+) The path to the property in a JSON-lines log + message that contains the sub-second time value + + :subsecond-units: (v0.11.1+) The units of the subsecond-field property value. + The following values are supported: + + :milli: for milliseconds + :micro: for microseconds + :nano: for nanoseconds + :ordered-by-time: (v0.8.3+) Indicates that the order of messages in the file is time-based. Files that are not naturally ordered by time will be sorted in order to display them in the correct order. Note that this sorting can diff --git a/src/log_format.cc b/src/log_format.cc index 204ddbdf..c1083971 100644 --- a/src/log_format.cc +++ b/src/log_format.cc @@ -480,6 +480,24 @@ read_json_int(yajlpp_parse_context* ypc, long long val) tv.tv_sec = tm2sec(<m); } jlu->jlu_base_line->set_time(tv); + } else if (jlu->jlu_format->lf_subsecond_field == field_name) { + uint64_t millis = 0; + switch (jlu->jlu_format->lf_subsecond_unit.value()) { + case log_format::subsecond_unit::milli: + millis = val; + break; + case log_format::subsecond_unit::micro: + millis = std::chrono::duration_cast( + std::chrono::microseconds(val)) + .count(); + break; + case log_format::subsecond_unit::nano: + millis = std::chrono::duration_cast( + std::chrono::nanoseconds(val)) + .count(); + break; + } + jlu->jlu_base_line->set_millis(millis); } else if (jlu->jlu_format->elf_level_field == field_name) { if (jlu->jlu_format->elf_level_pairs.empty()) { char level_buf[128]; @@ -2041,6 +2059,23 @@ external_log_format::build(std::vector& errors) .with_snippets(this->get_snippets())); } + if (!this->lf_subsecond_field.empty() + && !this->lf_subsecond_unit.has_value()) + { + errors.emplace_back( + lnav::console::user_message::error( + attr_line_t() + .append_quoted( + lnav::roles::symbol(this->elf_name.to_string())) + .append(" is not a valid log format")) + .with_reason(attr_line_t() + .append_quoted("subsecond-unit"_symbol) + .append(" must be set when ") + .append_quoted("subsecond-field"_symbol) + .append(" is used")) + .with_snippets(this->get_snippets())); + } + for (size_t sample_index = 0; sample_index < this->elf_samples.size(); sample_index += 1) { diff --git a/src/log_format.hh b/src/log_format.hh index 88ca877a..5dfe89b2 100644 --- a/src/log_format.hh +++ b/src/log_format.hh @@ -498,6 +498,12 @@ public: return intern_string_t::case_lt(lhs->get_name(), rhs->get_name()); } + enum class subsecond_unit { + milli, + micro, + nano, + }; + std::string lf_description; uint8_t lf_mod_index{0}; bool lf_multiline{true}; @@ -505,6 +511,8 @@ public: date_time_scanner lf_time_scanner; std::vector lf_pattern_locks; intern_string_t lf_timestamp_field{intern_string::lookup("timestamp", -1)}; + intern_string_t lf_subsecond_field; + nonstd::optional lf_subsecond_unit; intern_string_t lf_time_field; std::vector lf_timestamp_format; unsigned int lf_timestamp_flags{0}; diff --git a/src/log_format_loader.cc b/src/log_format_loader.cc index df907ef9..c9a49893 100644 --- a/src/log_format_loader.cc +++ b/src/log_format_loader.cc @@ -419,6 +419,14 @@ static struct json_path_container pattern_handlers = { .for_field(&external_log_format::pattern::p_module_format), }; +static const json_path_handler_base::enum_value_t SUBSECOND_UNIT_ENUM[] = { + {"milli", log_format::subsecond_unit::milli}, + {"micro", log_format::subsecond_unit::micro}, + {"nano", log_format::subsecond_unit::nano}, + + json_path_handler_base::ENUM_TERMINATOR, +}; + static const json_path_handler_base::enum_value_t ALIGN_ENUM[] = { {"left", external_log_format::json_format_element::align_t::LEFT}, {"right", external_log_format::json_format_element::align_t::RIGHT}, @@ -839,7 +847,7 @@ struct json_path_container format_handlers = { json_path_handler("mime-types#", read_format_field) .with_description("A list of mime-types this format should be used for") .with_enum_values(MIME_TYPE_ENUM), - json_path_handler("level-field", read_format_field) + json_path_handler("level-field") .with_description( "The name of the level field in the log message pattern") .for_field(&external_log_format::elf_level_field), @@ -847,17 +855,25 @@ struct json_path_container format_handlers = { .with_description("A regular-expression that matches the JSON-pointer " "of the level property") .for_field(&external_log_format::elf_level_pointer), - json_path_handler("timestamp-field", read_format_field) + json_path_handler("timestamp-field") .with_description( "The name of the timestamp field in the log message pattern") .for_field(&log_format::lf_timestamp_field), - json_path_handler("time-field", read_format_field) + json_path_handler("subsecond-field") + .with_description("The path to the property in a JSON-lines log " + "message that contains the sub-second time value") + .for_field(&log_format::lf_subsecond_field), + json_path_handler("subsecond-units") + .with_description("The units of the subsecond-field property value") + .with_enum_values(SUBSECOND_UNIT_ENUM) + .for_field(&log_format::lf_subsecond_unit), + json_path_handler("time-field") .with_description( "The name of the time field in the log message pattern. This " "field should only be specified if the timestamp field only " "contains a date.") .for_field(&log_format::lf_time_field), - json_path_handler("body-field", read_format_field) + json_path_handler("body-field") .with_description( "The name of the body field in the log message pattern") .for_field(&external_log_format::elf_body_field), diff --git a/src/yajlpp/yajlpp.cc b/src/yajlpp/yajlpp.cc index 1f5e76a2..bc7130d3 100644 --- a/src/yajlpp/yajlpp.cc +++ b/src/yajlpp/yajlpp.cc @@ -541,7 +541,7 @@ nonstd::optional json_path_handler_base::to_enum_value(const string_fragment& sf) const { for (int lpc = 0; this->jph_enum_values[lpc].first; lpc++) { - const enum_value_t& ev = this->jph_enum_values[lpc]; + const auto& ev = this->jph_enum_values[lpc]; if (sf == ev.first) { return ev.second; diff --git a/src/yajlpp/yajlpp.hh b/src/yajlpp/yajlpp.hh index 4c8180ca..5baee0b7 100644 --- a/src/yajlpp/yajlpp.hh +++ b/src/yajlpp/yajlpp.hh @@ -50,6 +50,7 @@ #include "base/lnav.console.hh" #include "base/lnav.console.into.hh" #include "base/lnav_log.hh" +#include "base/opt_util.hh" #include "json_ptr.hh" #include "optional.hpp" #include "pcrepp/pcre2pp.hh" @@ -210,6 +211,20 @@ struct json_path_handler_base { nonstd::optional to_enum_value(const string_fragment& sf) const; const char* to_enum_string(int value) const; + template + std::enable_if_t::value, const char*> + to_enum_string(T value) const + { + return this->to_enum_string((int) value); + } + + template + std::enable_if_t::value, const char*> to_enum_string( + T value) const + { + return this->to_enum_string((int) value.value()); + } + yajl_gen_status gen(yajlpp_gen_context& ygc, yajl_gen handle) const; yajl_gen_status gen_schema(yajlpp_gen_context& ygc) const; yajl_gen_status gen_schema_type(yajlpp_gen_context& ygc) const; diff --git a/src/yajlpp/yajlpp_def.hh b/src/yajlpp/yajlpp_def.hh index 16665525..3d3bf962 100644 --- a/src/yajlpp/yajlpp_def.hh +++ b/src/yajlpp/yajlpp_def.hh @@ -553,11 +553,21 @@ struct json_path_handler : public json_path_handler_base { template struct LastIsEnum { + using value_type = typename LastIsEnum::value_type; static constexpr bool value = LastIsEnum::value; }; template struct LastIsEnum { + using value_type = U; + + static constexpr bool value = std::is_enum::value; + }; + + template + struct LastIsEnum T::*> { + using value_type = U; + static constexpr bool value = std::is_enum::value; }; @@ -1414,8 +1424,7 @@ struct json_path_handler : public json_path_handler_base { if (res) { json_path_handler::get_field(obj, args...) - = (decltype(json_path_handler::get_field( - obj, args...))) res.value(); + = (typename LastIsEnum::value_type) res.value(); } else { handler->report_enum_error(ypc, std::string((const char*) str, len)); @@ -1438,13 +1447,17 @@ struct json_path_handler : public json_path_handler_base { } } + if (!is_field_set(field)) { + return yajl_gen_status_ok; + } + if (ygc.ygc_depth) { yajl_gen_string(handle, jph.jph_property); } yajlpp_generator gen(handle); - return gen(jph.to_enum_string((int) field)); + return gen(jph.to_enum_string(field)); }; this->jph_field_getter = [args...](void* root, nonstd::optional name) { diff --git a/test/Makefile.am b/test/Makefile.am index 913d5966..f0d5fe4a 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -309,6 +309,7 @@ dist_noinst_DATA = \ logfile_json.json \ logfile_json2.json \ logfile_json3.json \ + logfile_json_subsec.json \ logfile_leveltest.0 \ logfile_logfmt.0 \ logfile_multiline.0 \ @@ -370,6 +371,7 @@ dist_noinst_DATA = \ formats/jsontest/rewrite-user.lnav \ formats/jsontest2/format.json \ formats/jsontest3/format.json \ + formats/jsontest-subsec/format.json \ formats/nestedjson/format.json \ formats/scripts/multiline-echo.lnav \ formats/scripts/redirecting.lnav \ diff --git a/test/bad-config/formats/invalid-properties/format.json b/test/bad-config/formats/invalid-properties/format.json index c1b03810..a69a179a 100644 --- a/test/bad-config/formats/invalid-properties/format.json +++ b/test/bad-config/formats/invalid-properties/format.json @@ -8,6 +8,7 @@ } }, "timestamp-field": "ts", + "subsecond-field": "ts-sub", "sample": [ { "line": "1428634687123: 1234 abc" diff --git a/test/expected/expected.am b/test/expected/expected.am index 6d4d0a01..1280cda6 100644 --- a/test/expected/expected.am +++ b/test/expected/expected.am @@ -280,6 +280,8 @@ EXPECTED_FILES = \ $(srcdir)/%reldir%/test_json_format.sh_989e52d167582648b73c5d025cc0e814c642b3c8.out \ $(srcdir)/%reldir%/test_json_format.sh_a06b3cdd46b387e72d6faa4cce648b8b11ae870b.err \ $(srcdir)/%reldir%/test_json_format.sh_a06b3cdd46b387e72d6faa4cce648b8b11ae870b.out \ + $(srcdir)/%reldir%/test_json_format.sh_c1a23804c39b0f74642286d69865ee9d0961a58a.err \ + $(srcdir)/%reldir%/test_json_format.sh_c1a23804c39b0f74642286d69865ee9d0961a58a.out \ $(srcdir)/%reldir%/test_json_format.sh_c60050b3469f37c5b0864e1dc7eb354e91d6ec81.err \ $(srcdir)/%reldir%/test_json_format.sh_c60050b3469f37c5b0864e1dc7eb354e91d6ec81.out \ $(srcdir)/%reldir%/test_json_format.sh_d0ec34389274affb70a5a76ba4789d51fd60f602.err \ diff --git a/test/expected/test_format_loader.sh_5992e2695b7e6cf1f3520dbb87af8fc2b8f27088.err b/test/expected/test_format_loader.sh_5992e2695b7e6cf1f3520dbb87af8fc2b8f27088.err index e922dae2..b0860d95 100644 --- a/test/expected/test_format_loader.sh_5992e2695b7e6cf1f3520dbb87af8fc2b8f27088.err +++ b/test/expected/test_format_loader.sh_5992e2695b7e6cf1f3520dbb87af8fc2b8f27088.err @@ -3,7 +3,7 @@  --> /invalid_props_log/tags/badtag3/pattern  | invalid(abc   |  ^ missing closing parenthesis - --> {test_dir}/bad-config/formats/invalid-properties/format.json:35 + --> {test_dir}/bad-config/formats/invalid-properties/format.json:36  |  "pattern": "invalid(abc"  = help: Property Synopsis /invalid_props_log/tags/badtag3/pattern  @@ -16,7 +16,7 @@  --> /invalid_props_log/search-table/bad_table_regex/pattern  | abc(def   |  ^ missing closing parenthesis  - --> {test_dir}/bad-config/formats/invalid-properties/format.json:40 + --> {test_dir}/bad-config/formats/invalid-properties/format.json:41  |  "pattern": "abc(def"   = help: Property Synopsis /invalid_props_log/search-table/bad_table_regex/pattern  @@ -139,6 +139,9 @@ ✘ error: invalid tag definition “/invalid_props_log/tags/badtag3” reason: tag definitions must have a non-empty pattern  --> {test_dir}/bad-config/formats/invalid-properties/format.json:4 +✘ error: “invalid_props_log” is not a valid log format + reason: “subsecond-unit” must be set when “subsecond-field” is used + --> {test_dir}/bad-config/formats/invalid-properties/format.json:4 ✘ error: invalid value for property “/invalid_props_log/timestamp-field” reason: “ts” was not found in the pattern at /invalid_props_log/regex/std  --> {test_dir}/bad-config/formats/invalid-properties/format.json:4 @@ -146,10 +149,10 @@ body, pid, timestamp ✘ error: “not a color” is not a valid color value for property “/invalid_props_log/highlights/hl1/color” reason: Unknown color: 'not a color'. See https://jonasjacek.github.io/colors/ for a list of supported color names - --> {test_dir}/bad-config/formats/invalid-properties/format.json:23 + --> {test_dir}/bad-config/formats/invalid-properties/format.json:24 ✘ error: “also not a color” is not a valid color value for property “/invalid_props_log/highlights/hl1/background-color” reason: Unknown color: 'also not a color'. See https://jonasjacek.github.io/colors/ for a list of supported color names - --> {test_dir}/bad-config/formats/invalid-properties/format.json:24 + --> {test_dir}/bad-config/formats/invalid-properties/format.json:25 ✘ error: “no_regexes_log” is not a valid log format reason: no regexes specified  --> {test_dir}/bad-config/formats/no-regexes/format.json:4 diff --git a/test/expected/test_json_format.sh_c1a23804c39b0f74642286d69865ee9d0961a58a.err b/test/expected/test_json_format.sh_c1a23804c39b0f74642286d69865ee9d0961a58a.err new file mode 100644 index 00000000..e69de29b diff --git a/test/expected/test_json_format.sh_c1a23804c39b0f74642286d69865ee9d0961a58a.out b/test/expected/test_json_format.sh_c1a23804c39b0f74642286d69865ee9d0961a58a.out new file mode 100644 index 00000000..889eb99d --- /dev/null +++ b/test/expected/test_json_format.sh_c1a23804c39b0f74642286d69865ee9d0961a58a.out @@ -0,0 +1,2 @@ +2022-09-24T00:00:09.484 Hello, World! +2022-09-24T00:00:19.222 Goodbye, World! diff --git a/test/formats/jsontest-subsec/format.json b/test/formats/jsontest-subsec/format.json new file mode 100644 index 00000000..8469c7f5 --- /dev/null +++ b/test/formats/jsontest-subsec/format.json @@ -0,0 +1,27 @@ +{ + "$schema": "https://lnav.org/schemas/format-v1.schema.json", + "subsec_json_log": { + "title": "JSON Log with subsecond field", + "json": true, + "file-pattern": "logfile_json_subsec\\.json", + "line-format": [ + { + "field": "__timestamp__" + }, + " ", + { + "field": "msg" + } + ], + "timestamp-field": "instant/epochSecond", + "subsecond-field": "instant/nanoOfSecond", + "subsecond-units": "nano", + "body-field": "msg", + "value": { + "instant": { + "kind": "json", + "hidden": true + } + } + } +} \ No newline at end of file diff --git a/test/logfile_json_subsec.json b/test/logfile_json_subsec.json new file mode 100644 index 00000000..3ddc190d --- /dev/null +++ b/test/logfile_json_subsec.json @@ -0,0 +1,2 @@ +{"instant":{"epochSecond": 1663977609,"nanoOfSecond": 484000000}, "msg": "Hello, World!"} +{"instant":{"epochSecond": 1663977619,"nanoOfSecond": 222000000}, "msg": "Goodbye, World!"} diff --git a/test/test_json_format.sh b/test/test_json_format.sh index 1ef206a8..03873875 100644 --- a/test/test_json_format.sh +++ b/test/test_json_format.sh @@ -135,3 +135,7 @@ run_cap_test ${lnav_test} -n \ run_cap_test ${lnav_test} -n \ -I ${test_dir} \ ${test_dir}/logfile_mixed_json2.json + +run_cap_test ${lnav_test} -n \ + -I ${test_dir} \ + ${test_dir}/logfile_json_subsec.json