[logfile] plumb gzip header through file meta

This commit is contained in:
Tim Stack 2022-09-20 20:07:44 -07:00
parent 3f3e6dcbdc
commit d940d9d5eb
21 changed files with 312 additions and 76 deletions

View File

@ -530,7 +530,7 @@ public:
attr_line_t& append_quoted(S s)
{
this->al_string.append("\u201c");
this->al_string.append(std::move(s));
this->append(std::move(s));
this->al_string.append("\u201d");
return *this;

View File

@ -35,6 +35,48 @@
#include "config.h"
namespace lnav {
ssize_t
strftime_rfc3339(
char* buffer, size_t buffer_size, lnav::time64_t tim, int millis, char sep)
{
struct tm gmtm;
int year, month, index = 0;
secs2tm(tim, &gmtm);
year = gmtm.tm_year + 1900;
month = gmtm.tm_mon + 1;
buffer[index++] = '0' + ((year / 1000) % 10);
buffer[index++] = '0' + ((year / 100) % 10);
buffer[index++] = '0' + ((year / 10) % 10);
buffer[index++] = '0' + ((year / 1) % 10);
buffer[index++] = '-';
buffer[index++] = '0' + ((month / 10) % 10);
buffer[index++] = '0' + ((month / 1) % 10);
buffer[index++] = '-';
buffer[index++] = '0' + ((gmtm.tm_mday / 10) % 10);
buffer[index++] = '0' + ((gmtm.tm_mday / 1) % 10);
buffer[index++] = sep;
buffer[index++] = '0' + ((gmtm.tm_hour / 10) % 10);
buffer[index++] = '0' + ((gmtm.tm_hour / 1) % 10);
buffer[index++] = ':';
buffer[index++] = '0' + ((gmtm.tm_min / 10) % 10);
buffer[index++] = '0' + ((gmtm.tm_min / 1) % 10);
buffer[index++] = ':';
buffer[index++] = '0' + ((gmtm.tm_sec / 10) % 10);
buffer[index++] = '0' + ((gmtm.tm_sec / 1) % 10);
buffer[index++] = '.';
buffer[index++] = '0' + ((millis / 100) % 10);
buffer[index++] = '0' + ((millis / 10) % 10);
buffer[index++] = '0' + ((millis / 1) % 10);
buffer[index] = '\0';
return index;
}
}
static time_t BAD_DATE = -1;
time_t

View File

@ -42,7 +42,13 @@ namespace lnav {
using time64_t = uint64_t;
}
ssize_t strftime_rfc3339(char* buffer,
size_t buffer_size,
lnav::time64_t tim,
int millis,
char sep = ' ');
} // namespace lnav
struct tm* secs2tm(lnav::time64_t tim, struct tm* res);
/**
@ -103,10 +109,7 @@ struct exttm {
unsigned int et_flags{0};
long et_gmtoff{0};
exttm()
{
memset(&this->et_tm, 0, sizeof(this->et_tm));
}
exttm() { memset(&this->et_tm, 0, sizeof(this->et_tm)); }
bool operator==(const exttm& other) const
{
@ -141,6 +144,12 @@ operator!=(const struct timeval& left, const struct timeval& right)
return left.tv_sec != right.tv_sec || left.tv_usec != right.tv_usec;
}
inline bool
operator==(const struct timeval& left, const struct timeval& right)
{
return left.tv_sec == right.tv_sec || left.tv_usec == right.tv_usec;
}
inline struct timeval
operator-(const struct timeval& lhs, const struct timeval& rhs)
{

View File

@ -131,13 +131,6 @@ private:
};
/* XXX END */
static int32_t
read_le32(const unsigned char* data)
{
return ((data[0] << 0) | (data[1] << 8) | (data[2] << 16)
| (data[3] << 24));
}
#define Z_BUFSIZE 65536U
#define SYNCPOINT_SIZE (1024 * 1024)
line_buffer::gz_indexed::gz_indexed()
@ -198,11 +191,66 @@ line_buffer::gz_indexed::continue_stream()
}
void
line_buffer::gz_indexed::open(int fd)
line_buffer::gz_indexed::open(int fd, header_data& hd)
{
this->close();
this->init_stream();
this->gz_fd = fd;
unsigned char name[1024];
unsigned char comment[4096];
gz_header gz_hd;
memset(&gz_hd, 0, sizeof(gz_hd));
gz_hd.name = name;
gz_hd.name_max = sizeof(name);
gz_hd.comment = comment;
gz_hd.comm_max = sizeof(comment);
Bytef inbuf[8192];
Bytef outbuf[8192];
this->strm.next_out = outbuf;
this->strm.total_out = 0;
this->strm.avail_out = sizeof(outbuf);
this->strm.next_in = inbuf;
this->strm.total_in = 0;
if (inflateGetHeader(&this->strm, &gz_hd) == Z_OK) {
auto rc = pread(fd, inbuf, sizeof(inbuf), 0);
if (rc >= 0) {
this->strm.avail_in = rc;
inflate(&this->strm, Z_BLOCK);
inflateEnd(&this->strm);
this->strm.next_out = Z_NULL;
this->strm.next_in = Z_NULL;
this->strm.next_in = Z_NULL;
this->strm.total_in = 0;
this->strm.avail_in = 0;
this->init_stream();
switch (gz_hd.done) {
case 0:
log_debug("%d: no gzip header data", fd);
break;
case 1:
hd.hd_mtime.tv_sec = gz_hd.time;
hd.hd_name = std::string((char*) name);
hd.hd_comment = std::string((char*) comment);
break;
default:
log_error("%d: failed to read gzip header data", fd);
break;
}
} else {
log_error("%d: failed to read gzip header from file: %s",
fd,
strerror(errno));
}
} else {
log_error("%d: unable to get gzip header", fd);
}
}
int
@ -365,10 +413,9 @@ line_buffer::set_fd(auto_fd& fd)
close(gzfd);
throw error(errno);
}
this->lb_gz_file.writeAccess()->open(gzfd);
this->lb_gz_file.writeAccess()->open(gzfd, this->lb_header);
this->lb_compressed = true;
this->lb_file_time
= read_le32((const unsigned char*) &gz_id[4]);
this->lb_file_time = this->lb_header.hd_mtime.tv_sec;
if (this->lb_file_time < 0) {
this->lb_file_time = 0;
}

View File

@ -72,11 +72,24 @@ public:
static const ssize_t MAX_LINE_BUFFER_SIZE;
class error : public std::exception {
public:
error(int err) : e_err(err){};
explicit error(int err) : e_err(err) {}
int e_err;
};
struct header_data {
timeval hd_mtime{};
auto_buffer hd_extra{auto_buffer::alloc(0)};
std::string hd_name;
std::string hd_comment;
bool empty() const
{
return this->hd_mtime.tv_sec == 0 && this->hd_extra.empty()
&& this->hd_name.empty() && this->hd_comment.empty();
}
};
#define GZ_WINSIZE 32768U /*> gzip's max supported dictionary is 15-bits */
#define GZ_RAW_MODE (-15) /*> Raw inflate data mode */
#define GZ_HEADER_MODE (15 + 32) /*> Automatic zstd or gzip decoding */
@ -104,7 +117,7 @@ public:
void close();
void init_stream();
void continue_stream();
void open(int fd);
void open(int fd, header_data& hd);
int stream_data(void* buf, size_t size);
void seek(off_t offset);
@ -242,6 +255,8 @@ public:
size_t get_buffer_size() const { return this->lb_buffer.size(); }
const header_data& get_header_data() const { return this->lb_header; }
void enable_cache();
static void cleanup_cache();
@ -348,6 +363,8 @@ private:
stats lb_stats;
nonstd::optional<auto_fd> lb_cached_fd;
header_data lb_header;
};
#endif

View File

@ -51,11 +51,22 @@
#include "log.watch.hh"
#include "log_format.hh"
#include "logfile.cfg.hh"
#include "yajlpp/yajlpp_def.hh"
static auto intern_lifetime = intern_string::get_table_lifetime();
static const size_t INDEX_RESERVE_INCREMENT = 1024;
static const typed_json_path_container<line_buffer::header_data>
file_header_handlers = {
yajlpp::property_handler("name").for_field(
&line_buffer::header_data::hd_name),
yajlpp::property_handler("mtime").for_field(
&line_buffer::header_data::hd_mtime),
yajlpp::property_handler("comment").for_field(
&line_buffer::header_data::hd_comment),
};
Result<std::shared_ptr<logfile>, std::string>
logfile::open(std::string filename, logfile_open_options& loo)
{
@ -120,6 +131,12 @@ logfile::open(std::string filename, logfile_open_options& loo)
lf->lf_indexing = lf->lf_options.loo_is_visible;
const auto& hdr = lf->lf_line_buffer.get_header_data();
if (!hdr.empty()) {
lf->lf_embedded_metadata["net.zlib.gzip.header"]
= {text_format_t::TF_JSON, file_header_handlers.to_string(hdr)};
}
ensure(lf->invariant());
return Ok(lf);

View File

@ -526,44 +526,6 @@ attach_sqlite_db(sqlite3* db, const std::string& filename)
}
}
ssize_t
sql_strftime(
char* buffer, size_t buffer_size, lnav::time64_t tim, int millis, char sep)
{
struct tm gmtm;
int year, month, index = 0;
secs2tm(tim, &gmtm);
year = gmtm.tm_year + 1900;
month = gmtm.tm_mon + 1;
buffer[index++] = '0' + ((year / 1000) % 10);
buffer[index++] = '0' + ((year / 100) % 10);
buffer[index++] = '0' + ((year / 10) % 10);
buffer[index++] = '0' + ((year / 1) % 10);
buffer[index++] = '-';
buffer[index++] = '0' + ((month / 10) % 10);
buffer[index++] = '0' + ((month / 1) % 10);
buffer[index++] = '-';
buffer[index++] = '0' + ((gmtm.tm_mday / 10) % 10);
buffer[index++] = '0' + ((gmtm.tm_mday / 1) % 10);
buffer[index++] = sep;
buffer[index++] = '0' + ((gmtm.tm_hour / 10) % 10);
buffer[index++] = '0' + ((gmtm.tm_hour / 1) % 10);
buffer[index++] = ':';
buffer[index++] = '0' + ((gmtm.tm_min / 10) % 10);
buffer[index++] = '0' + ((gmtm.tm_min / 1) % 10);
buffer[index++] = ':';
buffer[index++] = '0' + ((gmtm.tm_sec / 10) % 10);
buffer[index++] = '0' + ((gmtm.tm_sec / 1) % 10);
buffer[index++] = '.';
buffer[index++] = '0' + ((millis / 100) % 10);
buffer[index++] = '0' + ((millis / 10) % 10);
buffer[index++] = '0' + ((millis / 1) % 10);
buffer[index] = '\0';
return index;
}
static void
sqlite_logger(void* dummy, int code, const char* msg)
{

View File

@ -82,11 +82,15 @@ void dump_sqlite_schema(sqlite3* db, std::string& schema_out);
void attach_sqlite_db(sqlite3* db, const std::string& filename);
ssize_t sql_strftime(char* buffer,
size_t buffer_size,
lnav::time64_t tim,
int millis,
char sep = ' ');
inline ssize_t
sql_strftime(char* buffer,
size_t buffer_size,
lnav::time64_t tim,
int millis,
char sep = ' ')
{
return lnav::strftime_rfc3339(buffer, buffer_size, tim, millis, sep);
}
inline ssize_t
sql_strftime(char* buffer,

View File

@ -54,6 +54,7 @@ enum class text_format_t {
TF_SQL,
TF_XML,
TF_YAML,
TF_TOML,
};
namespace fmt {
@ -103,6 +104,9 @@ struct formatter<text_format_t> : formatter<string_view> {
case text_format_t::TF_YAML:
name = "application/yaml";
break;
case text_format_t::TF_TOML:
name = "application/toml";
break;
}
return formatter<string_view>::format(name, ctx);
}

View File

@ -520,20 +520,62 @@ textfile_sub_source::rescan_files(
auto read_res = lf->read_file();
if (read_res.isOk()) {
auto content = read_res.unwrap();
auto content_sf = string_fragment{content};
std::string frontmatter;
auto front_matter_terminator = content.length() > 8
? content.find("\n---\n", 4)
: std::string::npos;
static const auto FRONT_MATTER_RE
= lnav::pcre2pp::code::from_const(
R"((?:^---\n(.*)\n---\n|^\+\+\+\n(.*)\n\+\+\+\n))",
PCRE2_MULTILINE | PCRE2_DOTALL);
static thread_local auto md
= FRONT_MATTER_RE.create_match_data();
if (startswith(content, "---\n")
&& front_matter_terminator != std::string::npos)
{
frontmatter
= content.substr(4, front_matter_terminator - 3);
content_sf
= content_sf.substr(front_matter_terminator + 4);
auto content = read_res.unwrap();
auto content_sf = string_fragment::from_str(content);
std::string frontmatter;
text_format_t frontmatter_format;
auto cap_res = FRONT_MATTER_RE.capture_from(content_sf)
.into(md)
.matches()
.ignore_error();
if (cap_res) {
if (md[1]) {
frontmatter_format = text_format_t::TF_YAML;
frontmatter = md[1]->to_string();
} else if (md[2]) {
frontmatter_format = text_format_t::TF_TOML;
frontmatter = md[2]->to_string();
} else {
}
content_sf = cap_res->f_remaining;
} else if (content_sf.startswith("{")) {
yajlpp_parse_context ypc(
intern_string::lookup(lf->get_filename()));
auto_mem<yajl_handle_t> handle(yajl_free);
handle = yajl_alloc(&ypc.ypc_callbacks, nullptr, &ypc);
yajl_config(
handle.in(), yajl_allow_trailing_garbage, 1);
ypc.with_ignore_unused(true)
.with_handle(handle.in())
.with_error_reporter(
[&lf](const auto& ypc, const auto& um) {
log_error(
"%s: failed to parse JSON front matter "
"-- %s",
lf->get_filename().c_str(),
um.um_reason.al_string.c_str());
});
if (ypc.parse_doc(content_sf)) {
auto consumed = ypc.ypc_total_consumed;
if (consumed < content_sf.length()
&& content_sf[consumed] == '\n')
{
frontmatter_format = text_format_t::TF_JSON;
frontmatter = string_fragment::from_str_range(
content, 0, consumed)
.to_string();
content_sf = content_sf.substr(consumed);
}
}
}
md2attr_line mdal;
@ -553,7 +595,7 @@ textfile_sub_source::rescan_files(
if (!frontmatter.empty()) {
lf_meta["net.daringfireball.markdown.frontmatter"]
= {text_format_t::TF_YAML, frontmatter};
= {frontmatter_format, frontmatter};
}
lnav::events::publish(

View File

@ -1040,6 +1040,7 @@ yajlpp_parse_context::parse_doc(const string_fragment& sf)
auto rc = yajl_parse(this->ypc_handle, this->ypc_json_text, sf.length());
size_t consumed = yajl_get_bytes_consumed(this->ypc_handle);
this->ypc_total_consumed += consumed;
this->ypc_line_number += std::count(
&this->ypc_json_text[0], &this->ypc_json_text[consumed], '\n');

View File

@ -442,6 +442,7 @@ public:
yajl_handle ypc_handle{nullptr};
const unsigned char* ypc_json_text{nullptr};
size_t ypc_json_text_len{0};
size_t ypc_total_consumed{0};
yajl_callbacks ypc_callbacks;
yajl_callbacks ypc_alt_callbacks;
std::vector<char> ypc_path;

View File

@ -34,6 +34,8 @@
#include <chrono>
#include "base/date_time_scanner.hh"
#include "base/time_util.hh"
#include "config.h"
#include "mapbox/variant.hpp"
#include "relative_time.hh"
@ -965,6 +967,69 @@ struct json_path_handler : public json_path_handler_base {
return *this;
}
template<typename... Args,
std::enable_if_t<LastIs<timeval, Args...>::value, bool> = true>
json_path_handler& for_field(Args... args)
{
this->add_cb(str_field_cb2);
this->jph_str_cb = [args...](yajlpp_parse_context* ypc,
const unsigned char* str,
size_t len) {
auto obj = ypc->ypc_obj_stack.top();
auto jph = ypc->ypc_current_handler;
date_time_scanner dts;
timeval tv{};
exttm tm;
if (dts.scan((char*) str, len, nullptr, &tm, tv) == nullptr) {
ypc->report_error(
lnav::console::user_message::error(
attr_line_t("unrecognized timestamp ")
.append_quoted(
string_fragment::from_bytes(str, len)))
.with_snippet(ypc->get_snippet())
.with_help(jph->get_help_text(ypc)));
} else {
json_path_handler::get_field(obj, args...) = tv;
}
return 1;
};
this->jph_gen_callback = [args...](yajlpp_gen_context& ygc,
const json_path_handler_base& jph,
yajl_gen handle) {
const auto& field = json_path_handler::get_field(
ygc.ygc_obj_stack.top(), args...);
if (!ygc.ygc_default_stack.empty()) {
const auto& field_def = json_path_handler::get_field(
ygc.ygc_default_stack.top(), args...);
if (field == field_def) {
return yajl_gen_status_ok;
}
}
if (ygc.ygc_depth) {
yajl_gen_string(handle, jph.jph_property);
}
yajlpp_generator gen(handle);
char buf[64];
auto buf_len = lnav::strftime_rfc3339(
buf, sizeof(buf), field.tv_sec, field.tv_usec, 'T');
return gen(string_fragment::from_bytes(buf, buf_len));
};
this->jph_field_getter
= [args...](void* root, nonstd::optional<std::string> name) {
return (void*) &json_path_handler::get_field(root, args...);
};
return *this;
}
template<
typename... Args,
std::enable_if_t<LastIs<nonstd::optional<std::string>, Args...>::value,

View File

@ -292,6 +292,8 @@ EXPECTED_FILES = \
$(srcdir)/%reldir%/test_logfile.sh_08d731a04c877a34819b35de185e30a74c9fd497.out \
$(srcdir)/%reldir%/test_logfile.sh_09bd16e044302f6b121092534708594bdad11b5a.err \
$(srcdir)/%reldir%/test_logfile.sh_09bd16e044302f6b121092534708594bdad11b5a.out \
$(srcdir)/%reldir%/test_logfile.sh_1c6eee38f66356fcd9a9f0faedaea6dbcc901060.err \
$(srcdir)/%reldir%/test_logfile.sh_1c6eee38f66356fcd9a9f0faedaea6dbcc901060.out \
$(srcdir)/%reldir%/test_logfile.sh_290a3c49e53c2229a7400c107338fa0bb38375e2.err \
$(srcdir)/%reldir%/test_logfile.sh_290a3c49e53c2229a7400c107338fa0bb38375e2.out \
$(srcdir)/%reldir%/test_logfile.sh_3fc6bfd8a6160817211f3e14fde957af75b9dbe7.err \
@ -984,6 +986,8 @@ EXPECTED_FILES = \
$(srcdir)/%reldir%/test_text_file.sh_6a24078983cf1b7a80b6fb65d5186cd125498136.out \
$(srcdir)/%reldir%/test_text_file.sh_87943c6be50d701a03e901f16493314c839af1ab.err \
$(srcdir)/%reldir%/test_text_file.sh_87943c6be50d701a03e901f16493314c839af1ab.out \
$(srcdir)/%reldir%/test_text_file.sh_8b2cd055e6a1db2ed9b2af2a917f8556395fa653.err \
$(srcdir)/%reldir%/test_text_file.sh_8b2cd055e6a1db2ed9b2af2a917f8556395fa653.out \
$(srcdir)/%reldir%/test_text_file.sh_ac486314c4e02e480d829ea2f077b86c49fedcec.err \
$(srcdir)/%reldir%/test_text_file.sh_ac486314c4e02e480d829ea2f077b86c49fedcec.out \
$(srcdir)/%reldir%/test_text_file.sh_ac872aadda29b9a824361a2c711d62ec1c75d40f.err \

View File

@ -0,0 +1,2 @@
 filepath  descriptor  mimetype  content 
{test_dir}/logfile_syslog.1.gz net.zlib.gzip.header application/json {"name":"logfile_syslog.1","mtime":"2007-11-03T16:23:00.000","comment":""} 

View File

@ -0,0 +1,2 @@
 filepath  descriptor  mimetype  content 
{test_dir}/textfile_0.md net.daringfireball.markdown.frontmatter application/json {␊ "comment": "This is JSON front-matter"␊} 

View File

@ -695,3 +695,7 @@ run_cap_test ${lnav_test} -n \
run_cap_test ${lnav_test} -n \
-c ':switch-to-view pretty' \
${test_dir}/logfile_ansi.1
run_cap_test ${lnav_test} -n \
-c ';SELECT * FROM lnav_file_metadata' \
logfile_syslog.1.gz

View File

@ -27,3 +27,7 @@ run_cap_test ${lnav_test} -n \
cp ${test_dir}/UTF-8-test.txt UTF-8-test.md
run_cap_test ${lnav_test} -n \
UTF-8-test.md
run_cap_test ${lnav_test} -n \
-c ';SELECT * FROM lnav_file_metadata' \
${test_dir}/textfile_0.md

9
test/textfile_0.md Normal file
View File

@ -0,0 +1,9 @@
{
"comment": "This is JSON front-matter"
}
## Test
* One
* Two
* Three