lnav/test/drive_data_scanner.cc

311 lines
11 KiB
C++
Raw Permalink Normal View History

2013-05-03 08:02:03 +02:00
/**
* Copyright (c) 2007-2012, Timothy Stack
*
* All rights reserved.
2013-06-03 16:45:19 +02:00
*
2013-05-03 08:02:03 +02:00
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
2013-06-03 16:45:19 +02:00
*
2013-05-03 08:02:03 +02:00
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* * Neither the name of Timothy Stack nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
2013-06-03 16:45:19 +02:00
*
2013-05-03 08:02:03 +02:00
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
2022-03-16 23:38:08 +01:00
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2013-05-03 08:02:03 +02:00
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
2011-06-13 16:46:03 +02:00
2022-02-20 00:35:40 +01:00
#ifdef __CYGWIN__
2022-03-16 23:38:08 +01:00
# include <alloca.h>
2022-02-20 00:35:40 +01:00
#endif
2022-03-16 23:38:08 +01:00
#include <fstream>
#include <iostream>
2011-06-13 16:46:03 +02:00
#include <stdio.h>
#include <stdlib.h>
2021-10-03 08:17:33 +02:00
#include "base/injector.hh"
2022-03-16 23:38:08 +01:00
#include "config.h"
2022-05-11 06:58:32 +02:00
#include "data_parser.hh"
2022-03-16 23:38:08 +01:00
#include "data_scanner.hh"
#include "elem_to_json.hh"
2013-06-03 16:45:19 +02:00
#include "log_format.hh"
2013-06-29 20:00:34 +02:00
#include "log_format_loader.hh"
2022-05-11 06:58:32 +02:00
#include "logfile.hh"
#include "pretty_printer.hh"
#include "shared_buffer.hh"
2022-05-11 06:58:32 +02:00
#include "view_curses.hh"
2011-06-13 16:46:03 +02:00
2022-03-16 23:38:08 +01:00
const char* TMP_NAME = "scanned.tmp";
2022-03-16 23:38:08 +01:00
int
main(int argc, char* argv[])
2011-06-13 16:46:03 +02:00
{
2022-03-16 23:38:08 +01:00
int c, retval = EXIT_SUCCESS;
bool prompt = false, is_log = false, pretty_print = false;
bool scanner_details = false;
2013-06-03 16:45:19 +02:00
2021-10-03 08:17:33 +02:00
{
2022-03-16 23:38:08 +01:00
static auto builtin_formats
= injector::get<std::vector<std::shared_ptr<log_format>>>();
2021-10-03 08:17:33 +02:00
auto& root_formats = log_format::get_root_formats();
2022-03-16 23:38:08 +01:00
log_format::get_root_formats().insert(root_formats.begin(),
builtin_formats.begin(),
builtin_formats.end());
2021-10-03 08:17:33 +02:00
builtin_formats.clear();
}
2013-06-29 20:00:34 +02:00
{
2020-10-21 07:55:46 +02:00
std::vector<ghc::filesystem::path> paths;
2022-04-13 01:07:13 +02:00
std::vector<lnav::console::user_message> errors;
2013-06-29 20:00:34 +02:00
load_formats(paths, errors);
2013-06-29 20:00:34 +02:00
}
while ((c = getopt(argc, argv, "pPls")) != -1) {
2013-06-03 16:45:19 +02:00
switch (c) {
2022-03-16 23:38:08 +01:00
case 'p':
prompt = true;
break;
2013-06-03 16:45:19 +02:00
2022-03-16 23:38:08 +01:00
case 'P':
pretty_print = true;
break;
2022-03-16 23:38:08 +01:00
case 'l':
is_log = true;
break;
case 's':
scanner_details = true;
break;
2022-03-16 23:38:08 +01:00
default:
retval = EXIT_FAILURE;
break;
2013-06-03 16:45:19 +02:00
}
2011-06-13 16:46:03 +02:00
}
2013-06-03 16:45:19 +02:00
argc -= optind;
argv += optind;
2022-03-16 23:38:08 +01:00
if (retval != EXIT_SUCCESS) {
} else if (argc < 1) {
fprintf(stderr, "error: expecting file name argument(s)\n");
2013-06-03 16:45:19 +02:00
retval = EXIT_FAILURE;
2022-03-16 23:38:08 +01:00
} else {
for (int lpc = 0; lpc < argc; lpc++) {
2022-03-31 17:59:19 +02:00
std::unique_ptr<std::ifstream> in_ptr;
std::istream* in;
2022-03-16 23:38:08 +01:00
FILE* out;
2013-06-03 16:45:19 +02:00
if (strcmp(argv[lpc], "-") == 0) {
2022-03-31 17:59:19 +02:00
in = &std::cin;
2022-03-16 23:38:08 +01:00
} else {
2022-03-31 17:59:19 +02:00
auto ifs = std::make_unique<std::ifstream>(argv[lpc]);
2013-06-03 16:45:19 +02:00
if (!ifs->is_open()) {
fprintf(stderr, "error: unable to open file\n");
retval = EXIT_FAILURE;
2022-03-16 23:38:08 +01:00
} else {
2021-02-26 07:22:16 +01:00
in_ptr = std::move(ifs);
in = in_ptr.get();
}
2013-06-03 16:45:19 +02:00
}
2021-02-26 07:22:16 +01:00
if ((out = fopen(TMP_NAME, "w")) == nullptr) {
2022-03-16 23:38:08 +01:00
fprintf(stderr,
"error: unable to temporary file for writing\n");
retval = EXIT_FAILURE;
2022-03-16 23:38:08 +01:00
} else {
2022-03-31 17:59:19 +02:00
std::shared_ptr<log_format> format;
2022-03-16 23:38:08 +01:00
char* log_line;
bool found = false;
2022-03-16 23:38:08 +01:00
char cmd[2048];
2022-03-31 17:59:19 +02:00
std::string line;
2022-03-16 23:38:08 +01:00
int rc;
getline(*in, line);
if (strcmp(argv[lpc], "-") == 0) {
line = " " + line;
}
2013-06-03 16:45:19 +02:00
2022-03-16 23:38:08 +01:00
log_line = (char*) alloca(line.length());
strcpy(log_line, &line[13]);
2022-03-31 17:59:19 +02:00
auto sub_line = line.substr(13);
struct line_range body(0, sub_line.length());
shared_buffer share_manager;
logline_value_vector ll_values;
auto& sbr = ll_values.lvv_sbr;
2022-03-16 23:38:08 +01:00
sbr.share(
share_manager, (char*) sub_line.c_str(), sub_line.size());
2013-06-03 16:45:19 +02:00
2022-03-16 23:38:08 +01:00
auto& root_formats = log_format::get_root_formats();
2022-03-31 17:59:19 +02:00
std::vector<std::shared_ptr<log_format>>::iterator iter;
std::vector<logline> index;
2013-06-03 16:45:19 +02:00
if (is_log) {
2022-05-11 06:58:32 +02:00
logfile_open_options loo;
auto open_res = logfile::open(argv[lpc], loo);
auto lf = open_res.unwrap();
ArenaAlloc::Alloc<char> allocator;
scan_batch_context sbc{allocator};
for (iter = root_formats.begin();
iter != root_formats.end() && !found;
++iter)
{
2022-03-16 23:38:08 +01:00
line_info li = {{13}};
(*iter)->clear();
if ((*iter)->scan(*lf, index, li, sbr, sbc)
== log_format::SCAN_MATCH)
{
format = (*iter)->specialized();
found = true;
}
}
if (!found) {
fprintf(stderr, "error: log sample does not match\n");
return EXIT_FAILURE;
}
}
2013-06-03 16:45:19 +02:00
string_attrs_t sa;
2013-06-03 16:45:19 +02:00
2021-02-26 07:22:16 +01:00
if (format.get() != nullptr) {
format->annotate(0, sa, ll_values);
body = find_string_attr_range(sa, &SA_BODY);
}
2013-06-03 16:45:19 +02:00
data_parser::TRACE_FILE = fopen("scanned.dpt", "w");
data_scanner ds(sub_line, body.lr_start);
if (scanner_details) {
fprintf(out,
" %s\n",
ds.get_input().to_string().c_str());
while (true) {
auto tok_res = ds.tokenize2();
if (!tok_res) {
break;
}
fprintf(out,
"%4s %3d:%-3d ",
data_scanner::token2name(tok_res->tr_token),
tok_res->tr_capture.c_begin,
tok_res->tr_capture.c_end);
size_t cap_index = 0;
for (; cap_index < tok_res->tr_capture.c_end;
cap_index++)
{
if (cap_index == tok_res->tr_capture.c_begin) {
fputc('^', out);
} else if (cap_index
== (tok_res->tr_capture.c_end - 1))
{
fputc('^', out);
} else if (cap_index > tok_res->tr_capture.c_begin)
{
fputc('-', out);
} else {
fputc(' ', out);
}
}
for (; cap_index < (int) ds.get_input().length();
cap_index++)
{
fputc(' ', out);
}
auto sub = tok_res->to_string();
fprintf(out, " %s\n", sub.c_str());
}
}
ds.reset();
2022-03-16 23:38:08 +01:00
data_parser dp(&ds);
2022-03-31 17:59:19 +02:00
std::string msg_format;
dp.dp_msg_format = &msg_format;
dp.parse();
dp.print(out, dp.dp_pairs);
2022-03-16 23:38:08 +01:00
fprintf(
out, "msg :%s\n", sub_line.c_str() + body.lr_start);
fprintf(out, "format :%s\n", msg_format.c_str());
if (pretty_print) {
data_scanner ds2(sub_line, body.lr_start);
pretty_printer pp(&ds2, sa);
attr_line_t pretty_out;
pp.append_to(pretty_out);
fprintf(out, "\n--\n%s", pretty_out.get_string().c_str());
}
auto_mem<yajl_gen_t> gen(yajl_gen_free);
2021-02-26 07:22:16 +01:00
gen = yajl_gen_alloc(nullptr);
yajl_gen_config(gen.in(), yajl_gen_beautify, true);
elements_to_json(gen, dp, &dp.dp_pairs);
2022-03-16 23:38:08 +01:00
const unsigned char* buf;
size_t len;
yajl_gen_get_buf(gen, &buf, &len);
fwrite(buf, 1, len, out);
fclose(out);
sprintf(cmd, "diff -u %s %s", argv[lpc], TMP_NAME);
rc = system(cmd);
if (rc != 0) {
if (prompt) {
char resp[4];
2022-03-16 23:38:08 +01:00
printf("\nOriginal line:\n%s\n",
sub_line.c_str() + body.lr_start);
printf(
"Would you like to update the original file? "
"(y/N) ");
fflush(stdout);
2015-04-02 22:44:27 +02:00
log_perror(scanf("%3s", resp));
2013-06-05 04:45:04 +02:00
if (strcasecmp(resp, "y") == 0) {
rename(TMP_NAME, argv[lpc]);
2022-03-16 23:38:08 +01:00
} else {
retval = EXIT_FAILURE;
}
2022-03-16 23:38:08 +01:00
} else {
fprintf(stderr, "error: mismatch\n");
2013-06-03 16:45:19 +02:00
retval = EXIT_FAILURE;
}
}
fclose(data_parser::TRACE_FILE);
2021-02-26 07:22:16 +01:00
data_parser::TRACE_FILE = nullptr;
2013-06-03 16:45:19 +02:00
}
}
}
return retval;
2011-06-13 16:46:03 +02:00
}