lnav/src/logfile.hh

431 lines
12 KiB
C++
Raw Normal View History

2022-03-31 17:59:19 +02:00
2009-09-14 03:07:32 +02:00
/**
2013-05-03 08:02:03 +02:00
* Copyright (c) 2007-2012, Timothy Stack
*
* All rights reserved.
2013-05-28 06:35:00 +02:00
*
2013-05-03 08:02:03 +02:00
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
2013-05-28 06:35:00 +02:00
*
2013-05-03 08:02:03 +02:00
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* * Neither the name of Timothy Stack nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
2013-05-28 06:35:00 +02:00
*
2013-05-03 08:02:03 +02:00
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
2022-03-16 23:38:08 +01:00
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2013-05-03 08:02:03 +02:00
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
2009-09-14 03:07:32 +02:00
* @file logfile.hh
*/
#ifndef logfile_hh
#define logfile_hh
2009-09-14 03:07:32 +02:00
2022-03-16 23:38:08 +01:00
#include <string>
#include <utility>
#include <vector>
2009-09-14 03:07:32 +02:00
#include <stdint.h>
2022-03-16 23:38:08 +01:00
#include <stdio.h>
#include <sys/resource.h>
#include <sys/stat.h>
2009-09-14 03:07:32 +02:00
#include <sys/types.h>
2019-05-08 14:30:59 +02:00
#include "base/lnav_log.hh"
#include "base/result.h"
#include "byte_array.hh"
2020-10-21 07:55:46 +02:00
#include "ghc/filesystem.hpp"
2022-03-16 23:38:08 +01:00
#include "line_buffer.hh"
#include "log_format_fwd.hh"
2022-03-16 23:38:08 +01:00
#include "logfile_fwd.hh"
#include "safe/safe.h"
2022-03-16 23:38:08 +01:00
#include "shared_buffer.hh"
#include "text_format.hh"
#include "unique_path.hh"
2009-09-14 03:07:32 +02:00
/**
* Observer interface for logfile indexing progress.
*
* @see logfile
*/
class logfile_observer {
public:
virtual ~logfile_observer() = default;
2009-09-14 03:07:32 +02:00
enum class indexing_result {
CONTINUE,
BREAK,
};
2009-09-14 03:07:32 +02:00
/**
* @param lf The logfile object that is doing the indexing.
* @param off The current offset in the file being processed.
* @param total The total size of the file.
* @return false
2009-09-14 03:07:32 +02:00
*/
virtual indexing_result logfile_indexing(const std::shared_ptr<logfile>& lf,
file_off_t off,
2022-03-16 23:38:08 +01:00
file_size_t total)
= 0;
2009-09-14 03:07:32 +02:00
};
struct logfile_activity {
2020-10-29 05:18:57 +01:00
int64_t la_polls{0};
int64_t la_reads{0};
2022-03-16 23:38:08 +01:00
struct rusage la_initial_index_rusage {
};
};
2009-09-14 03:07:32 +02:00
/**
* Container for the lines in a log file and some metadata.
*/
2022-03-16 23:38:08 +01:00
class logfile
: public unique_path_source
, public std::enable_shared_from_this<logfile> {
2009-09-14 03:07:32 +02:00
public:
2022-03-16 23:38:08 +01:00
typedef std::vector<logline>::iterator iterator;
2009-09-14 03:07:32 +02:00
typedef std::vector<logline>::const_iterator const_iterator;
/**
* Construct a logfile with the given arguments.
*
* @param filename The name of the log file.
* @param fd The file descriptor for accessing the file or -1 if the
* constructor should open the file specified by 'filename'. The
* descriptor needs to be seekable.
*/
static Result<std::shared_ptr<logfile>, std::string> open(
2022-03-16 23:38:08 +01:00
std::string filename, logfile_open_options& loo);
2009-09-14 03:07:32 +02:00
~logfile() override;
2009-09-14 03:07:32 +02:00
const logfile_activity& get_activity() const { return this->lf_activity; }
2022-03-16 23:38:08 +01:00
nonstd::optional<ghc::filesystem::path> get_actual_path() const
{
return this->lf_actual_path;
}
2009-09-14 03:07:32 +02:00
/** @return The filename as given in the constructor. */
const std::string& get_filename() const { return this->lf_filename; }
2022-03-16 23:38:08 +01:00
/** @return The filename as given in the constructor, excluding the path
* prefix. */
const std::string& get_basename() const { return this->lf_basename; }
int get_fd() const { return this->lf_line_buffer.get_fd(); }
2012-07-13 18:26:47 +02:00
/** @param filename The new filename for this log file. */
void set_filename(const std::string& filename);
2012-07-13 18:26:47 +02:00
const std::string& get_content_id() const { return this->lf_content_id; }
/** @return The inode for this log file. */
const struct stat& get_stat() const { return this->lf_stat; }
2009-09-14 03:07:32 +02:00
size_t get_longest_line_length() const { return this->lf_longest_line; }
bool is_compressed() const { return this->lf_line_buffer.is_compressed(); }
bool is_valid_filename() const { return this->lf_valid_filename; }
file_off_t get_index_size() const { return this->lf_index_size; }
2020-10-29 05:21:57 +01:00
2009-09-14 03:07:32 +02:00
/**
* @return The detected format, rebuild_index() must be called before this
* will return a value other than NULL.
2009-09-14 03:07:32 +02:00
*/
std::shared_ptr<log_format> get_format() const { return this->lf_format; }
2009-09-14 03:07:32 +02:00
intern_string_t get_format_name() const;
text_format_t get_text_format() const { return this->lf_text_format; }
2009-09-14 03:07:32 +02:00
/**
* @return The last modified time of the file when the file was last
* indexed.
*/
time_t get_modified_time() const { return this->lf_index_time; }
2009-09-14 03:07:32 +02:00
int get_time_offset_line() const { return this->lf_time_offset_line; }
2013-07-14 06:31:59 +02:00
2022-03-16 23:38:08 +01:00
const struct timeval& get_time_offset() const
{
return this->lf_time_offset;
}
2013-07-14 06:31:59 +02:00
void adjust_content_time(int line,
2022-03-16 23:38:08 +01:00
const struct timeval& tv,
bool abs_offset = true);
2022-03-16 23:38:08 +01:00
void clear_time_offset()
{
struct timeval tv = {0, 0};
2013-07-14 06:31:59 +02:00
this->adjust_content_time(-1, tv);
}
void mark_as_duplicate(const std::string& name);
2020-10-29 05:23:25 +01:00
2022-03-16 23:38:08 +01:00
const logfile_open_options& get_open_options() const
{
2021-01-09 07:42:28 +01:00
return this->lf_options;
}
2020-10-29 05:23:25 +01:00
void reset_state();
2022-03-16 23:38:08 +01:00
bool is_time_adjusted() const
{
return (this->lf_time_offset.tv_sec != 0
|| this->lf_time_offset.tv_usec != 0);
}
2022-03-16 23:38:08 +01:00
iterator begin()
{
return this->lf_index.begin();
}
2009-09-14 03:07:32 +02:00
2022-03-16 23:38:08 +01:00
const_iterator begin() const
{
return this->lf_index.begin();
}
2009-09-14 03:07:32 +02:00
2022-03-16 23:38:08 +01:00
const_iterator cbegin() const
{
return this->lf_index.begin();
}
2020-11-29 22:20:07 +01:00
2022-03-16 23:38:08 +01:00
iterator end()
{
return this->lf_index.end();
}
2009-09-14 03:07:32 +02:00
2022-03-16 23:38:08 +01:00
const_iterator end() const
{
return this->lf_index.end();
}
2009-09-14 03:07:32 +02:00
2022-03-16 23:38:08 +01:00
const_iterator cend() const
{
return this->lf_index.end();
}
2020-11-29 22:20:07 +01:00
2009-09-14 03:07:32 +02:00
/** @return The number of lines in the index. */
2022-03-16 23:38:08 +01:00
size_t size() const
{
return this->lf_index.size();
}
2009-09-14 03:07:32 +02:00
2022-03-16 23:38:08 +01:00
nonstd::optional<const_iterator> find_from_time(
const struct timeval& tv) const;
logline& operator[](int index) { return this->lf_index[index]; }
2009-09-14 03:07:32 +02:00
logline& front() { return this->lf_index.front(); }
2020-10-29 05:21:57 +01:00
logline& back() { return this->lf_index.back(); }
/** @return True if this log file still exists. */
bool exists() const;
void close() { this->lf_is_closed = true; }
2013-07-14 06:31:59 +02:00
bool is_closed() const { return this->lf_is_closed; }
2013-07-14 06:31:59 +02:00
struct timeval original_line_time(iterator ll);
2013-07-14 06:31:59 +02:00
Result<shared_buffer_ref, std::string> read_line(iterator ll);
2009-09-14 03:07:32 +02:00
2022-03-16 23:38:08 +01:00
iterator line_base(iterator ll)
{
auto retval = ll;
2014-02-20 05:23:19 +01:00
while (retval != this->begin() && retval->get_sub_offset() != 0) {
--retval;
}
return retval;
}
2014-02-20 05:23:19 +01:00
2022-03-16 23:38:08 +01:00
iterator message_start(iterator ll)
{
auto retval = ll;
2022-03-16 23:38:08 +01:00
while (retval != this->begin()
&& (retval->get_sub_offset() != 0 || !retval->is_message()))
{
--retval;
}
return retval;
}
2020-11-29 22:20:07 +01:00
size_t line_length(const_iterator ll, bool include_continues = true);
2022-03-16 23:38:08 +01:00
file_range get_file_range(const_iterator ll, bool include_continues = true)
{
return {
ll->get_offset(),
(file_ssize_t) this->line_length(ll, include_continues),
};
}
2022-03-16 23:38:08 +01:00
void read_full_message(const_iterator ll,
shared_buffer_ref& msg_out,
int max_lines = 50);
2014-02-01 15:41:11 +01:00
Result<shared_buffer_ref, std::string> read_raw_message(const_iterator ll);
enum class rebuild_result_t {
INVALID,
NO_NEW_LINES,
NEW_LINES,
NEW_ORDER,
};
2009-09-14 03:07:32 +02:00
/**
* Index any new data in the log file.
*
* @param lo The observer object that will be called regularly during
* indexing.
* @return True if any new lines were indexed.
*/
2022-03-16 23:38:08 +01:00
rebuild_result_t rebuild_index(
nonstd::optional<ui_clock::time_point> deadline = nonstd::nullopt);
2009-09-14 03:07:32 +02:00
void reobserve_from(iterator iter);
2022-03-16 23:38:08 +01:00
void set_logfile_observer(logfile_observer* lo)
{
this->lf_logfile_observer = lo;
}
2022-03-16 23:38:08 +01:00
void set_logline_observer(logline_observer* llo);
2022-03-16 23:38:08 +01:00
logline_observer* get_logline_observer() const
{
return this->lf_logline_observer;
}
2022-03-16 23:38:08 +01:00
bool operator<(const logfile& rhs) const
2009-09-14 03:07:32 +02:00
{
2013-05-28 06:35:00 +02:00
bool retval;
if (this->lf_index.empty()) {
retval = true;
2022-03-16 23:38:08 +01:00
} else if (rhs.lf_index.empty()) {
2013-05-28 06:35:00 +02:00
retval = false;
2022-03-16 23:38:08 +01:00
} else {
2013-05-28 06:35:00 +02:00
retval = this->lf_index[0].get_time() < rhs.lf_index[0].get_time();
}
return retval;
}
2009-09-14 03:07:32 +02:00
2022-03-16 23:38:08 +01:00
bool is_indexing() const
{
2020-11-25 23:47:39 +01:00
return this->lf_indexing;
}
2009-09-14 03:07:32 +02:00
/** Check the invariants for this object. */
2018-10-17 16:03:33 +02:00
bool invariant()
2009-09-14 03:07:32 +02:00
{
2018-10-17 16:03:33 +02:00
require(!this->lf_filename.empty());
2009-09-14 03:07:32 +02:00
2013-05-28 06:35:00 +02:00
return true;
}
2020-10-21 07:55:46 +02:00
ghc::filesystem::path get_path() const override;
2009-09-14 03:07:32 +02:00
enum class note_type {
indexing_disabled,
duplicate,
2021-05-31 00:07:09 +02:00
not_utf,
};
using note_map = std::map<note_type, std::string>;
using safe_notes = safe::Safe<note_map>;
2022-03-16 23:38:08 +01:00
note_map get_notes() const
{
return *this->lf_notes.readAccess();
}
2009-09-14 03:07:32 +02:00
protected:
/**
* Process a line from the file.
*
* @param offset The offset of the line in the file.
* @param prefix The contents of the line.
* @param len The length of the 'prefix' string.
*/
2022-03-16 23:38:08 +01:00
bool process_prefix(shared_buffer_ref& sbr, const line_info& li);
2009-09-14 03:07:32 +02:00
2022-03-16 23:38:08 +01:00
void set_format_base_time(log_format* lf);
private:
2022-03-16 23:38:08 +01:00
logfile(std::string filename, logfile_open_options& loo);
std::string lf_filename;
logfile_open_options lf_options;
logfile_activity lf_activity;
2022-03-16 23:38:08 +01:00
bool lf_named_file{true};
bool lf_valid_filename{true};
nonstd::optional<ghc::filesystem::path> lf_actual_path;
std::string lf_basename;
std::string lf_content_id;
2022-03-16 23:38:08 +01:00
struct stat lf_stat {
};
std::shared_ptr<log_format> lf_format;
2022-03-16 23:38:08 +01:00
std::vector<logline> lf_index;
time_t lf_index_time{0};
file_off_t lf_index_size{0};
bool lf_sort_needed{false};
2013-05-28 06:35:00 +02:00
line_buffer lf_line_buffer;
int lf_time_offset_line{0};
2022-03-16 23:38:08 +01:00
struct timeval lf_time_offset {
0, 0
};
bool lf_is_closed{false};
2020-11-25 23:47:39 +01:00
bool lf_indexing{true};
bool lf_partial_line{false};
2022-03-16 23:38:08 +01:00
logline_observer* lf_logline_observer{nullptr};
logfile_observer* lf_logfile_observer{nullptr};
size_t lf_longest_line{0};
text_format_t lf_text_format{text_format_t::TF_UNKNOWN};
2018-10-17 16:03:33 +02:00
uint32_t lf_out_of_time_order_count{0};
safe_notes lf_notes;
nonstd::optional<std::pair<file_off_t, size_t>> lf_next_line_cache;
2009-09-14 03:07:32 +02:00
};
class logline_observer {
public:
virtual ~logline_observer() = default;
2022-03-16 23:38:08 +01:00
virtual void logline_restart(const logfile& lf, file_size_t rollback_size)
= 0;
2022-03-16 23:38:08 +01:00
virtual void logline_new_lines(const logfile& lf,
logfile::const_iterator ll_begin,
logfile::const_iterator ll_end,
shared_buffer_ref& sbr)
= 0;
2022-03-16 23:38:08 +01:00
virtual void logline_eof(const logfile& lf) = 0;
};
2009-09-14 03:07:32 +02:00
#endif