/** * Copyright (c) 2020, Timothy Stack * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * Neither the name of Timothy Stack nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * @file archive_manager.cc */ #include #include "config.h" #if HAVE_ARCHIVE_H # include "archive.h" # include "archive_entry.h" #endif #include "archive_manager.cfg.hh" #include "archive_manager.hh" #include "auto_fd.hh" #include "auto_mem.hh" #include "base/fs_util.hh" #include "base/humanize.hh" #include "base/injector.hh" #include "base/lnav_log.hh" #include "base/paths.hh" #include "fmt/format.h" #include "lnav_util.hh" namespace fs = ghc::filesystem; namespace archive_manager { class archive_lock { public: class guard { public: explicit guard(archive_lock& arc_lock) : g_lock(arc_lock) { this->g_lock.lock(); }; ~guard() { this->g_lock.unlock(); }; private: archive_lock& g_lock; }; void lock() const { lockf(this->lh_fd, F_LOCK, 0); }; void unlock() const { lockf(this->lh_fd, F_ULOCK, 0); }; explicit archive_lock(const fs::path& archive_path) { auto lock_path = archive_path; lock_path += ".lck"; this->lh_fd = lnav::filesystem::openp(lock_path, O_CREAT | O_RDWR, 0600); log_perror(fcntl(this->lh_fd, F_SETFD, FD_CLOEXEC)); }; auto_fd lh_fd; }; #if HAVE_ARCHIVE_H /** * Enables a subset of the supported archive formats to speed up detection, * since some formats, like xar are unlikely to be used. */ static void enable_desired_archive_formats(archive* arc) { archive_read_support_format_7zip(arc); archive_read_support_format_cpio(arc); archive_read_support_format_lha(arc); archive_read_support_format_rar(arc); archive_read_support_format_tar(arc); archive_read_support_format_zip(arc); } #endif bool is_archive(const fs::path& filename) { #if HAVE_ARCHIVE_H auto_mem arc(archive_read_free); arc = archive_read_new(); archive_read_support_filter_all(arc); enable_desired_archive_formats(arc); archive_read_support_format_raw(arc); log_debug("read open %s", filename.c_str()); auto r = archive_read_open_filename(arc, filename.c_str(), 128 * 1024); if (r == ARCHIVE_OK) { struct archive_entry* entry; auto format_name = archive_format_name(arc); log_debug("read next header %s %s", format_name, filename.c_str()); if (archive_read_next_header(arc, &entry) == ARCHIVE_OK) { log_debug("read next done %s", filename.c_str()); static const auto RAW_FORMAT_NAME = string_fragment("raw"); static const auto GZ_FILTER_NAME = string_fragment("gzip"); format_name = archive_format_name(arc); if (RAW_FORMAT_NAME == format_name) { auto filter_count = archive_filter_count(arc); if (filter_count == 1) { return false; } auto first_filter_name = archive_filter_name(arc, 0); if (filter_count == 2 && GZ_FILTER_NAME == first_filter_name) { return false; } } log_info( "detected archive: %s -- %s", filename.c_str(), format_name); return true; } else { log_info("archive read header failed: %s -- %s", filename.c_str(), archive_error_string(arc)); } } else { log_info("archive open failed: %s -- %s", filename.c_str(), archive_error_string(arc)); } #endif return false; } static fs::path archive_cache_path() { return lnav::paths::workdir() / "archives"; } fs::path filename_to_tmp_path(const std::string& filename) { auto fn_path = fs::path(filename); auto basename = fn_path.filename().string(); hasher h; h.update(basename); auto fd = auto_fd(lnav::filesystem::openp(filename, O_RDONLY)); if (fd != -1) { char buffer[1024]; int rc; rc = read(fd, buffer, sizeof(buffer)); if (rc >= 0) { h.update(buffer, rc); } } basename = fmt::format("arc-{}-{}", h.to_string(), basename); return archive_cache_path() / basename; } #if HAVE_ARCHIVE_H static walk_result_t copy_data(const std::string& filename, struct archive* ar, struct archive_entry* entry, struct archive* aw, const fs::path& entry_path, struct extract_progress* ep) { int r; const void* buff; size_t size, total = 0, next_space_check = 0; la_int64_t offset; for (;;) { if (total >= next_space_check) { auto& cfg = injector::get(); auto tmp_space = fs::space(entry_path); if (tmp_space.available < cfg.amc_min_free_space) { return Err(fmt::format( FMT_STRING("available space on disk ({}) is below the " "minimum-free threshold ({}). Unable to unpack " "'{}' to '{}'"), humanize::file_size(tmp_space.available), humanize::file_size(cfg.amc_min_free_space), entry_path.filename().string(), entry_path.parent_path().string())); } next_space_check += 1024 * 1024; } r = archive_read_data_block(ar, &buff, &size, &offset); if (r == ARCHIVE_EOF) { return Ok(); } if (r != ARCHIVE_OK) { return Err(fmt::format("failed to read file: {} >> {} -- {}", filename, archive_entry_pathname_utf8(entry), archive_error_string(ar))); } r = archive_write_data_block(aw, buff, size, offset); if (r != ARCHIVE_OK) { return Err(fmt::format("failed to write file: {} -- {}", entry_path.string(), archive_error_string(aw))); } total += size; ep->ep_out_size.fetch_add(size); } } static walk_result_t extract(const std::string& filename, const extract_cb& cb) { static int FLAGS = ARCHIVE_EXTRACT_TIME | ARCHIVE_EXTRACT_PERM | ARCHIVE_EXTRACT_ACL | ARCHIVE_EXTRACT_FFLAGS; auto tmp_path = filename_to_tmp_path(filename); auto arc_lock = archive_lock(tmp_path); auto lock_guard = archive_lock::guard(arc_lock); auto done_path = tmp_path; done_path += ".done"; if (fs::exists(done_path)) { size_t file_count = 0; if (fs::is_directory(tmp_path)) { for (const auto& entry : fs::directory_iterator(tmp_path)) { (void) entry; file_count += 1; } } if (file_count > 0) { fs::last_write_time(done_path, std::chrono::system_clock::now()); log_info("%s: archive has already been extracted!", done_path.c_str()); return Ok(); } else { log_warning("%s: archive cache has been damaged, re-extracting", done_path.c_str()); } fs::remove(done_path); } auto_mem arc(archive_free); auto_mem ext(archive_free); arc = archive_read_new(); enable_desired_archive_formats(arc); archive_read_support_format_raw(arc); archive_read_support_filter_all(arc); ext = archive_write_disk_new(); archive_write_disk_set_options(ext, FLAGS); archive_write_disk_set_standard_lookup(ext); if (archive_read_open_filename(arc, filename.c_str(), 10240) != ARCHIVE_OK) { return Err(fmt::format("unable to open archive: {} -- {}", filename, archive_error_string(arc))); } log_info("extracting %s to %s", filename.c_str(), tmp_path.c_str()); while (true) { struct archive_entry* entry; auto r = archive_read_next_header(arc, &entry); if (r == ARCHIVE_EOF) { log_info("all done"); break; } if (r != ARCHIVE_OK) { return Err(fmt::format("unable to read entry header: {} -- {}", filename, archive_error_string(arc))); } auto format_name = archive_format_name(arc); auto filter_count = archive_filter_count(arc); auto_mem wentry(archive_entry_free); wentry = archive_entry_clone(entry); auto desired_pathname = fs::path(archive_entry_pathname(entry)); if (strcmp(format_name, "raw") == 0 && filter_count >= 2) { desired_pathname = fs::path(filename).filename(); } auto entry_path = tmp_path / desired_pathname; auto prog = cb( entry_path, archive_entry_size_is_set(entry) ? archive_entry_size(entry) : -1); archive_entry_copy_pathname(wentry, entry_path.c_str()); auto entry_mode = archive_entry_mode(wentry); archive_entry_set_perm( wentry, S_IRUSR | (S_ISDIR(entry_mode) ? S_IXUSR | S_IWUSR : 0)); r = archive_write_header(ext, wentry); if (r < ARCHIVE_OK) { return Err(fmt::format("unable to write entry: {} -- {}", entry_path.string(), archive_error_string(ext))); } else if (!archive_entry_size_is_set(entry) || archive_entry_size(entry) > 0) { TRY(copy_data(filename, arc, entry, ext, entry_path, prog)); } r = archive_write_finish_entry(ext); if (r != ARCHIVE_OK) { return Err(fmt::format("unable to finish entry: {} -- {}", entry_path.string(), archive_error_string(ext))); } } archive_read_close(arc); archive_write_close(ext); auto_fd(open(done_path.c_str(), O_CREAT | O_WRONLY, 0600)); return Ok(); } #endif walk_result_t walk_archive_files( const std::string& filename, const extract_cb& cb, const std::function& callback) { #if HAVE_ARCHIVE_H auto tmp_path = filename_to_tmp_path(filename); auto result = extract(filename, cb); if (result.isErr()) { fs::remove_all(tmp_path); return result; } for (const auto& entry : fs::recursive_directory_iterator(tmp_path)) { if (!entry.is_regular_file()) { continue; } callback(tmp_path, entry); } return Ok(); #else return Err(std::string("not compiled with libarchive")); #endif } void cleanup_cache() { (void) std::async(std::launch::async, []() { auto now = std::chrono::system_clock::now(); auto cache_path = archive_cache_path(); auto& cfg = injector::get(); std::vector to_remove; log_debug("cache-ttl %d", cfg.amc_cache_ttl.count()); for (const auto& entry : fs::directory_iterator(cache_path)) { if (entry.path().extension() != ".done") { continue; } auto mtime = fs::last_write_time(entry.path()); auto exp_time = mtime + cfg.amc_cache_ttl; if (now < exp_time) { continue; } to_remove.emplace_back(entry.path()); } for (auto& entry : to_remove) { log_debug("removing cached archive: %s", entry.c_str()); fs::remove(entry); entry.replace_extension(".lck"); fs::remove(entry); entry.replace_extension(); fs::remove_all(entry); } }); } } // namespace archive_manager