[sysclip] support osc 52 for copying to the clipboard

Fixes #825
2022-07-12 13:16:13 -07:00 · 2022-07-12 13:16:13 -07:00 · 3c54f20fce
parent 3b9235b6e6
commit 3c54f20fce
60 changed files with 4775 additions and 61 deletions
--- a/4
+++ b/4
@ -51,6 +51,10 @@ lnav v0.10.2:
     * Initial indexing of large files should be faster.  Decompression
       and searching for line-endings are now pipelined, so they happen
       in a thread that is separate from the regular expression matcher.
+     * Writing to the clipboard now falls back to OSC 52 escape sequence
+       if none of the clipboard commands could be detected.  Your
+       terminal software will need to support the sequence and you may
+       need to explicitly enable it in the terminal.

     Breaking Changes:
     * Added a 'language' column to the lnav_view_filters table that
--- a/aminclude_static.am
+++ b/aminclude_static.am
@ -1,6 +1,6 @@

 # aminclude_static.am generated automatically by Autoconf
-# from AX_AM_MACROS_STATIC on Fri Jul  8 15:54:09 PDT 2022
+# from AX_AM_MACROS_STATIC on Tue Jul 12 09:53:25 PDT 2022


 # Code coverage
--- a/configure.ac
+++ b/configure.ac
@ -324,6 +324,7 @@ AC_CONFIG_FILES([src/tailer/Makefile])
 AC_CONFIG_FILES([src/tools/Makefile])
 AC_CONFIG_FILES([src/yajl/Makefile])
 AC_CONFIG_FILES([src/yajlpp/Makefile])
+AC_CONFIG_FILES([src/third-party/base64/lib/Makefile])
 AC_CONFIG_FILES([test/Makefile])

 AC_OUTPUT
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -236,6 +236,15 @@ add_library(
 )
 target_include_directories(cppfmt PUBLIC fmtlib)

+add_library(
+        base64 STATIC
+        third-party/base64/lib/lib.c
+        third-party/base64/lib/arch/generic/codec.c
+        third-party/base64/lib/tables/tables.c
+)
+target_include_directories(base64 PRIVATE third-party/base64/lib)
+target_include_directories(base64 PUBLIC third-party/base64/include)
+
 add_library(lnavfileio STATIC
        grep_proc.hh
        line_buffer.hh
@ -400,6 +409,7 @@ add_library(
        lnav.management_cli.hh
        lnav_config.hh
        lnav_config_fwd.hh
+        lnav_util.hh
        log.watch.hh
        log_actions.hh
        log_data_helper.hh
@ -513,7 +523,7 @@ add_library(
 set(lnav_SRCS lnav.cc)

 target_include_directories(diag PUBLIC . fmtlib ${CMAKE_CURRENT_BINARY_DIR}
-        third-party)
+        third-party third-party/base64/include)

 target_link_libraries(
        diag
@ -527,6 +537,7 @@ target_link_libraries(
        logfmt
        yajlpp
        cppfmt
+        base64
        ${lnav_LIBS})
 target_compile_definitions(diag PRIVATE SQLITE_OMIT_LOAD_EXTENSION)

--- a/src/Makefile.am
+++ b/src/Makefile.am
@ -3,7 +3,7 @@ include $(top_srcdir)/aminclude_static.am

 CXXFLAGS =

-SUBDIRS = tools fmtlib pcrepp base tailer pugixml yajl yajlpp formats/logfmt .
+SUBDIRS = tools fmtlib third-party/base64/lib pcrepp base tailer pugixml yajl yajlpp formats/logfmt .

 bin_PROGRAMS = lnav

@ -131,6 +131,7 @@ AM_CPPFLAGS = \
 	-DSQLITE_OMIT_LOAD_EXTENSION \
 	-I$(srcdir)/fmtlib \
 	-I$(srcdir)/third-party \
+	-I$(srcdir)/third-party/base64/include \
 	-Wall \
 	$(CODE_COVERAGE_CPPFLAGS) \
 	$(LIBARCHIVE_CFLAGS) \
@ -151,6 +152,7 @@ LDADD = \
 	tailer/libtailerpp.a \
 	yajl/libyajl.a \
 	yajlpp/libyajlpp.a \
+	third-party/base64/lib/libbase64.a \
 	$(READLINE_LIBS) \
 	$(CURSES_LIB) \
 	$(SQLITE3_LIBS) \
--- a/src/base/auto_mem.hh
+++ b/src/base/auto_mem.hh
@ -107,6 +107,12 @@ public:
        return &this->am_ptr;
    }

+    template<typename F>
+    F get_free_func() const
+    {
+        return (F) this->am_free_func;
+    }
+
    void reset(T* ptr = nullptr)
    {
        if (this->am_ptr != ptr) {
--- a/src/lnav_commands.cc
+++ b/src/lnav_commands.cc
@ -1029,14 +1029,15 @@ com_save_to(exec_context& ec,
            lnav_data.ld_stdout_used = true;
        }
    } else if (split_args[0] == "/dev/clipboard") {
-        toclose = outfile = sysclip::open(sysclip::type_t::GENERAL);
-        closer = pclose;
-        if (!outfile) {
+        auto open_res = sysclip::open(sysclip::type_t::GENERAL);
+        if (open_res.isErr()) {
            alerter::singleton().chime();
-            return ec.make_error(
-                "Unable to copy to clipboard.  "
-                "Make sure xclip or pbcopy is installed.");
+            return ec.make_error("Unable to copy to clipboard: {}",
+                                 open_res.unwrapErr());
        }
+        auto holder = open_res.unwrap();
+        toclose = outfile = holder.release();
+        closer = holder.get_free_func<int(*)(FILE*)>();
    } else if ((outfile = fopen(split_args[0].c_str(), mode)) == nullptr) {
        return ec.make_error("unable to open file -- {}", split_args[0]);
    } else {
@ -1586,14 +1587,14 @@ com_redirect_to(exec_context& ec,
        ec.clear_output();
    } else if (split_args[0] == "/dev/clipboard") {
        auto out = sysclip::open(sysclip::type_t::GENERAL);
-        if (!out) {
+        if (out.isErr()) {
            alerter::singleton().chime();
            return ec.make_error(
-                "Unable to copy to clipboard.  "
-                "Make sure xclip or pbcopy is installed.");
+                "Unable to copy to clipboard: {}", out.unwrapErr());
        }

-        ec.set_output(split_args[0], out, pclose);
+        auto holder = out.unwrap();
+        ec.set_output(split_args[0], holder.release(), holder.get_free_func<int(*)(FILE*)>());
    } else {
        FILE* file = fopen(split_args[0].c_str(), "w");
        if (file == nullptr) {
--- a/src/lnav_util.cc
+++ b/src/lnav_util.cc
@ -31,8 +31,6 @@
 * Dumping ground for useful functions with no other home.
 */

-#include <algorithm>
-
 #include "lnav_util.hh"

 #include <stdio.h>
@ -114,16 +112,6 @@ err_to_ok(const lnav::console::user_message msg)
    return Ok(msg.to_attr_line().get_string());
 }

-short
-pollfd_revents(const std::vector<struct pollfd>& pollfds, int fd)
-{
-    return pollfds | lnav::itertools::find_if([fd](const auto& entry) {
-               return entry.fd == fd;
-           })
-        | lnav::itertools::deref() | lnav::itertools::map(&pollfd::revents)
-        | lnav::itertools::unwrap_or((short) 0);
-}
-
 void
 write_line_to(FILE* outfile, const attr_line_t& al)
 {
@ -139,15 +127,6 @@ write_line_to(FILE* outfile, const attr_line_t& al)
    }
 }

-bool
-pollfd_ready(const std::vector<struct pollfd>& pollfds, int fd, short events)
-{
-    return std::any_of(
-        pollfds.begin(), pollfds.end(), [fd, events](const auto& entry) {
-            return entry.fd == fd && entry.revents & events;
-        });
-}
-
 namespace lnav {

 std::string
--- a/src/lnav_util.hh
+++ b/src/lnav_util.hh
@ -137,13 +137,7 @@ is_glob(const std::string& fn)
    return (fn.find('*') != std::string::npos
            || fn.find('?') != std::string::npos
            || fn.find('[') != std::string::npos);
-};
-
-short pollfd_revents(const std::vector<struct pollfd>& pollfds, int fd);
-
-bool pollfd_ready(const std::vector<struct pollfd>& pollfds,
-                  int fd,
-                  short events = POLLIN | POLLHUP);
+}

 inline void
 rusagesub(const struct rusage& left,
--- a/src/pollable.cc
+++ b/src/pollable.cc
@ -31,6 +31,7 @@

 #include "pollable.hh"

+#include "base/itertools.hh"
 #include "base/lnav_log.hh"

 pollable::pollable(std::shared_ptr<pollable_supervisor> supervisor,
@ -103,3 +104,22 @@ pollable_supervisor::count(pollable::category cat)

    return retval;
 }
+
+short
+pollfd_revents(const std::vector<struct pollfd>& pollfds, int fd)
+{
+    return pollfds | lnav::itertools::find_if([fd](const auto& entry) {
+               return entry.fd == fd;
+           })
+        | lnav::itertools::deref() | lnav::itertools::map(&pollfd::revents)
+        | lnav::itertools::unwrap_or((short) 0);
+}
+
+bool
+pollfd_ready(const std::vector<struct pollfd>& pollfds, int fd, short events)
+{
+    return std::any_of(
+        pollfds.begin(), pollfds.end(), [fd, events](const auto& entry) {
+            return entry.fd == fd && entry.revents & events;
+        });
+}
--- a/src/pollable.hh
+++ b/src/pollable.hh
@ -77,4 +77,10 @@ public:
    size_t count(pollable::category cat);
 };

+short pollfd_revents(const std::vector<struct pollfd>& pollfds, int fd);
+
+bool pollfd_ready(const std::vector<struct pollfd>& pollfds,
+                  int fd,
+                  short events = POLLIN | POLLHUP);
+
 #endif
--- a/src/readline_callbacks.cc
+++ b/src/readline_callbacks.cc
@ -43,7 +43,6 @@
 #include "service_tags.hh"
 #include "sql_help.hh"
 #include "sqlite-extension-func.hh"
-#include "sysclip.hh"
 #include "tailer/tailer.looper.hh"
 #include "view_helpers.examples.hh"
 #include "vtab_module.hh"
@ -635,17 +634,11 @@ rl_callback_int(readline_curses* rc, bool is_alt)
        case ln_mode_t::CAPTURE:
            rl_search_internal(rc, old_mode, true);
            if (!rc->get_value().empty()) {
-                auto_mem<FILE> pfile(pclose);
-                vis_bookmarks& bm = tc->get_bookmarks();
+                auto& bm = tc->get_bookmarks();
                const auto& bv = bm[&textview_curses::BM_SEARCH];
                auto vl = is_alt ? bv.prev(tc->get_top())
                                 : bv.next(tc->get_top());

-                pfile = sysclip::open(sysclip::type_t::FIND);
-                if (pfile.in() != nullptr) {
-                    fmt::print(
-                        pfile, FMT_STRING("{}"), rc->get_value().get_string());
-                }
                if (vl) {
                    tc->set_top(vl.value());
                } else {
--- a/src/root-config.json
+++ b/src/root-config.json
@ -51,7 +51,7 @@
                    }
                },
                "tmux": {
-                    "test": "test -n \"$TMUX\"",
+                    "test": "test -n \"$TMUX\" -a -z \"$SSH_CLIENT\"",
                    "general": {
                        "write": "tmux load-buffer -",
                        "read": "tmux save-buffer -"
--- a/src/sysclip.cc
+++ b/src/sysclip.cc
@ -32,19 +32,23 @@
 #include "sysclip.hh"

 #include <stdio.h>
+#include <unistd.h>

 #include "base/injector.hh"
 #include "base/lnav_log.hh"
 #include "config.h"
 #include "fmt/format.h"
+#include "libbase64.h"
 #include "sysclip.cfg.hh"

+#define ANSI_OSC "\x1b]"
+
 namespace sysclip {

 static nonstd::optional<clipboard>
 get_commands()
 {
-    auto& cfg = injector::get<const config&>();
+    const auto& cfg = injector::get<const config&>();

    for (const auto& pair : cfg.c_clipboard_impls) {
        const auto full_cmd = fmt::format(FMT_STRING("{} > /dev/null 2>&1"),
@ -62,26 +66,76 @@ get_commands()
    return nonstd::nullopt;
 }

+static int
+osc52_close(FILE* file)
+{
+    static const char ANSI_OSC_COPY_TO_CLIP[] = ANSI_OSC "52;c;";
+
+    log_debug("writing %d bytes of clipboard data using OSC 52", ftell(file));
+    write(STDOUT_FILENO, ANSI_OSC_COPY_TO_CLIP, strlen(ANSI_OSC_COPY_TO_CLIP));
+
+    base64_state b64state{};
+    base64_stream_encode_init(&b64state, 0);
+
+    fseek(file, 0, SEEK_SET);
+
+    auto done = false;
+    while (!done) {
+        char in_buffer[1024];
+        char out_buffer[2048];
+        size_t outlen = 0;
+
+        auto rc = fread(in_buffer, 1, sizeof(in_buffer), file);
+        if (rc <= 0) {
+            base64_stream_encode_final(&b64state, out_buffer, &outlen);
+            write(STDOUT_FILENO, out_buffer, outlen);
+            break;
+        }
+
+        base64_stream_encode(&b64state, in_buffer, rc, out_buffer, &outlen);
+        write(STDOUT_FILENO, out_buffer, outlen);
+    }
+
+    write(STDOUT_FILENO, "\a", 1);
+
+    fclose(file);
+
+    return 0;
+}
+
 /* XXX For one, this code is kinda crappy.  For two, we should probably link
 * directly with X so we don't need to have xclip installed and it'll work if
 * we're ssh'd into a box.
 */
-FILE*
+Result<auto_mem<FILE>, std::string>
 open(type_t type, op_t op)
 {
    const char* mode = op == op_t::WRITE ? "w" : "r";
    static const auto clip_opt = sysclip::get_commands();

-    if (!clip_opt) {
-        log_error("unable to detect clipboard implementation");
-        return nullptr;
+    std::string cmd;
+
+    if (clip_opt) {
+        cmd = clip_opt.value().select(type).select(op);
+        if (cmd.empty()) {
+            log_info("configured clipboard does not support type/op");
+        }
+    } else {
+        log_info("unable to detect clipboard");
    }

-    auto cmd = clip_opt.value().select(type).select(op);
-
    if (cmd.empty()) {
-        log_error("clipboard does not support type/op");
-        return nullptr;
+        log_info("  ... falling back to OSC 52");
+        auto_mem<FILE> retval(osc52_close);
+
+        retval = tmpfile();
+        if (retval.in() == nullptr) {
+            return Err(
+                fmt::format(FMT_STRING("unable to open temporary file: {}"),
+                            strerror(errno)));
+        }
+
+        return Ok(std::move(retval));
    }

    switch (op) {
@ -93,7 +147,17 @@ open(type_t type, op_t op)
            break;
    }

-    return popen(cmd.c_str(), mode);
+    auto_mem<FILE> retval(pclose);
+
+    log_debug("trying detected clipboard command: %s", cmd.c_str());
+    retval = popen(cmd.c_str(), mode);
+    if (retval.in() == nullptr) {
+        return Err(fmt::format(FMT_STRING("failed to open clipboard: {} -- {}"),
+                               cmd,
+                               strerror(errno)));
+    }
+
+    return Ok(std::move(retval));
 }

 }  // namespace sysclip
--- a/src/sysclip.hh
+++ b/src/sysclip.hh
@ -33,6 +33,10 @@
 #define sysclip_hh

 #include <cstdio>
+#include <string>
+
+#include "base/auto_mem.hh"
+#include "base/result.h"

 namespace sysclip {

@ -46,7 +50,7 @@ enum class op_t {
    READ,
 };

-FILE* open(type_t type, op_t op = op_t::WRITE);
+Result<auto_mem<FILE>, std::string> open(type_t type, op_t op = op_t::WRITE);

 }  // namespace sysclip

--- a/src/third-party/base64/LICENSE
+++ b/src/third-party/base64/LICENSE
@ -0,0 +1,28 @@
+Copyright (c) 2005-2007, Nick Galbreath
+Copyright (c) 2013-2019, Alfred Klomp
+Copyright (c) 2015-2017, Wojciech Mula
+Copyright (c) 2016-2017, Matthieu Darbois
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+- Redistributions of source code must retain the above copyright notice,
+  this list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/src/third-party/base64/include/libbase64.h
+++ b/src/third-party/base64/include/libbase64.h
@ -0,0 +1,145 @@
+#ifndef LIBBASE64_H
+#define LIBBASE64_H
+
+#include <stddef.h>	/* size_t */
+
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+#define BASE64_SYMBOL_IMPORT __declspec(dllimport)
+#define BASE64_SYMBOL_EXPORT __declspec(dllexport)
+#define BASE64_SYMBOL_PRIVATE
+
+#elif __GNUC__ >= 4
+#define BASE64_SYMBOL_IMPORT   __attribute__ ((visibility ("default")))
+#define BASE64_SYMBOL_EXPORT   __attribute__ ((visibility ("default")))
+#define BASE64_SYMBOL_PRIVATE  __attribute__ ((visibility ("hidden")))
+
+#else
+#define BASE64_SYMBOL_IMPORT
+#define BASE64_SYMBOL_EXPORT
+#define BASE64_SYMBOL_PRIVATE
+#endif
+
+#if defined(BASE64_STATIC_DEFINE)
+#define BASE64_EXPORT
+#define BASE64_NO_EXPORT
+
+#else
+#if defined(BASE64_EXPORTS) // defined if we are building the shared library
+#define BASE64_EXPORT BASE64_SYMBOL_EXPORT
+
+#else
+#define BASE64_EXPORT BASE64_SYMBOL_IMPORT
+#endif
+
+#define BASE64_NO_EXPORT BASE64_SYMBOL_PRIVATE
+#endif
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* These are the flags that can be passed in the `flags` argument. The values
+ * below force the use of a given codec, even if that codec is a no-op in the
+ * current build. Used in testing. Set to 0 for the default behavior, which is
+ * runtime feature detection on x86, a compile-time fixed codec on ARM, and
+ * the plain codec on other platforms: */
+#define BASE64_FORCE_AVX2	(1 << 0)
+#define BASE64_FORCE_NEON32	(1 << 1)
+#define BASE64_FORCE_NEON64	(1 << 2)
+#define BASE64_FORCE_PLAIN	(1 << 3)
+#define BASE64_FORCE_SSSE3	(1 << 4)
+#define BASE64_FORCE_SSE41	(1 << 5)
+#define BASE64_FORCE_SSE42	(1 << 6)
+#define BASE64_FORCE_AVX	(1 << 7)
+
+struct base64_state {
+	int eof;
+	int bytes;
+	int flags;
+	unsigned char carry;
+};
+
+/* Wrapper function to encode a plain string of given length. Output is written
+ * to *out without trailing zero. Output length in bytes is written to *outlen.
+ * The buffer in `out` has been allocated by the caller and is at least 4/3 the
+ * size of the input. See above for `flags`; set to 0 for default operation: */
+void BASE64_EXPORT base64_encode
+	( const char		*src
+	, size_t		 srclen
+	, char			*out
+	, size_t		*outlen
+	, int			 flags
+	) ;
+
+/* Call this before calling base64_stream_encode() to init the state. See above
+ * for `flags`; set to 0 for default operation: */
+void BASE64_EXPORT base64_stream_encode_init
+	( struct base64_state	*state
+	, int			 flags
+	) ;
+
+/* Encodes the block of data of given length at `src`, into the buffer at
+ * `out`. Caller is responsible for allocating a large enough out-buffer; it
+ * must be at least 4/3 the size of the in-buffer, but take some margin. Places
+ * the number of new bytes written into `outlen` (which is set to zero when the
+ * function starts). Does not zero-terminate or finalize the output. */
+void BASE64_EXPORT base64_stream_encode
+	( struct base64_state	*state
+	, const char		*src
+	, size_t		 srclen
+	, char			*out
+	, size_t		*outlen
+	) ;
+
+/* Finalizes the output begun by previous calls to `base64_stream_encode()`.
+ * Adds the required end-of-stream markers if appropriate. `outlen` is modified
+ * and will contain the number of new bytes written at `out` (which will quite
+ * often be zero). */
+void BASE64_EXPORT base64_stream_encode_final
+	( struct base64_state	*state
+	, char			*out
+	, size_t		*outlen
+	) ;
+
+/* Wrapper function to decode a plain string of given length. Output is written
+ * to *out without trailing zero. Output length in bytes is written to *outlen.
+ * The buffer in `out` has been allocated by the caller and is at least 3/4 the
+ * size of the input. See above for `flags`, set to 0 for default operation: */
+int BASE64_EXPORT base64_decode
+	( const char		*src
+	, size_t		 srclen
+	, char			*out
+	, size_t		*outlen
+	, int			 flags
+	) ;
+
+/* Call this before calling base64_stream_decode() to init the state. See above
+ * for `flags`; set to 0 for default operation: */
+void BASE64_EXPORT base64_stream_decode_init
+	( struct base64_state	*state
+	, int			 flags
+	) ;
+
+/* Decodes the block of data of given length at `src`, into the buffer at
+ * `out`. Caller is responsible for allocating a large enough out-buffer; it
+ * must be at least 3/4 the size of the in-buffer, but take some margin. Places
+ * the number of new bytes written into `outlen` (which is set to zero when the
+ * function starts). Does not zero-terminate the output. Returns 1 if all is
+ * well, and 0 if a decoding error was found, such as an invalid character.
+ * Returns -1 if the chosen codec is not included in the current build. Used by
+ * the test harness to check whether a codec is available for testing. */
+int BASE64_EXPORT base64_stream_decode
+	( struct base64_state	*state
+	, const char		*src
+	, size_t		 srclen
+	, char			*out
+	, size_t		*outlen
+	) ;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* LIBBASE64_H */
--- a/src/third-party/base64/lib/Makefile.am
+++ b/src/third-party/base64/lib/Makefile.am
@ -0,0 +1,22 @@
+
+noinst_HEADERS = \
+    ../include/libbase64.h \
+    arch/generic/32/enc_loop.c \
+    arch/generic/32/dec_loop.c \
+    arch/generic/enc_tail.c \
+    arch/generic/dec_tail.c \
+    arch/generic/64/enc_loop.c \
+    arch/generic/enc_head.c \
+    arch/generic/dec_head.c \
+    tables/tables.h \
+    tables/table_enc_12bit.h \
+    codecs.h \
+    config.h \
+    env.h
+
+noinst_LIBRARIES = libbase64.a
+
+libbase64_a_SOURCES = \
+    lib.c \
+    arch/generic/codec.c \
+    tables/tables.c
--- a/src/third-party/base64/lib/arch/avx/codec.c
+++ b/src/third-party/base64/lib/arch/avx/codec.c
@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#include "../../../include/libbase64.h"
+#include "../../tables/tables.h"
+#include "../../codecs.h"
+#include "config.h"
+#include "../../env.h"
+
+#if HAVE_AVX
+#include <immintrin.h>
+
+#include "../ssse3/dec_reshuffle.c"
+#include "../ssse3/dec_loop.c"
+#include "../ssse3/enc_translate.c"
+#include "../ssse3/enc_reshuffle.c"
+#include "../ssse3/enc_loop.c"
+
+#endif	// HAVE_AVX
+
+BASE64_ENC_FUNCTION(avx)
+{
+#if HAVE_AVX
+	#include "../generic/enc_head.c"
+	enc_loop_ssse3(&s, &slen, &o, &olen);
+	#include "../generic/enc_tail.c"
+#else
+	BASE64_ENC_STUB
+#endif
+}
+
+BASE64_DEC_FUNCTION(avx)
+{
+#if HAVE_AVX
+	#include "../generic/dec_head.c"
+	dec_loop_ssse3(&s, &slen, &o, &olen);
+	#include "../generic/dec_tail.c"
+#else
+	BASE64_DEC_STUB
+#endif
+}
--- a/src/third-party/base64/lib/arch/avx2/codec.c
+++ b/src/third-party/base64/lib/arch/avx2/codec.c
@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#include "../../../include/libbase64.h"
+#include "../../tables/tables.h"
+#include "../../codecs.h"
+#include "config.h"
+#include "../../env.h"
+
+#if HAVE_AVX2
+#include <immintrin.h>
+
+#include "dec_reshuffle.c"
+#include "dec_loop.c"
+#include "enc_translate.c"
+#include "enc_reshuffle.c"
+#include "enc_loop.c"
+
+#endif	// HAVE_AVX2
+
+BASE64_ENC_FUNCTION(avx2)
+{
+#if HAVE_AVX2
+	#include "../generic/enc_head.c"
+	enc_loop_avx2(&s, &slen, &o, &olen);
+	#include "../generic/enc_tail.c"
+#else
+	BASE64_ENC_STUB
+#endif
+}
+
+BASE64_DEC_FUNCTION(avx2)
+{
+#if HAVE_AVX2
+	#include "../generic/dec_head.c"
+	dec_loop_avx2(&s, &slen, &o, &olen);
+	#include "../generic/dec_tail.c"
+#else
+	BASE64_DEC_STUB
+#endif
+}
--- a/src/third-party/base64/lib/arch/avx2/dec_loop.c
+++ b/src/third-party/base64/lib/arch/avx2/dec_loop.c
@ -0,0 +1,110 @@
+static inline int
+dec_loop_avx2_inner (const uint8_t **s, uint8_t **o, size_t *rounds)
+{
+	const __m256i lut_lo = _mm256_setr_epi8(
+		0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
+		0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A,
+		0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
+		0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A);
+
+	const __m256i lut_hi = _mm256_setr_epi8(
+		0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
+		0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+		0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
+		0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10);
+
+	const __m256i lut_roll = _mm256_setr_epi8(
+		0,  16,  19,   4, -65, -65, -71, -71,
+		0,   0,   0,   0,   0,   0,   0,   0,
+		0,  16,  19,   4, -65, -65, -71, -71,
+		0,   0,   0,   0,   0,   0,   0,   0);
+
+	const __m256i mask_2F = _mm256_set1_epi8(0x2F);
+
+	// Load input:
+	__m256i str = _mm256_loadu_si256((__m256i *) *s);
+
+	// See the SSSE3 decoder for an explanation of the algorithm.
+	const __m256i hi_nibbles = _mm256_and_si256(_mm256_srli_epi32(str, 4), mask_2F);
+	const __m256i lo_nibbles = _mm256_and_si256(str, mask_2F);
+	const __m256i hi         = _mm256_shuffle_epi8(lut_hi, hi_nibbles);
+	const __m256i lo         = _mm256_shuffle_epi8(lut_lo, lo_nibbles);
+
+	if (!_mm256_testz_si256(lo, hi)) {
+		return 0;
+	}
+
+	const __m256i eq_2F = _mm256_cmpeq_epi8(str, mask_2F);
+	const __m256i roll  = _mm256_shuffle_epi8(lut_roll, _mm256_add_epi8(eq_2F, hi_nibbles));
+
+	// Now simply add the delta values to the input:
+	str = _mm256_add_epi8(str, roll);
+
+	// Reshuffle the input to packed 12-byte output format:
+	str = dec_reshuffle(str);
+
+	// Store the output:
+	_mm256_storeu_si256((__m256i *) *o, str);
+
+	*s += 32;
+	*o += 24;
+	*rounds -= 1;
+
+	return 1;
+}
+
+static inline void
+dec_loop_avx2 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
+{
+	if (*slen < 45) {
+		return;
+	}
+
+	// Process blocks of 32 bytes per round. Because 8 extra zero bytes are
+	// written after the output, ensure that there will be at least 13
+	// bytes of input data left to cover the gap. (11 data bytes and up to
+	// two end-of-string markers.)
+	size_t rounds = (*slen - 13) / 32;
+
+	*slen -= rounds * 32;	// 32 bytes consumed per round
+	*olen += rounds * 24;	// 24 bytes produced per round
+
+	do {
+		if (rounds >= 8) {
+			if (dec_loop_avx2_inner(s, o, &rounds) &&
+			    dec_loop_avx2_inner(s, o, &rounds) &&
+			    dec_loop_avx2_inner(s, o, &rounds) &&
+			    dec_loop_avx2_inner(s, o, &rounds) &&
+			    dec_loop_avx2_inner(s, o, &rounds) &&
+			    dec_loop_avx2_inner(s, o, &rounds) &&
+			    dec_loop_avx2_inner(s, o, &rounds) &&
+			    dec_loop_avx2_inner(s, o, &rounds)) {
+				continue;
+			}
+			break;
+		}
+		if (rounds >= 4) {
+			if (dec_loop_avx2_inner(s, o, &rounds) &&
+			    dec_loop_avx2_inner(s, o, &rounds) &&
+			    dec_loop_avx2_inner(s, o, &rounds) &&
+			    dec_loop_avx2_inner(s, o, &rounds)) {
+				continue;
+			}
+			break;
+		}
+		if (rounds >= 2) {
+			if (dec_loop_avx2_inner(s, o, &rounds) &&
+			    dec_loop_avx2_inner(s, o, &rounds)) {
+				continue;
+			}
+			break;
+		}
+		dec_loop_avx2_inner(s, o, &rounds);
+		break;
+
+	} while (rounds > 0);
+
+	// Adjust for any rounds that were skipped:
+	*slen += rounds * 32;
+	*olen -= rounds * 24;
+}
--- a/src/third-party/base64/lib/arch/avx2/dec_reshuffle.c
+++ b/src/third-party/base64/lib/arch/avx2/dec_reshuffle.c
@ -0,0 +1,34 @@
+static inline __m256i
+dec_reshuffle (const __m256i in)
+{
+	// in, lower lane, bits, upper case are most significant bits, lower
+	// case are least significant bits:
+	// 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
+	// 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
+	// 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
+	// 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA
+
+	const __m256i merge_ab_and_bc = _mm256_maddubs_epi16(in, _mm256_set1_epi32(0x01400140));
+	// 0000kkkk LLllllll 0000JJJJ JJjjKKKK
+	// 0000hhhh IIiiiiii 0000GGGG GGggHHHH
+	// 0000eeee FFffffff 0000DDDD DDddEEEE
+	// 0000bbbb CCcccccc 0000AAAA AAaaBBBB
+
+	__m256i out = _mm256_madd_epi16(merge_ab_and_bc, _mm256_set1_epi32(0x00011000));
+	// 00000000 JJJJJJjj KKKKkkkk LLllllll
+	// 00000000 GGGGGGgg HHHHhhhh IIiiiiii
+	// 00000000 DDDDDDdd EEEEeeee FFffffff
+	// 00000000 AAAAAAaa BBBBbbbb CCcccccc
+
+	// Pack bytes together in each lane:
+	out = _mm256_shuffle_epi8(out, _mm256_setr_epi8(
+		2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1,
+		2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1));
+	// 00000000 00000000 00000000 00000000
+	// LLllllll KKKKkkkk JJJJJJjj IIiiiiii
+	// HHHHhhhh GGGGGGgg FFffffff EEEEeeee
+	// DDDDDDdd CCcccccc BBBBbbbb AAAAAAaa
+
+	// Pack lanes:
+	return _mm256_permutevar8x32_epi32(out, _mm256_setr_epi32(0, 1, 2, 4, 5, 6, -1, -1));
+}
--- a/src/third-party/base64/lib/arch/avx2/enc_loop.c
+++ b/src/third-party/base64/lib/arch/avx2/enc_loop.c
@ -0,0 +1,89 @@
+static inline void
+enc_loop_avx2_inner_first (const uint8_t **s, uint8_t **o)
+{
+	// First load is done at s - 0 to not get a segfault:
+	__m256i src = _mm256_loadu_si256((__m256i *) *s);
+
+	// Shift by 4 bytes, as required by enc_reshuffle:
+	src = _mm256_permutevar8x32_epi32(src, _mm256_setr_epi32(0, 0, 1, 2, 3, 4, 5, 6));
+
+	// Reshuffle, translate, store:
+	src = enc_reshuffle(src);
+	src = enc_translate(src);
+	_mm256_storeu_si256((__m256i *) *o, src);
+
+	// Subsequent loads will be done at s - 4, set pointer for next round:
+	*s += 20;
+	*o += 32;
+}
+
+static inline void
+enc_loop_avx2_inner (const uint8_t **s, uint8_t **o)
+{
+	// Load input:
+	__m256i src = _mm256_loadu_si256((__m256i *) *s);
+
+	// Reshuffle, translate, store:
+	src = enc_reshuffle(src);
+	src = enc_translate(src);
+	_mm256_storeu_si256((__m256i *) *o, src);
+
+	*s += 24;
+	*o += 32;
+}
+
+static inline void
+enc_loop_avx2 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
+{
+	if (*slen < 32) {
+		return;
+	}
+
+	// Process blocks of 24 bytes at a time. Because blocks are loaded 32
+	// bytes at a time an offset of -4, ensure that there will be at least
+	// 4 remaining bytes after the last round, so that the final read will
+	// not pass beyond the bounds of the input buffer:
+	size_t rounds = (*slen - 4) / 24;
+
+	*slen -= rounds * 24;   // 24 bytes consumed per round
+	*olen += rounds * 32;   // 32 bytes produced per round
+
+	// The first loop iteration requires special handling to ensure that
+	// the read, which is done at an offset, does not underflow the buffer:
+	enc_loop_avx2_inner_first(s, o);
+	rounds--;
+
+	while (rounds > 0) {
+		if (rounds >= 8) {
+			enc_loop_avx2_inner(s, o);
+			enc_loop_avx2_inner(s, o);
+			enc_loop_avx2_inner(s, o);
+			enc_loop_avx2_inner(s, o);
+			enc_loop_avx2_inner(s, o);
+			enc_loop_avx2_inner(s, o);
+			enc_loop_avx2_inner(s, o);
+			enc_loop_avx2_inner(s, o);
+			rounds -= 8;
+			continue;
+		}
+		if (rounds >= 4) {
+			enc_loop_avx2_inner(s, o);
+			enc_loop_avx2_inner(s, o);
+			enc_loop_avx2_inner(s, o);
+			enc_loop_avx2_inner(s, o);
+			rounds -= 4;
+			continue;
+		}
+		if (rounds >= 2) {
+			enc_loop_avx2_inner(s, o);
+			enc_loop_avx2_inner(s, o);
+			rounds -= 2;
+			continue;
+		}
+		enc_loop_avx2_inner(s, o);
+		break;
+	}
+
+	// Add the offset back:
+	*s += 4;
+}
--- a/src/third-party/base64/lib/arch/avx2/enc_reshuffle.c
+++ b/src/third-party/base64/lib/arch/avx2/enc_reshuffle.c
@ -0,0 +1,83 @@
+static inline __m256i
+enc_reshuffle (const __m256i input)
+{
+	// Translation of the SSSE3 reshuffling algorithm to AVX2. This one
+	// works with shifted (4 bytes) input in order to be able to work
+	// efficiently in the two 128-bit lanes.
+
+	// Input, bytes MSB to LSB:
+	// 0 0 0 0 x w v u t s r q p o n m
+	// l k j i h g f e d c b a 0 0 0 0
+
+	const __m256i in = _mm256_shuffle_epi8(input, _mm256_set_epi8(
+		10, 11,  9, 10,
+		 7,  8,  6,  7,
+		 4,  5,  3,  4,
+		 1,  2,  0,  1,
+
+		14, 15, 13, 14,
+		11, 12, 10, 11,
+		 8,  9,  7,  8,
+		 5,  6,  4,  5));
+	// in, bytes MSB to LSB:
+	// w x v w
+	// t u s t
+	// q r p q
+	// n o m n
+	// k l j k
+	// h i g h
+	// e f d e
+	// b c a b
+
+	const __m256i t0 = _mm256_and_si256(in, _mm256_set1_epi32(0x0FC0FC00));
+	// bits, upper case are most significant bits, lower case are least
+	// significant bits.
+	// 0000wwww XX000000 VVVVVV00 00000000
+	// 0000tttt UU000000 SSSSSS00 00000000
+	// 0000qqqq RR000000 PPPPPP00 00000000
+	// 0000nnnn OO000000 MMMMMM00 00000000
+	// 0000kkkk LL000000 JJJJJJ00 00000000
+	// 0000hhhh II000000 GGGGGG00 00000000
+	// 0000eeee FF000000 DDDDDD00 00000000
+	// 0000bbbb CC000000 AAAAAA00 00000000
+
+	const __m256i t1 = _mm256_mulhi_epu16(t0, _mm256_set1_epi32(0x04000040));
+	// 00000000 00wwwwXX 00000000 00VVVVVV
+	// 00000000 00ttttUU 00000000 00SSSSSS
+	// 00000000 00qqqqRR 00000000 00PPPPPP
+	// 00000000 00nnnnOO 00000000 00MMMMMM
+	// 00000000 00kkkkLL 00000000 00JJJJJJ
+	// 00000000 00hhhhII 00000000 00GGGGGG
+	// 00000000 00eeeeFF 00000000 00DDDDDD
+	// 00000000 00bbbbCC 00000000 00AAAAAA
+
+	const __m256i t2 = _mm256_and_si256(in, _mm256_set1_epi32(0x003F03F0));
+	// 00000000 00xxxxxx 000000vv WWWW0000
+	// 00000000 00uuuuuu 000000ss TTTT0000
+	// 00000000 00rrrrrr 000000pp QQQQ0000
+	// 00000000 00oooooo 000000mm NNNN0000
+	// 00000000 00llllll 000000jj KKKK0000
+	// 00000000 00iiiiii 000000gg HHHH0000
+	// 00000000 00ffffff 000000dd EEEE0000
+	// 00000000 00cccccc 000000aa BBBB0000
+
+	const __m256i t3 = _mm256_mullo_epi16(t2, _mm256_set1_epi32(0x01000010));
+	// 00xxxxxx 00000000 00vvWWWW 00000000
+	// 00uuuuuu 00000000 00ssTTTT 00000000
+	// 00rrrrrr 00000000 00ppQQQQ 00000000
+	// 00oooooo 00000000 00mmNNNN 00000000
+	// 00llllll 00000000 00jjKKKK 00000000
+	// 00iiiiii 00000000 00ggHHHH 00000000
+	// 00ffffff 00000000 00ddEEEE 00000000
+	// 00cccccc 00000000 00aaBBBB 00000000
+
+	return _mm256_or_si256(t1, t3);
+	// 00xxxxxx 00wwwwXX 00vvWWWW 00VVVVVV
+	// 00uuuuuu 00ttttUU 00ssTTTT 00SSSSSS
+	// 00rrrrrr 00qqqqRR 00ppQQQQ 00PPPPPP
+	// 00oooooo 00nnnnOO 00mmNNNN 00MMMMMM
+	// 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
+	// 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
+	// 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
+	// 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA
+}
--- a/src/third-party/base64/lib/arch/avx2/enc_translate.c
+++ b/src/third-party/base64/lib/arch/avx2/enc_translate.c
@ -0,0 +1,30 @@
+static inline __m256i
+enc_translate (const __m256i in)
+{
+	// A lookup table containing the absolute offsets for all ranges:
+	const __m256i lut = _mm256_setr_epi8(
+		65, 71, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -19, -16, 0, 0,
+		65, 71, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -19, -16, 0, 0);
+
+	// Translate values 0..63 to the Base64 alphabet. There are five sets:
+	// #  From      To         Abs    Index  Characters
+	// 0  [0..25]   [65..90]   +65        0  ABCDEFGHIJKLMNOPQRSTUVWXYZ
+	// 1  [26..51]  [97..122]  +71        1  abcdefghijklmnopqrstuvwxyz
+	// 2  [52..61]  [48..57]    -4  [2..11]  0123456789
+	// 3  [62]      [43]       -19       12  +
+	// 4  [63]      [47]       -16       13  /
+
+	// Create LUT indices from the input. The index for range #0 is right,
+	// others are 1 less than expected:
+	__m256i indices = _mm256_subs_epu8(in, _mm256_set1_epi8(51));
+
+	// mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0:
+	const __m256i mask = _mm256_cmpgt_epi8(in, _mm256_set1_epi8(25));
+
+	// Subtract -1, so add 1 to indices for range #[1..4]. All indices are
+	// now correct:
+	indices = _mm256_sub_epi8(indices, mask);
+
+	// Add offsets to input values:
+	return _mm256_add_epi8(in, _mm256_shuffle_epi8(lut, indices));
+}
--- a/src/third-party/base64/lib/arch/generic/32/dec_loop.c
+++ b/src/third-party/base64/lib/arch/generic/32/dec_loop.c
@ -0,0 +1,86 @@
+static inline int
+dec_loop_generic_32_inner (const uint8_t **s, uint8_t **o, size_t *rounds)
+{
+	const uint32_t str
+		= base64_table_dec_32bit_d0[(*s)[0]]
+		| base64_table_dec_32bit_d1[(*s)[1]]
+		| base64_table_dec_32bit_d2[(*s)[2]]
+		| base64_table_dec_32bit_d3[(*s)[3]];
+
+#if BASE64_LITTLE_ENDIAN
+
+	// LUTs for little-endian set MSB in case of invalid character:
+	if (str & UINT32_C(0x80000000)) {
+		return 0;
+	}
+#else
+	// LUTs for big-endian set LSB in case of invalid character:
+	if (str & UINT32_C(1)) {
+		return 0;
+	}
+#endif
+	// Store the output:
+	memcpy(*o, &str, sizeof (str));
+
+	*s += 4;
+	*o += 3;
+	*rounds -= 1;
+
+	return 1;
+}
+
+static inline void
+dec_loop_generic_32 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
+{
+	if (*slen < 8) {
+		return;
+	}
+
+	// Process blocks of 4 bytes per round. Because one extra zero byte is
+	// written after the output, ensure that there will be at least 4 bytes
+	// of input data left to cover the gap. (Two data bytes and up to two
+	// end-of-string markers.)
+	size_t rounds = (*slen - 4) / 4;
+
+	*slen -= rounds * 4;	// 4 bytes consumed per round
+	*olen += rounds * 3;	// 3 bytes produced per round
+
+	do {
+		if (rounds >= 8) {
+			if (dec_loop_generic_32_inner(s, o, &rounds) &&
+			    dec_loop_generic_32_inner(s, o, &rounds) &&
+			    dec_loop_generic_32_inner(s, o, &rounds) &&
+			    dec_loop_generic_32_inner(s, o, &rounds) &&
+			    dec_loop_generic_32_inner(s, o, &rounds) &&
+			    dec_loop_generic_32_inner(s, o, &rounds) &&
+			    dec_loop_generic_32_inner(s, o, &rounds) &&
+			    dec_loop_generic_32_inner(s, o, &rounds)) {
+				continue;
+			}
+			break;
+		}
+		if (rounds >= 4) {
+			if (dec_loop_generic_32_inner(s, o, &rounds) &&
+			    dec_loop_generic_32_inner(s, o, &rounds) &&
+			    dec_loop_generic_32_inner(s, o, &rounds) &&
+			    dec_loop_generic_32_inner(s, o, &rounds)) {
+				continue;
+			}
+			break;
+		}
+		if (rounds >= 2) {
+			if (dec_loop_generic_32_inner(s, o, &rounds) &&
+			    dec_loop_generic_32_inner(s, o, &rounds)) {
+				continue;
+			}
+			break;
+		}
+		dec_loop_generic_32_inner(s, o, &rounds);
+		break;
+
+	} while (rounds > 0);
+
+	// Adjust for any rounds that were skipped:
+	*slen += rounds * 4;
+	*olen -= rounds * 3;
+}
--- a/src/third-party/base64/lib/arch/generic/32/enc_loop.c
+++ b/src/third-party/base64/lib/arch/generic/32/enc_loop.c
@ -0,0 +1,73 @@
+static inline void
+enc_loop_generic_32_inner (const uint8_t **s, uint8_t **o)
+{
+	uint32_t src;
+
+	// Load input:
+	memcpy(&src, *s, sizeof (src));
+
+	// Reorder to 32-bit big-endian, if not already in that format. The
+	// workset must be in big-endian, otherwise the shifted bits do not
+	// carry over properly among adjacent bytes:
+	src = BASE64_HTOBE32(src);
+
+	// Two indices for the 12-bit lookup table:
+	const size_t index0 = (src >> 20) & 0xFFFU;
+	const size_t index1 = (src >>  8) & 0xFFFU;
+
+	// Table lookup and store:
+	memcpy(*o + 0, base64_table_enc_12bit + index0, 2);
+	memcpy(*o + 2, base64_table_enc_12bit + index1, 2);
+
+	*s += 3;
+	*o += 4;
+}
+
+static inline void
+enc_loop_generic_32 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
+{
+	if (*slen < 4) {
+		return;
+	}
+
+	// Process blocks of 3 bytes at a time. Because blocks are loaded 4
+	// bytes at a time, ensure that there will be at least one remaining
+	// byte after the last round, so that the final read will not pass
+	// beyond the bounds of the input buffer:
+	size_t rounds = (*slen - 1) / 3;
+
+	*slen -= rounds * 3;	// 3 bytes consumed per round
+	*olen += rounds * 4;	// 4 bytes produced per round
+
+	do {
+		if (rounds >= 8) {
+			enc_loop_generic_32_inner(s, o);
+			enc_loop_generic_32_inner(s, o);
+			enc_loop_generic_32_inner(s, o);
+			enc_loop_generic_32_inner(s, o);
+			enc_loop_generic_32_inner(s, o);
+			enc_loop_generic_32_inner(s, o);
+			enc_loop_generic_32_inner(s, o);
+			enc_loop_generic_32_inner(s, o);
+			rounds -= 8;
+			continue;
+		}
+		if (rounds >= 4) {
+			enc_loop_generic_32_inner(s, o);
+			enc_loop_generic_32_inner(s, o);
+			enc_loop_generic_32_inner(s, o);
+			enc_loop_generic_32_inner(s, o);
+			rounds -= 4;
+			continue;
+		}
+		if (rounds >= 2) {
+			enc_loop_generic_32_inner(s, o);
+			enc_loop_generic_32_inner(s, o);
+			rounds -= 2;
+			continue;
+		}
+		enc_loop_generic_32_inner(s, o);
+		break;
+
+	} while (rounds > 0);
+}
--- a/src/third-party/base64/lib/arch/generic/64/enc_loop.c
+++ b/src/third-party/base64/lib/arch/generic/64/enc_loop.c
@ -0,0 +1,77 @@
+static inline void
+enc_loop_generic_64_inner (const uint8_t **s, uint8_t **o)
+{
+	uint64_t src;
+
+	// Load input:
+	memcpy(&src, *s, sizeof (src));
+
+	// Reorder to 64-bit big-endian, if not already in that format. The
+	// workset must be in big-endian, otherwise the shifted bits do not
+	// carry over properly among adjacent bytes:
+	src = BASE64_HTOBE64(src);
+
+	// Four indices for the 12-bit lookup table:
+	const size_t index0 = (src >> 52) & 0xFFFU;
+	const size_t index1 = (src >> 40) & 0xFFFU;
+	const size_t index2 = (src >> 28) & 0xFFFU;
+	const size_t index3 = (src >> 16) & 0xFFFU;
+
+	// Table lookup and store:
+	memcpy(*o + 0, base64_table_enc_12bit + index0, 2);
+	memcpy(*o + 2, base64_table_enc_12bit + index1, 2);
+	memcpy(*o + 4, base64_table_enc_12bit + index2, 2);
+	memcpy(*o + 6, base64_table_enc_12bit + index3, 2);
+
+	*s += 6;
+	*o += 8;
+}
+
+static inline void
+enc_loop_generic_64 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
+{
+	if (*slen < 8) {
+		return;
+	}
+
+	// Process blocks of 6 bytes at a time. Because blocks are loaded 8
+	// bytes at a time, ensure that there will be at least 2 remaining
+	// bytes after the last round, so that the final read will not pass
+	// beyond the bounds of the input buffer:
+	size_t rounds = (*slen - 2) / 6;
+
+	*slen -= rounds * 6;	// 6 bytes consumed per round
+	*olen += rounds * 8;	// 8 bytes produced per round
+
+	do {
+		if (rounds >= 8) {
+			enc_loop_generic_64_inner(s, o);
+			enc_loop_generic_64_inner(s, o);
+			enc_loop_generic_64_inner(s, o);
+			enc_loop_generic_64_inner(s, o);
+			enc_loop_generic_64_inner(s, o);
+			enc_loop_generic_64_inner(s, o);
+			enc_loop_generic_64_inner(s, o);
+			enc_loop_generic_64_inner(s, o);
+			rounds -= 8;
+			continue;
+		}
+		if (rounds >= 4) {
+			enc_loop_generic_64_inner(s, o);
+			enc_loop_generic_64_inner(s, o);
+			enc_loop_generic_64_inner(s, o);
+			enc_loop_generic_64_inner(s, o);
+			rounds -= 4;
+			continue;
+		}
+		if (rounds >= 2) {
+			enc_loop_generic_64_inner(s, o);
+			enc_loop_generic_64_inner(s, o);
+			rounds -= 2;
+			continue;
+		}
+		enc_loop_generic_64_inner(s, o);
+		break;
+
+	} while (rounds > 0);
+}
--- a/src/third-party/base64/lib/arch/generic/codec.c
+++ b/src/third-party/base64/lib/arch/generic/codec.c
@ -0,0 +1,39 @@
+#include <stdint.h>
+#include <stddef.h>
+#include <string.h>
+
+#include "../../../include/libbase64.h"
+#include "../../tables/tables.h"
+#include "../../codecs.h"
+#include "config.h"
+#include "../../env.h"
+
+#if BASE64_WORDSIZE == 32
+#  include "32/enc_loop.c"
+#elif BASE64_WORDSIZE == 64
+#  include "64/enc_loop.c"
+#endif
+
+#if BASE64_WORDSIZE >= 32
+#  include "32/dec_loop.c"
+#endif
+
+BASE64_ENC_FUNCTION(plain)
+{
+	#include "enc_head.c"
+#if BASE64_WORDSIZE == 32
+	enc_loop_generic_32(&s, &slen, &o, &olen);
+#elif BASE64_WORDSIZE == 64
+	enc_loop_generic_64(&s, &slen, &o, &olen);
+#endif
+	#include "enc_tail.c"
+}
+
+BASE64_DEC_FUNCTION(plain)
+{
+	#include "dec_head.c"
+#if BASE64_WORDSIZE >= 32
+	dec_loop_generic_32(&s, &slen, &o, &olen);
+#endif
+	#include "dec_tail.c"
+}
--- a/src/third-party/base64/lib/arch/generic/dec_head.c
+++ b/src/third-party/base64/lib/arch/generic/dec_head.c
@ -0,0 +1,37 @@
+int ret = 0;
+const uint8_t *s = (const uint8_t *) src;
+uint8_t *o = (uint8_t *) out;
+uint8_t q;
+
+// Use local temporaries to avoid cache thrashing:
+size_t olen = 0;
+size_t slen = srclen;
+struct base64_state st;
+st.eof = state->eof;
+st.bytes = state->bytes;
+st.carry = state->carry;
+
+// If we previously saw an EOF or an invalid character, bail out:
+if (st.eof) {
+	*outlen = 0;
+	ret = 0;
+	// If there was a trailing '=' to check, check it:
+	if (slen && (st.eof == BASE64_AEOF)) {
+		state->bytes = 0;
+		state->eof = BASE64_EOF;
+		ret = ((base64_table_dec_8bit[*s++] == 254) && (slen == 1)) ? 1 : 0;
+	}
+	return ret;
+}
+
+// Turn four 6-bit numbers into three bytes:
+// out[0] = 11111122
+// out[1] = 22223333
+// out[2] = 33444444
+
+// Duff's device again:
+switch (st.bytes)
+{
+	for (;;)
+	{
+	case 0:
--- a/src/third-party/base64/lib/arch/generic/dec_tail.c
+++ b/src/third-party/base64/lib/arch/generic/dec_tail.c
@ -0,0 +1,91 @@
+		if (slen-- == 0) {
+			ret = 1;
+			break;
+		}
+		if ((q = base64_table_dec_8bit[*s++]) >= 254) {
+			st.eof = BASE64_EOF;
+			// Treat character '=' as invalid for byte 0:
+			break;
+		}
+		st.carry = q << 2;
+		st.bytes++;
+
+		// Deliberate fallthrough:
+		BASE64_FALLTHROUGH
+
+	case 1:	if (slen-- == 0) {
+			ret = 1;
+			break;
+		}
+		if ((q = base64_table_dec_8bit[*s++]) >= 254) {
+			st.eof = BASE64_EOF;
+			// Treat character '=' as invalid for byte 1:
+			break;
+		}
+		*o++ = st.carry | (q >> 4);
+		st.carry = q << 4;
+		st.bytes++;
+		olen++;
+
+		// Deliberate fallthrough:
+		BASE64_FALLTHROUGH
+
+	case 2:	if (slen-- == 0) {
+			ret = 1;
+			break;
+		}
+		if ((q = base64_table_dec_8bit[*s++]) >= 254) {
+			st.bytes++;
+			// When q == 254, the input char is '='.
+			// Check if next byte is also '=':
+			if (q == 254) {
+				if (slen-- != 0) {
+					st.bytes = 0;
+					// EOF:
+					st.eof = BASE64_EOF;
+					q = base64_table_dec_8bit[*s++];
+					ret = ((q == 254) && (slen == 0)) ? 1 : 0;
+					break;
+				}
+				else {
+					// Almost EOF
+					st.eof = BASE64_AEOF;
+					ret = 1;
+					break;
+				}
+			}
+			// If we get here, there was an error:
+			break;
+		}
+		*o++ = st.carry | (q >> 2);
+		st.carry = q << 6;
+		st.bytes++;
+		olen++;
+
+		// Deliberate fallthrough:
+		BASE64_FALLTHROUGH
+
+	case 3:	if (slen-- == 0) {
+			ret = 1;
+			break;
+		}
+		if ((q = base64_table_dec_8bit[*s++]) >= 254) {
+			st.bytes = 0;
+			st.eof = BASE64_EOF;
+			// When q == 254, the input char is '='. Return 1 and EOF.
+			// When q == 255, the input char is invalid. Return 0 and EOF.
+			ret = ((q == 254) && (slen == 0)) ? 1 : 0;
+			break;
+		}
+		*o++ = st.carry | q;
+		st.carry = 0;
+		st.bytes = 0;
+		olen++;
+	}
+}
+
+state->eof = st.eof;
+state->bytes = st.bytes;
+state->carry = st.carry;
+*outlen = olen;
+return ret;
--- a/src/third-party/base64/lib/arch/generic/enc_head.c
+++ b/src/third-party/base64/lib/arch/generic/enc_head.c
@ -0,0 +1,24 @@
+// Assume that *out is large enough to contain the output.
+// Theoretically it should be 4/3 the length of src.
+const uint8_t *s = (const uint8_t *) src;
+uint8_t *o = (uint8_t *) out;
+
+// Use local temporaries to avoid cache thrashing:
+size_t olen = 0;
+size_t slen = srclen;
+struct base64_state st;
+st.bytes = state->bytes;
+st.carry = state->carry;
+
+// Turn three bytes into four 6-bit numbers:
+// in[0] = 00111111
+// in[1] = 00112222
+// in[2] = 00222233
+// in[3] = 00333333
+
+// Duff's device, a for() loop inside a switch() statement. Legal!
+switch (st.bytes)
+{
+	for (;;)
+	{
+	case 0:
--- a/src/third-party/base64/lib/arch/generic/enc_tail.c
+++ b/src/third-party/base64/lib/arch/generic/enc_tail.c
@ -0,0 +1,34 @@
+		if (slen-- == 0) {
+			break;
+		}
+		*o++ = base64_table_enc_6bit[*s >> 2];
+		st.carry = (*s++ << 4) & 0x30;
+		st.bytes++;
+		olen += 1;
+
+		// Deliberate fallthrough:
+		BASE64_FALLTHROUGH
+
+	case 1:	if (slen-- == 0) {
+			break;
+		}
+		*o++ = base64_table_enc_6bit[st.carry | (*s >> 4)];
+		st.carry = (*s++ << 2) & 0x3C;
+		st.bytes++;
+		olen += 1;
+
+		// Deliberate fallthrough:
+		BASE64_FALLTHROUGH
+
+	case 2:	if (slen-- == 0) {
+			break;
+		}
+		*o++ = base64_table_enc_6bit[st.carry | (*s >> 6)];
+		*o++ = base64_table_enc_6bit[*s++ & 0x3F];
+		st.bytes = 0;
+		olen += 2;
+	}
+}
+state->bytes = st.bytes;
+state->carry = st.carry;
+*outlen = olen;
--- a/src/third-party/base64/lib/arch/neon32/codec.c
+++ b/src/third-party/base64/lib/arch/neon32/codec.c
@ -0,0 +1,77 @@
+#include <stdint.h>
+#include <stddef.h>
+#include <string.h>
+
+#include "../../../include/libbase64.h"
+#include "../../tables/tables.h"
+#include "../../codecs.h"
+#include "config.h"
+#include "../../env.h"
+
+#ifdef __arm__
+#  if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && HAVE_NEON32
+#    define BASE64_USE_NEON32
+#  endif
+#endif
+
+#ifdef BASE64_USE_NEON32
+#include <arm_neon.h>
+
+// Only enable inline assembly on supported compilers.
+#if defined(__GNUC__) || defined(__clang__)
+#define BASE64_NEON32_USE_ASM
+#endif
+
+static inline uint8x16_t
+vqtbl1q_u8 (const uint8x16_t lut, const uint8x16_t indices)
+{
+	// NEON32 only supports 64-bit wide lookups in 128-bit tables. Emulate
+	// the NEON64 `vqtbl1q_u8` intrinsic to do 128-bit wide lookups.
+	uint8x8x2_t lut2;
+	uint8x8x2_t result;
+
+	lut2.val[0] = vget_low_u8(lut);
+	lut2.val[1] = vget_high_u8(lut);
+
+	result.val[0] = vtbl2_u8(lut2, vget_low_u8(indices));
+	result.val[1] = vtbl2_u8(lut2, vget_high_u8(indices));
+
+	return vcombine_u8(result.val[0], result.val[1]);
+}
+
+#include "../generic/32/dec_loop.c"
+#include "../generic/32/enc_loop.c"
+#include "dec_loop.c"
+#include "enc_reshuffle.c"
+#include "enc_translate.c"
+#include "enc_loop.c"
+
+#endif	// BASE64_USE_NEON32
+
+// Stride size is so large on these NEON 32-bit functions
+// (48 bytes encode, 32 bytes decode) that we inline the
+// uint32 codec to stay performant on smaller inputs.
+
+BASE64_ENC_FUNCTION(neon32)
+{
+#ifdef BASE64_USE_NEON32
+	#include "../generic/enc_head.c"
+	enc_loop_neon32(&s, &slen, &o, &olen);
+	enc_loop_generic_32(&s, &slen, &o, &olen);
+	#include "../generic/enc_tail.c"
+#else
+	BASE64_ENC_STUB
+#endif
+}
+
+BASE64_DEC_FUNCTION(neon32)
+{
+#ifdef BASE64_USE_NEON32
+	#include "../generic/dec_head.c"
+	dec_loop_neon32(&s, &slen, &o, &olen);
+	dec_loop_generic_32(&s, &slen, &o, &olen);
+	#include "../generic/dec_tail.c"
+#else
+	BASE64_DEC_STUB
+#endif
+}
--- a/src/third-party/base64/lib/arch/neon32/dec_loop.c
+++ b/src/third-party/base64/lib/arch/neon32/dec_loop.c
@ -0,0 +1,106 @@
+static inline int
+is_nonzero (const uint8x16_t v)
+{
+	uint64_t u64;
+	const uint64x2_t v64 = vreinterpretq_u64_u8(v);
+	const uint32x2_t v32 = vqmovn_u64(v64);
+
+	vst1_u64(&u64, vreinterpret_u64_u32(v32));
+	return u64 != 0;
+}
+
+static inline uint8x16_t
+delta_lookup (const uint8x16_t v)
+{
+	const uint8x8_t lut = {
+		0, 16, 19, 4, (uint8_t) -65, (uint8_t) -65, (uint8_t) -71, (uint8_t) -71,
+	};
+
+	return vcombine_u8(
+		vtbl1_u8(lut, vget_low_u8(v)),
+		vtbl1_u8(lut, vget_high_u8(v)));
+}
+
+static inline uint8x16_t
+dec_loop_neon32_lane (uint8x16_t *lane)
+{
+	// See the SSSE3 decoder for an explanation of the algorithm.
+	const uint8x16_t lut_lo = {
+		0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
+		0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A
+	};
+
+	const uint8x16_t lut_hi = {
+		0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
+		0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10
+	};
+
+	const uint8x16_t mask_0F = vdupq_n_u8(0x0F);
+	const uint8x16_t mask_2F = vdupq_n_u8(0x2F);
+
+	const uint8x16_t hi_nibbles = vshrq_n_u8(*lane, 4);
+	const uint8x16_t lo_nibbles = vandq_u8(*lane, mask_0F);
+	const uint8x16_t eq_2F      = vceqq_u8(*lane, mask_2F);
+
+	const uint8x16_t hi = vqtbl1q_u8(lut_hi, hi_nibbles);
+	const uint8x16_t lo = vqtbl1q_u8(lut_lo, lo_nibbles);
+
+	// Now simply add the delta values to the input:
+	*lane = vaddq_u8(*lane, delta_lookup(vaddq_u8(eq_2F, hi_nibbles)));
+
+	// Return the validity mask:
+	return vandq_u8(lo, hi);
+}
+
+static inline void
+dec_loop_neon32 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
+{
+	if (*slen < 64) {
+		return;
+	}
+
+	// Process blocks of 64 bytes per round. Unlike the SSE codecs, no
+	// extra trailing zero bytes are written, so it is not necessary to
+	// reserve extra input bytes:
+	size_t rounds = *slen / 64;
+
+	*slen -= rounds * 64;	// 64 bytes consumed per round
+	*olen += rounds * 48;	// 48 bytes produced per round
+
+	do {
+		uint8x16x3_t dec;
+
+		// Load 64 bytes and deinterleave:
+		uint8x16x4_t str = vld4q_u8(*s);
+
+		// Decode each lane, collect a mask of invalid inputs:
+		const uint8x16_t classified
+			= dec_loop_neon32_lane(&str.val[0])
+			| dec_loop_neon32_lane(&str.val[1])
+			| dec_loop_neon32_lane(&str.val[2])
+			| dec_loop_neon32_lane(&str.val[3]);
+
+		// Check for invalid input: if any of the delta values are
+		// zero, fall back on bytewise code to do error checking and
+		// reporting:
+		if (is_nonzero(classified)) {
+			break;
+		}
+
+		// Compress four bytes into three:
+		dec.val[0] = vorrq_u8(vshlq_n_u8(str.val[0], 2), vshrq_n_u8(str.val[1], 4));
+		dec.val[1] = vorrq_u8(vshlq_n_u8(str.val[1], 4), vshrq_n_u8(str.val[2], 2));
+		dec.val[2] = vorrq_u8(vshlq_n_u8(str.val[2], 6), str.val[3]);
+
+		// Interleave and store decoded result:
+		vst3q_u8(*o, dec);
+
+		*s += 64;
+		*o += 48;
+
+	} while (--rounds > 0);
+
+	// Adjust for any rounds that were skipped:
+	*slen += rounds * 64;
+	*olen -= rounds * 48;
+}
--- a/src/third-party/base64/lib/arch/neon32/enc_loop.c
+++ b/src/third-party/base64/lib/arch/neon32/enc_loop.c
@ -0,0 +1,169 @@
+#ifdef BASE64_NEON32_USE_ASM
+static inline void
+enc_loop_neon32_inner_asm (const uint8_t **s, uint8_t **o)
+{
+	// This function duplicates the functionality of enc_loop_neon32_inner,
+	// but entirely with inline assembly. This gives a significant speedup
+	// over using NEON intrinsics, which do not always generate very good
+	// code. The logic of the assembly is directly lifted from the
+	// intrinsics version, so it can be used as a guide to this code.
+
+	// Temporary registers, used as scratch space.
+	uint8x16_t tmp0, tmp1, tmp2, tmp3;
+	uint8x16_t mask0, mask1, mask2, mask3;
+
+	// A lookup table containing the absolute offsets for all ranges.
+	const uint8x16_t lut = {
+		  65U,  71U, 252U, 252U,
+		 252U, 252U, 252U, 252U,
+		 252U, 252U, 252U, 252U,
+		 237U, 240U,   0U,   0U
+	};
+
+	// Numeric constants.
+	const uint8x16_t n51 = vdupq_n_u8(51);
+	const uint8x16_t n25 = vdupq_n_u8(25);
+	const uint8x16_t n63 = vdupq_n_u8(63);
+
+	__asm__ (
+
+		// Load 48 bytes and deinterleave. The bytes are loaded to
+		// hard-coded registers q12, q13 and q14, to ensure that they
+		// are contiguous. Increment the source pointer.
+		"vld3.8 {d24, d26, d28}, [%[src]]! \n\t"
+		"vld3.8 {d25, d27, d29}, [%[src]]! \n\t"
+
+		// Reshuffle the bytes using temporaries.
+		"vshr.u8 %q[t0], q12,    #2      \n\t"
+		"vshr.u8 %q[t1], q13,    #4      \n\t"
+		"vshr.u8 %q[t2], q14,    #6      \n\t"
+		"vsli.8  %q[t1], q12,    #4      \n\t"
+		"vsli.8  %q[t2], q13,    #2      \n\t"
+		"vand.u8 %q[t1], %q[t1], %q[n63] \n\t"
+		"vand.u8 %q[t2], %q[t2], %q[n63] \n\t"
+		"vand.u8 %q[t3], q14,    %q[n63] \n\t"
+
+		// t0..t3 are the reshuffled inputs. Create LUT indices.
+		"vqsub.u8 q12, %q[t0], %q[n51] \n\t"
+		"vqsub.u8 q13, %q[t1], %q[n51] \n\t"
+		"vqsub.u8 q14, %q[t2], %q[n51] \n\t"
+		"vqsub.u8 q15, %q[t3], %q[n51] \n\t"
+
+		// Create the mask for range #0.
+		"vcgt.u8 %q[m0], %q[t0], %q[n25] \n\t"
+		"vcgt.u8 %q[m1], %q[t1], %q[n25] \n\t"
+		"vcgt.u8 %q[m2], %q[t2], %q[n25] \n\t"
+		"vcgt.u8 %q[m3], %q[t3], %q[n25] \n\t"
+
+		// Subtract -1 to correct the LUT indices.
+		"vsub.u8 q12, %q[m0] \n\t"
+		"vsub.u8 q13, %q[m1] \n\t"
+		"vsub.u8 q14, %q[m2] \n\t"
+		"vsub.u8 q15, %q[m3] \n\t"
+
+		// Lookup the delta values.
+		"vtbl.u8 d24, {%q[lut]}, d24 \n\t"
+		"vtbl.u8 d25, {%q[lut]}, d25 \n\t"
+		"vtbl.u8 d26, {%q[lut]}, d26 \n\t"
+		"vtbl.u8 d27, {%q[lut]}, d27 \n\t"
+		"vtbl.u8 d28, {%q[lut]}, d28 \n\t"
+		"vtbl.u8 d29, {%q[lut]}, d29 \n\t"
+		"vtbl.u8 d30, {%q[lut]}, d30 \n\t"
+		"vtbl.u8 d31, {%q[lut]}, d31 \n\t"
+
+		// Add the delta values.
+		"vadd.u8 q12, %q[t0] \n\t"
+		"vadd.u8 q13, %q[t1] \n\t"
+		"vadd.u8 q14, %q[t2] \n\t"
+		"vadd.u8 q15, %q[t3] \n\t"
+
+		// Store 64 bytes and interleave. Increment the dest pointer.
+		"vst4.8 {d24, d26, d28, d30}, [%[dst]]! \n\t"
+		"vst4.8 {d25, d27, d29, d31}, [%[dst]]! \n\t"
+
+		// Outputs (modified).
+		: [src] "+r"  (*s),
+		  [dst] "+r"  (*o),
+		  [t0]  "=&w" (tmp0),
+		  [t1]  "=&w" (tmp1),
+		  [t2]  "=&w" (tmp2),
+		  [t3]  "=&w" (tmp3),
+		  [m0]  "=&w" (mask0),
+		  [m1]  "=&w" (mask1),
+		  [m2]  "=&w" (mask2),
+		  [m3]  "=&w" (mask3)
+
+		// Inputs (not modified).
+		: [lut] "w" (lut),
+		  [n25] "w" (n25),
+		  [n51] "w" (n51),
+		  [n63] "w" (n63)
+
+		// Clobbers.
+		: "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31"
+	);
+}
+#endif
+
+static inline void
+enc_loop_neon32_inner (const uint8_t **s, uint8_t **o)
+{
+#ifdef BASE64_NEON32_USE_ASM
+	enc_loop_neon32_inner_asm(s, o);
+#else
+	// Load 48 bytes and deinterleave:
+	uint8x16x3_t src = vld3q_u8(*s);
+
+	// Reshuffle:
+	uint8x16x4_t out = enc_reshuffle(src);
+
+	// Translate reshuffled bytes to the Base64 alphabet:
+	out = enc_translate(out);
+
+	// Interleave and store output:
+	vst4q_u8(*o, out);
+
+	*s += 48;
+	*o += 64;
+#endif
+}
+
+static inline void
+enc_loop_neon32 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
+{
+	size_t rounds = *slen / 48;
+
+	*slen -= rounds * 48;	// 48 bytes consumed per round
+	*olen += rounds * 64;	// 64 bytes produced per round
+
+	while (rounds > 0) {
+		if (rounds >= 8) {
+			enc_loop_neon32_inner(s, o);
+			enc_loop_neon32_inner(s, o);
+			enc_loop_neon32_inner(s, o);
+			enc_loop_neon32_inner(s, o);
+			enc_loop_neon32_inner(s, o);
+			enc_loop_neon32_inner(s, o);
+			enc_loop_neon32_inner(s, o);
+			enc_loop_neon32_inner(s, o);
+			rounds -= 8;
+			continue;
+		}
+		if (rounds >= 4) {
+			enc_loop_neon32_inner(s, o);
+			enc_loop_neon32_inner(s, o);
+			enc_loop_neon32_inner(s, o);
+			enc_loop_neon32_inner(s, o);
+			rounds -= 4;
+			continue;
+		}
+		if (rounds >= 2) {
+			enc_loop_neon32_inner(s, o);
+			enc_loop_neon32_inner(s, o);
+			rounds -= 2;
+			continue;
+		}
+		enc_loop_neon32_inner(s, o);
+		break;
+	}
+}
--- a/src/third-party/base64/lib/arch/neon32/enc_reshuffle.c
+++ b/src/third-party/base64/lib/arch/neon32/enc_reshuffle.c
@ -0,0 +1,31 @@
+static inline uint8x16x4_t
+enc_reshuffle (uint8x16x3_t in)
+{
+	uint8x16x4_t out;
+
+	// Input:
+	// in[0]  = a7 a6 a5 a4 a3 a2 a1 a0
+	// in[1]  = b7 b6 b5 b4 b3 b2 b1 b0
+	// in[2]  = c7 c6 c5 c4 c3 c2 c1 c0
+
+	// Output:
+	// out[0] = 00 00 a7 a6 a5 a4 a3 a2
+	// out[1] = 00 00 a1 a0 b7 b6 b5 b4
+	// out[2] = 00 00 b3 b2 b1 b0 c7 c6
+	// out[3] = 00 00 c5 c4 c3 c2 c1 c0
+
+	// Move the input bits to where they need to be in the outputs. Except
+	// for the first output, the high two bits are not cleared.
+	out.val[0] = vshrq_n_u8(in.val[0], 2);
+	out.val[1] = vshrq_n_u8(in.val[1], 4);
+	out.val[2] = vshrq_n_u8(in.val[2], 6);
+	out.val[1] = vsliq_n_u8(out.val[1], in.val[0], 4);
+	out.val[2] = vsliq_n_u8(out.val[2], in.val[1], 2);
+
+	// Clear the high two bits in the second, third and fourth output.
+	out.val[1] = vandq_u8(out.val[1], vdupq_n_u8(0x3F));
+	out.val[2] = vandq_u8(out.val[2], vdupq_n_u8(0x3F));
+	out.val[3] = vandq_u8(in.val[2],  vdupq_n_u8(0x3F));
+
+	return out;
+}
--- a/src/third-party/base64/lib/arch/neon32/enc_translate.c
+++ b/src/third-party/base64/lib/arch/neon32/enc_translate.c
@ -0,0 +1,57 @@
+static inline uint8x16x4_t
+enc_translate (const uint8x16x4_t in)
+{
+	// A lookup table containing the absolute offsets for all ranges:
+	const uint8x16_t lut = {
+		 65U,  71U, 252U, 252U,
+		252U, 252U, 252U, 252U,
+		252U, 252U, 252U, 252U,
+		237U, 240U,   0U,   0U
+	};
+
+	const uint8x16_t offset = vdupq_n_u8(51);
+
+	uint8x16x4_t indices, mask, delta, out;
+
+	// Translate values 0..63 to the Base64 alphabet. There are five sets:
+	// #  From      To         Abs    Index  Characters
+	// 0  [0..25]   [65..90]   +65        0  ABCDEFGHIJKLMNOPQRSTUVWXYZ
+	// 1  [26..51]  [97..122]  +71        1  abcdefghijklmnopqrstuvwxyz
+	// 2  [52..61]  [48..57]    -4  [2..11]  0123456789
+	// 3  [62]      [43]       -19       12  +
+	// 4  [63]      [47]       -16       13  /
+
+	// Create LUT indices from input:
+	// the index for range #0 is right, others are 1 less than expected:
+	indices.val[0] = vqsubq_u8(in.val[0], offset);
+	indices.val[1] = vqsubq_u8(in.val[1], offset);
+	indices.val[2] = vqsubq_u8(in.val[2], offset);
+	indices.val[3] = vqsubq_u8(in.val[3], offset);
+
+	// mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0:
+	mask.val[0] = vcgtq_u8(in.val[0], vdupq_n_u8(25));
+	mask.val[1] = vcgtq_u8(in.val[1], vdupq_n_u8(25));
+	mask.val[2] = vcgtq_u8(in.val[2], vdupq_n_u8(25));
+	mask.val[3] = vcgtq_u8(in.val[3], vdupq_n_u8(25));
+
+	// Subtract -1, so add 1 to indices for range #[1..4], All indices are
+	// now correct:
+	indices.val[0] = vsubq_u8(indices.val[0], mask.val[0]);
+	indices.val[1] = vsubq_u8(indices.val[1], mask.val[1]);
+	indices.val[2] = vsubq_u8(indices.val[2], mask.val[2]);
+	indices.val[3] = vsubq_u8(indices.val[3], mask.val[3]);
+
+	// Lookup delta values:
+	delta.val[0] = vqtbl1q_u8(lut, indices.val[0]);
+	delta.val[1] = vqtbl1q_u8(lut, indices.val[1]);
+	delta.val[2] = vqtbl1q_u8(lut, indices.val[2]);
+	delta.val[3] = vqtbl1q_u8(lut, indices.val[3]);
+
+	// Add delta values:
+	out.val[0] = vaddq_u8(in.val[0], delta.val[0]);
+	out.val[1] = vaddq_u8(in.val[1], delta.val[1]);
+	out.val[2] = vaddq_u8(in.val[2], delta.val[2]);
+	out.val[3] = vaddq_u8(in.val[3], delta.val[3]);
+
+	return out;
+}
--- a/src/third-party/base64/lib/arch/neon64/codec.c
+++ b/src/third-party/base64/lib/arch/neon64/codec.c
@ -0,0 +1,92 @@
+#include <stdint.h>
+#include <stddef.h>
+#include <string.h>
+
+#include "../../../include/libbase64.h"
+#include "../../tables/tables.h"
+#include "../../codecs.h"
+#include "config.h"
+#include "../../env.h"
+
+#ifdef __aarch64__
+#  if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && HAVE_NEON64
+#    define BASE64_USE_NEON64
+#  endif
+#endif
+
+#ifdef BASE64_USE_NEON64
+#include <arm_neon.h>
+
+// Only enable inline assembly on supported compilers.
+#if defined(__GNUC__) || defined(__clang__)
+#define BASE64_NEON64_USE_ASM
+#endif
+
+static inline uint8x16x4_t
+load_64byte_table (const uint8_t *p)
+{
+#ifdef BASE64_NEON64_USE_ASM
+
+	// Force the table to be loaded into contiguous registers. GCC will not
+	// normally allocate contiguous registers for a `uint8x16x4_t'. These
+	// registers are chosen to not conflict with the ones in the enc loop.
+	register uint8x16_t t0 __asm__ ("v8");
+	register uint8x16_t t1 __asm__ ("v9");
+	register uint8x16_t t2 __asm__ ("v10");
+	register uint8x16_t t3 __asm__ ("v11");
+
+	__asm__ (
+		"ld1 {%[t0].16b, %[t1].16b, %[t2].16b, %[t3].16b}, [%[src]], #64 \n\t"
+		: [src] "+r" (p),
+		  [t0]  "=w" (t0),
+		  [t1]  "=w" (t1),
+		  [t2]  "=w" (t2),
+		  [t3]  "=w" (t3)
+	);
+
+	return (uint8x16x4_t) {
+		.val[0] = t0,
+		.val[1] = t1,
+		.val[2] = t2,
+		.val[3] = t3,
+	};
+#else
+	return vld1q_u8_x4(p);
+#endif
+}
+
+#include "../generic/32/dec_loop.c"
+#include "../generic/64/enc_loop.c"
+#include "dec_loop.c"
+#include "enc_reshuffle.c"
+#include "enc_loop.c"
+
+#endif	// BASE64_USE_NEON64
+
+// Stride size is so large on these NEON 64-bit functions
+// (48 bytes encode, 64 bytes decode) that we inline the
+// uint64 codec to stay performant on smaller inputs.
+
+BASE64_ENC_FUNCTION(neon64)
+{
+#ifdef BASE64_USE_NEON64
+	#include "../generic/enc_head.c"
+	enc_loop_neon64(&s, &slen, &o, &olen);
+	enc_loop_generic_64(&s, &slen, &o, &olen);
+	#include "../generic/enc_tail.c"
+#else
+	BASE64_ENC_STUB
+#endif
+}
+
+BASE64_DEC_FUNCTION(neon64)
+{
+#ifdef BASE64_USE_NEON64
+	#include "../generic/dec_head.c"
+	dec_loop_neon64(&s, &slen, &o, &olen);
+	dec_loop_generic_32(&s, &slen, &o, &olen);
+	#include "../generic/dec_tail.c"
+#else
+	BASE64_DEC_STUB
+#endif
+}
--- a/src/third-party/base64/lib/arch/neon64/dec_loop.c
+++ b/src/third-party/base64/lib/arch/neon64/dec_loop.c
@ -0,0 +1,129 @@
+// The input consists of five valid character sets in the Base64 alphabet,
+// which we need to map back to the 6-bit values they represent.
+// There are three ranges, two singles, and then there's the rest.
+//
+//   #  From       To        LUT  Characters
+//   1  [0..42]    [255]      #1  invalid input
+//   2  [43]       [62]       #1  +
+//   3  [44..46]   [255]      #1  invalid input
+//   4  [47]       [63]       #1  /
+//   5  [48..57]   [52..61]   #1  0..9
+//   6  [58..63]   [255]      #1  invalid input
+//   7  [64]       [255]      #2  invalid input
+//   8  [65..90]   [0..25]    #2  A..Z
+//   9  [91..96]   [255]      #2  invalid input
+//  10  [97..122]  [26..51]   #2  a..z
+//  11  [123..126] [255]      #2  invalid input
+// (12) Everything else => invalid input
+
+// The first LUT will use the VTBL instruction (out of range indices are set to
+// 0 in destination).
+static const uint8_t dec_lut1[] = {
+	255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U,
+	255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U,
+	255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U, 255U,  62U, 255U, 255U, 255U,  63U,
+	 52U,  53U,  54U,  55U,  56U,  57U,  58U,  59U,  60U,  61U, 255U, 255U, 255U, 255U, 255U, 255U,
+};
+
+// The second LUT will use the VTBX instruction (out of range indices will be
+// unchanged in destination). Input [64..126] will be mapped to index [1..63]
+// in this LUT. Index 0 means that value comes from LUT #1.
+static const uint8_t dec_lut2[] = {
+	  0U, 255U,   0U,   1U,   2U,   3U,   4U,   5U,   6U,   7U,   8U,   9U,  10U,  11U,  12U,  13U,
+	 14U,  15U,  16U,  17U,  18U,  19U,  20U,  21U,  22U,  23U,  24U,  25U, 255U, 255U, 255U, 255U,
+	255U, 255U,  26U,  27U,  28U,  29U,  30U,  31U,  32U,  33U,  34U,  35U,  36U,  37U,  38U,  39U,
+	 40U,  41U,  42U,  43U,  44U,  45U,  46U,  47U,  48U,  49U,  50U,  51U, 255U, 255U, 255U, 255U,
+};
+
+// All input values in range for the first look-up will be 0U in the second
+// look-up result. All input values out of range for the first look-up will be
+// 0U in the first look-up result. Thus, the two results can be ORed without
+// conflicts.
+//
+// Invalid characters that are in the valid range for either look-up will be
+// set to 255U in the combined result. Other invalid characters will just be
+// passed through with the second look-up result (using the VTBX instruction).
+// Since the second LUT is 64 bytes, those passed-through values are guaranteed
+// to have a value greater than 63U. Therefore, valid characters will be mapped
+// to the valid [0..63] range and all invalid characters will be mapped to
+// values greater than 63.
+
+static inline void
+dec_loop_neon64 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
+{
+	if (*slen < 64) {
+		return;
+	}
+
+	// Process blocks of 64 bytes per round. Unlike the SSE codecs, no
+	// extra trailing zero bytes are written, so it is not necessary to
+	// reserve extra input bytes:
+	size_t rounds = *slen / 64;
+
+	*slen -= rounds * 64;	// 64 bytes consumed per round
+	*olen += rounds * 48;	// 48 bytes produced per round
+
+	const uint8x16x4_t tbl_dec1 = load_64byte_table(dec_lut1);
+	const uint8x16x4_t tbl_dec2 = load_64byte_table(dec_lut2);
+
+	do {
+		const uint8x16_t offset = vdupq_n_u8(63U);
+		uint8x16x4_t dec1, dec2;
+		uint8x16x3_t dec;
+
+		// Load 64 bytes and deinterleave:
+		uint8x16x4_t str = vld4q_u8((uint8_t *) *s);
+
+		// Get indices for second LUT:
+		dec2.val[0] = vqsubq_u8(str.val[0], offset);
+		dec2.val[1] = vqsubq_u8(str.val[1], offset);
+		dec2.val[2] = vqsubq_u8(str.val[2], offset);
+		dec2.val[3] = vqsubq_u8(str.val[3], offset);
+
+		// Get values from first LUT:
+		dec1.val[0] = vqtbl4q_u8(tbl_dec1, str.val[0]);
+		dec1.val[1] = vqtbl4q_u8(tbl_dec1, str.val[1]);
+		dec1.val[2] = vqtbl4q_u8(tbl_dec1, str.val[2]);
+		dec1.val[3] = vqtbl4q_u8(tbl_dec1, str.val[3]);
+
+		// Get values from second LUT:
+		dec2.val[0] = vqtbx4q_u8(dec2.val[0], tbl_dec2, dec2.val[0]);
+		dec2.val[1] = vqtbx4q_u8(dec2.val[1], tbl_dec2, dec2.val[1]);
+		dec2.val[2] = vqtbx4q_u8(dec2.val[2], tbl_dec2, dec2.val[2]);
+		dec2.val[3] = vqtbx4q_u8(dec2.val[3], tbl_dec2, dec2.val[3]);
+
+		// Get final values:
+		str.val[0] = vorrq_u8(dec1.val[0], dec2.val[0]);
+		str.val[1] = vorrq_u8(dec1.val[1], dec2.val[1]);
+		str.val[2] = vorrq_u8(dec1.val[2], dec2.val[2]);
+		str.val[3] = vorrq_u8(dec1.val[3], dec2.val[3]);
+
+		// Check for invalid input, any value larger than 63:
+		const uint8x16_t classified
+			= vcgtq_u8(str.val[0], vdupq_n_u8(63))
+			| vcgtq_u8(str.val[1], vdupq_n_u8(63))
+			| vcgtq_u8(str.val[2], vdupq_n_u8(63))
+			| vcgtq_u8(str.val[3], vdupq_n_u8(63));
+
+		// Check that all bits are zero:
+		if (vmaxvq_u8(classified) != 0U) {
+			break;
+		}
+
+		// Compress four bytes into three:
+		dec.val[0] = vshlq_n_u8(str.val[0], 2) | vshrq_n_u8(str.val[1], 4);
+		dec.val[1] = vshlq_n_u8(str.val[1], 4) | vshrq_n_u8(str.val[2], 2);
+		dec.val[2] = vshlq_n_u8(str.val[2], 6) | str.val[3];
+
+		// Interleave and store decoded result:
+		vst3q_u8((uint8_t *) *o, dec);
+
+		*s += 64;
+		*o += 48;
+
+	} while (--rounds > 0);
+
+	// Adjust for any rounds that were skipped:
+	*slen += rounds * 64;
+	*olen -= rounds * 48;
+}
--- a/src/third-party/base64/lib/arch/neon64/enc_loop.c
+++ b/src/third-party/base64/lib/arch/neon64/enc_loop.c
@ -0,0 +1,133 @@
+#ifdef BASE64_NEON64_USE_ASM
+static inline void
+enc_loop_neon64_inner_asm (const uint8_t **s, uint8_t **o, const uint8x16x4_t tbl_enc)
+{
+	// This function duplicates the functionality of enc_loop_neon64_inner,
+	// but entirely with inline assembly. This gives a significant speedup
+	// over using NEON intrinsics, which do not always generate very good
+	// code. The logic of the assembly is directly lifted from the
+	// intrinsics version, so it can be used as a guide to this code.
+
+	// Temporary registers, used as scratch space.
+	uint8x16_t tmp0, tmp1, tmp2, tmp3;
+
+	// Numeric constant.
+	const uint8x16_t n63 = vdupq_n_u8(63);
+
+	__asm__ (
+
+		// Load 48 bytes and deinterleave. The bytes are loaded to
+		// hard-coded registers v12, v13 and v14, to ensure that they
+		// are contiguous. Increment the source pointer.
+		"ld3 {v12.16b, v13.16b, v14.16b}, [%[src]], #48 \n\t"
+
+		// Reshuffle the bytes using temporaries.
+		"ushr %[t0].16b, v12.16b,   #2         \n\t"
+		"ushr %[t1].16b, v13.16b,   #4         \n\t"
+		"ushr %[t2].16b, v14.16b,   #6         \n\t"
+		"sli  %[t1].16b, v12.16b,   #4         \n\t"
+		"sli  %[t2].16b, v13.16b,   #2         \n\t"
+		"and  %[t1].16b, %[t1].16b, %[n63].16b \n\t"
+		"and  %[t2].16b, %[t2].16b, %[n63].16b \n\t"
+		"and  %[t3].16b, v14.16b,   %[n63].16b \n\t"
+
+		// Translate the values to the Base64 alphabet.
+		"tbl v12.16b, {%[l0].16b, %[l1].16b, %[l2].16b, %[l3].16b}, %[t0].16b \n\t"
+		"tbl v13.16b, {%[l0].16b, %[l1].16b, %[l2].16b, %[l3].16b}, %[t1].16b \n\t"
+		"tbl v14.16b, {%[l0].16b, %[l1].16b, %[l2].16b, %[l3].16b}, %[t2].16b \n\t"
+		"tbl v15.16b, {%[l0].16b, %[l1].16b, %[l2].16b, %[l3].16b}, %[t3].16b \n\t"
+
+		// Store 64 bytes and interleave. Increment the dest pointer.
+		"st4 {v12.16b, v13.16b, v14.16b, v15.16b}, [%[dst]], #64 \n\t"
+
+		// Outputs (modified).
+		: [src] "+r"  (*s),
+		  [dst] "+r"  (*o),
+		  [t0]  "=&w" (tmp0),
+		  [t1]  "=&w" (tmp1),
+		  [t2]  "=&w" (tmp2),
+		  [t3]  "=&w" (tmp3)
+
+		// Inputs (not modified).
+		: [n63] "w" (n63),
+		  [l0]  "w" (tbl_enc.val[0]),
+		  [l1]  "w" (tbl_enc.val[1]),
+		  [l2]  "w" (tbl_enc.val[2]),
+		  [l3]  "w" (tbl_enc.val[3])
+
+		// Clobbers.
+		: "v12", "v13", "v14", "v15"
+	);
+}
+#endif
+
+static inline void
+enc_loop_neon64_inner (const uint8_t **s, uint8_t **o, const uint8x16x4_t tbl_enc)
+{
+#ifdef BASE64_NEON64_USE_ASM
+	enc_loop_neon64_inner_asm(s, o, tbl_enc);
+#else
+	// Load 48 bytes and deinterleave:
+	uint8x16x3_t src = vld3q_u8(*s);
+
+	// Divide bits of three input bytes over four output bytes:
+	uint8x16x4_t out = enc_reshuffle(src);
+
+	// The bits have now been shifted to the right locations;
+	// translate their values 0..63 to the Base64 alphabet.
+	// Use a 64-byte table lookup:
+	out.val[0] = vqtbl4q_u8(tbl_enc, out.val[0]);
+	out.val[1] = vqtbl4q_u8(tbl_enc, out.val[1]);
+	out.val[2] = vqtbl4q_u8(tbl_enc, out.val[2]);
+	out.val[3] = vqtbl4q_u8(tbl_enc, out.val[3]);
+
+	// Interleave and store output:
+	vst4q_u8(*o, out);
+
+	*s += 48;
+	*o += 64;
+#endif
+}
+
+static inline void
+enc_loop_neon64 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
+{
+	size_t rounds = *slen / 48;
+
+	*slen -= rounds * 48;	// 48 bytes consumed per round
+	*olen += rounds * 64;	// 64 bytes produced per round
+
+	// Load the encoding table:
+	const uint8x16x4_t tbl_enc = load_64byte_table(base64_table_enc_6bit);
+
+	while (rounds > 0) {
+		if (rounds >= 8) {
+			enc_loop_neon64_inner(s, o, tbl_enc);
+			enc_loop_neon64_inner(s, o, tbl_enc);
+			enc_loop_neon64_inner(s, o, tbl_enc);
+			enc_loop_neon64_inner(s, o, tbl_enc);
+			enc_loop_neon64_inner(s, o, tbl_enc);
+			enc_loop_neon64_inner(s, o, tbl_enc);
+			enc_loop_neon64_inner(s, o, tbl_enc);
+			enc_loop_neon64_inner(s, o, tbl_enc);
+			rounds -= 8;
+			continue;
+		}
+		if (rounds >= 4) {
+			enc_loop_neon64_inner(s, o, tbl_enc);
+			enc_loop_neon64_inner(s, o, tbl_enc);
+			enc_loop_neon64_inner(s, o, tbl_enc);
+			enc_loop_neon64_inner(s, o, tbl_enc);
+			rounds -= 4;
+			continue;
+		}
+		if (rounds >= 2) {
+			enc_loop_neon64_inner(s, o, tbl_enc);
+			enc_loop_neon64_inner(s, o, tbl_enc);
+			rounds -= 2;
+			continue;
+		}
+		enc_loop_neon64_inner(s, o, tbl_enc);
+		break;
+	}
+}
--- a/src/third-party/base64/lib/arch/neon64/enc_reshuffle.c
+++ b/src/third-party/base64/lib/arch/neon64/enc_reshuffle.c
@ -0,0 +1,31 @@
+static inline uint8x16x4_t
+enc_reshuffle (const uint8x16x3_t in)
+{
+	uint8x16x4_t out;
+
+	// Input:
+	// in[0]  = a7 a6 a5 a4 a3 a2 a1 a0
+	// in[1]  = b7 b6 b5 b4 b3 b2 b1 b0
+	// in[2]  = c7 c6 c5 c4 c3 c2 c1 c0
+
+	// Output:
+	// out[0] = 00 00 a7 a6 a5 a4 a3 a2
+	// out[1] = 00 00 a1 a0 b7 b6 b5 b4
+	// out[2] = 00 00 b3 b2 b1 b0 c7 c6
+	// out[3] = 00 00 c5 c4 c3 c2 c1 c0
+
+	// Move the input bits to where they need to be in the outputs. Except
+	// for the first output, the high two bits are not cleared.
+	out.val[0] = vshrq_n_u8(in.val[0], 2);
+	out.val[1] = vshrq_n_u8(in.val[1], 4);
+	out.val[2] = vshrq_n_u8(in.val[2], 6);
+	out.val[1] = vsliq_n_u8(out.val[1], in.val[0], 4);
+	out.val[2] = vsliq_n_u8(out.val[2], in.val[1], 2);
+
+	// Clear the high two bits in the second, third and fourth output.
+	out.val[1] = vandq_u8(out.val[1], vdupq_n_u8(0x3F));
+	out.val[2] = vandq_u8(out.val[2], vdupq_n_u8(0x3F));
+	out.val[3] = vandq_u8(in.val[2],  vdupq_n_u8(0x3F));
+
+	return out;
+}
--- a/src/third-party/base64/lib/arch/sse41/codec.c
+++ b/src/third-party/base64/lib/arch/sse41/codec.c
@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#include "../../../include/libbase64.h"
+#include "../../tables/tables.h"
+#include "../../codecs.h"
+#include "config.h"
+#include "../../env.h"
+
+#if HAVE_SSE41
+#include <smmintrin.h>
+
+#include "../ssse3/dec_reshuffle.c"
+#include "../ssse3/dec_loop.c"
+#include "../ssse3/enc_translate.c"
+#include "../ssse3/enc_reshuffle.c"
+#include "../ssse3/enc_loop.c"
+
+#endif	// HAVE_SSE41
+
+BASE64_ENC_FUNCTION(sse41)
+{
+#if HAVE_SSE41
+	#include "../generic/enc_head.c"
+	enc_loop_ssse3(&s, &slen, &o, &olen);
+	#include "../generic/enc_tail.c"
+#else
+	BASE64_ENC_STUB
+#endif
+}
+
+BASE64_DEC_FUNCTION(sse41)
+{
+#if HAVE_SSE41
+	#include "../generic/dec_head.c"
+	dec_loop_ssse3(&s, &slen, &o, &olen);
+	#include "../generic/dec_tail.c"
+#else
+	BASE64_DEC_STUB
+#endif
+}
--- a/src/third-party/base64/lib/arch/sse42/codec.c
+++ b/src/third-party/base64/lib/arch/sse42/codec.c
@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#include "../../../include/libbase64.h"
+#include "../../tables/tables.h"
+#include "../../codecs.h"
+#include "config.h"
+#include "../../env.h"
+
+#if HAVE_SSE42
+#include <nmmintrin.h>
+
+#include "../ssse3/dec_reshuffle.c"
+#include "../ssse3/dec_loop.c"
+#include "../ssse3/enc_translate.c"
+#include "../ssse3/enc_reshuffle.c"
+#include "../ssse3/enc_loop.c"
+
+#endif	// HAVE_SSE42
+
+BASE64_ENC_FUNCTION(sse42)
+{
+#if HAVE_SSE42
+	#include "../generic/enc_head.c"
+	enc_loop_ssse3(&s, &slen, &o, &olen);
+	#include "../generic/enc_tail.c"
+#else
+	BASE64_ENC_STUB
+#endif
+}
+
+BASE64_DEC_FUNCTION(sse42)
+{
+#if HAVE_SSE42
+	#include "../generic/dec_head.c"
+	dec_loop_ssse3(&s, &slen, &o, &olen);
+	#include "../generic/dec_tail.c"
+#else
+	BASE64_DEC_STUB
+#endif
+}
--- a/src/third-party/base64/lib/arch/ssse3/codec.c
+++ b/src/third-party/base64/lib/arch/ssse3/codec.c
@ -0,0 +1,42 @@
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#include "../../../include/libbase64.h"
+#include "../../tables/tables.h"
+#include "../../codecs.h"
+#include "config.h"
+#include "../../env.h"
+
+#if HAVE_SSSE3
+#include <tmmintrin.h>
+
+#include "dec_reshuffle.c"
+#include "dec_loop.c"
+#include "enc_reshuffle.c"
+#include "enc_translate.c"
+#include "enc_loop.c"
+
+#endif	// HAVE_SSSE3
+
+BASE64_ENC_FUNCTION(ssse3)
+{
+#if HAVE_SSSE3
+	#include "../generic/enc_head.c"
+	enc_loop_ssse3(&s, &slen, &o, &olen);
+	#include "../generic/enc_tail.c"
+#else
+	BASE64_ENC_STUB
+#endif
+}
+
+BASE64_DEC_FUNCTION(ssse3)
+{
+#if HAVE_SSSE3
+	#include "../generic/dec_head.c"
+	dec_loop_ssse3(&s, &slen, &o, &olen);
+	#include "../generic/dec_tail.c"
+#else
+	BASE64_DEC_STUB
+#endif
+}
--- a/src/third-party/base64/lib/arch/ssse3/dec_loop.c
+++ b/src/third-party/base64/lib/arch/ssse3/dec_loop.c
@ -0,0 +1,173 @@
+// The input consists of six character sets in the Base64 alphabet, which we
+// need to map back to the 6-bit values they represent. There are three ranges,
+// two singles, and then there's the rest.
+//
+//  #  From       To        Add  Characters
+//  1  [43]       [62]      +19  +
+//  2  [47]       [63]      +16  /
+//  3  [48..57]   [52..61]   +4  0..9
+//  4  [65..90]   [0..25]   -65  A..Z
+//  5  [97..122]  [26..51]  -71  a..z
+// (6) Everything else => invalid input
+//
+// We will use lookup tables for character validation and offset computation.
+// Remember that 0x2X and 0x0X are the same index for _mm_shuffle_epi8, this
+// allows to mask with 0x2F instead of 0x0F and thus save one constant
+// declaration (register and/or memory access).
+//
+// For offsets:
+// Perfect hash for lut = ((src >> 4) & 0x2F) + ((src == 0x2F) ? 0xFF : 0x00)
+// 0000 = garbage
+// 0001 = /
+// 0010 = +
+// 0011 = 0-9
+// 0100 = A-Z
+// 0101 = A-Z
+// 0110 = a-z
+// 0111 = a-z
+// 1000 >= garbage
+//
+// For validation, here's the table.
+// A character is valid if and only if the AND of the 2 lookups equals 0:
+//
+// hi \ lo              0000 0001 0010 0011 0100 0101 0110 0111 1000 1001 1010 1011 1100 1101 1110 1111
+//      LUT             0x15 0x11 0x11 0x11 0x11 0x11 0x11 0x11 0x11 0x11 0x13 0x1A 0x1B 0x1B 0x1B 0x1A
+//
+// 0000 0x10 char        NUL  SOH  STX  ETX  EOT  ENQ  ACK  BEL   BS   HT   LF   VT   FF   CR   SO   SI
+//           andlut     0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
+//
+// 0001 0x10 char        DLE  DC1  DC2  DC3  DC4  NAK  SYN  ETB  CAN   EM  SUB  ESC   FS   GS   RS   US
+//           andlut     0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
+//
+// 0010 0x01 char               !    "    #    $    %    &    '    (    )    *    +    ,    -    .    /
+//           andlut     0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x00 0x01 0x01 0x01 0x00
+//
+// 0011 0x02 char          0    1    2    3    4    5    6    7    8    9    :    ;    <    =    >    ?
+//           andlut     0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x02 0x02 0x02 0x02 0x02 0x02
+//
+// 0100 0x04 char          @    A    B    C    D    E    F    G    H    I    J    K    L    M    N    O
+//           andlut     0x04 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
+//
+// 0101 0x08 char          P    Q    R    S    T    U    V    W    X    Y    Z    [    \    ]    ^    _
+//           andlut     0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x08 0x08 0x08 0x08 0x08
+//
+// 0110 0x04 char          `    a    b    c    d    e    f    g    h    i    j    k    l    m    n    o
+//           andlut     0x04 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
+// 0111 0x08 char          p    q    r    s    t    u    v    w    x    y    z    {    |    }    ~
+//           andlut     0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x08 0x08 0x08 0x08 0x08
+//
+// 1000 0x10 andlut     0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
+// 1001 0x10 andlut     0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
+// 1010 0x10 andlut     0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
+// 1011 0x10 andlut     0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
+// 1100 0x10 andlut     0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
+// 1101 0x10 andlut     0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
+// 1110 0x10 andlut     0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
+// 1111 0x10 andlut     0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10
+
+static inline int
+dec_loop_ssse3_inner (const uint8_t **s, uint8_t **o, size_t *rounds)
+{
+	const __m128i lut_lo = _mm_setr_epi8(
+		0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
+		0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A);
+
+	const __m128i lut_hi = _mm_setr_epi8(
+		0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
+		0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10);
+
+	const __m128i lut_roll = _mm_setr_epi8(
+		0,  16,  19,   4, -65, -65, -71, -71,
+		0,   0,   0,   0,   0,   0,   0,   0);
+
+	const __m128i mask_2F = _mm_set1_epi8(0x2F);
+
+	// Load input:
+	__m128i str = _mm_loadu_si128((__m128i *) *s);
+
+	// Table lookups:
+	const __m128i hi_nibbles = _mm_and_si128(_mm_srli_epi32(str, 4), mask_2F);
+	const __m128i lo_nibbles = _mm_and_si128(str, mask_2F);
+	const __m128i hi         = _mm_shuffle_epi8(lut_hi, hi_nibbles);
+	const __m128i lo         = _mm_shuffle_epi8(lut_lo, lo_nibbles);
+
+	// Check for invalid input: if any "and" values from lo and hi are not
+	// zero, fall back on bytewise code to do error checking and reporting:
+	if (_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_and_si128(lo, hi), _mm_setzero_si128())) != 0) {
+		return 0;
+	}
+
+	const __m128i eq_2F = _mm_cmpeq_epi8(str, mask_2F);
+	const __m128i roll  = _mm_shuffle_epi8(lut_roll, _mm_add_epi8(eq_2F, hi_nibbles));
+
+	// Now simply add the delta values to the input:
+	str = _mm_add_epi8(str, roll);
+
+	// Reshuffle the input to packed 12-byte output format:
+	str = dec_reshuffle(str);
+
+	// Store the output:
+	_mm_storeu_si128((__m128i *) *o, str);
+
+	*s += 16;
+	*o += 12;
+	*rounds -= 1;
+
+	return 1;
+}
+
+static inline void
+dec_loop_ssse3 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
+{
+	if (*slen < 24) {
+		return;
+	}
+
+	// Process blocks of 16 bytes per round. Because 4 extra zero bytes are
+	// written after the output, ensure that there will be at least 8 bytes
+	// of input data left to cover the gap. (6 data bytes and up to two
+	// end-of-string markers.)
+	size_t rounds = (*slen - 8) / 16;
+
+	*slen -= rounds * 16;	// 16 bytes consumed per round
+	*olen += rounds * 12;	// 12 bytes produced per round
+
+	do {
+		if (rounds >= 8) {
+			if (dec_loop_ssse3_inner(s, o, &rounds) &&
+			    dec_loop_ssse3_inner(s, o, &rounds) &&
+			    dec_loop_ssse3_inner(s, o, &rounds) &&
+			    dec_loop_ssse3_inner(s, o, &rounds) &&
+			    dec_loop_ssse3_inner(s, o, &rounds) &&
+			    dec_loop_ssse3_inner(s, o, &rounds) &&
+			    dec_loop_ssse3_inner(s, o, &rounds) &&
+			    dec_loop_ssse3_inner(s, o, &rounds)) {
+				continue;
+			}
+			break;
+		}
+		if (rounds >= 4) {
+			if (dec_loop_ssse3_inner(s, o, &rounds) &&
+			    dec_loop_ssse3_inner(s, o, &rounds) &&
+			    dec_loop_ssse3_inner(s, o, &rounds) &&
+			    dec_loop_ssse3_inner(s, o, &rounds)) {
+				continue;
+			}
+			break;
+		}
+		if (rounds >= 2) {
+			if (dec_loop_ssse3_inner(s, o, &rounds) &&
+			    dec_loop_ssse3_inner(s, o, &rounds)) {
+				continue;
+			}
+			break;
+		}
+		dec_loop_ssse3_inner(s, o, &rounds);
+		break;
+
+	} while (rounds > 0);
+
+	// Adjust for any rounds that were skipped:
+	*slen += rounds * 16;
+	*olen -= rounds * 12;
+}
--- a/src/third-party/base64/lib/arch/ssse3/dec_reshuffle.c
+++ b/src/third-party/base64/lib/arch/ssse3/dec_reshuffle.c
@ -0,0 +1,33 @@
+static inline __m128i
+dec_reshuffle (const __m128i in)
+{
+	// in, bits, upper case are most significant bits, lower case are least significant bits
+	// 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
+	// 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
+	// 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
+	// 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA
+
+	const __m128i merge_ab_and_bc = _mm_maddubs_epi16(in, _mm_set1_epi32(0x01400140));
+	// 0000kkkk LLllllll 0000JJJJ JJjjKKKK
+	// 0000hhhh IIiiiiii 0000GGGG GGggHHHH
+	// 0000eeee FFffffff 0000DDDD DDddEEEE
+	// 0000bbbb CCcccccc 0000AAAA AAaaBBBB
+
+	const __m128i out = _mm_madd_epi16(merge_ab_and_bc, _mm_set1_epi32(0x00011000));
+	// 00000000 JJJJJJjj KKKKkkkk LLllllll
+	// 00000000 GGGGGGgg HHHHhhhh IIiiiiii
+	// 00000000 DDDDDDdd EEEEeeee FFffffff
+	// 00000000 AAAAAAaa BBBBbbbb CCcccccc
+
+	// Pack bytes together:
+	return  _mm_shuffle_epi8(out, _mm_setr_epi8(
+		 2,  1,  0,
+		 6,  5,  4,
+		10,  9,  8,
+		14, 13, 12,
+		-1, -1, -1, -1));
+	// 00000000 00000000 00000000 00000000
+	// LLllllll KKKKkkkk JJJJJJjj IIiiiiii
+	// HHHHhhhh GGGGGGgg FFffffff EEEEeeee
+	// DDDDDDdd CCcccccc BBBBbbbb AAAAAAaa
+}
--- a/src/third-party/base64/lib/arch/ssse3/enc_loop.c
+++ b/src/third-party/base64/lib/arch/ssse3/enc_loop.c
@ -0,0 +1,67 @@
+static inline void
+enc_loop_ssse3_inner (const uint8_t **s, uint8_t **o)
+{
+	// Load input:
+	__m128i str = _mm_loadu_si128((__m128i *) *s);
+
+	// Reshuffle:
+	str = enc_reshuffle(str);
+
+	// Translate reshuffled bytes to the Base64 alphabet:
+	str = enc_translate(str);
+
+	// Store:
+	_mm_storeu_si128((__m128i *) *o, str);
+
+	*s += 12;
+	*o += 16;
+}
+
+static inline void
+enc_loop_ssse3 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
+{
+	if (*slen < 16) {
+		return;
+	}
+
+	// Process blocks of 12 bytes at a time. Because blocks are loaded 16
+	// bytes at a time, ensure that there will be at least 4 remaining
+	// bytes after the last round, so that the final read will not pass
+	// beyond the bounds of the input buffer:
+	size_t rounds = (*slen - 4) / 12;
+
+	*slen -= rounds * 12;	// 12 bytes consumed per round
+	*olen += rounds * 16;	// 16 bytes produced per round
+
+	do {
+		if (rounds >= 8) {
+			enc_loop_ssse3_inner(s, o);
+			enc_loop_ssse3_inner(s, o);
+			enc_loop_ssse3_inner(s, o);
+			enc_loop_ssse3_inner(s, o);
+			enc_loop_ssse3_inner(s, o);
+			enc_loop_ssse3_inner(s, o);
+			enc_loop_ssse3_inner(s, o);
+			enc_loop_ssse3_inner(s, o);
+			rounds -= 8;
+			continue;
+		}
+		if (rounds >= 4) {
+			enc_loop_ssse3_inner(s, o);
+			enc_loop_ssse3_inner(s, o);
+			enc_loop_ssse3_inner(s, o);
+			enc_loop_ssse3_inner(s, o);
+			rounds -= 4;
+			continue;
+		}
+		if (rounds >= 2) {
+			enc_loop_ssse3_inner(s, o);
+			enc_loop_ssse3_inner(s, o);
+			rounds -= 2;
+			continue;
+		}
+		enc_loop_ssse3_inner(s, o);
+		break;
+
+	} while (rounds > 0);
+}
--- a/src/third-party/base64/lib/arch/ssse3/enc_reshuffle.c
+++ b/src/third-party/base64/lib/arch/ssse3/enc_reshuffle.c
@ -0,0 +1,48 @@
+static inline __m128i
+enc_reshuffle (__m128i in)
+{
+	// Input, bytes MSB to LSB:
+	// 0 0 0 0 l k j i h g f e d c b a
+
+	in = _mm_shuffle_epi8(in, _mm_set_epi8(
+		10, 11,  9, 10,
+		 7,  8,  6,  7,
+		 4,  5,  3,  4,
+		 1,  2,  0,  1));
+	// in, bytes MSB to LSB:
+	// k l j k
+	// h i g h
+	// e f d e
+	// b c a b
+
+	const __m128i t0 = _mm_and_si128(in, _mm_set1_epi32(0x0FC0FC00));
+	// bits, upper case are most significant bits, lower case are least significant bits
+	// 0000kkkk LL000000 JJJJJJ00 00000000
+	// 0000hhhh II000000 GGGGGG00 00000000
+	// 0000eeee FF000000 DDDDDD00 00000000
+	// 0000bbbb CC000000 AAAAAA00 00000000
+
+	const __m128i t1 = _mm_mulhi_epu16(t0, _mm_set1_epi32(0x04000040));
+	// 00000000 00kkkkLL 00000000 00JJJJJJ
+	// 00000000 00hhhhII 00000000 00GGGGGG
+	// 00000000 00eeeeFF 00000000 00DDDDDD
+	// 00000000 00bbbbCC 00000000 00AAAAAA
+
+	const __m128i t2 = _mm_and_si128(in, _mm_set1_epi32(0x003F03F0));
+	// 00000000 00llllll 000000jj KKKK0000
+	// 00000000 00iiiiii 000000gg HHHH0000
+	// 00000000 00ffffff 000000dd EEEE0000
+	// 00000000 00cccccc 000000aa BBBB0000
+
+	const __m128i t3 = _mm_mullo_epi16(t2, _mm_set1_epi32(0x01000010));
+	// 00llllll 00000000 00jjKKKK 00000000
+	// 00iiiiii 00000000 00ggHHHH 00000000
+	// 00ffffff 00000000 00ddEEEE 00000000
+	// 00cccccc 00000000 00aaBBBB 00000000
+
+	return _mm_or_si128(t1, t3);
+	// 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
+	// 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
+	// 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
+	// 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA
+}
--- a/src/third-party/base64/lib/arch/ssse3/enc_translate.c
+++ b/src/third-party/base64/lib/arch/ssse3/enc_translate.c
@ -0,0 +1,33 @@
+static inline __m128i
+enc_translate (const __m128i in)
+{
+	// A lookup table containing the absolute offsets for all ranges:
+	const __m128i lut = _mm_setr_epi8(
+		 65,  71, -4, -4,
+		 -4,  -4, -4, -4,
+		 -4,  -4, -4, -4,
+		-19, -16,  0,  0
+	);
+
+	// Translate values 0..63 to the Base64 alphabet. There are five sets:
+	// #  From      To         Abs    Index  Characters
+	// 0  [0..25]   [65..90]   +65        0  ABCDEFGHIJKLMNOPQRSTUVWXYZ
+	// 1  [26..51]  [97..122]  +71        1  abcdefghijklmnopqrstuvwxyz
+	// 2  [52..61]  [48..57]    -4  [2..11]  0123456789
+	// 3  [62]      [43]       -19       12  +
+	// 4  [63]      [47]       -16       13  /
+
+	// Create LUT indices from the input. The index for range #0 is right,
+	// others are 1 less than expected:
+	__m128i indices = _mm_subs_epu8(in, _mm_set1_epi8(51));
+
+	// mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0:
+	__m128i mask = _mm_cmpgt_epi8(in, _mm_set1_epi8(25));
+
+	// Subtract -1, so add 1 to indices for range #[1..4]. All indices are
+	// now correct:
+	indices = _mm_sub_epi8(indices, mask);
+
+	// Add offsets to input values:
+	return _mm_add_epi8(in, _mm_shuffle_epi8(lut, indices));
+}
--- a/src/third-party/base64/lib/codec_choose.c
+++ b/src/third-party/base64/lib/codec_choose.c
@ -0,0 +1,281 @@
+#include <stdbool.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "../include/libbase64.h"
+#include "codecs.h"
+#include "config.h"
+#include "env.h"
+
+#if (__x86_64__ || __i386__ || _M_X86 || _M_X64)
+  #define BASE64_X86
+  #if (HAVE_SSSE3 || HAVE_SSE41 || HAVE_SSE42 || HAVE_AVX || HAVE_AVX2)
+    #define BASE64_X86_SIMD
+  #endif
+#endif
+
+#ifdef BASE64_X86
+#ifdef _MSC_VER
+	#include <intrin.h>
+	#define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \
+	{						\
+		int info[4];				\
+		__cpuidex(info, __level, __count);	\
+		__eax = info[0];			\
+		__ebx = info[1];			\
+		__ecx = info[2];			\
+		__edx = info[3];			\
+	}
+	#define __cpuid(__level, __eax, __ebx, __ecx, __edx) \
+		__cpuid_count(__level, 0, __eax, __ebx, __ecx, __edx)
+#else
+	#include <cpuid.h>
+	#if HAVE_AVX2 || HAVE_AVX
+		#if ((__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 2) || (__clang_major__ >= 3))
+			static inline uint64_t _xgetbv (uint32_t index)
+			{
+				uint32_t eax, edx;
+				__asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
+				return ((uint64_t)edx << 32) | eax;
+			}
+		#else
+			#error "Platform not supported"
+		#endif
+	#endif
+#endif
+
+#ifndef bit_AVX2
+#define bit_AVX2 (1 << 5)
+#endif
+#ifndef bit_SSSE3
+#define bit_SSSE3 (1 << 9)
+#endif
+#ifndef bit_SSE41
+#define bit_SSE41 (1 << 19)
+#endif
+#ifndef bit_SSE42
+#define bit_SSE42 (1 << 20)
+#endif
+#ifndef bit_AVX
+#define bit_AVX (1 << 28)
+#endif
+
+#define bit_XSAVE_XRSTORE (1 << 27)
+
+#ifndef _XCR_XFEATURE_ENABLED_MASK
+#define _XCR_XFEATURE_ENABLED_MASK 0
+#endif
+
+#define _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS 0x6
+#endif
+
+// Function declarations:
+#define BASE64_CODEC_FUNCS(arch)	\
+	BASE64_ENC_FUNCTION(arch);	\
+	BASE64_DEC_FUNCTION(arch);	\
+
+BASE64_CODEC_FUNCS(avx2)
+BASE64_CODEC_FUNCS(neon32)
+BASE64_CODEC_FUNCS(neon64)
+BASE64_CODEC_FUNCS(plain)
+BASE64_CODEC_FUNCS(ssse3)
+BASE64_CODEC_FUNCS(sse41)
+BASE64_CODEC_FUNCS(sse42)
+BASE64_CODEC_FUNCS(avx)
+
+static bool
+codec_choose_forced (struct codec *codec, int flags)
+{
+	// If the user wants to use a certain codec,
+	// always allow it, even if the codec is a no-op.
+	// For testing purposes.
+
+	if (!(flags & 0xFF)) {
+		return false;
+	}
+	if (flags & BASE64_FORCE_AVX2) {
+		codec->enc = base64_stream_encode_avx2;
+		codec->dec = base64_stream_decode_avx2;
+		return true;
+	}
+	if (flags & BASE64_FORCE_NEON32) {
+		codec->enc = base64_stream_encode_neon32;
+		codec->dec = base64_stream_decode_neon32;
+		return true;
+	}
+	if (flags & BASE64_FORCE_NEON64) {
+		codec->enc = base64_stream_encode_neon64;
+		codec->dec = base64_stream_decode_neon64;
+		return true;
+	}
+	if (flags & BASE64_FORCE_PLAIN) {
+		codec->enc = base64_stream_encode_plain;
+		codec->dec = base64_stream_decode_plain;
+		return true;
+	}
+	if (flags & BASE64_FORCE_SSSE3) {
+		codec->enc = base64_stream_encode_ssse3;
+		codec->dec = base64_stream_decode_ssse3;
+		return true;
+	}
+	if (flags & BASE64_FORCE_SSE41) {
+		codec->enc = base64_stream_encode_sse41;
+		codec->dec = base64_stream_decode_sse41;
+		return true;
+	}
+	if (flags & BASE64_FORCE_SSE42) {
+		codec->enc = base64_stream_encode_sse42;
+		codec->dec = base64_stream_decode_sse42;
+		return true;
+	}
+	if (flags & BASE64_FORCE_AVX) {
+		codec->enc = base64_stream_encode_avx;
+		codec->dec = base64_stream_decode_avx;
+		return true;
+	}
+	return false;
+}
+
+static bool
+codec_choose_arm (struct codec *codec)
+{
+#if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && ((defined(__aarch64__) && HAVE_NEON64) || HAVE_NEON32)
+
+	// Unfortunately there is no portable way to check for NEON
+	// support at runtime from userland in the same way that x86
+	// has cpuid, so just stick to the compile-time configuration:
+
+	#if defined(__aarch64__) && HAVE_NEON64
+	codec->enc = base64_stream_encode_neon64;
+	codec->dec = base64_stream_decode_neon64;
+	#else
+	codec->enc = base64_stream_encode_neon32;
+	codec->dec = base64_stream_decode_neon32;
+	#endif
+
+	return true;
+
+#else
+	(void)codec;
+	return false;
+#endif
+}
+
+static bool
+codec_choose_x86 (struct codec *codec)
+{
+#ifdef BASE64_X86_SIMD
+
+	unsigned int eax, ebx = 0, ecx = 0, edx;
+	unsigned int max_level;
+
+	#ifdef _MSC_VER
+	int info[4];
+	__cpuidex(info, 0, 0);
+	max_level = info[0];
+	#else
+	max_level = __get_cpuid_max(0, NULL);
+	#endif
+
+	#if HAVE_AVX2 || HAVE_AVX
+	// Check for AVX/AVX2 support:
+	// Checking for AVX requires 3 things:
+	// 1) CPUID indicates that the OS uses XSAVE and XRSTORE instructions
+	//    (allowing saving YMM registers on context switch)
+	// 2) CPUID indicates support for AVX
+	// 3) XGETBV indicates the AVX registers will be saved and restored on
+	//    context switch
+	//
+	// Note that XGETBV is only available on 686 or later CPUs, so the
+	// instruction needs to be conditionally run.
+	if (max_level >= 1) {
+		__cpuid_count(1, 0, eax, ebx, ecx, edx);
+		if (ecx & bit_XSAVE_XRSTORE) {
+			uint64_t xcr_mask;
+			xcr_mask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
+			if (xcr_mask & _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS) {
+				#if HAVE_AVX2
+				if (max_level >= 7) {
+					__cpuid_count(7, 0, eax, ebx, ecx, edx);
+					if (ebx & bit_AVX2) {
+						codec->enc = base64_stream_encode_avx2;
+						codec->dec = base64_stream_decode_avx2;
+						return true;
+					}
+				}
+				#endif
+				#if HAVE_AVX
+				__cpuid_count(1, 0, eax, ebx, ecx, edx);
+				if (ecx & bit_AVX) {
+					codec->enc = base64_stream_encode_avx;
+					codec->dec = base64_stream_decode_avx;
+					return true;
+				}
+				#endif
+			}
+		}
+	}
+	#endif
+
+	#if HAVE_SSE42
+	// Check for SSE42 support:
+	if (max_level >= 1) {
+		__cpuid(1, eax, ebx, ecx, edx);
+		if (ecx & bit_SSE42) {
+			codec->enc = base64_stream_encode_sse42;
+			codec->dec = base64_stream_decode_sse42;
+			return true;
+		}
+	}
+	#endif
+
+	#if HAVE_SSE41
+	// Check for SSE41 support:
+	if (max_level >= 1) {
+		__cpuid(1, eax, ebx, ecx, edx);
+		if (ecx & bit_SSE41) {
+			codec->enc = base64_stream_encode_sse41;
+			codec->dec = base64_stream_decode_sse41;
+			return true;
+		}
+	}
+	#endif
+
+	#if HAVE_SSSE3
+	// Check for SSSE3 support:
+	if (max_level >= 1) {
+		__cpuid(1, eax, ebx, ecx, edx);
+		if (ecx & bit_SSSE3) {
+			codec->enc = base64_stream_encode_ssse3;
+			codec->dec = base64_stream_decode_ssse3;
+			return true;
+		}
+	}
+	#endif
+
+#else
+	(void)codec;
+#endif
+
+	return false;
+}
+
+void
+codec_choose (struct codec *codec, int flags)
+{
+	// User forced a codec:
+	if (codec_choose_forced(codec, flags)) {
+		return;
+	}
+
+	// Runtime feature detection:
+	if (codec_choose_arm(codec)) {
+		return;
+	}
+	if (codec_choose_x86(codec)) {
+		return;
+	}
+	codec->enc = base64_stream_encode_plain;
+	codec->dec = base64_stream_decode_plain;
+}
--- a/src/third-party/base64/lib/codecs.h
+++ b/src/third-party/base64/lib/codecs.h
@ -0,0 +1,65 @@
+#include <stdint.h>
+#include <stddef.h>
+
+#include "../include/libbase64.h"
+#include "config.h"
+
+// Function parameters for encoding functions:
+#define BASE64_ENC_PARAMS			\
+	( struct base64_state	*state		\
+	, const char		*src		\
+	, size_t		 srclen		\
+	, char			*out		\
+	, size_t		*outlen		\
+	)
+
+// Function parameters for decoding functions:
+#define BASE64_DEC_PARAMS			\
+	( struct base64_state	*state		\
+	, const char		*src		\
+	, size_t		 srclen		\
+	, char			*out		\
+	, size_t		*outlen		\
+	)
+
+// Function signature for encoding functions:
+#define BASE64_ENC_FUNCTION(arch)		\
+	void					\
+	base64_stream_encode_ ## arch		\
+	BASE64_ENC_PARAMS
+
+// Function signature for decoding functions:
+#define BASE64_DEC_FUNCTION(arch)		\
+	int					\
+	base64_stream_decode_ ## arch		\
+	BASE64_DEC_PARAMS
+
+// Cast away unused variable, silence compiler:
+#define UNUSED(x)		((void)(x))
+
+// Stub function when encoder arch unsupported:
+#define BASE64_ENC_STUB				\
+	UNUSED(state);				\
+	UNUSED(src);				\
+	UNUSED(srclen);				\
+	UNUSED(out);				\
+						\
+	*outlen = 0;
+
+// Stub function when decoder arch unsupported:
+#define BASE64_DEC_STUB				\
+	UNUSED(state);				\
+	UNUSED(src);				\
+	UNUSED(srclen);				\
+	UNUSED(out);				\
+	UNUSED(outlen);				\
+						\
+	return -1;
+
+struct codec
+{
+	void (* enc) BASE64_ENC_PARAMS;
+	int  (* dec) BASE64_DEC_PARAMS;
+};
+
+extern void codec_choose (struct codec *, int flags);
--- a/src/third-party/base64/lib/config.h
+++ b/src/third-party/base64/lib/config.h
@ -0,0 +1,7 @@
+#define HAVE_AVX2   0
+#define HAVE_NEON32 0
+#define HAVE_NEON64 0
+#define HAVE_SSSE3  0
+#define HAVE_SSE41  0
+#define HAVE_SSE42  0
+#define HAVE_AVX    0
--- a/src/third-party/base64/lib/env.h
+++ b/src/third-party/base64/lib/env.h
@ -0,0 +1,74 @@
+#ifndef BASE64_ENV_H
+#define BASE64_ENV_H
+
+// This header file contains macro definitions that describe certain aspects of
+// the compile-time environment. Compatibility and portability macros go here.
+
+// Define machine endianness. This is for GCC:
+#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#  define BASE64_LITTLE_ENDIAN 1
+#else
+#  define BASE64_LITTLE_ENDIAN 0
+#endif
+
+// This is for Clang:
+#ifdef __LITTLE_ENDIAN__
+#  define BASE64_LITTLE_ENDIAN 1
+#endif
+
+#ifdef __BIG_ENDIAN__
+#  define BASE64_LITTLE_ENDIAN 0
+#endif
+
+// MSVC++ needs intrin.h for _byteswap_uint64 (issue #68):
+#if BASE64_LITTLE_ENDIAN && defined(_MSC_VER)
+#  include <intrin.h>
+#endif
+
+// Endian conversion functions:
+#if BASE64_LITTLE_ENDIAN
+#  ifdef _MSC_VER
+//   Microsoft Visual C++:
+#    define BASE64_HTOBE32(x)	_byteswap_ulong(x)
+#    define BASE64_HTOBE64(x)	_byteswap_uint64(x)
+#  else
+//   GCC and Clang:
+#    define BASE64_HTOBE32(x)	__builtin_bswap32(x)
+#    define BASE64_HTOBE64(x)	__builtin_bswap64(x)
+#  endif
+#else
+// No conversion needed:
+#  define BASE64_HTOBE32(x)	(x)
+#  define BASE64_HTOBE64(x)	(x)
+#endif
+
+// Detect word size:
+#if defined (__x86_64__)
+// This also works for the x32 ABI, which has a 64-bit word size.
+#  define BASE64_WORDSIZE 64
+#elif defined (_INTEGRAL_MAX_BITS)
+#  define BASE64_WORDSIZE _INTEGRAL_MAX_BITS
+#elif defined (__WORDSIZE)
+#  define BASE64_WORDSIZE __WORDSIZE
+#elif defined (__SIZE_WIDTH__)
+#  define BASE64_WORDSIZE __SIZE_WIDTH__
+#else
+#  error BASE64_WORDSIZE_NOT_DEFINED
+#endif
+
+// End-of-file definitions.
+// Almost end-of-file when waiting for the last '=' character:
+#define BASE64_AEOF 1
+// End-of-file when stream end has been reached or invalid input provided:
+#define BASE64_EOF 2
+
+// GCC 7 defaults to issuing a warning for fallthrough in switch statements,
+// unless the fallthrough cases are marked with an attribute. As we use
+// fallthrough deliberately, define an alias for the attribute:
+#if __GNUC__ >= 7
+#  define BASE64_FALLTHROUGH  __attribute__((fallthrough));
+#else
+#  define BASE64_FALLTHROUGH
+#endif
+
+#endif	// BASE64_ENV_H
--- a/src/third-party/base64/lib/lib.c
+++ b/src/third-party/base64/lib/lib.c
@ -0,0 +1,175 @@
+#include <stdint.h>
+#include <stddef.h>
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+#include "../include/libbase64.h"
+#include "tables/tables.h"
+#include "codecs.h"
+#include "env.h"
+
+// These static function pointers are initialized once when the library is
+// first used, and remain in use for the remaining lifetime of the program.
+// The idea being that CPU features don't change at runtime.
+static struct codec codec = { NULL, NULL };
+
+// Function declarations:
+#define BASE64_CODEC_FUNCS(arch)	\
+	BASE64_ENC_FUNCTION(arch);	\
+	BASE64_DEC_FUNCTION(arch);	\
+
+BASE64_CODEC_FUNCS(plain)
+
+void
+base64_stream_encode_init (struct base64_state *state, int flags)
+{
+	// If any of the codec flags are set, redo choice:
+	if (codec.enc == NULL || flags & 0xFF) {
+		// codec_choose(&codec, flags);
+                codec.enc = base64_stream_encode_plain;
+                codec.dec = base64_stream_decode_plain;
+	}
+	state->eof = 0;
+	state->bytes = 0;
+	state->carry = 0;
+	state->flags = flags;
+}
+
+void
+base64_stream_encode
+	( struct base64_state	*state
+	, const char		*src
+	, size_t		 srclen
+	, char			*out
+	, size_t		*outlen
+	)
+{
+	codec.enc(state, src, srclen, out, outlen);
+}
+
+void
+base64_stream_encode_final
+	( struct base64_state	*state
+	, char			*out
+	, size_t		*outlen
+	)
+{
+	uint8_t *o = (uint8_t *)out;
+
+	if (state->bytes == 1) {
+		*o++ = base64_table_enc_6bit[state->carry];
+		*o++ = '=';
+		*o++ = '=';
+		*outlen = 3;
+		return;
+	}
+	if (state->bytes == 2) {
+		*o++ = base64_table_enc_6bit[state->carry];
+		*o++ = '=';
+		*outlen = 2;
+		return;
+	}
+	*outlen = 0;
+}
+
+void
+base64_stream_decode_init (struct base64_state *state, int flags)
+{
+	// If any of the codec flags are set, redo choice:
+	if (codec.dec == NULL || flags & 0xFF) {
+            // codec_choose(&codec, flags);
+            codec.enc = base64_stream_encode_plain;
+            codec.dec = base64_stream_decode_plain;
+	}
+	state->eof = 0;
+	state->bytes = 0;
+	state->carry = 0;
+	state->flags = flags;
+}
+
+int
+base64_stream_decode
+	( struct base64_state	*state
+	, const char		*src
+	, size_t		 srclen
+	, char			*out
+	, size_t		*outlen
+	)
+{
+	return codec.dec(state, src, srclen, out, outlen);
+}
+
+#ifdef _OPENMP
+
+	// Due to the overhead of initializing OpenMP and creating a team of
+	// threads, we require the data length to be larger than a threshold:
+	#define OMP_THRESHOLD 20000
+
+	// Conditionally include OpenMP-accelerated codec implementations:
+	#include "lib_openmp.c"
+#endif
+
+void
+base64_encode
+	( const char	*src
+	, size_t	 srclen
+	, char		*out
+	, size_t	*outlen
+	, int		 flags
+	)
+{
+	size_t s;
+	size_t t;
+	struct base64_state state;
+
+	#ifdef _OPENMP
+	if (srclen >= OMP_THRESHOLD) {
+		base64_encode_openmp(src, srclen, out, outlen, flags);
+		return;
+	}
+	#endif
+
+	// Init the stream reader:
+	base64_stream_encode_init(&state, flags);
+
+	// Feed the whole string to the stream reader:
+	base64_stream_encode(&state, src, srclen, out, &s);
+
+	// Finalize the stream by writing trailer if any:
+	base64_stream_encode_final(&state, out + s, &t);
+
+	// Final output length is stream length plus tail:
+	*outlen = s + t;
+}
+
+int
+base64_decode
+	( const char	*src
+	, size_t	 srclen
+	, char		*out
+	, size_t	*outlen
+	, int		 flags
+	)
+{
+	int ret;
+	struct base64_state state;
+
+	#ifdef _OPENMP
+	if (srclen >= OMP_THRESHOLD) {
+		return base64_decode_openmp(src, srclen, out, outlen, flags);
+	}
+	#endif
+
+	// Init the stream reader:
+	base64_stream_decode_init(&state, flags);
+
+	// Feed the whole string to the stream reader:
+	ret = base64_stream_decode(&state, src, srclen, out, outlen);
+
+	// If when decoding a whole block, we're still waiting for input then fail:
+	if (ret && (state.bytes == 0)) {
+		return ret;
+	}
+	return 0;
+}
--- a/src/third-party/base64/lib/lib_openmp.c
+++ b/src/third-party/base64/lib/lib_openmp.c
@ -0,0 +1,149 @@
+// This code makes some assumptions on the implementation of
+// base64_stream_encode_init(), base64_stream_encode() and base64_stream_decode().
+// Basically these assumptions boil down to that when breaking the src into
+// parts, out parts can be written without side effects.
+// This is met when:
+// 1) base64_stream_encode() and base64_stream_decode() don't use globals;
+// 2) the shared variables src and out are not read or written outside of the
+//    bounds of their parts, i.e.  when base64_stream_encode() reads a multiple
+//    of 3 bytes, it must write no more then a multiple of 4 bytes, not even
+//    temporarily;
+// 3) the state flag can be discarded after base64_stream_encode() and
+//    base64_stream_decode() on the parts.
+
+static inline void
+base64_encode_openmp
+	( const char	*src
+	, size_t	 srclen
+	, char		*out
+	, size_t	*outlen
+	, int		 flags
+	)
+{
+	size_t s;
+	size_t t;
+	size_t sum = 0, len, last_len;
+	struct base64_state state, initial_state;
+	int num_threads, i;
+
+	// Request a number of threads but not necessarily get them:
+	#pragma omp parallel
+	{
+		// Get the number of threads used from one thread only,
+		// as num_threads is a shared var:
+		#pragma omp single
+		{
+			num_threads = omp_get_num_threads();
+
+			// Split the input string into num_threads parts, each
+			// part a multiple of 3 bytes. The remaining bytes will
+			// be done later:
+			len = srclen / (num_threads * 3);
+			len *= 3;
+			last_len = srclen - num_threads * len;
+
+			// Init the stream reader:
+			base64_stream_encode_init(&state, flags);
+			initial_state = state;
+		}
+
+		// Single has an implicit barrier for all threads to wait here
+		// for the above to complete:
+		#pragma omp for firstprivate(state) private(s) reduction(+:sum) schedule(static,1)
+		for (i = 0; i < num_threads; i++)
+		{
+			// Feed each part of the string to the stream reader:
+			base64_stream_encode(&state, src + i * len, len, out + i * len * 4 / 3, &s);
+			sum += s;
+		}
+	}
+
+	// As encoding should never fail and we encode an exact multiple
+	// of 3 bytes, we can discard state:
+	state = initial_state;
+
+	// Encode the remaining bytes:
+	base64_stream_encode(&state, src + num_threads * len, last_len, out + num_threads * len * 4 / 3, &s);
+
+	// Finalize the stream by writing trailer if any:
+	base64_stream_encode_final(&state, out + num_threads * len * 4 / 3 + s, &t);
+
+	// Final output length is stream length plus tail:
+	sum += s + t;
+	*outlen = sum;
+}
+
+static inline int
+base64_decode_openmp
+	( const char	*src
+	, size_t	 srclen
+	, char		*out
+	, size_t	*outlen
+	, int		 flags
+	)
+{
+	int num_threads, result = 0, i;
+	size_t sum = 0, len, last_len, s;
+	struct base64_state state, initial_state;
+
+	// Request a number of threads but not necessarily get them:
+	#pragma omp parallel
+	{
+		// Get the number of threads used from one thread only,
+		// as num_threads is a shared var:
+		#pragma omp single
+		{
+			num_threads = omp_get_num_threads();
+
+			// Split the input string into num_threads parts, each
+			// part a multiple of 4 bytes. The remaining bytes will
+			// be done later:
+			len = srclen / (num_threads * 4);
+			len *= 4;
+			last_len = srclen - num_threads * len;
+
+			// Init the stream reader:
+			base64_stream_decode_init(&state, flags);
+
+			initial_state = state;
+		}
+
+		// Single has an implicit barrier to wait here for the above to
+		// complete:
+		#pragma omp for firstprivate(state) private(s) reduction(+:sum, result) schedule(static,1)
+		for (i = 0; i < num_threads; i++)
+		{
+			int this_result;
+
+			// Feed each part of the string to the stream reader:
+			this_result = base64_stream_decode(&state, src + i * len, len, out + i * len * 3 / 4, &s);
+			sum += s;
+			result += this_result;
+		}
+	}
+
+	// If `result' equals `-num_threads', then all threads returned -1,
+	// indicating that the requested codec is not available:
+	if (result == -num_threads) {
+		return -1;
+	}
+
+	// If `result' does not equal `num_threads', then at least one of the
+	// threads hit a decode error:
+	if (result != num_threads) {
+		return 0;
+	}
+
+	// So far so good, now decode whatever remains in the buffer. Reuse the
+	// initial state, since we are at a 4-byte boundary:
+	state = initial_state;
+	result = base64_stream_decode(&state, src + num_threads * len, last_len, out + num_threads * len * 3 / 4, &s);
+	sum += s;
+	*outlen = sum;
+
+	// If when decoding a whole block, we're still waiting for input then fail:
+	if (result && (state.bytes == 0)) {
+		return result;
+	}
+	return 0;
+}
--- a/src/third-party/base64/lib/tables/table_dec_32bit.h
+++ b/src/third-party/base64/lib/tables/table_dec_32bit.h
@ -0,0 +1,393 @@
+#include <stdint.h>
+#define CHAR62 '+'
+#define CHAR63 '/'
+#define CHARPAD '='
+
+
+#if BASE64_LITTLE_ENDIAN
+
+
+/* SPECIAL DECODE TABLES FOR LITTLE ENDIAN (INTEL) CPUS */
+
+const uint32_t base64_table_dec_32bit_d0[256] = {
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0x000000f8, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000fc,
+0x000000d0, 0x000000d4, 0x000000d8, 0x000000dc, 0x000000e0, 0x000000e4,
+0x000000e8, 0x000000ec, 0x000000f0, 0x000000f4, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
+0x00000004, 0x00000008, 0x0000000c, 0x00000010, 0x00000014, 0x00000018,
+0x0000001c, 0x00000020, 0x00000024, 0x00000028, 0x0000002c, 0x00000030,
+0x00000034, 0x00000038, 0x0000003c, 0x00000040, 0x00000044, 0x00000048,
+0x0000004c, 0x00000050, 0x00000054, 0x00000058, 0x0000005c, 0x00000060,
+0x00000064, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0x00000068, 0x0000006c, 0x00000070, 0x00000074, 0x00000078,
+0x0000007c, 0x00000080, 0x00000084, 0x00000088, 0x0000008c, 0x00000090,
+0x00000094, 0x00000098, 0x0000009c, 0x000000a0, 0x000000a4, 0x000000a8,
+0x000000ac, 0x000000b0, 0x000000b4, 0x000000b8, 0x000000bc, 0x000000c0,
+0x000000c4, 0x000000c8, 0x000000cc, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
+};
+
+
+const uint32_t base64_table_dec_32bit_d1[256] = {
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0x0000e003, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000f003,
+0x00004003, 0x00005003, 0x00006003, 0x00007003, 0x00008003, 0x00009003,
+0x0000a003, 0x0000b003, 0x0000c003, 0x0000d003, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
+0x00001000, 0x00002000, 0x00003000, 0x00004000, 0x00005000, 0x00006000,
+0x00007000, 0x00008000, 0x00009000, 0x0000a000, 0x0000b000, 0x0000c000,
+0x0000d000, 0x0000e000, 0x0000f000, 0x00000001, 0x00001001, 0x00002001,
+0x00003001, 0x00004001, 0x00005001, 0x00006001, 0x00007001, 0x00008001,
+0x00009001, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0x0000a001, 0x0000b001, 0x0000c001, 0x0000d001, 0x0000e001,
+0x0000f001, 0x00000002, 0x00001002, 0x00002002, 0x00003002, 0x00004002,
+0x00005002, 0x00006002, 0x00007002, 0x00008002, 0x00009002, 0x0000a002,
+0x0000b002, 0x0000c002, 0x0000d002, 0x0000e002, 0x0000f002, 0x00000003,
+0x00001003, 0x00002003, 0x00003003, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
+};
+
+
+const uint32_t base64_table_dec_32bit_d2[256] = {
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0x00800f00, 0xffffffff, 0xffffffff, 0xffffffff, 0x00c00f00,
+0x00000d00, 0x00400d00, 0x00800d00, 0x00c00d00, 0x00000e00, 0x00400e00,
+0x00800e00, 0x00c00e00, 0x00000f00, 0x00400f00, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
+0x00400000, 0x00800000, 0x00c00000, 0x00000100, 0x00400100, 0x00800100,
+0x00c00100, 0x00000200, 0x00400200, 0x00800200, 0x00c00200, 0x00000300,
+0x00400300, 0x00800300, 0x00c00300, 0x00000400, 0x00400400, 0x00800400,
+0x00c00400, 0x00000500, 0x00400500, 0x00800500, 0x00c00500, 0x00000600,
+0x00400600, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0x00800600, 0x00c00600, 0x00000700, 0x00400700, 0x00800700,
+0x00c00700, 0x00000800, 0x00400800, 0x00800800, 0x00c00800, 0x00000900,
+0x00400900, 0x00800900, 0x00c00900, 0x00000a00, 0x00400a00, 0x00800a00,
+0x00c00a00, 0x00000b00, 0x00400b00, 0x00800b00, 0x00c00b00, 0x00000c00,
+0x00400c00, 0x00800c00, 0x00c00c00, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
+};
+
+
+const uint32_t base64_table_dec_32bit_d3[256] = {
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0x003e0000, 0xffffffff, 0xffffffff, 0xffffffff, 0x003f0000,
+0x00340000, 0x00350000, 0x00360000, 0x00370000, 0x00380000, 0x00390000,
+0x003a0000, 0x003b0000, 0x003c0000, 0x003d0000, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
+0x00010000, 0x00020000, 0x00030000, 0x00040000, 0x00050000, 0x00060000,
+0x00070000, 0x00080000, 0x00090000, 0x000a0000, 0x000b0000, 0x000c0000,
+0x000d0000, 0x000e0000, 0x000f0000, 0x00100000, 0x00110000, 0x00120000,
+0x00130000, 0x00140000, 0x00150000, 0x00160000, 0x00170000, 0x00180000,
+0x00190000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0x001a0000, 0x001b0000, 0x001c0000, 0x001d0000, 0x001e0000,
+0x001f0000, 0x00200000, 0x00210000, 0x00220000, 0x00230000, 0x00240000,
+0x00250000, 0x00260000, 0x00270000, 0x00280000, 0x00290000, 0x002a0000,
+0x002b0000, 0x002c0000, 0x002d0000, 0x002e0000, 0x002f0000, 0x00300000,
+0x00310000, 0x00320000, 0x00330000, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
+};
+
+
+#else
+
+
+/* SPECIAL DECODE TABLES FOR BIG ENDIAN (IBM/MOTOROLA/SUN) CPUS */
+
+const uint32_t base64_table_dec_32bit_d0[256] = {
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xf8000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xfc000000,
+0xd0000000, 0xd4000000, 0xd8000000, 0xdc000000, 0xe0000000, 0xe4000000,
+0xe8000000, 0xec000000, 0xf0000000, 0xf4000000, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
+0x04000000, 0x08000000, 0x0c000000, 0x10000000, 0x14000000, 0x18000000,
+0x1c000000, 0x20000000, 0x24000000, 0x28000000, 0x2c000000, 0x30000000,
+0x34000000, 0x38000000, 0x3c000000, 0x40000000, 0x44000000, 0x48000000,
+0x4c000000, 0x50000000, 0x54000000, 0x58000000, 0x5c000000, 0x60000000,
+0x64000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0x68000000, 0x6c000000, 0x70000000, 0x74000000, 0x78000000,
+0x7c000000, 0x80000000, 0x84000000, 0x88000000, 0x8c000000, 0x90000000,
+0x94000000, 0x98000000, 0x9c000000, 0xa0000000, 0xa4000000, 0xa8000000,
+0xac000000, 0xb0000000, 0xb4000000, 0xb8000000, 0xbc000000, 0xc0000000,
+0xc4000000, 0xc8000000, 0xcc000000, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
+};
+
+
+const uint32_t base64_table_dec_32bit_d1[256] = {
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0x03e00000, 0xffffffff, 0xffffffff, 0xffffffff, 0x03f00000,
+0x03400000, 0x03500000, 0x03600000, 0x03700000, 0x03800000, 0x03900000,
+0x03a00000, 0x03b00000, 0x03c00000, 0x03d00000, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
+0x00100000, 0x00200000, 0x00300000, 0x00400000, 0x00500000, 0x00600000,
+0x00700000, 0x00800000, 0x00900000, 0x00a00000, 0x00b00000, 0x00c00000,
+0x00d00000, 0x00e00000, 0x00f00000, 0x01000000, 0x01100000, 0x01200000,
+0x01300000, 0x01400000, 0x01500000, 0x01600000, 0x01700000, 0x01800000,
+0x01900000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0x01a00000, 0x01b00000, 0x01c00000, 0x01d00000, 0x01e00000,
+0x01f00000, 0x02000000, 0x02100000, 0x02200000, 0x02300000, 0x02400000,
+0x02500000, 0x02600000, 0x02700000, 0x02800000, 0x02900000, 0x02a00000,
+0x02b00000, 0x02c00000, 0x02d00000, 0x02e00000, 0x02f00000, 0x03000000,
+0x03100000, 0x03200000, 0x03300000, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
+};
+
+
+const uint32_t base64_table_dec_32bit_d2[256] = {
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0x000f8000, 0xffffffff, 0xffffffff, 0xffffffff, 0x000fc000,
+0x000d0000, 0x000d4000, 0x000d8000, 0x000dc000, 0x000e0000, 0x000e4000,
+0x000e8000, 0x000ec000, 0x000f0000, 0x000f4000, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
+0x00004000, 0x00008000, 0x0000c000, 0x00010000, 0x00014000, 0x00018000,
+0x0001c000, 0x00020000, 0x00024000, 0x00028000, 0x0002c000, 0x00030000,
+0x00034000, 0x00038000, 0x0003c000, 0x00040000, 0x00044000, 0x00048000,
+0x0004c000, 0x00050000, 0x00054000, 0x00058000, 0x0005c000, 0x00060000,
+0x00064000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0x00068000, 0x0006c000, 0x00070000, 0x00074000, 0x00078000,
+0x0007c000, 0x00080000, 0x00084000, 0x00088000, 0x0008c000, 0x00090000,
+0x00094000, 0x00098000, 0x0009c000, 0x000a0000, 0x000a4000, 0x000a8000,
+0x000ac000, 0x000b0000, 0x000b4000, 0x000b8000, 0x000bc000, 0x000c0000,
+0x000c4000, 0x000c8000, 0x000cc000, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
+};
+
+
+const uint32_t base64_table_dec_32bit_d3[256] = {
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0x00003e00, 0xffffffff, 0xffffffff, 0xffffffff, 0x00003f00,
+0x00003400, 0x00003500, 0x00003600, 0x00003700, 0x00003800, 0x00003900,
+0x00003a00, 0x00003b00, 0x00003c00, 0x00003d00, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000,
+0x00000100, 0x00000200, 0x00000300, 0x00000400, 0x00000500, 0x00000600,
+0x00000700, 0x00000800, 0x00000900, 0x00000a00, 0x00000b00, 0x00000c00,
+0x00000d00, 0x00000e00, 0x00000f00, 0x00001000, 0x00001100, 0x00001200,
+0x00001300, 0x00001400, 0x00001500, 0x00001600, 0x00001700, 0x00001800,
+0x00001900, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0x00001a00, 0x00001b00, 0x00001c00, 0x00001d00, 0x00001e00,
+0x00001f00, 0x00002000, 0x00002100, 0x00002200, 0x00002300, 0x00002400,
+0x00002500, 0x00002600, 0x00002700, 0x00002800, 0x00002900, 0x00002a00,
+0x00002b00, 0x00002c00, 0x00002d00, 0x00002e00, 0x00002f00, 0x00003000,
+0x00003100, 0x00003200, 0x00003300, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
+0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
+};
+
+
+#endif
--- a/src/third-party/base64/lib/tables/table_enc_12bit.h
+++ b/src/third-party/base64/lib/tables/table_enc_12bit.h
--- a/src/third-party/base64/lib/tables/tables.c
+++ b/src/third-party/base64/lib/tables/tables.c
@ -0,0 +1,40 @@
+#include "tables.h"
+
+const uint8_t
+base64_table_enc_6bit[] =
+	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+	"abcdefghijklmnopqrstuvwxyz"
+	"0123456789"
+	"+/";
+
+// In the lookup table below, note that the value for '=' (character 61) is
+// 254, not 255. This character is used for in-band signaling of the end of
+// the datastream, and we will use that later. The characters A-Z, a-z, 0-9
+// and + / are mapped to their "decoded" values. The other bytes all map to
+// the value 255, which flags them as "invalid input".
+
+const uint8_t
+base64_table_dec_8bit[] =
+{
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,		//   0..15
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,		//  16..31
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,  62, 255, 255, 255,  63,		//  32..47
+	 52,  53,  54,  55,  56,  57,  58,  59,  60,  61, 255, 255, 255, 254, 255, 255,		//  48..63
+	255,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,		//  64..79
+	 15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25, 255, 255, 255, 255, 255,		//  80..95
+	255,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,		//  96..111
+	 41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51, 255, 255, 255, 255, 255,		// 112..127
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,		// 128..143
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+	255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+};
+
+#if BASE64_WORDSIZE >= 32
+#  include "table_dec_32bit.h"
+#  include "table_enc_12bit.h"
+#endif
--- a/src/third-party/base64/lib/tables/tables.h
+++ b/src/third-party/base64/lib/tables/tables.h
@ -0,0 +1,23 @@
+#ifndef BASE64_TABLES_H
+#define BASE64_TABLES_H
+
+#include <stdint.h>
+
+#include "../env.h"
+
+// These tables are used by all codecs for fallback plain encoding/decoding:
+extern const uint8_t base64_table_enc_6bit[];
+extern const uint8_t base64_table_dec_8bit[];
+
+// These tables are used for the 32-bit and 64-bit generic decoders:
+#if BASE64_WORDSIZE >= 32
+extern const uint32_t base64_table_dec_32bit_d0[];
+extern const uint32_t base64_table_dec_32bit_d1[];
+extern const uint32_t base64_table_dec_32bit_d2[];
+extern const uint32_t base64_table_dec_32bit_d3[];
+
+// This table is used by the 32 and 64-bit generic encoders:
+extern const uint16_t base64_table_enc_12bit[];
+#endif
+
+#endif	// BASE64_TABLES_H