Change the code to use Spookyhash.

* This change removes the external dependency on OpenSSL, which should
  allow the code to compile on Mac OS X without specifying the
  MAC_OS_X_VERSION_MIN_REQUIRED non-sense.
* Spookyhash should be faster than SHA-1's Merkle-Damgård construction.
* Spookyhash is only 128-bits as opposed to SHA-1's 160-bits so memory
  consumption should be less. I doubt it will lead to too many
  collisions.
* The docs are updated to reflect the fact that OpenSSL is not a
  dependency.
This commit is contained in:
Suresh Sundriyal 2014-02-03 22:29:59 -08:00
parent 35584d6482
commit 13f5ad0dce
11 changed files with 39 additions and 41 deletions

2
README
View File

@ -23,8 +23,6 @@ Lnav requires the following software packages:
readline - The readline line editing library.
zlib - The zlib compression library.
bz2 - The bzip2 compression library.
openssl - The OpenSSL cryptographic library. (Only the hash
functions from this library are used.)
INSTALLATION

View File

@ -20,7 +20,6 @@ When compiling from source, the following dependencies are required:
* `PCRE <http://www.pcre.org>`_ -- Versions greater than 8.20 give better
performance since the PCRE JIT will be leveraged.
* `SQLite <http://www.sqlite.org>`_
* `OpenSSL <http://www.openssl.org>`_
* `ZLib <http://wwww.zlib.net>`_
* `Bzip2 <http://www.bzip.org>`_
* `Readline <http://www.gnu.org/s/readline>`_

View File

@ -32,7 +32,7 @@
#include <stdio.h>
#include <openssl/sha.h>
#include "spookyhash/SpookyV2.h"
#include <list>
#include <vector>
@ -173,7 +173,7 @@ public:
static FILE *TRACE_FILE;
typedef byte_array<SHA_DIGEST_LENGTH> schema_id_t;
typedef byte_array<16> schema_id_t;
struct element;
/* typedef std::list<element> element_list_t; */
@ -421,7 +421,7 @@ private:
element_list_t ELEMENT_LIST_T(el_stack), ELEMENT_LIST_T(free_row),
ELEMENT_LIST_T(key_comps), ELEMENT_LIST_T(value),
ELEMENT_LIST_T(prefix);
SHA_CTX context;
SpookyHash context;
POINT_TRACE("pairup_start");
@ -558,7 +558,7 @@ private:
POINT_TRACE("pairup_stack");
SHA_Init(&context);
context.Init(0, 0);
while (!el_stack.empty()) {
element_list_t::iterator kv_iter = el_stack.begin();
if (kv_iter->e_token == DNT_VALUE) {
@ -595,7 +595,7 @@ private:
if (schema != NULL) {
SHA_Update(&context, key_val.c_str(), key_val.length());
context.Update(key_val.c_str(), key_val.length());
}
while (!free_row.empty()) {
@ -658,7 +658,7 @@ private:
value.e_sub_elements->begin(),
value.e_sub_elements->end());
pairs_out.clear();
SHA_Init(&context);
context.Init(0, 0);
}
}
@ -700,7 +700,7 @@ private:
// columns is significant. I don't think we want to
// use the token ID since some columns values might vary
// between rows.
SHA_Update(&context, " ", 1);
context.Update(" ", 1);
}
break;
@ -711,7 +711,7 @@ private:
std::string key_val = this->get_element_string(
free_row.front());
SHA_Update(&context, key_val.c_str(), key_val.length());
context.Update(key_val.c_str(), key_val.length());
}
break;
}
@ -733,7 +733,8 @@ private:
}
if (schema != NULL) {
SHA_Final(this->dp_schema_id.ba_data, &context);
context.Final((uint64 *)(this->dp_schema_id.ba_data),
(uint64 *)((uint64 *)(this->dp_schema_id.ba_data)+1));
}
};

View File

@ -44,12 +44,12 @@
std::string hash_string(const std::string &str)
{
byte_array<SHA_DIGEST_LENGTH> hash;
SHA_CTX context;
byte_array<16> hash;
SpookyHash context;
SHA_Init(&context);
SHA_Update(&context, str.c_str(), str.length());
SHA_Final(hash.out(), &context);
context.Init(0, 0);
context.Update(str.c_str(), str.length());
context.Final((uint64 *)hash.out(), (uint64 *)((uint64 *)(hash.out())+1));
return hash.to_string();
}

View File

@ -37,7 +37,7 @@
#include <time.h>
#include <sys/types.h>
#include <openssl/sha.h>
#include "spookyhash/SpookyV2.h"
#include <string>
@ -90,15 +90,15 @@ std::string time_ago(time_t last_time);
#error "off_t has unhandled size..."
#endif
struct sha_updater {
sha_updater(SHA_CTX *context) : su_context(context) { };
struct hash_updater {
hash_updater(SpookyHash *context) : su_context(context) { };
void operator()(const std::string &str)
{
SHA_Update(this->su_context, str.c_str(), str.length());
this->su_context->Update(str.c_str(), str.length());
}
SHA_CTX *su_context;
SpookyHash *su_context;
};
std::string hash_string(const std::string &str);

View File

@ -241,7 +241,7 @@ public:
*
* @param ba The SHA-1 hash of the constant parts of this log line.
*/
void set_schema(const byte_array<20> &ba)
void set_schema(const byte_array<16> &ba)
{
memcpy(this->ll_schema, ba.in(), sizeof(this->ll_schema));
};
@ -255,7 +255,7 @@ public:
* @return True if the first four bytes of the given schema match the
* schema stored in this log line.
*/
bool match_schema(const byte_array<20> &ba) const
bool match_schema(const byte_array<16> &ba) const
{
return memcmp(this->ll_schema, ba.in(), sizeof(this->ll_schema)) == 0;
}

View File

@ -29,7 +29,7 @@
#include "config.h"
#include <openssl/sha.h>
#include "spookyhash/SpookyV2.h"
#include "sequence_matcher.hh"
@ -65,18 +65,18 @@ sequence_matcher::sequence_matcher(field_col_t &example)
void sequence_matcher::identity(const std::vector<string> &values,
id_t &id_out)
{
SHA_CTX context;
SpookyHash context;
int lpc = 0;
SHA_Init(&context);
context.Init(0, 0);
for (std::list<field>::iterator iter = sm_fields.begin();
iter != sm_fields.end();
++iter, lpc++) {
if (iter->sf_type == FT_VARIABLE) {
SHA_Update(&context,
values[lpc].c_str(),
values[lpc].length() + 1);
context.Update( values[lpc].c_str(),
values[lpc].length() + 1);
}
}
SHA_Final(id_out.ba_data, &context);
context.Final((uint64 *)id_out.ba_data,
(uint64 *)((uint64 *)(id_out.ba_data)+1));
}

View File

@ -41,7 +41,7 @@ public:
typedef std::vector<std::string> field_row_t;
typedef std::list<field_row_t> field_col_t;
typedef byte_array<20> id_t;
typedef byte_array<16> id_t;
enum field_type_t {
FT_VARIABLE,

View File

@ -37,7 +37,7 @@
#include <sys/types.h>
#include <dirent.h>
#include <openssl/sha.h>
#include "spookyhash/SpookyV2.h"
#include <algorithm>
@ -243,17 +243,17 @@ static void cleanup_session_data(void)
void init_session(void)
{
byte_array<SHA_DIGEST_LENGTH> hash;
SHA_CTX context;
byte_array<16> hash;
SpookyHash context;
lnav_data.ld_session_time = time(NULL);
SHA_Init(&context);
sha_updater updater(&context);
context.Init(0, 0);
hash_updater updater(&context);
for_each(lnav_data.ld_file_names.begin(),
lnav_data.ld_file_names.end(),
object_field(updater, &pair<string, int>::first));
SHA_Final(hash.out(), &context);
context.Final((uint64 *)hash.out(), (uint64 *)((uint64 *)(hash.out())+1));
lnav_data.ld_session_id = hash.to_string();
}

View File

@ -26,6 +26,8 @@
// slower than MD5.
//
#ifndef _SPOOKYHASH_V2_H
#define _SPOOKYHASH_V2_H
#include <stddef.h>
#ifdef _MSC_VER
@ -296,4 +298,4 @@ private:
};
#endif

View File

@ -38,8 +38,6 @@
#include <unistd.h>
#include <string.h>
#include <openssl/sha.h>
#include <map>
#include <list>
#include <vector>