From: Julian Andres Klode Date: Tue, 22 Nov 2016 21:57:46 +0000 (+0100) Subject: Merge commit 'e2073b0276226b625897ef475f225bf8f508719e' as 'triehash' X-Git-Tag: 1.4_beta1~24 X-Git-Url: https://git.saurik.com/apt.git/commitdiff_plain/e07f3d5a9ed2870a0e2909cc1e5e55e826086c53 Merge commit 'e2073b0276226b625897ef475f225bf8f508719e' as 'triehash' --- e07f3d5a9ed2870a0e2909cc1e5e55e826086c53 diff --cc triehash/.travis.yml index 000000000,000000000..c66ea9697 new file mode 100644 --- /dev/null +++ b/triehash/.travis.yml @@@ -1,0 -1,0 +1,14 @@@ ++language: perl ++perl: ++ - "5.24" ++ - "5.22" ++ - "5.20" ++ - "5.18" ++ - "5.16" ++ - "5.14" ++install: ++ - cpanm --quiet --notest --skip-satisfied Devel::Cover Devel::Cover::Report::Codecov ++script: ++ - ./tests/run-tests.sh ++after_script: ++ - cover -report codecov diff --cc triehash/LICENSE.md index 000000000,000000000..89de35479 new file mode 100644 --- /dev/null +++ b/triehash/LICENSE.md @@@ -1,0 -1,0 +1,17 @@@ ++Permission is hereby granted, free of charge, to any person obtaining a copy ++of this software and associated documentation files (the "Software"), to deal ++in the Software without restriction, including without limitation the rights ++to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++copies of the Software, and to permit persons to whom the Software is ++furnished to do so, subject to the following conditions: ++ ++The above copyright notice and this permission notice shall be included in ++all copies or substantial portions of the Software. ++ ++THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++THE SOFTWARE. diff --cc triehash/README.md index 000000000,000000000..d6f6e7fc3 new file mode 100644 --- /dev/null +++ b/triehash/README.md @@@ -1,0 -1,0 +1,58 @@@ ++# Order-preserving minimal perfect hash function generator ++ ++Build order-preserving minimal perfect hash functions. ++ ++[![codecov](https://codecov.io/gh/julian-klode/triehash/branch/master/graph/badge.svg)](https://codecov.io/gh/julian-klode/triehash) ++[![Build Status](https://travis-ci.org/julian-klode/triehash.svg?branch=master)](https://travis-ci.org/julian-klode/triehash) ++ ++## Performance ++ ++Performance was evaluated against other hash functions. As an input set, the ++fields of Debian Packages and Sources files was used, and each hash function ++was run 1,000,000 times for each word. The byte count of the words were then ++summed up and divided by the total number of nanoseconds each function ran, so ++all speeds below are given in bytes per nanosecond, AKA gigabyte per second. ++ ++arch/function|jak-x230 (amd64)|backup (amd64)|asachi.d.o (arm64)|asachi.d.o (armel)|asachi.d.o (armhf)|plummer.d.o (ppc64el)|eller.d.o (mipsel) ++-------------|----------------|--------------|------------------|------------------|------------------|---------------------|------------------ ++Trie | 2.4 | 1.9 | 1.2 | 0.9 | 0.8 | 2.0 | 0.2 ++Trie (*) | 2.2 | 1.7 | 0.8 | 0.7 | 0.7 | 1.8 | 0.2 ++re2c | 1.7 | 1.3 | 0.9 | 0.9 | 0.7 | 1.6 | 0.2 ++re2c (*) | 1.2 | 0.9 | 0.6 | 0.6 | 0.5 | 1.1 | 0.1 ++gperf (*) | 0.7 | 0.5 | 0.2 | 0.2 | 0.2 | 0.5 | 0.1 ++gperf | 1.3 | 0.9 | 0.3 | 0.3 | 0.2 | 0.4 | 0.1 ++djb (*) | 0.7 | 0.5 | 0.3 | 0.3 | 0.3 | 0.5 | 0.1 ++djb (**) | 1.0 | 0.7 | 0.4 | 0.5 | 0.5 | 0.6 | 0.2 ++djb | 0.9 | 0.7 | 0.5 | 0.5 | 0.5 | 0.7 | 0.2 ++apt (*) | 1.2 | 0.9 | 0.7 | 0.7 | 0.7 | 1.1 | 0.2 ++apt (**) | 2.3 | 1.7 | 0.7 | 0.9 | 0.8 | 1.9 | 0.2 ++ ++And transposed: ++ ++function/arch |Trie |Trie (*) |re2c |re2c (*) |gperf (*)|gperf |djb (*) |djb (**) |djb |apt (*) |apt (**) ++---------------------|---------|---------|---------|---------|---------|---------|---------|---------|---------|---------|--------- ++jak-x230 (amd64) | 2.4| 2.2| 1.7| 1.2| 0.7| 1.3| 0.7| 1.0| 0.9| 1.2| 2.3 ++backup (amd64) | 1.9| 1.7| 1.3| 0.9| 0.5| 0.9| 0.5| 0.7| 0.7| 0.9| 1.7 ++asachi.d.o (arm64) | 1.2| 0.8| 0.9| 0.6| 0.2| 0.3| 0.3| 0.4| 0.5| 0.7| 0.7 ++asachi.d.o (armel) | 0.9| 0.7| 0.9| 0.6| 0.2| 0.3| 0.3| 0.5| 0.5| 0.7| 0.9 ++asachi.d.o (armhf) | 0.8| 0.7| 0.7| 0.5| 0.2| 0.2| 0.3| 0.5| 0.5| 0.7| 0.8 ++plummer.d.o (ppc64el)| 2.0| 1.8| 1.6| 1.1| 0.5| 0.4| 0.5| 0.6| 0.7| 1.1| 1.9 ++eller.d.o (mipsel) | 0.2| 0.2| 0.2| 0.1| 0.1| 0.1| 0.1| 0.2| 0.2| 0.2| 0.2 ++ ++ ++Legend: ++ ++* The (*) variants are case-insensitive, (**) are more optimised versions ++ of the (*) versions. ++* DJB (*) is a DJB Hash with naive lowercase conversion, DJB (**) just ORs one ++ bit into each value to get alphabetical characters to be lowercase ++* APT (*) is the AlphaHash function from APT which hashes the last 8 bytes in a ++ word in a case-insensitive manner. APT (**) is the same function unrolled. ++* All hosts except the x230 are Debian porterboxes. The x230 has a Core i5-3320M, ++ barriere has an Opteron 23xx. ++ ++Notes: ++ ++* The overhead is larger than needed on some platforms due to gcc inserting ++ unneeded zero extend instructions, see: ++ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77729 diff --cc triehash/tests/framework.sh index 000000000,000000000..51d4580a6 new file mode 100644 --- /dev/null +++ b/triehash/tests/framework.sh @@@ -1,0 -1,0 +1,84 @@@ ++#!/bin/sh ++# Simple integration test framework ++ ++set -e ++ ++ ++cleanup() { ++ rm -f test.output test.c test.h test.tree ++} ++ ++dumpone() { ++ if [ -e "$@" ]; then ++ echo "Content of $@:" ++ cat "$@" | sed "s#^#\t#g" ++ fi ++ ++} ++ ++dump() { ++ dumpone test.output ++ dumpone test.c ++ dumpone test.h ++ dumpone test.tree ++ return 1 ++} ++ ++testsuccess() { ++ [ "$INNER" ] || cleanup ++ [ "$INNER" ] || echo "Testing success of $@" ++ if ! "$@" > test.output 2>&1; then ++ echo "ERROR: Running $@ failed with error $?, messages were:" >&2 ++ dump ++ return 1 ++ fi ++} ++ ++testfailure() { ++ [ "$INNER" ] || cleanup ++ [ "$INNER" ] || echo "Testing failure of $@" ++ if "$@" > test.output 2>&1; then ++ echo "ERROR: Running $@ unexpectedly succeeded, messages were:" >&2 ++ dump ++ return 1 ++ fi ++} ++ ++testfileequal() { ++ [ "$INNER" ] || echo "Testing output of $2" ++ printf "%b\n" "$1" > expected ++ if ! diff -u "expected" "$2" > test.diff; then ++ echo "ERROR: Differences between expected output and and $2:" >&2 ++ cat test.diff | sed "s#^#\t#g" ++ dump ++ return 1 ++ fi ++} ++ ++testgrep() { ++ [ "$INNER" ] || echo "Testing grep $@" ++ INNER=1 testsuccess grep "$@" ++ unset INNER ++} ++ ++testsuccessequal() { ++ expect="$1" ++ shift ++ cleanup ++ echo "Testing success and output of $@" ++ INNER=1 testsuccess "$@" ++ INNER=1 testfileequal "$expect" test.output ++ unset INNER ++} ++ ++ ++WORDS="Word-_0 ++Word = 42 ++VeryLongWord ++Label ~ Word2 ++= -9" ++ ++triehash() { ++ printf "%b\n" "$WORDS" | perl -MDevel::Cover=-summary,0,-silent,1 $(dirname $(dirname $(readlink -f $0)))/triehash.pl "$@" || return $? ++ return $? ++} diff --cc triehash/tests/run-tests.sh index 000000000,000000000..b9c1ec309 new file mode 100755 --- /dev/null +++ b/triehash/tests/run-tests.sh @@@ -1,0 -1,0 +1,22 @@@ ++#!/bin/sh ++DIR=$(dirname $(readlink -f $0)) ++ ++# Let's go into triehash.pl's dir ++cd $(dirname "$DIR") ++ ++rm -rf cover_db ++ ++count=$(cd "$DIR" && echo test-* | wc -w) ++i=1 ++ ++for test in $DIR/test-*; do ++ echo "[$i/$count] Running testcase $test" ++ if ! $test > test.summary 2>&1; then ++ cat test.summary ++ exit 1 ++ fi ++ i=$((i + 1)) ++done ++ ++ ++cover diff --cc triehash/tests/test-basic index 000000000,000000000..19cb08684 new file mode 100755 --- /dev/null +++ b/triehash/tests/test-basic @@@ -1,0 -1,0 +1,245 @@@ ++#!/bin/sh ++. $(dirname $(readlink -f $0))/framework.sh ++ ++# Check for non-existing files ++testfailure triehash -C /does/not/exist1 -H /does/not/exist1 /does/not/exist/input ++ ++# Check that we can specify - for -C and -H ++testsuccessequal "#ifndef TRIE_HASH_PerfectHash ++#define TRIE_HASH_PerfectHash ++#include ++#include ++enum PerfectKey { ++ Unknown = -1, ++}; ++static enum PerfectKey PerfectHash(const char *string, size_t length); ++static enum PerfectKey PerfectHash(const char *string, size_t length) ++{ ++ switch (length) { ++ default: ++ return Unknown; ++ } ++} ++#endif /* TRIE_HASH_PerfectHash */" triehash --multi-byte=0 -C - -H - ++ ++# Check that split files work ++testsuccess triehash -C test.c -H test.h --multi-byte=0 ++testfileequal "#include \"test.h\" ++ enum PerfectKey PerfectHash(const char *string, size_t length) ++{ ++ switch (length) { ++ default: ++ return Unknown; ++ } ++}" test.c ++testfileequal "#ifndef TRIE_HASH_PerfectHash ++#define TRIE_HASH_PerfectHash ++#include ++#include ++enum PerfectKey { ++ Unknown = -1, ++}; ++ enum PerfectKey PerfectHash(const char *string, size_t length); ++#endif /* TRIE_HASH_PerfectHash */" test.h ++ ++ ++# Check the C code generator ++testsuccess triehash -C test.c -H test.c /dev/stdin ++testfileequal "#ifndef TRIE_HASH_PerfectHash ++#define TRIE_HASH_PerfectHash ++#include ++#include ++enum PerfectKey { ++ VeryLongWord = 43, ++ Word = 42, ++ Word___0 = 0, ++ Label = 44, ++ Unknown = -9, ++}; ++static enum PerfectKey PerfectHash(const char *string, size_t length); ++#ifdef __GNUC__ ++typedef uint16_t __attribute__((aligned (1))) triehash_uu16; ++typedef char static_assert16[__alignof__(triehash_uu16) == 1 ? 1 : -1]; ++typedef uint32_t __attribute__((aligned (1))) triehash_uu32; ++typedef char static_assert32[__alignof__(triehash_uu32) == 1 ? 1 : -1]; ++typedef uint64_t __attribute__((aligned (1))) triehash_uu64; ++typedef char static_assert64[__alignof__(triehash_uu64) == 1 ? 1 : -1]; ++#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ++#define onechar(c, s, l) (((uint64_t)(c)) << (s)) ++#else ++#define onechar(c, s, l) (((uint64_t)(c)) << (l-8-s)) ++#endif ++#if (!defined(__ARM_ARCH) || defined(__ARM_FEATURE_UNALIGNED)) && !defined(TRIE_HASH_NO_MULTI_BYTE) ++#define TRIE_HASH_MULTI_BYTE ++#endif ++#endif /*GNUC */ ++#ifdef TRIE_HASH_MULTI_BYTE ++static enum PerfectKey PerfectHash4(const char *string) ++{ ++ switch(*((triehash_uu32*) &string[0])) { ++ case 0| onechar('W', 0, 32)| onechar('o', 8, 32)| onechar('r', 16, 32)| onechar('d', 24, 32): ++ return Word; ++ } ++ return Unknown; ++} ++static enum PerfectKey PerfectHash5(const char *string) ++{ ++ switch(*((triehash_uu32*) &string[0])) { ++ case 0| onechar('W', 0, 32)| onechar('o', 8, 32)| onechar('r', 16, 32)| onechar('d', 24, 32): ++ switch(string[4]) { ++ case 0| onechar('2', 0, 8): ++ return Label; ++ } ++ } ++ return Unknown; ++} ++static enum PerfectKey PerfectHash7(const char *string) ++{ ++ switch(*((triehash_uu32*) &string[0])) { ++ case 0| onechar('W', 0, 32)| onechar('o', 8, 32)| onechar('r', 16, 32)| onechar('d', 24, 32): ++ switch(string[4]) { ++ case 0| onechar('-', 0, 8): ++ switch(string[5]) { ++ case 0| onechar('_', 0, 8): ++ switch(string[6]) { ++ case 0| onechar('0', 0, 8): ++ return Word___0; ++ } ++ } ++ } ++ } ++ return Unknown; ++} ++static enum PerfectKey PerfectHash12(const char *string) ++{ ++ switch(*((triehash_uu64*) &string[0])) { ++ case 0| onechar('V', 0, 64)| onechar('e', 8, 64)| onechar('r', 16, 64)| onechar('y', 24, 64)| onechar('L', 32, 64)| onechar('o', 40, 64)| onechar('n', 48, 64)| onechar('g', 56, 64): ++ switch(*((triehash_uu32*) &string[8])) { ++ case 0| onechar('W', 0, 32)| onechar('o', 8, 32)| onechar('r', 16, 32)| onechar('d', 24, 32): ++ return VeryLongWord; ++ } ++ } ++ return Unknown; ++} ++#else ++static enum PerfectKey PerfectHash4(const char *string) ++{ ++ switch(string[0]) { ++ case 'W': ++ switch(string[1]) { ++ case 'o': ++ switch(string[2]) { ++ case 'r': ++ switch(string[3]) { ++ case 'd': ++ return Word; ++ } ++ } ++ } ++ } ++ return Unknown; ++} ++static enum PerfectKey PerfectHash5(const char *string) ++{ ++ switch(string[0]) { ++ case 'W': ++ switch(string[1]) { ++ case 'o': ++ switch(string[2]) { ++ case 'r': ++ switch(string[3]) { ++ case 'd': ++ switch(string[4]) { ++ case '2': ++ return Label; ++ } ++ } ++ } ++ } ++ } ++ return Unknown; ++} ++static enum PerfectKey PerfectHash7(const char *string) ++{ ++ switch(string[0]) { ++ case 'W': ++ switch(string[1]) { ++ case 'o': ++ switch(string[2]) { ++ case 'r': ++ switch(string[3]) { ++ case 'd': ++ switch(string[4]) { ++ case '-': ++ switch(string[5]) { ++ case '_': ++ switch(string[6]) { ++ case '0': ++ return Word___0; ++ } ++ } ++ } ++ } ++ } ++ } ++ } ++ return Unknown; ++} ++static enum PerfectKey PerfectHash12(const char *string) ++{ ++ switch(string[0]) { ++ case 'V': ++ switch(string[1]) { ++ case 'e': ++ switch(string[2]) { ++ case 'r': ++ switch(string[3]) { ++ case 'y': ++ switch(string[4]) { ++ case 'L': ++ switch(string[5]) { ++ case 'o': ++ switch(string[6]) { ++ case 'n': ++ switch(string[7]) { ++ case 'g': ++ switch(string[8]) { ++ case 'W': ++ switch(string[9]) { ++ case 'o': ++ switch(string[10]) { ++ case 'r': ++ switch(string[11]) { ++ case 'd': ++ return VeryLongWord; ++ } ++ } ++ } ++ } ++ } ++ } ++ } ++ } ++ } ++ } ++ } ++ } ++ return Unknown; ++} ++#endif /* TRIE_HASH_MULTI_BYTE */ ++static enum PerfectKey PerfectHash(const char *string, size_t length) ++{ ++ switch (length) { ++ case 4: ++ return PerfectHash4(string); ++ case 5: ++ return PerfectHash5(string); ++ case 7: ++ return PerfectHash7(string); ++ case 12: ++ return PerfectHash12(string); ++ default: ++ return Unknown; ++ } ++} ++#endif /* TRIE_HASH_PerfectHash */" test.c diff --cc triehash/tests/test-case-insensitive index 000000000,000000000..25ab2dd78 new file mode 100755 --- /dev/null +++ b/triehash/tests/test-case-insensitive @@@ -1,0 -1,0 +1,109 @@@ ++#!/bin/sh ++. $(dirname $(readlink -f $0))/framework.sh ++ ++WORDS="Halllo\nH-lllo\nHalll1" ++ ++# Case-insensitive test ++testsuccessequal "#include \"/dev/null\" ++#ifdef __GNUC__ ++typedef uint16_t __attribute__((aligned (1))) triehash_uu16; ++typedef char static_assert16[__alignof__(triehash_uu16) == 1 ? 1 : -1]; ++typedef uint32_t __attribute__((aligned (1))) triehash_uu32; ++typedef char static_assert32[__alignof__(triehash_uu32) == 1 ? 1 : -1]; ++typedef uint64_t __attribute__((aligned (1))) triehash_uu64; ++typedef char static_assert64[__alignof__(triehash_uu64) == 1 ? 1 : -1]; ++#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ++#define onechar(c, s, l) (((uint64_t)(c)) << (s)) ++#else ++#define onechar(c, s, l) (((uint64_t)(c)) << (l-8-s)) ++#endif ++#if (!defined(__ARM_ARCH) || defined(__ARM_FEATURE_UNALIGNED)) && !defined(TRIE_HASH_NO_MULTI_BYTE) ++#define TRIE_HASH_MULTI_BYTE ++#endif ++#endif /*GNUC */ ++#ifdef TRIE_HASH_MULTI_BYTE ++static enum PerfectKey PerfectHash6(const char *string) ++{ ++ switch(string[0] | 0x20) { ++ case 0| onechar('h', 0, 8): ++ switch(string[1]) { ++ case 0| onechar('-', 0, 8): ++ switch(*((triehash_uu32*) &string[2]) | 0x20202020) { ++ case 0| onechar('l', 0, 32)| onechar('l', 8, 32)| onechar('l', 16, 32)| onechar('o', 24, 32): ++ return H_lllo; ++ } ++ break; ++ case 0| onechar('a', 0, 8): ++ case 0| onechar('A', 0, 8): ++ switch(*((triehash_uu16*) &string[2]) | 0x2020) { ++ case 0| onechar('l', 0, 16)| onechar('l', 8, 16): ++ switch(string[4] | 0x20) { ++ case 0| onechar('l', 0, 8): ++ switch(string[5]) { ++ case 0| onechar('1', 0, 8): ++ return Halll1; ++ break; ++ case 0| onechar('o', 0, 8): ++ case 0| onechar('O', 0, 8): ++ return Halllo; ++ } ++ } ++ } ++ } ++ } ++ return Unknown; ++} ++#else ++static enum PerfectKey PerfectHash6(const char *string) ++{ ++ switch(string[0] | 0x20) { ++ case 'h': ++ switch(string[1]) { ++ case '-': ++ switch(string[2] | 0x20) { ++ case 'l': ++ switch(string[3] | 0x20) { ++ case 'l': ++ switch(string[4] | 0x20) { ++ case 'l': ++ switch(string[5] | 0x20) { ++ case 'o': ++ return H_lllo; ++ } ++ } ++ } ++ } ++ break; ++ case 'a': ++ case 'A': ++ switch(string[2] | 0x20) { ++ case 'l': ++ switch(string[3] | 0x20) { ++ case 'l': ++ switch(string[4] | 0x20) { ++ case 'l': ++ switch(string[5]) { ++ case '1': ++ return Halll1; ++ break; ++ case 'o': ++ case 'O': ++ return Halllo; ++ } ++ } ++ } ++ } ++ } ++ } ++ return Unknown; ++} ++#endif /* TRIE_HASH_MULTI_BYTE */ ++ enum PerfectKey PerfectHash(const char *string, size_t length) ++{ ++ switch (length) { ++ case 6: ++ return PerfectHash6(string); ++ default: ++ return Unknown; ++ } ++}" triehash --multi-byte=3210 --ignore-case -H /dev/null /dev/stdin diff --cc triehash/tests/test-enum-include-name-flags index 000000000,000000000..33bd97c0f new file mode 100755 --- /dev/null +++ b/triehash/tests/test-enum-include-name-flags @@@ -1,0 -1,0 +1,129 @@@ ++#!/bin/sh ++. $(dirname $(readlink -f $0))/framework.sh ++ ++# Need a short word, we really just need to check if the labels work ++WORDS=w ++ ++testsuccessequal "\ ++#ifndef TRIE_HASH_PerfectHash ++#define TRIE_HASH_PerfectHash ++#include ++#include ++#include ++enum PerfectKey { ++ w = 0, ++ Unknown = -1, ++}; ++ enum PerfectKey PerfectHash(const char *string, size_t length); ++#endif /* TRIE_HASH_PerfectHash */" triehash --multi-byte=0 -C /dev/null --include="" /dev/stdin ++ ++# Check for --enum-class support ++testsuccessequal "\ ++#ifndef TRIE_HASH_PerfectHash ++#define TRIE_HASH_PerfectHash ++#include ++#include ++enum class PerfectKey { ++ w = 0, ++ Unknown = -1, ++}; ++static enum PerfectKey PerfectHash(const char *string, size_t length); ++static enum PerfectKey PerfectHash1(const char *string) ++{ ++ switch(string[0]) { ++ case 'w': ++ return PerfectKey::w; ++ } ++ return PerfectKey::Unknown; ++} ++static enum PerfectKey PerfectHash(const char *string, size_t length) ++{ ++ switch (length) { ++ case 1: ++ return PerfectHash1(string); ++ default: ++ return PerfectKey::Unknown; ++ } ++} ++#endif /* TRIE_HASH_PerfectHash */" triehash --multi-byte=0 --enum-class /dev/stdin ++ ++# Check for --enum-name support ++testsuccessequal "\ ++#ifndef TRIE_HASH_PerfectHash ++#define TRIE_HASH_PerfectHash ++#include ++#include ++enum Foo { ++ Unknown = -1, ++}; ++static enum Foo PerfectHash(const char *string, size_t length); ++static enum Foo PerfectHash(const char *string, size_t length) ++{ ++ switch (length) { ++ default: ++ return Unknown; ++ } ++} ++#endif /* TRIE_HASH_PerfectHash */\ ++" triehash --multi-byte=0 --enum-name="Foo" ++ ++# Check for --enum-class support ++testsuccessequal "\ ++#ifndef TRIE_HASH_PerfectHash ++#define TRIE_HASH_PerfectHash ++#include ++#include ++enum class Foo::Bar { ++ Unknown = -1, ++}; ++static enum Foo::Bar PerfectHash(const char *string, size_t length); ++static enum Foo::Bar PerfectHash(const char *string, size_t length) ++{ ++ switch (length) { ++ default: ++ return Foo::Bar::Unknown; ++ } ++} ++#endif /* TRIE_HASH_PerfectHash */\ ++" triehash --multi-byte=0 --enum-class --enum-name="Foo::Bar" ++ ++# Check for --function-name support ++testsuccessequal "\ ++#ifndef TRIE_HASH_NonSense ++#define TRIE_HASH_NonSense ++#include ++#include ++enum PerfectKey { ++ Unknown = -1, ++}; ++static enum PerfectKey NonSense(const char *string, size_t length); ++static enum PerfectKey NonSense(const char *string, size_t length) ++{ ++ switch (length) { ++ default: ++ return Unknown; ++ } ++} ++#endif /* TRIE_HASH_NonSense */\ ++" triehash --multi-byte=0 --function-name="NonSense" ++ ++# Check for --counter-name support ++testsuccessequal "\ ++#ifndef TRIE_HASH_PerfectHash ++#define TRIE_HASH_PerfectHash ++#include ++#include ++enum { MyCounter = 0 }; ++enum PerfectKey { ++ Unknown = -1, ++}; ++static enum PerfectKey PerfectHash(const char *string, size_t length); ++static enum PerfectKey PerfectHash(const char *string, size_t length) ++{ ++ switch (length) { ++ default: ++ return Unknown; ++ } ++} ++#endif /* TRIE_HASH_PerfectHash */\ ++" triehash --multi-byte=0 --counter-name="MyCounter" diff --cc triehash/tests/test-multi-byte-level index 000000000,000000000..ddfb8cd1b new file mode 100755 --- /dev/null +++ b/triehash/tests/test-multi-byte-level @@@ -1,0 -1,0 +1,427 @@@ ++#!/bin/sh ++. $(dirname $(readlink -f $0))/framework.sh ++ ++# Check that building a single-byte trie works ++testsuccessequal "\ ++┌────────────────────────────────────────────────────┐ ++│ Initial trie │ ++└────────────────────────────────────────────────────┘ ++ ++├── V ++│ ├── e ++│ │ ├── r ++│ │ │ ├── y ++│ │ │ │ ├── L ++│ │ │ │ │ ├── o ++│ │ │ │ │ │ ├── n ++│ │ │ │ │ │ │ ├── g ++│ │ │ │ │ │ │ │ ├── W ++│ │ │ │ │ │ │ │ │ ├── o ++│ │ │ │ │ │ │ │ │ │ ├── r ++│ │ │ │ │ │ │ │ │ │ │ ├── d → VeryLongWord ++├── W ++│ ├── o ++│ │ ├── r ++│ │ │ ├── d → Word ++│ │ │ │ ├── - ++│ │ │ │ │ ├── _ ++│ │ │ │ │ │ ├── 0 → Word-_0 ++│ │ │ │ ├── 2 → Label ++┌────────────────────────────────────────────────────┐ ++│ Rebuilt trie │ ++└────────────────────────────────────────────────────┘ ++ ++├── V ++│ ├── e ++│ │ ├── r ++│ │ │ ├── y ++│ │ │ │ ├── L ++│ │ │ │ │ ├── o ++│ │ │ │ │ │ ├── n ++│ │ │ │ │ │ │ ├── g ++│ │ │ │ │ │ │ │ ├── W ++│ │ │ │ │ │ │ │ │ ├── o ++│ │ │ │ │ │ │ │ │ │ ├── r ++│ │ │ │ │ │ │ │ │ │ │ ├── d → VeryLongWord ++├── W ++│ ├── o ++│ │ ├── r ++│ │ │ ├── d → Word ++│ │ │ │ ├── - ++│ │ │ │ │ ├── _ ++│ │ │ │ │ │ ├── 0 → Word-_0 ++│ │ │ │ ├── 2 → Label ++┌────────────────────────────────────────────────────┐ ++│ Trie for words of length 4 │ ++└────────────────────────────────────────────────────┘ ++ ++├── W ++│ ├── o ++│ │ ├── r ++│ │ │ ├── d → Word ++┌────────────────────────────────────────────────────┐ ++│ Trie for words of length 5 │ ++└────────────────────────────────────────────────────┘ ++ ++├── W ++│ ├── o ++│ │ ├── r ++│ │ │ ├── d ++│ │ │ │ ├── 2 → Label ++┌────────────────────────────────────────────────────┐ ++│ Trie for words of length 7 │ ++└────────────────────────────────────────────────────┘ ++ ++├── W ++│ ├── o ++│ │ ├── r ++│ │ │ ├── d ++│ │ │ │ ├── - ++│ │ │ │ │ ├── _ ++│ │ │ │ │ │ ├── 0 → Word-_0 ++┌────────────────────────────────────────────────────┐ ++│ Trie for words of length 12 │ ++└────────────────────────────────────────────────────┘ ++ ++├── V ++│ ├── e ++│ │ ├── r ++│ │ │ ├── y ++│ │ │ │ ├── L ++│ │ │ │ │ ├── o ++│ │ │ │ │ │ ├── n ++│ │ │ │ │ │ │ ├── g ++│ │ │ │ │ │ │ │ ├── W ++│ │ │ │ │ │ │ │ │ ├── o ++│ │ │ │ │ │ │ │ │ │ ├── r ++│ │ │ │ │ │ │ │ │ │ │ ├── d → VeryLongWord" triehash --multi-byte=0 -l tree /dev/stdin ++ ++# Two byte optimization ++testsuccessequal "\ ++┌────────────────────────────────────────────────────┐ ++│ Initial trie │ ++└────────────────────────────────────────────────────┘ ++ ++├── Ve ++│ ├── ry ++│ │ ├── Lo ++│ │ │ ├── ng ++│ │ │ │ ├── Wo ++│ │ │ │ │ ├── rd → VeryLongWord ++├── Wo ++│ ├── rd → Word ++│ │ ├── -_ ++│ │ │ ├── 0 → Word-_0 ++│ │ ├── 2 → Label ++┌────────────────────────────────────────────────────┐ ++│ Rebuilt trie │ ++└────────────────────────────────────────────────────┘ ++ ++├── Ve ++│ ├── ry ++│ │ ├── Lo ++│ │ │ ├── ng ++│ │ │ │ ├── Wo ++│ │ │ │ │ ├── rd → VeryLongWord ++├── Wo ++│ ├── rd → Word ++│ │ ├── - ++│ │ │ ├── _0 → Word-_0 ++│ │ ├── 2 → Label ++┌────────────────────────────────────────────────────┐ ++│ Trie for words of length 4 │ ++└────────────────────────────────────────────────────┘ ++ ++├── Wo ++│ ├── rd → Word ++┌────────────────────────────────────────────────────┐ ++│ Trie for words of length 5 │ ++└────────────────────────────────────────────────────┘ ++ ++├── Wo ++│ ├── rd ++│ │ ├── 2 → Label ++┌────────────────────────────────────────────────────┐ ++│ Trie for words of length 7 │ ++└────────────────────────────────────────────────────┘ ++ ++├── Wo ++│ ├── rd ++│ │ ├── -_ ++│ │ │ ├── 0 → Word-_0 ++┌────────────────────────────────────────────────────┐ ++│ Trie for words of length 12 │ ++└────────────────────────────────────────────────────┘ ++ ++├── Ve ++│ ├── ry ++│ │ ├── Lo ++│ │ │ ├── ng ++│ │ │ │ ├── Wo ++│ │ │ │ │ ├── rd → VeryLongWord" triehash --multi-byte=1 -l tree /dev/stdin ++# Four byte optimization ++testsuccessequal "\ ++┌────────────────────────────────────────────────────┐ ++│ Initial trie │ ++└────────────────────────────────────────────────────┘ ++ ++├── Very ++│ ├── Long ++│ │ ├── Word → VeryLongWord ++├── Word → Word ++│ ├── - ++│ │ ├── _ ++│ │ │ ├── 0 → Word-_0 ++│ ├── 2 → Label ++┌────────────────────────────────────────────────────┐ ++│ Rebuilt trie │ ++└────────────────────────────────────────────────────┘ ++ ++├── Very ++│ ├── Long ++│ │ ├── Word → VeryLongWord ++├── Word → Word ++│ ├── - ++│ │ ├── _ ++│ │ │ ├── 0 → Word-_0 ++│ ├── 2 → Label ++┌────────────────────────────────────────────────────┐ ++│ Trie for words of length 4 │ ++└────────────────────────────────────────────────────┘ ++ ++├── Word → Word ++┌────────────────────────────────────────────────────┐ ++│ Trie for words of length 5 │ ++└────────────────────────────────────────────────────┘ ++ ++├── Word ++│ ├── 2 → Label ++┌────────────────────────────────────────────────────┐ ++│ Trie for words of length 7 │ ++└────────────────────────────────────────────────────┘ ++ ++├── Word ++│ ├── - ++│ │ ├── _ ++│ │ │ ├── 0 → Word-_0 ++┌────────────────────────────────────────────────────┐ ++│ Trie for words of length 12 │ ++└────────────────────────────────────────────────────┘ ++ ++├── Very ++│ ├── Long ++│ │ ├── Word → VeryLongWord" triehash --multi-byte=2 -l tree /dev/stdin ++# Eigh byte optimization ++testsuccessequal "\ ++┌────────────────────────────────────────────────────┐ ++│ Initial trie │ ++└────────────────────────────────────────────────────┘ ++ ++├── VeryLong ++│ ├── W ++│ │ ├── o ++│ │ │ ├── r ++│ │ │ │ ├── d → VeryLongWord ++├── W ++│ ├── o ++│ │ ├── r ++│ │ │ ├── d → Word ++│ │ │ │ ├── - ++│ │ │ │ │ ├── _ ++│ │ │ │ │ │ ├── 0 → Word-_0 ++│ │ │ │ ├── 2 → Label ++┌────────────────────────────────────────────────────┐ ++│ Rebuilt trie │ ++└────────────────────────────────────────────────────┘ ++ ++├── V ++│ ├── eryLongW ++│ │ ├── o ++│ │ │ ├── r ++│ │ │ │ ├── d → VeryLongWord ++├── W ++│ ├── o ++│ │ ├── r ++│ │ │ ├── d → Word ++│ │ │ │ ├── - ++│ │ │ │ │ ├── _ ++│ │ │ │ │ │ ├── 0 → Word-_0 ++│ │ │ │ ├── 2 → Label ++┌────────────────────────────────────────────────────┐ ++│ Trie for words of length 4 │ ++└────────────────────────────────────────────────────┘ ++ ++├── W ++│ ├── o ++│ │ ├── r ++│ │ │ ├── d → Word ++┌────────────────────────────────────────────────────┐ ++│ Trie for words of length 5 │ ++└────────────────────────────────────────────────────┘ ++ ++├── W ++│ ├── o ++│ │ ├── r ++│ │ │ ├── d ++│ │ │ │ ├── 2 → Label ++┌────────────────────────────────────────────────────┐ ++│ Trie for words of length 7 │ ++└────────────────────────────────────────────────────┘ ++ ++├── W ++│ ├── o ++│ │ ├── r ++│ │ │ ├── d ++│ │ │ │ ├── - ++│ │ │ │ │ ├── _ ++│ │ │ │ │ │ ├── 0 → Word-_0 ++┌────────────────────────────────────────────────────┐ ++│ Trie for words of length 12 │ ++└────────────────────────────────────────────────────┘ ++ ++├── VeryLong ++│ ├── W ++│ │ ├── o ++│ │ │ ├── r ++│ │ │ │ ├── d → VeryLongWord" triehash --multi-byte=3 -l tree /dev/stdin ++ ++ ++# Check that building a multi-byte trie works ++testsuccessequal "\ ++┌────────────────────────────────────────────────────┐ ++│ Initial trie │ ++└────────────────────────────────────────────────────┘ ++ ++├── VeryLong ++│ ├── Word → VeryLongWord ++├── Word → Word ++│ ├── - ++│ │ ├── _ ++│ │ │ ├── 0 → Word-_0 ++│ ├── 2 → Label ++┌────────────────────────────────────────────────────┐ ++│ Rebuilt trie │ ++└────────────────────────────────────────────────────┘ ++ ++├── Very ++│ ├── LongWord → VeryLongWord ++├── Word → Word ++│ ├── - ++│ │ ├── _ ++│ │ │ ├── 0 → Word-_0 ++│ ├── 2 → Label ++┌────────────────────────────────────────────────────┐ ++│ Trie for words of length 4 │ ++└────────────────────────────────────────────────────┘ ++ ++├── Word → Word ++┌────────────────────────────────────────────────────┐ ++│ Trie for words of length 5 │ ++└────────────────────────────────────────────────────┘ ++ ++├── Word ++│ ├── 2 → Label ++┌────────────────────────────────────────────────────┐ ++│ Trie for words of length 7 │ ++└────────────────────────────────────────────────────┘ ++ ++├── Word ++│ ├── - ++│ │ ├── _ ++│ │ │ ├── 0 → Word-_0 ++┌────────────────────────────────────────────────────┐ ++│ Trie for words of length 12 │ ++└────────────────────────────────────────────────────┘ ++ ++├── VeryLong ++│ ├── Word → VeryLongWord" triehash -l tree /dev/stdin ++ ++ ++###### CHANGE THE WORDS FOR THE FOLLOWING TESTS ####### ++WORDS="Word" ++ ++# Check that we are generating the proper multi-byte and fallback sessions ++testsuccessequal "#include \"/dev/null\" ++#ifdef __GNUC__ ++typedef uint16_t __attribute__((aligned (1))) triehash_uu16; ++typedef char static_assert16[__alignof__(triehash_uu16) == 1 ? 1 : -1]; ++typedef uint32_t __attribute__((aligned (1))) triehash_uu32; ++typedef char static_assert32[__alignof__(triehash_uu32) == 1 ? 1 : -1]; ++typedef uint64_t __attribute__((aligned (1))) triehash_uu64; ++typedef char static_assert64[__alignof__(triehash_uu64) == 1 ? 1 : -1]; ++#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ++#define onechar(c, s, l) (((uint64_t)(c)) << (s)) ++#else ++#define onechar(c, s, l) (((uint64_t)(c)) << (l-8-s)) ++#endif ++#if (!defined(__ARM_ARCH) || defined(__ARM_FEATURE_UNALIGNED)) && !defined(TRIE_HASH_NO_MULTI_BYTE) ++#define TRIE_HASH_MULTI_BYTE ++#endif ++#endif /*GNUC */ ++#ifdef TRIE_HASH_MULTI_BYTE ++static enum PerfectKey PerfectHash4(const char *string) ++{ ++ switch(*((triehash_uu32*) &string[0])) { ++ case 0| onechar('W', 0, 32)| onechar('o', 8, 32)| onechar('r', 16, 32)| onechar('d', 24, 32): ++ return Word; ++ } ++ return Unknown; ++} ++#else ++static enum PerfectKey PerfectHash4(const char *string) ++{ ++ switch(string[0]) { ++ case 'W': ++ switch(string[1]) { ++ case 'o': ++ switch(string[2]) { ++ case 'r': ++ switch(string[3]) { ++ case 'd': ++ return Word; ++ } ++ } ++ } ++ } ++ return Unknown; ++} ++#endif /* TRIE_HASH_MULTI_BYTE */ ++ enum PerfectKey PerfectHash(const char *string, size_t length) ++{ ++ switch (length) { ++ case 4: ++ return PerfectHash4(string); ++ default: ++ return Unknown; ++ } ++}" triehash -H /dev/null /dev/stdin ++ ++ ++# Check that we are generating no multi-byte session ++testsuccessequal "#include \"/dev/null\" ++static enum PerfectKey PerfectHash4(const char *string) ++{ ++ switch(string[0]) { ++ case 'W': ++ switch(string[1]) { ++ case 'o': ++ switch(string[2]) { ++ case 'r': ++ switch(string[3]) { ++ case 'd': ++ return Word; ++ } ++ } ++ } ++ } ++ return Unknown; ++} ++ enum PerfectKey PerfectHash(const char *string, size_t length) ++{ ++ switch (length) { ++ case 4: ++ return PerfectHash4(string); ++ default: ++ return Unknown; ++ } ++}" triehash --multi-byte=0 -H /dev/null /dev/stdin diff --cc triehash/triehash.pl index 000000000,000000000..c722cc62d new file mode 100755 --- /dev/null +++ b/triehash/triehash.pl @@@ -1,0 -1,0 +1,660 @@@ ++#!/usr/bin/perl -w ++# ++# Copyright (C) 2016 Julian Andres Klode ++# ++# Permission is hereby granted, free of charge, to any person obtaining a copy ++# of this software and associated documentation files (the "Software"), to deal ++# in the Software without restriction, including without limitation the rights ++# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++# copies of the Software, and to permit persons to whom the Software is ++# furnished to do so, subject to the following conditions: ++# ++# The above copyright notice and this permission notice shall be included in ++# all copies or substantial portions of the Software. ++# ++# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++# THE SOFTWARE. ++ ++ ++=head1 NAME ++ ++triehash - Generate a perfect hash function derived from a trie. ++ ++=cut ++ ++use strict; ++use warnings; ++use Getopt::Long; ++ ++=head1 SYNOPSIS ++ ++B [S>] [S>] ++ ++=head1 DESCRIPTION ++ ++triehash takes a list of words in input file and generates a function and ++an enumeration to describe the word ++ ++=head1 INPUT FILE FORMAT ++ ++The file consists of multiple lines of the form: ++ ++ [label ~ ] word [= value] ++ ++This maps word to value, and generates an enumeration with entries of the form: ++ ++ label = value ++ ++If I