234 lines
7.6 KiB
Java
234 lines
7.6 KiB
Java
// Copyright 2011 Google Inc. All Rights Reserved.
|
|
|
|
package com.google.common.hash;
|
|
|
|
import static com.google.common.base.Charsets.ISO_8859_1;
|
|
import static com.google.common.base.Charsets.UTF_8;
|
|
import static com.google.common.truth.Truth.assertThat;
|
|
|
|
import com.google.common.base.Strings;
|
|
import com.google.common.collect.ImmutableSortedMap;
|
|
import com.google.common.collect.Ordering;
|
|
import com.google.common.primitives.UnsignedLong;
|
|
import java.util.Arrays;
|
|
import junit.framework.TestCase;
|
|
|
|
/**
|
|
* Unit test for Fingerprint2011.
|
|
*
|
|
* @author kylemaddison@google.com (Kyle Maddison)
|
|
*/
|
|
public class Fingerprint2011Test extends TestCase {
|
|
|
|
// Length of the sample string to produce
|
|
private static final int MAX_BYTES = 1000;
|
|
|
|
// Map from sample string lengths to the fingerprint
|
|
private static final ImmutableSortedMap<Integer, Long> LENGTH_FINGERPRINTS =
|
|
new ImmutableSortedMap.Builder<Integer, Long>(Ordering.natural())
|
|
.put(1000, 0x433109b33e13e6edL)
|
|
.put(800, 0x5f2f123bfc815f81L)
|
|
.put(640, 0x6396fc6a67293cf4L)
|
|
.put(512, 0x45c01b4934ddbbbeL)
|
|
.put(409, 0xfcd19b617551db45L)
|
|
.put(327, 0x4eee69e12854871eL)
|
|
.put(261, 0xab753446a3bbd532L)
|
|
.put(208, 0x54242fe06a291c3fL)
|
|
.put(166, 0x4f7acff7703a635bL)
|
|
.put(132, 0xa784bd0a1f22cc7fL)
|
|
.put(105, 0xf19118e187456638L)
|
|
.put(84, 0x3e2e58f9196abfe5L)
|
|
.put(67, 0xd38ae3dec0107aeaL)
|
|
.put(53, 0xea3033885868e10eL)
|
|
.put(42, 0x1394a146d0d7e04bL)
|
|
.put(33, 0x9962499315d2e8daL)
|
|
.put(26, 0x0849f5cfa85489b5L)
|
|
.put(20, 0x83b395ff19bf2171L)
|
|
.put(16, 0x9d33dd141bd55d9aL)
|
|
.put(12, 0x196248eb0b02466aL)
|
|
.put(9, 0x1cf73a50ff120336L)
|
|
.put(7, 0xb451c339457dbf51L)
|
|
.put(5, 0x681982c5e7b74064L)
|
|
.put(4, 0xc5ce47450ca6c021L)
|
|
.put(3, 0x9fcc3c3fde4d5ff7L)
|
|
.put(2, 0x090966a836e5fa4bL)
|
|
.put(1, 0x8199675ecaa6fe64L)
|
|
.put(0, 0x23ad7c904aa665e3L)
|
|
.build();
|
|
private static final HashFunction HASH_FN = Hashing.fingerprint2011();
|
|
|
|
// If this test fails, all bets are off
|
|
public void testReallySimpleFingerprints() {
|
|
assertEquals(8473225671271759044L, fingerprint("test".getBytes(UTF_8)));
|
|
// 32 characters long
|
|
assertEquals(7345148637025587076L, fingerprint(Strings.repeat("test", 8).getBytes(UTF_8)));
|
|
// 256 characters long
|
|
assertEquals(4904844928629814570L, fingerprint(Strings.repeat("test", 64).getBytes(UTF_8)));
|
|
}
|
|
|
|
public void testStringsConsistency() {
|
|
for (String s : Arrays.asList("", "some", "test", "strings", "to", "try")) {
|
|
assertEquals(HASH_FN.newHasher().putUnencodedChars(s).hash(), HASH_FN.hashUnencodedChars(s));
|
|
}
|
|
}
|
|
|
|
public void testUtf8() {
|
|
char[] charsA = new char[128];
|
|
char[] charsB = new char[128];
|
|
|
|
for (int i = 0; i < charsA.length; i++) {
|
|
if (i < 100) {
|
|
charsA[i] = 'a';
|
|
charsB[i] = 'a';
|
|
} else {
|
|
// Both two-byte characters, but must be different
|
|
charsA[i] = (char) (0x0180 + i);
|
|
charsB[i] = (char) (0x0280 + i);
|
|
}
|
|
}
|
|
|
|
String stringA = new String(charsA);
|
|
String stringB = new String(charsB);
|
|
assertThat(stringA).isNotEqualTo(stringB);
|
|
assertThat(HASH_FN.hashUnencodedChars(stringA))
|
|
.isNotEqualTo(HASH_FN.hashUnencodedChars(stringB));
|
|
assertThat(fingerprint(stringA.getBytes(UTF_8)))
|
|
.isNotEqualTo(fingerprint(stringB.getBytes(UTF_8)));
|
|
|
|
// ISO 8859-1 only has 0-255 (ubyte) representation so throws away UTF-8 characters
|
|
// greater than 127 (ie with their top bit set).
|
|
// Don't attempt to do this in real code.
|
|
assertEquals(
|
|
fingerprint(stringA.getBytes(ISO_8859_1)), fingerprint(stringB.getBytes(ISO_8859_1)));
|
|
}
|
|
|
|
public void testMumurHash64() {
|
|
byte[] bytes = "test".getBytes(UTF_8);
|
|
assertEquals(
|
|
1618900948208871284L, Fingerprint2011.murmurHash64WithSeed(bytes, 0, bytes.length, 1));
|
|
|
|
bytes = "test test test".getBytes(UTF_8);
|
|
assertEquals(
|
|
UnsignedLong.valueOf("12313169684067793560").longValue(),
|
|
Fingerprint2011.murmurHash64WithSeed(bytes, 0, bytes.length, 1));
|
|
}
|
|
|
|
public void testPutNonChars() {
|
|
Hasher hasher = HASH_FN.newHasher();
|
|
// Expected data is 0x0100010100000000
|
|
hasher
|
|
.putBoolean(true)
|
|
.putBoolean(true)
|
|
.putBoolean(false)
|
|
.putBoolean(true)
|
|
.putBoolean(false)
|
|
.putBoolean(false)
|
|
.putBoolean(false)
|
|
.putBoolean(false);
|
|
final long hashCode = hasher.hash().asLong();
|
|
|
|
hasher = HASH_FN.newHasher();
|
|
hasher
|
|
.putByte((byte) 0x01)
|
|
.putByte((byte) 0x01)
|
|
.putByte((byte) 0x00)
|
|
.putByte((byte) 0x01)
|
|
.putByte((byte) 0x00)
|
|
.putByte((byte) 0x00)
|
|
.putByte((byte) 0x00)
|
|
.putByte((byte) 0x00);
|
|
assertEquals(hashCode, hasher.hash().asLong());
|
|
|
|
hasher = HASH_FN.newHasher();
|
|
hasher
|
|
.putChar((char) 0x0101)
|
|
.putChar((char) 0x0100)
|
|
.putChar((char) 0x0000)
|
|
.putChar((char) 0x0000);
|
|
assertEquals(hashCode, hasher.hash().asLong());
|
|
|
|
hasher = HASH_FN.newHasher();
|
|
hasher.putBytes(new byte[] {0x01, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00});
|
|
assertEquals(hashCode, hasher.hash().asLong());
|
|
|
|
hasher = HASH_FN.newHasher();
|
|
hasher.putLong(0x0000000001000101L);
|
|
assertEquals(hashCode, hasher.hash().asLong());
|
|
|
|
hasher = HASH_FN.newHasher();
|
|
hasher
|
|
.putShort((short) 0x0101)
|
|
.putShort((short) 0x0100)
|
|
.putShort((short) 0x0000)
|
|
.putShort((short) 0x0000);
|
|
assertEquals(hashCode, hasher.hash().asLong());
|
|
}
|
|
|
|
public void testHashFloatIsStable() {
|
|
// This is about the best we can do for floating-point
|
|
Hasher hasher = HASH_FN.newHasher();
|
|
hasher.putFloat(0x01000101f).putFloat(0f);
|
|
assertEquals(0x96a4f8cc6ecbf16L, hasher.hash().asLong());
|
|
|
|
hasher = HASH_FN.newHasher();
|
|
hasher.putDouble(0x0000000001000101d);
|
|
assertEquals(0xcf54171253fdc198L, hasher.hash().asLong());
|
|
}
|
|
|
|
/** Convenience method to compute a fingerprint on a full bytes array. */
|
|
private static long fingerprint(byte[] bytes) {
|
|
return fingerprint(bytes, bytes.length);
|
|
}
|
|
|
|
/** Convenience method to compute a fingerprint on a subset of a byte array. */
|
|
private static long fingerprint(byte[] bytes, int length) {
|
|
return HASH_FN.hashBytes(bytes, 0, length).asLong();
|
|
}
|
|
|
|
/**
|
|
* Tests that the Java port of Fingerprint2011 provides the same results on buffers up to 800
|
|
* bytes long as the original implementation in C++. See http://cl/106539598
|
|
*/
|
|
public void testMultipleLengths() {
|
|
int iterations = 800;
|
|
byte[] buf = new byte[iterations * 4];
|
|
int bufLen = 0;
|
|
long h = 0;
|
|
for (int i = 0; i < iterations; ++i) {
|
|
h ^= fingerprint(buf, i);
|
|
h = remix(h);
|
|
buf[bufLen++] = getChar(h);
|
|
|
|
h ^= fingerprint(buf, i * i % bufLen);
|
|
h = remix(h);
|
|
buf[bufLen++] = getChar(h);
|
|
|
|
h ^= fingerprint(buf, i * i * i % bufLen);
|
|
h = remix(h);
|
|
buf[bufLen++] = getChar(h);
|
|
|
|
h ^= fingerprint(buf, bufLen);
|
|
h = remix(h);
|
|
buf[bufLen++] = getChar(h);
|
|
|
|
int x0 = buf[bufLen - 1] & 0xff;
|
|
int x1 = buf[bufLen - 2] & 0xff;
|
|
int x2 = buf[bufLen - 3] & 0xff;
|
|
int x3 = buf[bufLen / 2] & 0xff;
|
|
buf[((x0 << 16) + (x1 << 8) + x2) % bufLen] ^= x3;
|
|
buf[((x1 << 16) + (x2 << 8) + x3) % bufLen] ^= i % 256;
|
|
}
|
|
assertEquals(0xeaa3b1c985261632L, h);
|
|
}
|
|
|
|
private static long remix(long h) {
|
|
h ^= h >>> 41;
|
|
h *= 949921979;
|
|
return h;
|
|
}
|
|
|
|
private static byte getChar(long h) {
|
|
return (byte) ('a' + ((h & 0xfffff) % 26));
|
|
}
|
|
}
|