// Copyright 2011 Google Inc. All Rights Reserved. package com.google.common.hash; import static com.google.common.base.Charsets.ISO_8859_1; import static com.google.common.base.Charsets.UTF_8; import static com.google.common.truth.Truth.assertThat; import com.google.common.base.Strings; import com.google.common.collect.ImmutableSortedMap; import com.google.common.collect.Ordering; import com.google.common.primitives.UnsignedLong; import java.util.Arrays; import junit.framework.TestCase; /** * Unit test for Fingerprint2011. * * @author kylemaddison@google.com (Kyle Maddison) */ public class Fingerprint2011Test extends TestCase { // Length of the sample string to produce private static final int MAX_BYTES = 1000; // Map from sample string lengths to the fingerprint private static final ImmutableSortedMap LENGTH_FINGERPRINTS = new ImmutableSortedMap.Builder(Ordering.natural()) .put(1000, 0x433109b33e13e6edL) .put(800, 0x5f2f123bfc815f81L) .put(640, 0x6396fc6a67293cf4L) .put(512, 0x45c01b4934ddbbbeL) .put(409, 0xfcd19b617551db45L) .put(327, 0x4eee69e12854871eL) .put(261, 0xab753446a3bbd532L) .put(208, 0x54242fe06a291c3fL) .put(166, 0x4f7acff7703a635bL) .put(132, 0xa784bd0a1f22cc7fL) .put(105, 0xf19118e187456638L) .put(84, 0x3e2e58f9196abfe5L) .put(67, 0xd38ae3dec0107aeaL) .put(53, 0xea3033885868e10eL) .put(42, 0x1394a146d0d7e04bL) .put(33, 0x9962499315d2e8daL) .put(26, 0x0849f5cfa85489b5L) .put(20, 0x83b395ff19bf2171L) .put(16, 0x9d33dd141bd55d9aL) .put(12, 0x196248eb0b02466aL) .put(9, 0x1cf73a50ff120336L) .put(7, 0xb451c339457dbf51L) .put(5, 0x681982c5e7b74064L) .put(4, 0xc5ce47450ca6c021L) .put(3, 0x9fcc3c3fde4d5ff7L) .put(2, 0x090966a836e5fa4bL) .put(1, 0x8199675ecaa6fe64L) .put(0, 0x23ad7c904aa665e3L) .build(); private static final HashFunction HASH_FN = Hashing.fingerprint2011(); // If this test fails, all bets are off public void testReallySimpleFingerprints() { assertEquals(8473225671271759044L, fingerprint("test".getBytes(UTF_8))); // 32 characters long assertEquals(7345148637025587076L, fingerprint(Strings.repeat("test", 8).getBytes(UTF_8))); // 256 characters long assertEquals(4904844928629814570L, fingerprint(Strings.repeat("test", 64).getBytes(UTF_8))); } public void testStringsConsistency() { for (String s : Arrays.asList("", "some", "test", "strings", "to", "try")) { assertEquals(HASH_FN.newHasher().putUnencodedChars(s).hash(), HASH_FN.hashUnencodedChars(s)); } } public void testUtf8() { char[] charsA = new char[128]; char[] charsB = new char[128]; for (int i = 0; i < charsA.length; i++) { if (i < 100) { charsA[i] = 'a'; charsB[i] = 'a'; } else { // Both two-byte characters, but must be different charsA[i] = (char) (0x0180 + i); charsB[i] = (char) (0x0280 + i); } } String stringA = new String(charsA); String stringB = new String(charsB); assertThat(stringA).isNotEqualTo(stringB); assertThat(HASH_FN.hashUnencodedChars(stringA)) .isNotEqualTo(HASH_FN.hashUnencodedChars(stringB)); assertThat(fingerprint(stringA.getBytes(UTF_8))) .isNotEqualTo(fingerprint(stringB.getBytes(UTF_8))); // ISO 8859-1 only has 0-255 (ubyte) representation so throws away UTF-8 characters // greater than 127 (ie with their top bit set). // Don't attempt to do this in real code. assertEquals( fingerprint(stringA.getBytes(ISO_8859_1)), fingerprint(stringB.getBytes(ISO_8859_1))); } public void testMumurHash64() { byte[] bytes = "test".getBytes(UTF_8); assertEquals( 1618900948208871284L, Fingerprint2011.murmurHash64WithSeed(bytes, 0, bytes.length, 1)); bytes = "test test test".getBytes(UTF_8); assertEquals( UnsignedLong.valueOf("12313169684067793560").longValue(), Fingerprint2011.murmurHash64WithSeed(bytes, 0, bytes.length, 1)); } public void testPutNonChars() { Hasher hasher = HASH_FN.newHasher(); // Expected data is 0x0100010100000000 hasher .putBoolean(true) .putBoolean(true) .putBoolean(false) .putBoolean(true) .putBoolean(false) .putBoolean(false) .putBoolean(false) .putBoolean(false); final long hashCode = hasher.hash().asLong(); hasher = HASH_FN.newHasher(); hasher .putByte((byte) 0x01) .putByte((byte) 0x01) .putByte((byte) 0x00) .putByte((byte) 0x01) .putByte((byte) 0x00) .putByte((byte) 0x00) .putByte((byte) 0x00) .putByte((byte) 0x00); assertEquals(hashCode, hasher.hash().asLong()); hasher = HASH_FN.newHasher(); hasher .putChar((char) 0x0101) .putChar((char) 0x0100) .putChar((char) 0x0000) .putChar((char) 0x0000); assertEquals(hashCode, hasher.hash().asLong()); hasher = HASH_FN.newHasher(); hasher.putBytes(new byte[] {0x01, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00}); assertEquals(hashCode, hasher.hash().asLong()); hasher = HASH_FN.newHasher(); hasher.putLong(0x0000000001000101L); assertEquals(hashCode, hasher.hash().asLong()); hasher = HASH_FN.newHasher(); hasher .putShort((short) 0x0101) .putShort((short) 0x0100) .putShort((short) 0x0000) .putShort((short) 0x0000); assertEquals(hashCode, hasher.hash().asLong()); } public void testHashFloatIsStable() { // This is about the best we can do for floating-point Hasher hasher = HASH_FN.newHasher(); hasher.putFloat(0x01000101f).putFloat(0f); assertEquals(0x96a4f8cc6ecbf16L, hasher.hash().asLong()); hasher = HASH_FN.newHasher(); hasher.putDouble(0x0000000001000101d); assertEquals(0xcf54171253fdc198L, hasher.hash().asLong()); } /** Convenience method to compute a fingerprint on a full bytes array. */ private static long fingerprint(byte[] bytes) { return fingerprint(bytes, bytes.length); } /** Convenience method to compute a fingerprint on a subset of a byte array. */ private static long fingerprint(byte[] bytes, int length) { return HASH_FN.hashBytes(bytes, 0, length).asLong(); } /** * Tests that the Java port of Fingerprint2011 provides the same results on buffers up to 800 * bytes long as the original implementation in C++. See http://cl/106539598 */ public void testMultipleLengths() { int iterations = 800; byte[] buf = new byte[iterations * 4]; int bufLen = 0; long h = 0; for (int i = 0; i < iterations; ++i) { h ^= fingerprint(buf, i); h = remix(h); buf[bufLen++] = getChar(h); h ^= fingerprint(buf, i * i % bufLen); h = remix(h); buf[bufLen++] = getChar(h); h ^= fingerprint(buf, i * i * i % bufLen); h = remix(h); buf[bufLen++] = getChar(h); h ^= fingerprint(buf, bufLen); h = remix(h); buf[bufLen++] = getChar(h); int x0 = buf[bufLen - 1] & 0xff; int x1 = buf[bufLen - 2] & 0xff; int x2 = buf[bufLen - 3] & 0xff; int x3 = buf[bufLen / 2] & 0xff; buf[((x0 << 16) + (x1 << 8) + x2) % bufLen] ^= x3; buf[((x1 << 16) + (x2 << 8) + x3) % bufLen] ^= i % 256; } assertEquals(0xeaa3b1c985261632L, h); } private static long remix(long h) { h ^= h >>> 41; h *= 949921979; return h; } private static byte getChar(long h) { return (byte) ('a' + ((h & 0xfffff) % 26)); } }