MurmurHash Tips

发布时间:2019-09-28 08:38:47编辑:auto阅读(1974)

    简介

    MurmurHash 是一种非加密型哈希函数,适用于一般的哈希检索操作。与其它流行的哈希函数相比,对于规律性较强的 key,MurmurHash 的随机分布特征表现更良好。

    家族成员

    MurmurHash1, MurmurHash2, MurmurHash3

    计算 mmh3 十六进制字符串

    Python3

    >>> import binascii
    >>> import mmh3
    >>> binascii.b2a_hex(mmh3.hash_bytes('CN305183362S')).decode('utf8')
    'a4fb17cba6d455e4812ad28989780cbc'    # 32个字符,128 bit
    >>> hex(mmh3.hash128('CN305183362S'))
    '0xbc0c788989d22a81e455d4a6cb17fba4'

    Java

    • 借用 BigInteger
    import java.math.BigInteger;
    import org.apache.commons.codec.digest.MurmurHash3;
    import org.apache.commons.lang.ArrayUtils;
    public class AppTester {
        public static void main(String[] args) {
            final byte[] origin = "CN305183362S".getBytes();
            long[] vec = MurmurHash3.hash128(origin, 0, origin.length, 0);
            // 将 long 转换为 BigInteger
            BigInteger bigInteger0 = BigInteger.valueOf(vec[0]);
            BigInteger bigInteger1 = BigInteger.valueOf(vec[1]);
            // 将 BigInteger 转换为 byte[]
            byte[] array0 = bigInteger0.toByteArray();
            byte[] array1 = bigInteger1.toByteArray();
            // 反转 byte[](大小端转换)
            ArrayUtils.reverse(array0);
            ArrayUtils.reverse(array1);
            // 将 byte[] 转换为无符号整数,并转为十六进制字符串
            String part0 = (new BigInteger(1, array0)).toString(16);
            String part1 = (new BigInteger(1, array1)).toString(16);
            System.out.println(part0 + part1); // a4fb17cba6d455e4812ad28989780cbc
        }
    }
    import java.nio.ByteBuffer;
    import org.apache.commons.codec.digest.MurmurHash3;
    import org.apache.commons.lang.ArrayUtils;
    public class AppTester {
        public static void main(String[] args) {
            final byte[] origin = "CN305183362S".getBytes();
            long[] vec = MurmurHash3.hash128(origin, 0, origin.length, 0);
            ByteBuffer buf0 = ByteBuffer.allocate(8); 
            ByteBuffer buf1 = ByteBuffer.allocate(8);         
            buf0.putLong(0, vec[0]);
            buf1.putLong(0, vec[1]);        
            byte[] array0 = buf0.putLong(0, vec[0]).array();
            byte[] array1 = buf1.putLong(0, vec[1]).array();        
            ArrayUtils.reverse(array0);    // 反转 byte[](大小端转换)
            ArrayUtils.reverse(array1);    // 反转 byte[](大小端转换)
            buf0.put(array0, 0, array0.length).flip();
            buf1.put(array1, 0, array1.length).flip();        
            String part0 = String.format("%x", buf0.getLong());
            String part1 = String.format("%x", buf1.getLong());
            System.out.println(part0 + part1);    // a4fb17cba6d455e4812ad28989780cbc
        }
    }
    • 浅显易懂版,不用处理字节
    import org.apache.commons.codec.digest.MurmurHash3;
    public class AppTester {
        public static void main(String[] args) {
            final byte[] origin = "CN305183362S".getBytes();
            long[] vec = MurmurHash3.hash128(origin, 0, origin.length, 0);
            String part0 = String.format("%x", vec[0]);
            String part1 = String.format("%x", vec[1]);
            String line = "";
            // 反转前半段(大小端转换)
            for (int i = 0; i < 8; ++i) {
                line += part0.substring(14 - 2 * i, 16 - 2 * i);
            }
            // 反转后半段(大小端转换)
            for (int i = 0; i < 8; ++i) {
                line += part1.substring(14 - 2 * i, 16 - 2 * i);
            }
            System.out.println(line);    // a4fb17cba6d455e4812ad28989780cbc
        }
    }

    Node.js

    var murmurHash3 = require("murmurhash3js");
    let line = murmurHash3.x64.hash128("CN305183362S");
    console.log(line);        // e455d4a6cb17fba4bc0c788989d22a81
    let newLine = "";
    // 反转前半段(大小端转换)
    for (let i = 0; i < 8; ++i) {
        newLine += line.slice(14-2*i, 16-2*i);
    }
    // 反转后半段(大小端转换)
    for (let i = 0; i < 8; ++i) {
        newLine += line.slice(30-2*i, 32-2*i);
    }
    console.log(newLine);    // a4fb17cba6d455e4812ad28989780cbc
    本文出处 walker snapshot

关键字