CharsRefIntHashMap并不比HashMap<String, Integer>快

我模仿lucene的BytesRef写了一个CharsRefIntHashMap,实測效果并不如HashMap<String, Integer>。代码例如以下:

package com.dp.arts.lucenex.utils;

import org.apache.lucene.util.CharsRef;

public interface CharsRefIntMap

{

public static
abstract class CharsRefIntEntryAccessor {

public abstract
void access(char[] arr,
int offset, int length,
int value);

}

public void incKey(CharsRef key);

public void incKey(CharsRef key,
int add);

public void incKey(char[] arr,
int offset, int length);

public void incKey(char[] arr,
int offset, int length,
int add);

public int get(CharsRef key);

public int get(CharsRef key,
int no_entry_value);

public int get(char[] arr,
int offset, int length);

public int get(char[] arr,
int offset, int length,
int no_entry_value);

public int size();

public void forEach(CharsRefIntEntryAccessor accesor);

}

package com.dp.arts.lucenex.utils;

import java.util.Arrays;

import org.apache.lucene.util.CharsRef;

import com.dp.arts.lucenex.utils.CharsRefIntMap.CharsRefIntEntryAccessor;

public class CharsRefIntHashMap implements CharsRefIntMap

{

public static final int DEFAULT_CAPACITY = 16;

private char[][] arrs;

private int[] offsets;

private int[] lengths;

private int[] ords;

private int[] values;

private int hashSize;

private int halfHashSize;

private int hashMask;

private int count;

public CharsRefIntHashMap() {

this(DEFAULT_CAPACITY);

}

public CharsRefIntHashMap(int capacity) {

assert capacity > 0 && ( (capacity & (capacity - 1)) == 0);

arrs = new char[capacity][];

offsets = new int[capacity];

lengths = new int[capacity];

ords = new int[capacity];

values = new int[capacity];

Arrays.fill(ords, -1);

hashSize = capacity;

halfHashSize = (capacity >>> 1);

hashMask = capacity - 1;

}

@Override

public void incKey(CharsRef key) {

int code = charsHashCode(key.chars, key.offset, key.length);

incKey(key.chars, key.offset, key.length, code, 1);

}

@Override

public void incKey(CharsRef key, int add) {

int code = charsHashCode(key.chars, key.offset, key.length);

incKey(key.chars, key.offset, key.length, code, add);

}

@Override

public void incKey(char[] arr, int offset, int length) {

int code = charsHashCode(arr, offset, length);

incKey(arr, offset, length, code, 1);

}

@Override

public void incKey(char[] arr, int offset, int length, int add) {

int code = charsHashCode(arr, offset, length);

incKey(arr, offset, length, code, add);

}

private void incKey(char[] arr, int offset, int length, int code, int add) {

int pos = (code & hashMask);

int e = ords[pos];

while (e != -1 && !charsEquals(arrs[e], offsets[e], lengths[e], arr, offset, length)) {

final int inc = ((code >> 8) + code) | 1;

code += inc;

pos = (code & hashMask);

e = ords[pos];

}

if (e == -1) {

// new entry.

arrs[count] = arr;

offsets[count] = offset;

lengths[count] = length;

values[count] = add;

ords[pos] = count;

++count;

if (count == halfHashSize) {

rehash((hashSize << 1));

}

} else {

values[e] += add;

}

}

private void rehash(int newSize) {

char[][] newArrs = new char[newSize][];

int[] newOffsets = new int[newSize];

int[] newLengths = new int[newSize];

int[] newValues = new int[newSize];

System.arraycopy(arrs, 0, newArrs, 0, halfHashSize);

System.arraycopy(offsets, 0, newOffsets, 0, halfHashSize);

System.arraycopy(lengths, 0, newLengths, 0, halfHashSize);

System.arraycopy(values, 0, newValues, 0, halfHashSize);

final int[] newOrds = new int[newSize];

Arrays.fill(newOrds, -1);

final int newHashMask = newSize - 1;

for (int i = 0; i < hashSize; ++i) {

int e0 = ords[i];

if (e0 != -1) {

char[] arr = newArrs[e0];

int offset = newOffsets[e0];

int length = newLengths[e0];

int code = charsHashCode(arr, offset, length);

int pos = code & newHashMask;

while (newOrds[pos] != -1) {

final int inc = ((code >> 8) + code) | 1;

code += inc;

pos = code & newHashMask;

}

newOrds[pos] = e0;

}

}

ords = newOrds;

arrs = newArrs;

offsets = newOffsets;

lengths = newLengths;

values = newValues;

hashSize = newSize;

halfHashSize = (newSize >> 1);

hashMask = newHashMask;

}

public int charsHashCode(char[] chars, int offset, int length) {

final int prime = 31;

int result = 0;

final int end = offset + length;

for (int i = offset; i < end; i++) {

result = prime * result + chars[i];

}

return result;

}

public boolean charsEquals(char[] lhsArr, int lhsOffset, int lhsLength, char[] rhsArr, int rhsOffset, int rhsLength) {

if (lhsLength == rhsLength) {

int otherUpto = rhsOffset;

final int end = lhsOffset + lhsLength;

for (int upto = lhsOffset; upto < end; upto++, otherUpto++) {

if (lhsArr[upto] != rhsArr[otherUpto]) {

return false;

}

}

return true;

} else {

return false;

}

}

@Override

public int get(CharsRef key) {

return get(key.chars, key.offset, key.length, 0);

}

@Override

public int get(CharsRef key, int no_entry_key) {

return get(key.chars, key.offset, key.length, no_entry_key);

}

@Override

public int get(char[] arr, int offset, int length) {

return get(arr, offset, length, 0);

}

@Override

public int get(char[] arr, int offset, int length, int no_entry_key) {

int code = charsHashCode(arr, offset, length);

int pos = (code & hashMask);

int e = ords[pos];

while (e != -1 && !charsEquals(arrs[e], offsets[e], lengths[e], arr, offset, length)) {

final int inc = ((code >> 8) + code) | 1;

code += inc;

pos = (code & hashMask);

e = ords[pos];

}

return e == -1 ? no_entry_key : values[e];

}

@Override

public void forEach(CharsRefIntEntryAccessor accessor) {

for (int i = 0; i < hashSize; ++i) {

int pos = ords[i];

if (pos != -1) {

accessor.access(arrs[pos], offsets[pos], lengths[pos], values[pos]);

}

}

}

@Override

public int size() {

return count;

}

// for test only.

public int hashSize() {

return hashSize;

}

}

package com.dp.arts.lucenex.utils;

import java.util.HashMap;

import java.util.Random;

import
org.apache.lucene.util.CharsRef;

public class CharsRefIntHashMapBenchmark

{

private static Random
randGen = null;

private
static char[]
numbersAndLetters =
null;

static {

randGen =
new Random();

numbersAndLetters = ("0123456789abcdefghijklmnopqrstuvwxyz" +

"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ").toCharArray();

}

private static
final String randomString(int length) {

if (length < 1) {

return
null;

}

char [] randBuffer =
new char[length];

for (int i=0; i<randBuffer.length; i++) {

randBuffer[i] = numbersAndLetters[randGen.nextInt(71)];

}

return new String(randBuffer);

}

public static
void main(String[] args) {

final int MAX = 100000;

String[] strs = new String[10000];

int[] values =
new int[MAX];

for (int i = 0; i < 10000; ++i) {

strs[i] = randomString(randGen.nextInt(10) + 1);

}

for (int i = 0; i < MAX; ++i) {

values[i] = randGen.nextInt(10000);

}

char[][] arrs =
new char[MAX][];

int offsets[] =
new int[MAX];

int counts[] =
new int[MAX];

for (int i = 0; i < MAX; ++i) {

String s = strs[values[i]];

arrs[i] = StringMisc.toCharArray(s);

offsets[i] = StringMisc.getOffset(s);

counts[i] = StringMisc.getCount(s);

}

long start = System.currentTimeMillis();

CharsRefIntHashMap map = new CharsRefIntHashMap();

for (int j = 0; j < 100; ++j) {

for (int i = 0; i < MAX; ++i) {

map.incKey(arrs[i], offsets[i], counts[i]);

}}

System.err.println("CharsRefIntHashMap time elapsed: " + (System.currentTimeMillis() - start) +
"ms.");

start = System.currentTimeMillis();

HashMap<String, Integer> oldMap = new HashMap<String, Integer>();

for (int j = 0; j < 100; ++j) {

for (int i = 0; i < MAX; ++i) {

String s = strs[values[i]];

Integer v = oldMap.get(s);

if (v ==
null) {

v = new Integer(1);

oldMap.put(s, v);

} else {

v += 1;

}

}}

System.err.println("Origin string map time elapsed: " + (System.currentTimeMillis() - start) +
"ms.");

}

}

因此这样写优点不过内存少用一些,性能应该更差,rehash时须要拷贝很多其它数据,对每一个数据的訪问都须要下标。实际情况下,CharsRef所须要的内存是24字节。假设使用trove的TObjectIntHashMap,插入速度相当,查询速度是jdk hashmap的三倍。

时间: 2024-11-09 13:13:14

CharsRefIntHashMap并不比HashMap&lt;String, Integer&gt;快的相关文章

CharsRefIntHashMap并不比HashMap&lt;String, Integer&gt;快

我模仿lucene的BytesRef写了一个CharsRefIntHashMap,实测效果并不如HashMap<String, Integer>.代码如下: package com.dp.arts.lucenex.utils; import org.apache.lucene.util.CharsRef; public interface CharsRefIntMap { public static abstract class CharsRefIntEntryAccessor { publi

string integer == equals 转

java中的数据类型,可分为两类: 1.基本数据类型,也称原始数据类型.byte,short,char,int,long,float,double,boolean 他们之间的比较,应用双等号(==),比较的是他们的值. 2.复合数据类型(类) 当他们用(==)进行比较的时候,比较的是他们在内存中的存放地址,所以,除非是同一个new出来的对象,他们的比较后的结果为true,否则比较后结果为false. JAVA当中所有的类都是继承于Object这个基类的,在Object中的基类中定义了一个equa

String,Integer,int类型之间的相互转换

String, Integer, int 三种类型之间可以两两进行转换 1. 基本数据类型到包装数据类型的转换 int -> Integer (两种方法) Integer it1 = new Integer(int a); //封装的基本原理 Integer it2 = Integer.valueOf(int a); int -> String String s2=10+""; 2.  包装数据类型到基本数据类型的转换 String -> int int i4=Int

List&lt;Map&lt;String, Integer&gt;&gt; 同key的value全部累加合并

public static void main(String[] args){ List<Map<String,Object>> list1 = new ArrayList<Map<String,Object>>(); Map<String,Object> map1 = new HashMap<String,Object>(); Map<String,Object> map2 = new HashMap<String

String,Integer,int类型之间转换总结

今天学习了封装的知识理念,关于自动装箱和拆箱等手段来改变数据类型感到十分有意思,同时之间的相互转换也值得我们去牢记,当然,在工作中熟能生巧,只是为了梳理一下知识点,供大家参考: 自java7以后已经实现了自动装箱和自动拆箱,int和Integer之间的转换已经可以实现自动 下面是实际的操作代码 1 public class Integer04{ 2 public static void main(String[] args){ 3 //int--->Integer 4 Integer i1=In

Delphi out 参数 string Integer

http://www.delphibasics.co.uk/RTL.asp?Name=Out http://stackoverflow.com/questions/14507310/whats-the-difference-between-var-and-out-parameters A var parameter will be passed by reference, and that's it. An out parameter is also passed by reference, b

java-笔记-string ,Integer,Math,正则表达式

做了几个习题,以前没怎么用过的方法这几次用得比较多. String 字符串的方法 1.截取   str.substring(beginIndex,endIndex)  substring(beginIndex) 2.indexOf(),定位目标str在字符串中的索引位置: Integer的方法 1.Integer.valueOf(xxx)   把其他类型转换为int型 正则表达式 1.判断目标是否满足正则表达式   xxx.matches(regex) e.g xxx是一个数字 xxx.matc

Jan 14 - Integer to Roman; String; Integer; Math;

public class Solution { public String intToRoman(int num) { int[] digits = new int[4]; int i = 0; String s = ""; digits[i] = num%10; while((num = num/10) > 0){ digits[++i] = num%10; } for(int j = 0; j <= i; j++){ if(j == 0){ switch (digits

HashMap为什么比数组查询快

通常数组不直接保存值,而是通过保存值的list.然后对list中的"值"使用equals方法比较,这部分查询速度自然慢.但是如果有好的散列函数,数组的每个位置就只有较少的"值".因此,不是查询所有的list,而是快速跳到数组的某个位置,只对很少的额元素进行比较,这就是HashMap会如此之快的原因. 这里解释一下数组的"键",数组并不保存键本身,而是通过"键"对象生成一个数字,将其作为数组的下标索引.这个数字就是散列码,有定义