11import java .util .function .ToIntFunction ;
22
3+ /**
4+ * A special (singly) linked list node class for hash tables.
5+ */
36class HashTableNode <K , V > {
47 K key ;
58 V value ;
@@ -12,24 +15,90 @@ class HashTableNode<K, V> {
1215 }
1316}
1417
18+ /**
19+ * An implementation of the hash table data structure that uses chaining to
20+ * resolve collisions.
21+ *
22+ * <p>
23+ * The implementation uses a static array whose individual elements we call
24+ * "buckets". Each bucket contains a linked list of <code>HashTableNode</code>.
25+ * Each node stores a key-value pair. The bucket is <code>null</code> if no
26+ * key-value pair whose key hashes to the bucket's index has been added.
27+ */
1528@ SuppressWarnings ("unchecked" )
1629public class HashTable <K , V > {
30+ /**
31+ * The buckets of this hash table, each containing a singly linked list for
32+ * storing data, or <code>null</code>.
33+ */
1734 Object [] buckets ;
18- float loadFactor = 0.75f ; // The maximum load factor
35+ /**
36+ * The maximum load factor allowed for this hash table.
37+ *
38+ * <p>
39+ * If putting a new key would cause the hash table to exceed this max amount,
40+ * the table will double its size.
41+ *
42+ * <p>
43+ * Good load factors are between 0.5 and 2.0 where the hash table would not
44+ * become too sparse or too dense. This implementation defaults to 0.75.
45+ *
46+ * @see #put(Object, Object)
47+ */
48+ float loadFactor = 0.75f ;
49+ /**
50+ * The hash function used to calculate the index for an incoming key-value
51+ * pair.
52+ *
53+ * <p>
54+ * Hash function takes only one parameter, usually the number of buckets (also
55+ * known as capacity), and always returns an integer.
56+ */
1957 ToIntFunction <Integer > hashFunction = k -> k % this .capacity ; // h(k)
58+ /**
59+ * The number of key-value pairs that are stored by this hash table.
60+ */
2061 int length ;
62+ /**
63+ * The number of buckets in this hash table.
64+ *
65+ * <p>
66+ * While this is usually the same as <code>this.buckets.length</code>, this
67+ * variable was introduced so the hashing function could refer to the new
68+ * capacity instead of the current capacity.
69+ *
70+ * @see #resize(int)
71+ */
2172 int capacity ;
2273
74+ /**
75+ * Initializes a new hash table with 16 buckets and a load factor of 0.75.
76+ */
2377 public HashTable () {
2478 this .buckets = new Object [this .capacity = 16 ];
2579 this .length = 0 ;
2680 }
2781
82+ /**
83+ * Initializes a new hash table with the specified initial capacity and a load
84+ * factor of 0.75.
85+ *
86+ * @param initialCapacity
87+ */
2888 public HashTable (int initialCapacity ) {
2989 this .buckets = new Object [this .capacity = initialCapacity ];
3090 this .length = 0 ;
3191 }
3292
93+ /**
94+ * Initializes a new hash table with the specified initial capacity and load
95+ * factor.
96+ *
97+ * @param initialCapacity
98+ * @param loadFactor
99+ *
100+ * @throws IllegalArgumentException if the load factor is or is below 0.
101+ */
33102 public HashTable (int initialCapacity , float loadFactor ) {
34103 if (loadFactor <= 0 )
35104 throw new IllegalArgumentException ("Illegal load factor: " + loadFactor );
@@ -39,24 +108,67 @@ public HashTable(int initialCapacity, float loadFactor) {
39108 this .length = 0 ;
40109 }
41110
111+ /**
112+ * Returns the number of key-value pairs in this hash table.
113+ *
114+ * @return the number of key-value pairs in this hash table
115+ */
42116 public int getLength () {
43117 return this .length ;
44118 }
45119
120+ /**
121+ * Returns the number of buckets in this hash table.
122+ *
123+ * @return the number of buckets in this hash table
124+ */
46125 public int getCapacity () {
47126 return this .capacity ;
48127 }
49128
129+ /**
130+ * Returns the active hash function of this hash table.
131+ *
132+ * @return the active hash function of this hash table
133+ */
50134 public ToIntFunction <Integer > getHashFunction () {
51135 return this .hashFunction ;
52136 }
53137
138+ /**
139+ * Sets the hash function for this hash table.
140+ *
141+ * @param hashFunction
142+ */
54143 public void setHashFunction (ToIntFunction <Integer > hashFunction ) {
55144 this .hashFunction = hashFunction ;
56145 }
57146
147+ /**
148+ * Puts a <code>HashTableNode</code> into the specified array of buckets.
149+ *
150+ * <p>
151+ * A new node will be created if the key does not already exist among the
152+ * buckets. Otherwise assigns the <code>value</code> field of the existing
153+ * node to the specified value.
154+ *
155+ * <p>
156+ * This function is created for internal use and is shared by <code>put</code>
157+ * and <code>resize</code>. In particular, it helps <code>resize</code> avoid
158+ * using O(n) space.
159+ *
160+ * @param newNode
161+ * @param buckets
162+ * @return <code>true</code> if a new node was created, <code>false</code> if
163+ * an existing node was overridden.
164+ *
165+ * @see HashTableNode
166+ */
58167 private boolean put (HashTableNode <K , V > newNode , Object [] buckets ) {
168+ // Since we are adding to the end of the corresponding linked list, make
169+ // sure .next is null.
59170 newNode .next = null ;
171+ // The index which is the h(k) or the return value of the hash function.
60172 int index = this .hashFunction .applyAsInt (newNode .key .hashCode ());
61173
62174 if (buckets [index ] == null ) {
@@ -67,11 +179,12 @@ private boolean put(HashTableNode<K, V> newNode, Object[] buckets) {
67179 HashTableNode <K , V > node = (HashTableNode <K , V >) buckets [index ];
68180
69181 while (true ) {
70- if (node .key .equals (newNode .key )) {
182+ if (node .key .equals (newNode .key )) { // A node with the same key exists
71183 node .value = newNode .value ;
72184 return false ;
73185 }
74186
187+ // Otherwise move to the end of the linked list
75188 if (node .next != null )
76189 node = node .next ;
77190 else
@@ -83,39 +196,75 @@ private boolean put(HashTableNode<K, V> newNode, Object[] buckets) {
83196 }
84197 }
85198
199+ /**
200+ * Puts a new key-value pair into this hash table.
201+ *
202+ * <p>
203+ * If a node with the same key already exists, overrides the
204+ * <code>value</code> field of that node only. Otherwise create a new
205+ * <code>HashTableNode</code>.
206+ *
207+ * <p>
208+ * If adding would cause the hash table to exceed the maximum load factor,
209+ * double the capacity before adding.
210+ *
211+ * @param key
212+ * @param value
213+ *
214+ * @see #put(HashTableNode, Object[])
215+ */
86216 public void put (K key , V value ) {
217+ // Double in size if the load factor would be exceeded.
87218 if ((float ) (this .length + 1 ) / this .capacity > this .loadFactor )
88219 this .expand ();
89220
221+ // Increase the length only if a node was created (as oppossed to overridden)
90222 if (this .put (new HashTableNode <K , V >(key , value , null ), this .buckets ))
91223 this .length ++;
92224 }
93225
94- public boolean remove (K key ) {
226+ /**
227+ * Removes a key-value pair from this hash table, if it exists.
228+ *
229+ * @param key
230+ * @return the value that was matched with the specified key in the hash table
231+ * or <code>null</code> if the key was not found in the table.
232+ */
233+ public V remove (K key ) {
95234 int index = this .hashFunction .applyAsInt (key .hashCode ());
96235 HashTableNode <K , V > node = (HashTableNode <K , V >) this .buckets [index ];
97236
98237 if (node == null )
99- return false ;
238+ return null ;
100239
101240 if (node .key .equals (key )) {
102241 this .buckets [index ] = node .next ;
103- return true ;
242+
243+ this .length --;
244+ return node .value ;
104245 }
105246
106247 while (node .next != null ) {
107248 if (node .next .key .equals (key )) {
108249 node .next = node .next .next ;
109- return true ;
250+
251+ this .length --;
252+ return node .next .value ;
110253 }
111254
112255 node = node .next ;
113256 }
114257
115- // TODO: this.length--;
116- return false ;
258+ return null ;
117259 }
118260
261+ /**
262+ * Retrieves the value in this table that corresponds to the specified key.
263+ *
264+ * @param key
265+ * @return the value in this table that matches with the key, or <code>null
266+ * </code> if such a value does not exist in the table.
267+ */
119268 public V get (K key ) {
120269 int index = this .hashFunction .applyAsInt (key .hashCode ());
121270 HashTableNode <K , V > node = (HashTableNode <K , V >) this .buckets [index ];
@@ -130,6 +279,16 @@ public V get(K key) {
130279 return null ;
131280 }
132281
282+ /**
283+ * Resizes this hash table to a new capacity, copying all existing key-value
284+ * pairs to a new array.
285+ *
286+ * <p>
287+ * Copying is done by creating new references to the existing nodes instead
288+ * of creating new nodes to maintain a O(1) space usage.
289+ *
290+ * @param newCapacity
291+ */
133292 private void resize (int newCapacity ) {
134293 Object [] newBuckets = new Object [this .capacity = newCapacity ];
135294
@@ -148,15 +307,47 @@ private void resize(int newCapacity) {
148307 this .buckets = newBuckets ;
149308 }
150309
310+ /**
311+ * Doubles the capacity, or number of buckets.
312+ *
313+ * <p>
314+ * Used internally only for the <code>put</code> method.
315+ */
151316 private void expand () {
152317 this .resize (this .capacity * 2 );
153318 }
154319
320+ /**
321+ * Resizes
322+ *
323+ * @param minimumLoadFactor
324+ */
155325 public void shrink (float minimumLoadFactor ) {
156326 this .resize ((int ) Math .ceil (this .length / minimumLoadFactor ));
157327 }
158328
329+ /**
330+ * Returns the clustering measure of this hash table, which tells us how (not)
331+ * random the key distribution is.
332+ *
333+ * <p>
334+ * The meanings of the clustering is as follows:
335+ * <ul>
336+ * <li>around 1.0: the hash function is uniform;</li>
337+ * <li>greater than 1.0: clustering slows down the performance by a factor
338+ * of the clustering measure C;</li>
339+ * <li>less than 1.0: the hash function is spreading elements out more
340+ * evenly than a random hash function would;</li>
341+ * <li>0.0: the hash function is perfect and every key-value pair is in
342+ * its own bucket.</li>
343+ * </ul>
344+ *
345+ * @return the clustering of this hash table
346+ */
159347 public float measureClustering () {
348+ // First, calculate the sum of the squares of the number of elements in each
349+ // bucket.
350+
160351 int squaresSum = 0 ;
161352
162353 for (Object bucket : this .buckets ) {
@@ -174,6 +365,9 @@ public float measureClustering() {
174365 int m = this .capacity ;
175366 float n = this .length ;
176367
368+ // C = (m / (n − 1))((∑_i(x_i^2) / n) − 1) where x_i is the number of
369+ // elements in bucket i.
370+
177371 return (m / (n - 1 )) * (squaresSum / n - 1 );
178372 }
179373}
0 commit comments