Skip to content

Commit 887f683

Browse files
committed
HashTable: Fix .remove() and add JavaDoc
The remove method did not modify the length.
1 parent ce0187e commit 887f683

1 file changed

Lines changed: 202 additions & 8 deletions

File tree

src/main/lists/HashTable.java

Lines changed: 202 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
import java.util.function.ToIntFunction;
22

3+
/**
4+
* A special (singly) linked list node class for hash tables.
5+
*/
36
class HashTableNode<K, V> {
47
K key;
58
V value;
@@ -12,24 +15,90 @@ class HashTableNode<K, V> {
1215
}
1316
}
1417

18+
/**
19+
* An implementation of the hash table data structure that uses chaining to
20+
* resolve collisions.
21+
*
22+
* <p>
23+
* The implementation uses a static array whose individual elements we call
24+
* "buckets". Each bucket contains a linked list of <code>HashTableNode</code>.
25+
* Each node stores a key-value pair. The bucket is <code>null</code> if no
26+
* key-value pair whose key hashes to the bucket's index has been added.
27+
*/
1528
@SuppressWarnings("unchecked")
1629
public class HashTable<K, V> {
30+
/**
31+
* The buckets of this hash table, each containing a singly linked list for
32+
* storing data, or <code>null</code>.
33+
*/
1734
Object[] buckets;
18-
float loadFactor = 0.75f; // The maximum load factor
35+
/**
36+
* The maximum load factor allowed for this hash table.
37+
*
38+
* <p>
39+
* If putting a new key would cause the hash table to exceed this max amount,
40+
* the table will double its size.
41+
*
42+
* <p>
43+
* Good load factors are between 0.5 and 2.0 where the hash table would not
44+
* become too sparse or too dense. This implementation defaults to 0.75.
45+
*
46+
* @see #put(Object, Object)
47+
*/
48+
float loadFactor = 0.75f;
49+
/**
50+
* The hash function used to calculate the index for an incoming key-value
51+
* pair.
52+
*
53+
* <p>
54+
* Hash function takes only one parameter, usually the number of buckets (also
55+
* known as capacity), and always returns an integer.
56+
*/
1957
ToIntFunction<Integer> hashFunction = k -> k % this.capacity; // h(k)
58+
/**
59+
* The number of key-value pairs that are stored by this hash table.
60+
*/
2061
int length;
62+
/**
63+
* The number of buckets in this hash table.
64+
*
65+
* <p>
66+
* While this is usually the same as <code>this.buckets.length</code>, this
67+
* variable was introduced so the hashing function could refer to the new
68+
* capacity instead of the current capacity.
69+
*
70+
* @see #resize(int)
71+
*/
2172
int capacity;
2273

74+
/**
75+
* Initializes a new hash table with 16 buckets and a load factor of 0.75.
76+
*/
2377
public HashTable() {
2478
this.buckets = new Object[this.capacity = 16];
2579
this.length = 0;
2680
}
2781

82+
/**
83+
* Initializes a new hash table with the specified initial capacity and a load
84+
* factor of 0.75.
85+
*
86+
* @param initialCapacity
87+
*/
2888
public HashTable(int initialCapacity) {
2989
this.buckets = new Object[this.capacity = initialCapacity];
3090
this.length = 0;
3191
}
3292

93+
/**
94+
* Initializes a new hash table with the specified initial capacity and load
95+
* factor.
96+
*
97+
* @param initialCapacity
98+
* @param loadFactor
99+
*
100+
* @throws IllegalArgumentException if the load factor is or is below 0.
101+
*/
33102
public HashTable(int initialCapacity, float loadFactor) {
34103
if (loadFactor <= 0)
35104
throw new IllegalArgumentException("Illegal load factor: " + loadFactor);
@@ -39,24 +108,67 @@ public HashTable(int initialCapacity, float loadFactor) {
39108
this.length = 0;
40109
}
41110

111+
/**
112+
* Returns the number of key-value pairs in this hash table.
113+
*
114+
* @return the number of key-value pairs in this hash table
115+
*/
42116
public int getLength() {
43117
return this.length;
44118
}
45119

120+
/**
121+
* Returns the number of buckets in this hash table.
122+
*
123+
* @return the number of buckets in this hash table
124+
*/
46125
public int getCapacity() {
47126
return this.capacity;
48127
}
49128

129+
/**
130+
* Returns the active hash function of this hash table.
131+
*
132+
* @return the active hash function of this hash table
133+
*/
50134
public ToIntFunction<Integer> getHashFunction() {
51135
return this.hashFunction;
52136
}
53137

138+
/**
139+
* Sets the hash function for this hash table.
140+
*
141+
* @param hashFunction
142+
*/
54143
public void setHashFunction(ToIntFunction<Integer> hashFunction) {
55144
this.hashFunction = hashFunction;
56145
}
57146

147+
/**
148+
* Puts a <code>HashTableNode</code> into the specified array of buckets.
149+
*
150+
* <p>
151+
* A new node will be created if the key does not already exist among the
152+
* buckets. Otherwise assigns the <code>value</code> field of the existing
153+
* node to the specified value.
154+
*
155+
* <p>
156+
* This function is created for internal use and is shared by <code>put</code>
157+
* and <code>resize</code>. In particular, it helps <code>resize</code> avoid
158+
* using O(n) space.
159+
*
160+
* @param newNode
161+
* @param buckets
162+
* @return <code>true</code> if a new node was created, <code>false</code> if
163+
* an existing node was overridden.
164+
*
165+
* @see HashTableNode
166+
*/
58167
private boolean put(HashTableNode<K, V> newNode, Object[] buckets) {
168+
// Since we are adding to the end of the corresponding linked list, make
169+
// sure .next is null.
59170
newNode.next = null;
171+
// The index which is the h(k) or the return value of the hash function.
60172
int index = this.hashFunction.applyAsInt(newNode.key.hashCode());
61173

62174
if (buckets[index] == null) {
@@ -67,11 +179,12 @@ private boolean put(HashTableNode<K, V> newNode, Object[] buckets) {
67179
HashTableNode<K, V> node = (HashTableNode<K, V>) buckets[index];
68180

69181
while (true) {
70-
if (node.key.equals(newNode.key)) {
182+
if (node.key.equals(newNode.key)) { // A node with the same key exists
71183
node.value = newNode.value;
72184
return false;
73185
}
74186

187+
// Otherwise move to the end of the linked list
75188
if (node.next != null)
76189
node = node.next;
77190
else
@@ -83,39 +196,75 @@ private boolean put(HashTableNode<K, V> newNode, Object[] buckets) {
83196
}
84197
}
85198

199+
/**
200+
* Puts a new key-value pair into this hash table.
201+
*
202+
* <p>
203+
* If a node with the same key already exists, overrides the
204+
* <code>value</code> field of that node only. Otherwise create a new
205+
* <code>HashTableNode</code>.
206+
*
207+
* <p>
208+
* If adding would cause the hash table to exceed the maximum load factor,
209+
* double the capacity before adding.
210+
*
211+
* @param key
212+
* @param value
213+
*
214+
* @see #put(HashTableNode, Object[])
215+
*/
86216
public void put(K key, V value) {
217+
// Double in size if the load factor would be exceeded.
87218
if ((float) (this.length + 1) / this.capacity > this.loadFactor)
88219
this.expand();
89220

221+
// Increase the length only if a node was created (as oppossed to overridden)
90222
if (this.put(new HashTableNode<K, V>(key, value, null), this.buckets))
91223
this.length++;
92224
}
93225

94-
public boolean remove(K key) {
226+
/**
227+
* Removes a key-value pair from this hash table, if it exists.
228+
*
229+
* @param key
230+
* @return the value that was matched with the specified key in the hash table
231+
* or <code>null</code> if the key was not found in the table.
232+
*/
233+
public V remove(K key) {
95234
int index = this.hashFunction.applyAsInt(key.hashCode());
96235
HashTableNode<K, V> node = (HashTableNode<K, V>) this.buckets[index];
97236

98237
if (node == null)
99-
return false;
238+
return null;
100239

101240
if (node.key.equals(key)) {
102241
this.buckets[index] = node.next;
103-
return true;
242+
243+
this.length--;
244+
return node.value;
104245
}
105246

106247
while (node.next != null) {
107248
if (node.next.key.equals(key)) {
108249
node.next = node.next.next;
109-
return true;
250+
251+
this.length--;
252+
return node.next.value;
110253
}
111254

112255
node = node.next;
113256
}
114257

115-
// TODO: this.length--;
116-
return false;
258+
return null;
117259
}
118260

261+
/**
262+
* Retrieves the value in this table that corresponds to the specified key.
263+
*
264+
* @param key
265+
* @return the value in this table that matches with the key, or <code>null
266+
* </code> if such a value does not exist in the table.
267+
*/
119268
public V get(K key) {
120269
int index = this.hashFunction.applyAsInt(key.hashCode());
121270
HashTableNode<K, V> node = (HashTableNode<K, V>) this.buckets[index];
@@ -130,6 +279,16 @@ public V get(K key) {
130279
return null;
131280
}
132281

282+
/**
283+
* Resizes this hash table to a new capacity, copying all existing key-value
284+
* pairs to a new array.
285+
*
286+
* <p>
287+
* Copying is done by creating new references to the existing nodes instead
288+
* of creating new nodes to maintain a O(1) space usage.
289+
*
290+
* @param newCapacity
291+
*/
133292
private void resize(int newCapacity) {
134293
Object[] newBuckets = new Object[this.capacity = newCapacity];
135294

@@ -148,15 +307,47 @@ private void resize(int newCapacity) {
148307
this.buckets = newBuckets;
149308
}
150309

310+
/**
311+
* Doubles the capacity, or number of buckets.
312+
*
313+
* <p>
314+
* Used internally only for the <code>put</code> method.
315+
*/
151316
private void expand() {
152317
this.resize(this.capacity * 2);
153318
}
154319

320+
/**
321+
* Resizes
322+
*
323+
* @param minimumLoadFactor
324+
*/
155325
public void shrink(float minimumLoadFactor) {
156326
this.resize((int) Math.ceil(this.length / minimumLoadFactor));
157327
}
158328

329+
/**
330+
* Returns the clustering measure of this hash table, which tells us how (not)
331+
* random the key distribution is.
332+
*
333+
* <p>
334+
* The meanings of the clustering is as follows:
335+
* <ul>
336+
* <li>around 1.0: the hash function is uniform;</li>
337+
* <li>greater than 1.0: clustering slows down the performance by a factor
338+
* of the clustering measure C;</li>
339+
* <li>less than 1.0: the hash function is spreading elements out more
340+
* evenly than a random hash function would;</li>
341+
* <li>0.0: the hash function is perfect and every key-value pair is in
342+
* its own bucket.</li>
343+
* </ul>
344+
*
345+
* @return the clustering of this hash table
346+
*/
159347
public float measureClustering() {
348+
// First, calculate the sum of the squares of the number of elements in each
349+
// bucket.
350+
160351
int squaresSum = 0;
161352

162353
for (Object bucket: this.buckets) {
@@ -174,6 +365,9 @@ public float measureClustering() {
174365
int m = this.capacity;
175366
float n = this.length;
176367

368+
// C = (m / (n − 1))((∑_i(x_i^2) / n) − 1) where x_i is the number of
369+
// elements in bucket i.
370+
177371
return (m / (n - 1)) * (squaresSum / n - 1);
178372
}
179373
}

0 commit comments

Comments
 (0)