LeetCode 705: Design HashSet

link

List of LinkedLists

We can use a fixed length list of LinkedList‘s. A key is put into the LinkedList or bucket at index hash(key).

Say len(buckets) = n and there are k keys. The cost of operations depend on the load-factor, \texttt{load-factor} = \frac{k}{n}. And the load-factor depends on how evenly the hash distributes the keys across different LinkedList‘s.

OperationTimeSpace
__init__\mathcal{O}(n)\mathcal{O}(n)
add\mathcal{O}(\texttt{load-factor})\mathcal{O}(1)
remove\mathcal{O}(\texttt{load-factor})\mathcal{O}(1)
contains\mathcal{O}(\texttt{load-factor})\mathcal{O}(1)
class ListNode:
    def __init__(self, key):
        self.key = key
        self.prev, self.next = None, None

class LinkedList:
    def __init__(self):
        self.head, self.tail = None, None
    
    def append(self, key):
        node = ListNode(key)

        if not self.tail:
            self.tail = node
            self.head = node
            return

        node.prev = self.tail
        self.tail.next = node
        self.tail = node

    def __find_key(self, key):
        t = self.head
        while t:
            if t.key == key:
                return t
            t = t.next

    def remove(self, key):
        node = self.__find_key(key)
        if not node:
            return

        prev_node, next_node = node.prev, node.next
        if not prev_node:
            # node is head
            self.head = self.head.next
            if self.head:
                self.head.prev = None
            else:
                self.tail = None
            node.prev, node.next = None, None
            return

        if not next_node:
            # node is tail
            self.tail = self.tail.prev
            self.tail.next = None
            node.prev, node.next = None, None
            return

        prev_node.next = next_node
        next_node.prev = prev_node
        node.prev, node.next = None, None

    def __contains__(self, key):
        node = self.__find_key(key)
        return node is not None

class MyHashSet:

    def __init__(self):
        self.buckets = [LinkedList() for _ in range(1023)]

    def __hash(self, key):
        return key % len(self.buckets)

    def add(self, key: int) -> None:
        if self.contains(key):
            return

        h = self.__hash(key)
        self.buckets[h].append(key)

    def remove(self, key: int) -> None:
        h = self.__hash(key)
        self.buckets[h].remove(key)

    def contains(self, key: int) -> bool:
        h = self.__hash(key)
        return key in self.buckets[h]


# Your MyHashSet object will be instantiated and called as such:
# obj = MyHashSet()
# obj.add(key)
# obj.remove(key)
# param_3 = obj.contains(key)

To keep the \texttt{load-factor} about 0.5, we can double or halve when key count is too high or too low.

class ListNode:
    def __init__(self, key):
        self.key = key
        self.prev, self.next = None, None

class LinkedList:
    def __init__(self):
        self.head, self.tail = None, None
    
    def append(self, key):
        node = ListNode(key)

        if not self.tail:
            self.tail = node
            self.head = node
            return

        node.prev = self.tail
        self.tail.next = node
        self.tail = node

    def __find_key(self, key):
        t = self.head
        while t:
            if t.key == key:
                return t
            t = t.next

    def remove(self, key):
        node = self.__find_key(key)
        if not node:
            return False

        prev_node, next_node = node.prev, node.next
        if not prev_node:
            # node is head
            self.head = self.head.next
            if self.head:
                self.head.prev = None
            else:
                self.tail = None
            node.prev, node.next = None, None
            return True

        if not next_node:
            # node is tail
            self.tail = self.tail.prev
            self.tail.next = None
            node.prev, node.next = None, None
            return True

        prev_node.next = next_node
        next_node.prev = prev_node
        node.prev, node.next = None, None
        return True

    def __contains__(self, key):
        node = self.__find_key(key)
        return node is not None

    def __iter__(self):
        t = self.head
        while t:
            yield t.key
            t = t.next

class MyHashSet:

    def __init__(self):
        self.key_count = 0
        self.buckets = [LinkedList() for _ in range(1023)]

    def __resize(self, new_size):
        new_buckets = [LinkedList() for _ in range(new_size)]
        for bucket in self.buckets:
            for key in bucket:
                h = key % new_size
                new_buckets[h].append(key)
        self.buckets = new_buckets

    def __hash(self, key):
        return key % len(self.buckets)

    def add(self, key: int) -> None:
        if self.contains(key):
            return

        if self.key_count > len(self.buckets) // 2:
            self.__resize(2 * len(self.buckets))

        h = self.__hash(key)
        self.buckets[h].append(key)
        self.key_count += 1

    def remove(self, key: int) -> None:
        h = self.__hash(key)
        was_removed = self.buckets[h].remove(key)
        self.key_count -= (1 if was_removed else 0)

        if self.key_count < len(self.buckets) // 8:
            self.__resize(len(self.buckets) // 2)

    def contains(self, key: int) -> bool:
        h = self.__hash(key)
        return key in self.buckets[h]


# Your MyHashSet object will be instantiated and called as such:
# obj = MyHashSet()
# obj.add(key)
# obj.remove(key)
# param_3 = obj.contains(key)

Leave a comment