简体   繁体   English

Python中的AVL树性能

[英]AVL tree performance in Python

I put together an AVL Tree with deletion and allowance for duplicate keys. 我将删除和允许重复密钥的AVL树放在一起。 Its based on a couple examples I found online (and which are listed in the comments of the code). 它基于我在网上找到的几个示例(并在代码注释中列出)。

I wanted to compare insertion performance versus the standard Python list. 我想将插入性能与标准Python列表进行比较。 I wrote a function that generates an arbitrary amount of random ints and inserts them into the avl tree. 我编写了一个函数,该函数生成任意数量的随机整数并将其插入到avl树中。 I wrote the same function again for inserting into the 0 index of the standard Python list. 我再次编写了相同的函数以将其插入标准Python列表的0索引中。 Then I used a timeit decorator to measure how long each function took to execute. 然后,我使用timeit装饰器来测量每个函数执行所需的时间。

With 100,000 random integers the time was 7693ms for the AVL Tree and 3906ms for the List. 对于100,000个随机整数,AVL树的时间为7693ms,列表的时间为3906ms。 With 500,000 random integers the AVL Tree took 46894ms and the List took 136665ms. 带有500,000个随机整数的AVL树花费了46894毫秒,列表花费了136665毫秒。 It makes sense that the AVL Tree would win out asymptotically, but is there anything I can do to improve the AVL Tree when dealing with smaller numbers of inserts? AVL树会渐近取胜是有道理的,但是当处理较少数量的插入片段时,我可以做些什么来改进AVL树吗?

Here is my code, sorry if its sloppy: 这是我的代码,如果它草率的话,对不起:

#!/usr/bin/env python
import time
from random import randint
from typing import Optional

"""
Class based AVL balanced binary search tree.
Based on designs from:
https://interactivepython.org/runestone/static/pythonds/Trees/AVLTreeImplementation.html
http://www.geeksforgeeks.org/avl-tree-set-2-deletion/

A tree constists of a single AVL_Tree object and
many Node objects.

What distinguises AVL_Tree from a plain Binary Search Tree is
it's self balancing property. Whenever a node is inserted or
deleted, the balance factors of the affected nodes are checked
and Nodes are rotated to maintain balance in the tree. This
ensures O(logN) insertion, deletion, and search performance.

"""

class Node:
    def __init__(self, key, left=None, right=None, parent=None, payload=None):
        self.key = key
        self.left = left
        self.right= right
        self.parent = parent
        self.height = 1
        if payload:
            self.payload = payload
        else:
            self.payload = self.key
        self.count = 1


class AVL_Tree:
    def __init__(self):
        self.root = None

    def height(self, node: Node) -> int:
        if node == None:
            return 0
        return node.height

    def right_rotate(self, y: Node) -> None:
        x = y.left
        y.left = x.right
        if x.right != None:
            x.right.parent = y
        x.parent = y.parent
        if self.root == y:
            self.root = x
        else:
            if y.parent.left == y:
                y.parent.left = x
            else:
                y.parent.right = x
        x.right = y
        y.parent = x

        y.height = max(self.height(y.left), self.height(y.right)) + 1
        x.height = max(self.height(x.left), self.height(x.right)) + 1

    def left_rotate(self, x: Node) -> None:
        y = x.right
        x.right = y.left

        if y.left != None:
           y.left.parent = x
        y.parent = x.parent

        if self.root == x:
            self.root = y
        else:
            if x.parent.left == x:
               x.parent.left = y
            else:
                x.parent.right = y
        y.left = x
        x.parent = y

        x.height = max(self.height(x.left), self.height(x.right)) + 1
        y.height = max(self.height(y.left), self.height(y.right)) + 1

    def get_balance(self, node: Node) -> int:
        if node == None:
            return 0
        return self.height(node.left) - self.height(node.right)

    def insert(self, key: int, insertion_point=None, payload=None) -> None:
        node = Node(key)
        if payload != None:
            node.payload = payload
        # If the tree is empty then assign new node to root
        if self.root == None:
            self.root = node
            return

        if insertion_point == None:
            insertion_point = self.root

        if key == insertion_point.key:
            insertion_point.count += 1
        elif key < insertion_point.key:
            if insertion_point.left:
                self.insert(key, insertion_point.left, payload)
            else:
                insertion_point.left = node
                node.parent = insertion_point
        elif key > insertion_point.key:
            if insertion_point.right:
                self.insert(key, insertion_point.right, payload)
            else:
                insertion_point.right = node
                node.parent = insertion_point
        else:
            return

        insertion_point.height = 1 + max(self.height(insertion_point.left), self.height(insertion_point.right))
        balance = self.get_balance(insertion_point)

        if balance > 1 and key < insertion_point.left.key:
            # Left Left
            self.right_rotate(insertion_point)
        elif balance < -1 and key > insertion_point.right.key:
            # Right Right
            self.left_rotate(insertion_point)
        elif balance > 1 and key > insertion_point.left.key:
            # Left Right
            self.left_rotate(insertion_point.left)
            self.right_rotate(insertion_point)
        elif balance < -1 and key < insertion_point.right.key:
            # Right Left
            self.right_rotate(insertion_point.right)
            self.left_rotate(insertion_point)

    def get(self, key: int) -> Optional[Node]:
        if self.root:
            node = self._get(key,self.root)
            if node:
                return node
            else:
                return None
        else:
            return None

    def _get(self, key: int, currentNode: Node) -> Optional[Node]:
        if not currentNode
            return None
        elif currentNode.key == key:
            return currentNode
        elif key < currentNode.key:
            return self._get(key, currentNode.left)
        else:
            return self._get(key,currentNode.right)

    def __getitem__(self,key: int):
        """ Overloads [] getter to use get() """
        return self.get(key)

    def __contains__(self,key):
        if self.get(key):
            return True
        else:
            return False

    def min_value(self, key: int) -> int:
        """ Return the lowest value key in subtree with root 'node' """
        sub_tree_root = self.get(key)
        while sub_tree_root.left != None:
            sub_tree_root = sub_tree_root.left
        return sub_tree_root.key

    def delete(self, key: int, starting_node: Node = None) -> None:
        """
        When removing a node there are three cases:
            1. The node has no children:
                Delete pointer in parent node and
                delete node object.
            2. The node has one child:
                Promote the child to take node's place
                then delete node object.
            3. The node has two children:
                Search tree for a node that can replace
                the node and preserve the binary structure
                This will be the next largest node in
                the tree and will never have two children.
                This means it can be removed and swapped
                in using the first two cases.
        """
        if self.root == None:
            return
        if starting_node == None:
            starting_node = self.root

        # key < starting_node so we recurse left
        if key < starting_node.key:
            self.delete(key, starting_node.left)
        # key > starting_node so we recurse right
        elif key > starting_node.key:
            self.delete(key, starting_node.right)
        # starting_node is key and we can begin the deletion process.
        else:
            if starting_node.count > 1:
                starting_node.count -= 1
            # starting_node is a leaf
            elif starting_node.left == None and starting_node.right == None:
                if starting_node == starting_node.parent.left:
                    starting_node.parent.left = None
                else:
                    starting_node.parent.right = None
            # starting_node has both children
            elif starting_node.left != None and starting_node.right != None:
                succ = self.get(self.min_value(starting_node.right.key))
                starting_node.key = succ.key
                starting_node.payload = succ.payload
                # succ is a leaf 
                # (succ cannot have a left child because it is the min)
                if succ.right == None:
                    # succ is a left child
                    if succ.parent.left == succ:
                        succ.parent.left = None
                    # succ is a right child
                    else:
                        succ.parent.right = None
                # succ has a right child
                else:
                    # succ is a left child
                    if succ.parent.left == succ:
                        succ.parent.left = succ.right
                        succ.right.parent = succ.parent
                    # succ is a right child
                    else:
                        succ.parent.right = succ.right
                        succ.right.parent = succ.parent
            # starting_node has one child
            else:
                if starting_node == self.root:
                    # Child is left
                    if starting_node.left != None:
                        starting_node.left.parent = None
                        self.root = starting_node.left
                    # Child is right
                    else:
                        starting_node.right.parent = None
                        self.root = starting_node.right
                # starting_node is left child:
                elif starting_node.parent.left == starting_node:
                    # Child is left
                    if starting_node.left != None:
                        starting_node.left.parent = starting_node.parent
                        starting_node.parent.left = starting_node.left
                    # Child is right
                    else:
                        starting_node.right.parent = starting_node.parent
                        starting_node.parent.left = starting_node.right
                # starting_node is right child
                else:
                    # Child is left
                    if starting_node.left != None:
                        starting_node.left.parent = starting_node.parent
                        starting_node.parent.right = starting_node.left
                    else:
                        starting_node.right.parent = starting_node.parent
                        starting_node.parent.right = starting_node.right

        # Update height of starting_node
        starting_node.height = max(self.height(starting_node.left), self.height(starting_node.right)) + 1

        # Get balance factor
        balance = self.get_balance(starting_node)
        # Use balance factor to rotate

        # Left Left
        if balance > 1 and self.get_balance(starting_node.left) >= 0:
            self.right_rotate(starting_node)
        # Left Right
        if balance > 1 and self.get_balance(starting_node.left) < 0:
            self.left_rotate(starting_node.left)
            self.right_rotate(starting_node)
        # Right Right
        if balance < -1 and self.get_balance(starting_node.right) <= 0:
            self.left_rotate(starting_node)
        # Right Left
        if balance < -1 and self.get_balance(starting_node.right) > 0:
            self.right_rotate(starting_node.right)
            self.left_rotate(starting_node)

    def __delitem__(self,key):
        self.delete(key)


def traverse(rootnode: Node) -> None:
    thislevel = [rootnode]
    while thislevel:
        nextlevel = list()
        row_string = ""
        for n in thislevel:
            if n.parent != None:
                if n.parent.left == n:
                    relation = "L"
                elif n.parent.right == n:
                    relation = "R"
            else:
                relation = "ro"
            row_string += str(n.key) + str((relation, n.payload)) + " "
            if n.left: nextlevel.append(n.left)
            if n.right: nextlevel.append(n.right)
        print(row_string)
        thislevel = nextlevel


def timeit(method):
    def timed(*args, **kw):
        ts = time.time()
        result = method(*args, **kw)
        te = time.time()
        if 'log_time' in kw:
            name = kw.get('log_name', method.__name__.upper())
            kw['log_time'][name] = int((te - ts) * 1000)
        else:
            print( '%r  %2.2f ms' % \
                  (method.__name__, (te - ts) * 1000))
        return result
    return timed


@timeit
def avl_inserter(items):
    tree = AVL_Tree()
    for _ in range(1, items):
        tree.insert(randint(1,items))
    return None

@timeit
def list_inserter(items):
    l = []
    for _ in range(1, items):
        l.insert(0, randint(1,items))
    return None

if __name__ == '__main__':
    avl_inserter(100000)
    list_inserter(100000)

After a lot of experimentation I replaced the recursive insert method with an iterative version. 经过大量的实验,我用迭代版本替换了递归插入方法。 I made a number of smaller tweaks based on the output of cProfile and got the runtime down to ~4100ms with 100,000 inserts. 我根据cPro​​file的输出进行了一些较小的调整,并通过100,000次插入将运行时间降低至约4100ms。

My iterative insert() uses a while loop to find the path to insertion point then a for loop to walk back up the path and do the necessary rotations. 我的迭代insert()使用while循环找到插入点的路径,然后使用for循环返回路径并进行必要的旋转。 I would think this would be inherently slower then a recursive insertion and would take at best 2*logn time for insertion. 我认为这本质上比递归插入要慢,并且插入最多需要2个登录时间。 Can anyone explain what was causing my recursive version to be so slow? 谁能解释造成我的递归版本太慢的原因?

#!/usr/bin/env python
"""
Class based AVL balanced binary search tree.
Based on designs from:
https://interactivepython.org/runestone/static/pythonds/Trees/AVLTreeImplementation.html
http://www.geeksforgeeks.org/avl-tree-set-2-deletion/

A tree constists of a single AVL_Tree object and
many Node objects.

What distinguises AVL_Tree from a plain Binary Search Tree is
it's self balancing property. Whenever a node is inserted or
deleted, the balance factors of the affected nodes are checked
and Nodes are rotated to maintain balance in the tree. This
ensures O(logN) insertion, deletion, and search performance.
"""

import time
import random
from typing import Optional

class Node:
    def __init__(self, key, left=None, right=None, parent=None, payload=None):
        self.key = key
        self.left = left
        self.right = right
        self.parent = parent
        self.height = 1
        if payload:
            self.payload = payload
        else:
            self.payload = self.key
        self.count = 1


class AvlTree:
    def __init__(self):
        self.root = None

    def right_rotate(self, current_node: Node) -> None:
        """ Performs a right rotation for balancing the tree """
        left_child = current_node.left
        current_node.left = left_child.right
        if left_child.right != None:
            left_child.right.parent = current_node
        left_child.parent = current_node.parent
        if self.root == current_node:
            self.root = left_child
        else:
            if current_node.parent.left == current_node:
                current_node.parent.left = left_child
            else:
                current_node.parent.right = left_child
        left_child.right = current_node
        current_node.parent = left_child

        current_node.height = max(
            current_node.left.height if current_node.left else 0,
            current_node.right.height if current_node.right else 0) + 1
        left_child.height = max(
            left_child.left.height if left_child.left else 0,
            left_child.right.height if left_child.right else 0) + 1

    def left_rotate(self, current_node: Node) -> None:
        """ Performs a left rotation for balancing the tree """
        right_child = current_node.right
        current_node.right = right_child.left

        if right_child.left != None:
            right_child.left.parent = current_node
        right_child.parent = current_node.parent

        if self.root == current_node:
            self.root = right_child
        else:
            if current_node.parent.left == current_node:
                current_node.parent.left = right_child
            else:
                current_node.parent.right = right_child
        right_child.left = current_node
        current_node.parent = right_child

        current_node.height = max(
            current_node.left.height if current_node.left else 0,
            current_node.right.height if current_node.right else 0) + 1
        right_child.height = max(
            right_child.left.height if right_child.left else 0,
            right_child.right.height if right_child.right else 0) + 1

    def get_balance(self, node: Node) -> int:
        """ Returns balance factor for a node """
        if node is None:
            return 0
        return (node.left.height if node.left else 0) - (node.right.height if node.right else 0)

    def rotate_manager(self, node: Node, inserted_key: int, balance: int) -> None:

        if balance > 1 and inserted_key < node.left.key:
            # Left Left
            self.right_rotate(node)
        elif balance < -1 and inserted_key > node.right.key:
            # Right Right
            self.left_rotate(node)
        elif balance > 1 and inserted_key > node.left.key:
            # Left Right
            self.left_rotate(node.left)
            self.right_rotate(node)
        elif balance < -1 and inserted_key < node.right.key:
            # Right Left
            self.right_rotate(node.right)
            self.left_rotate(node)

    def insert(self, key: int, insertion_point=None, payload=None) -> None:
        """ Insert new node into the tree """
        node = Node(key)
        if payload is not None:
            node.payload = payload
        # If the tree is empty then assign new node to root
        if self.root is None:
            self.root = node
            return

        if insertion_point is None:
            insertion_point = self.root

        search_queue = [insertion_point]
        index = 0
        while search_queue:
            if key < search_queue[index].key:
                if search_queue[index].left:
                    search_queue.append(search_queue[index].left)
                    index += 1
                else:
                    search_queue[index].left = node
                    node.parent = search_queue[index]
                    break
            elif key > search_queue[index].key:
                if search_queue[index].right:
                    search_queue.append(search_queue[index].right)
                    index += 1
                else:
                    search_queue[index].right = node
                    node.parent = search_queue[index]
                    break

        for n in reversed(search_queue):
            n.height = max(
                n.left.height if n.left else 0,
                n.right.height if n.right else 0) + 1
            balance = self.get_balance(n)
            if balance > 1 or balance < -1:
                self.rotate_manager(n, key, balance)


    def get(self, key: int) -> Optional[Node]:
        """ Returns a node with key if found in tree """
        if self.root:
            node = self._get(key, self.root)
            if node:
                return node
            return None
        return None

    def _get(self, key: int, current_node: Node) -> Optional[Node]:
        """ Recursive search method called by get() """
        if not current_node:
            return None
        elif current_node.key == key:
            return current_node
        elif key < current_node.key:
            return self._get(key, current_node.left)
        return self._get(key, current_node.right)

    def __getitem__(self, key: int):
        """ Overloads [] getter to use get() """
        return self.get(key)

    def __contains__(self, key):
        return bool(self.get(key))

    def min_value(self, key: int) -> int:
        """ Return the lowest value key in subtree with root 'node' """
        sub_tree_root = self.get(key)
        while sub_tree_root.left != None:
            sub_tree_root = sub_tree_root.left
        return sub_tree_root.key

    def delete(self, key: int, starting_node: Node = None) -> None:
        """
        When removing a node there are three cases:
            1. The node has no children:
                Delete pointer in parent node and
                delete node object.
            2. The node has one child:
                Promote the child to take node's place
                then delete node object.
            3. The node has two children:
                Search tree for a node that can replace
                the node and preserve the binary structure
                This will be the next largest node in
                the tree and will never have two children.
                This means it can be removed and swapped
                in using the first two cases.
        """
        if self.root is None:
            return
        if starting_node is None:
            starting_node = self.root

        # key < starting_node so we recurse left
        if key < starting_node.key:
            self.delete(key, starting_node.left)
        # key > starting_node so we recurse right
        elif key > starting_node.key:
            self.delete(key, starting_node.right)
        # starting_node is key and we can begin the deletion process.
        else:
            if starting_node.count > 1:
                starting_node.count -= 1
            # starting_node is a leaf
            elif starting_node.left is None and starting_node.right is None:
                if starting_node == starting_node.parent.left:
                    starting_node.parent.left = None
                else:
                    starting_node.parent.right = None
            # starting_node has both children
            elif starting_node.left != None and starting_node.right != None:
                succ = self.get(self.min_value(starting_node.right.key))
                starting_node.key = succ.key
                starting_node.payload = succ.payload
                # succ is a leaf
                # (succ cannot have a left child because it is the min)
                if succ.right is None:
                    # succ is a left child
                    if succ.parent.left == succ:
                        succ.parent.left = None
                    # succ is a right child
                    else:
                        succ.parent.right = None
                # succ has a right child
                else:
                    # succ is a left child
                    if succ.parent.left == succ:
                        succ.parent.left = succ.right
                        succ.right.parent = succ.parent
                    # succ is a right child
                    else:
                        succ.parent.right = succ.right
                        succ.right.parent = succ.parent
            # starting_node has one child
            else:
                if starting_node == self.root:
                    # Child is left
                    if starting_node.left != None:
                        starting_node.left.parent = None
                        self.root = starting_node.left
                    # Child is right
                    else:
                        starting_node.right.parent = None
                        self.root = starting_node.right
                # starting_node is left child:
                elif starting_node.parent.left == starting_node:
                    # Child is left
                    if starting_node.left != None:
                        starting_node.left.parent = starting_node.parent
                        starting_node.parent.left = starting_node.left
                    # Child is right
                    else:
                        starting_node.right.parent = starting_node.parent
                        starting_node.parent.left = starting_node.right
                # starting_node is right child
                else:
                    # Child is left
                    if starting_node.left != None:
                        starting_node.left.parent = starting_node.parent
                        starting_node.parent.right = starting_node.left
                    else:
                        starting_node.right.parent = starting_node.parent
                        starting_node.parent.right = starting_node.right

        # Update height of starting_node
        starting_node.height = max(
            self.height(starting_node.left),
            self.height(starting_node.right)) + 1

        # Get balance factor
        balance = self.get_balance(starting_node)
        # Use balance factor to rotate

        # Left Left
        if balance > 1 and self.get_balance(starting_node.left) >= 0:
            print('L L')
            self.right_rotate(starting_node)
        # Left Right
        if balance > 1 and self.get_balance(starting_node.left) < 0:
            print('L R')
            self.left_rotate(starting_node.left)
            self.right_rotate(starting_node)
        # Right Right
        if balance < -1 and self.get_balance(starting_node.right) <= 0:
            print('R R')
            self.left_rotate(starting_node)
        # Right Left
        if balance < -1 and self.get_balance(starting_node.right) > 0:
            print('R L')
            self.right_rotate(starting_node.right)
            self.left_rotate(starting_node)

    def __delitem__(self, key):
        self.delete(key)


def traverse(rootnode: Node) -> None:
    """ Prints a map of the tree starting at rootnode """
    thislevel = [rootnode]
    while thislevel:
        nextlevel = list()
        row_string = ""
        for node in thislevel:
            if node.parent != None:
                if node.parent.left == node:
                    relation = "L"
                elif node.parent.right == node:
                    relation = "R"
            else:
                relation = "ro"
            row_string += str(node.key) + str((relation, node.height)) + " "
            if node.left:
                nextlevel.append(node.left)
            if node.right:
                nextlevel.append(node.right)
        print(row_string)
        thislevel = nextlevel


def timeit(method):
    """ timeit decorator """
    def timed(*args, **kw):
        """ inner timeit function """
        ts = time.time()
        result = method(*args, **kw)
        te = time.time()
        if 'log_time' in kw:
            name = kw.get('log_name', method.__name__.upper())
            kw['log_time'][name] = int((te - ts) * 1000)
        else:
            print('%r  %2.2f ms' % \
                  (method.__name__, (te - ts) * 1000))
        return result
    return timed


@timeit
def avl_inserter(items):
    """ Tree insertion speed test """
    samples = random.sample(range(1, 1000000), items)
    sample_tree = AvlTree()
    for sample in samples:
        sample_tree.insert(sample)
    return None

@timeit
def list_inserter(items):
    """ List Insertion speed test """
    samples = random.sample(range(1, 1000000), items)
    sample_list = []
    for sample in samples:
        sample_list.insert(0, sample)
    return None

if __name__ == '__main__':
    avl_inserter(100000)
    #list_inserter(500000)
    #tree = AvlTree()
    #tree.insert(10, payload=5)
    #tree.insert(15, payload=3)
    #tree.insert(11, payload=4)
    #tree.insert(20)
    #tree.insert(17)
    #tree.insert(25)
    #tree.insert(18)
    #traverse(tree.root)

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM