[英]AVL tree performance in Python
I put together an AVL Tree with deletion and allowance for duplicate keys. 我将删除和允许重复密钥的AVL树放在一起。 Its based on a couple examples I found online (and which are listed in the comments of the code).
它基于我在网上找到的几个示例(并在代码注释中列出)。
I wanted to compare insertion performance versus the standard Python list. 我想将插入性能与标准Python列表进行比较。 I wrote a function that generates an arbitrary amount of random ints and inserts them into the avl tree.
我编写了一个函数,该函数生成任意数量的随机整数并将其插入到avl树中。 I wrote the same function again for inserting into the 0 index of the standard Python list.
我再次编写了相同的函数以将其插入标准Python列表的0索引中。 Then I used a timeit decorator to measure how long each function took to execute.
然后,我使用timeit装饰器来测量每个函数执行所需的时间。
With 100,000 random integers the time was 7693ms for the AVL Tree and 3906ms for the List. 对于100,000个随机整数,AVL树的时间为7693ms,列表的时间为3906ms。 With 500,000 random integers the AVL Tree took 46894ms and the List took 136665ms.
带有500,000个随机整数的AVL树花费了46894毫秒,列表花费了136665毫秒。 It makes sense that the AVL Tree would win out asymptotically, but is there anything I can do to improve the AVL Tree when dealing with smaller numbers of inserts?
AVL树会渐近取胜是有道理的,但是当处理较少数量的插入片段时,我可以做些什么来改进AVL树吗?
Here is my code, sorry if its sloppy: 这是我的代码,如果它草率的话,对不起:
#!/usr/bin/env python
import time
from random import randint
from typing import Optional
"""
Class based AVL balanced binary search tree.
Based on designs from:
https://interactivepython.org/runestone/static/pythonds/Trees/AVLTreeImplementation.html
http://www.geeksforgeeks.org/avl-tree-set-2-deletion/
A tree constists of a single AVL_Tree object and
many Node objects.
What distinguises AVL_Tree from a plain Binary Search Tree is
it's self balancing property. Whenever a node is inserted or
deleted, the balance factors of the affected nodes are checked
and Nodes are rotated to maintain balance in the tree. This
ensures O(logN) insertion, deletion, and search performance.
"""
class Node:
def __init__(self, key, left=None, right=None, parent=None, payload=None):
self.key = key
self.left = left
self.right= right
self.parent = parent
self.height = 1
if payload:
self.payload = payload
else:
self.payload = self.key
self.count = 1
class AVL_Tree:
def __init__(self):
self.root = None
def height(self, node: Node) -> int:
if node == None:
return 0
return node.height
def right_rotate(self, y: Node) -> None:
x = y.left
y.left = x.right
if x.right != None:
x.right.parent = y
x.parent = y.parent
if self.root == y:
self.root = x
else:
if y.parent.left == y:
y.parent.left = x
else:
y.parent.right = x
x.right = y
y.parent = x
y.height = max(self.height(y.left), self.height(y.right)) + 1
x.height = max(self.height(x.left), self.height(x.right)) + 1
def left_rotate(self, x: Node) -> None:
y = x.right
x.right = y.left
if y.left != None:
y.left.parent = x
y.parent = x.parent
if self.root == x:
self.root = y
else:
if x.parent.left == x:
x.parent.left = y
else:
x.parent.right = y
y.left = x
x.parent = y
x.height = max(self.height(x.left), self.height(x.right)) + 1
y.height = max(self.height(y.left), self.height(y.right)) + 1
def get_balance(self, node: Node) -> int:
if node == None:
return 0
return self.height(node.left) - self.height(node.right)
def insert(self, key: int, insertion_point=None, payload=None) -> None:
node = Node(key)
if payload != None:
node.payload = payload
# If the tree is empty then assign new node to root
if self.root == None:
self.root = node
return
if insertion_point == None:
insertion_point = self.root
if key == insertion_point.key:
insertion_point.count += 1
elif key < insertion_point.key:
if insertion_point.left:
self.insert(key, insertion_point.left, payload)
else:
insertion_point.left = node
node.parent = insertion_point
elif key > insertion_point.key:
if insertion_point.right:
self.insert(key, insertion_point.right, payload)
else:
insertion_point.right = node
node.parent = insertion_point
else:
return
insertion_point.height = 1 + max(self.height(insertion_point.left), self.height(insertion_point.right))
balance = self.get_balance(insertion_point)
if balance > 1 and key < insertion_point.left.key:
# Left Left
self.right_rotate(insertion_point)
elif balance < -1 and key > insertion_point.right.key:
# Right Right
self.left_rotate(insertion_point)
elif balance > 1 and key > insertion_point.left.key:
# Left Right
self.left_rotate(insertion_point.left)
self.right_rotate(insertion_point)
elif balance < -1 and key < insertion_point.right.key:
# Right Left
self.right_rotate(insertion_point.right)
self.left_rotate(insertion_point)
def get(self, key: int) -> Optional[Node]:
if self.root:
node = self._get(key,self.root)
if node:
return node
else:
return None
else:
return None
def _get(self, key: int, currentNode: Node) -> Optional[Node]:
if not currentNode
return None
elif currentNode.key == key:
return currentNode
elif key < currentNode.key:
return self._get(key, currentNode.left)
else:
return self._get(key,currentNode.right)
def __getitem__(self,key: int):
""" Overloads [] getter to use get() """
return self.get(key)
def __contains__(self,key):
if self.get(key):
return True
else:
return False
def min_value(self, key: int) -> int:
""" Return the lowest value key in subtree with root 'node' """
sub_tree_root = self.get(key)
while sub_tree_root.left != None:
sub_tree_root = sub_tree_root.left
return sub_tree_root.key
def delete(self, key: int, starting_node: Node = None) -> None:
"""
When removing a node there are three cases:
1. The node has no children:
Delete pointer in parent node and
delete node object.
2. The node has one child:
Promote the child to take node's place
then delete node object.
3. The node has two children:
Search tree for a node that can replace
the node and preserve the binary structure
This will be the next largest node in
the tree and will never have two children.
This means it can be removed and swapped
in using the first two cases.
"""
if self.root == None:
return
if starting_node == None:
starting_node = self.root
# key < starting_node so we recurse left
if key < starting_node.key:
self.delete(key, starting_node.left)
# key > starting_node so we recurse right
elif key > starting_node.key:
self.delete(key, starting_node.right)
# starting_node is key and we can begin the deletion process.
else:
if starting_node.count > 1:
starting_node.count -= 1
# starting_node is a leaf
elif starting_node.left == None and starting_node.right == None:
if starting_node == starting_node.parent.left:
starting_node.parent.left = None
else:
starting_node.parent.right = None
# starting_node has both children
elif starting_node.left != None and starting_node.right != None:
succ = self.get(self.min_value(starting_node.right.key))
starting_node.key = succ.key
starting_node.payload = succ.payload
# succ is a leaf
# (succ cannot have a left child because it is the min)
if succ.right == None:
# succ is a left child
if succ.parent.left == succ:
succ.parent.left = None
# succ is a right child
else:
succ.parent.right = None
# succ has a right child
else:
# succ is a left child
if succ.parent.left == succ:
succ.parent.left = succ.right
succ.right.parent = succ.parent
# succ is a right child
else:
succ.parent.right = succ.right
succ.right.parent = succ.parent
# starting_node has one child
else:
if starting_node == self.root:
# Child is left
if starting_node.left != None:
starting_node.left.parent = None
self.root = starting_node.left
# Child is right
else:
starting_node.right.parent = None
self.root = starting_node.right
# starting_node is left child:
elif starting_node.parent.left == starting_node:
# Child is left
if starting_node.left != None:
starting_node.left.parent = starting_node.parent
starting_node.parent.left = starting_node.left
# Child is right
else:
starting_node.right.parent = starting_node.parent
starting_node.parent.left = starting_node.right
# starting_node is right child
else:
# Child is left
if starting_node.left != None:
starting_node.left.parent = starting_node.parent
starting_node.parent.right = starting_node.left
else:
starting_node.right.parent = starting_node.parent
starting_node.parent.right = starting_node.right
# Update height of starting_node
starting_node.height = max(self.height(starting_node.left), self.height(starting_node.right)) + 1
# Get balance factor
balance = self.get_balance(starting_node)
# Use balance factor to rotate
# Left Left
if balance > 1 and self.get_balance(starting_node.left) >= 0:
self.right_rotate(starting_node)
# Left Right
if balance > 1 and self.get_balance(starting_node.left) < 0:
self.left_rotate(starting_node.left)
self.right_rotate(starting_node)
# Right Right
if balance < -1 and self.get_balance(starting_node.right) <= 0:
self.left_rotate(starting_node)
# Right Left
if balance < -1 and self.get_balance(starting_node.right) > 0:
self.right_rotate(starting_node.right)
self.left_rotate(starting_node)
def __delitem__(self,key):
self.delete(key)
def traverse(rootnode: Node) -> None:
thislevel = [rootnode]
while thislevel:
nextlevel = list()
row_string = ""
for n in thislevel:
if n.parent != None:
if n.parent.left == n:
relation = "L"
elif n.parent.right == n:
relation = "R"
else:
relation = "ro"
row_string += str(n.key) + str((relation, n.payload)) + " "
if n.left: nextlevel.append(n.left)
if n.right: nextlevel.append(n.right)
print(row_string)
thislevel = nextlevel
def timeit(method):
def timed(*args, **kw):
ts = time.time()
result = method(*args, **kw)
te = time.time()
if 'log_time' in kw:
name = kw.get('log_name', method.__name__.upper())
kw['log_time'][name] = int((te - ts) * 1000)
else:
print( '%r %2.2f ms' % \
(method.__name__, (te - ts) * 1000))
return result
return timed
@timeit
def avl_inserter(items):
tree = AVL_Tree()
for _ in range(1, items):
tree.insert(randint(1,items))
return None
@timeit
def list_inserter(items):
l = []
for _ in range(1, items):
l.insert(0, randint(1,items))
return None
if __name__ == '__main__':
avl_inserter(100000)
list_inserter(100000)
After a lot of experimentation I replaced the recursive insert method with an iterative version. 经过大量的实验,我用迭代版本替换了递归插入方法。 I made a number of smaller tweaks based on the output of cProfile and got the runtime down to ~4100ms with 100,000 inserts.
我根据cProfile的输出进行了一些较小的调整,并通过100,000次插入将运行时间降低至约4100ms。
My iterative insert() uses a while loop to find the path to insertion point then a for loop to walk back up the path and do the necessary rotations. 我的迭代insert()使用while循环找到插入点的路径,然后使用for循环返回路径并进行必要的旋转。 I would think this would be inherently slower then a recursive insertion and would take at best 2*logn time for insertion.
我认为这本质上比递归插入要慢,并且插入最多需要2个登录时间。 Can anyone explain what was causing my recursive version to be so slow?
谁能解释造成我的递归版本太慢的原因?
#!/usr/bin/env python
"""
Class based AVL balanced binary search tree.
Based on designs from:
https://interactivepython.org/runestone/static/pythonds/Trees/AVLTreeImplementation.html
http://www.geeksforgeeks.org/avl-tree-set-2-deletion/
A tree constists of a single AVL_Tree object and
many Node objects.
What distinguises AVL_Tree from a plain Binary Search Tree is
it's self balancing property. Whenever a node is inserted or
deleted, the balance factors of the affected nodes are checked
and Nodes are rotated to maintain balance in the tree. This
ensures O(logN) insertion, deletion, and search performance.
"""
import time
import random
from typing import Optional
class Node:
def __init__(self, key, left=None, right=None, parent=None, payload=None):
self.key = key
self.left = left
self.right = right
self.parent = parent
self.height = 1
if payload:
self.payload = payload
else:
self.payload = self.key
self.count = 1
class AvlTree:
def __init__(self):
self.root = None
def right_rotate(self, current_node: Node) -> None:
""" Performs a right rotation for balancing the tree """
left_child = current_node.left
current_node.left = left_child.right
if left_child.right != None:
left_child.right.parent = current_node
left_child.parent = current_node.parent
if self.root == current_node:
self.root = left_child
else:
if current_node.parent.left == current_node:
current_node.parent.left = left_child
else:
current_node.parent.right = left_child
left_child.right = current_node
current_node.parent = left_child
current_node.height = max(
current_node.left.height if current_node.left else 0,
current_node.right.height if current_node.right else 0) + 1
left_child.height = max(
left_child.left.height if left_child.left else 0,
left_child.right.height if left_child.right else 0) + 1
def left_rotate(self, current_node: Node) -> None:
""" Performs a left rotation for balancing the tree """
right_child = current_node.right
current_node.right = right_child.left
if right_child.left != None:
right_child.left.parent = current_node
right_child.parent = current_node.parent
if self.root == current_node:
self.root = right_child
else:
if current_node.parent.left == current_node:
current_node.parent.left = right_child
else:
current_node.parent.right = right_child
right_child.left = current_node
current_node.parent = right_child
current_node.height = max(
current_node.left.height if current_node.left else 0,
current_node.right.height if current_node.right else 0) + 1
right_child.height = max(
right_child.left.height if right_child.left else 0,
right_child.right.height if right_child.right else 0) + 1
def get_balance(self, node: Node) -> int:
""" Returns balance factor for a node """
if node is None:
return 0
return (node.left.height if node.left else 0) - (node.right.height if node.right else 0)
def rotate_manager(self, node: Node, inserted_key: int, balance: int) -> None:
if balance > 1 and inserted_key < node.left.key:
# Left Left
self.right_rotate(node)
elif balance < -1 and inserted_key > node.right.key:
# Right Right
self.left_rotate(node)
elif balance > 1 and inserted_key > node.left.key:
# Left Right
self.left_rotate(node.left)
self.right_rotate(node)
elif balance < -1 and inserted_key < node.right.key:
# Right Left
self.right_rotate(node.right)
self.left_rotate(node)
def insert(self, key: int, insertion_point=None, payload=None) -> None:
""" Insert new node into the tree """
node = Node(key)
if payload is not None:
node.payload = payload
# If the tree is empty then assign new node to root
if self.root is None:
self.root = node
return
if insertion_point is None:
insertion_point = self.root
search_queue = [insertion_point]
index = 0
while search_queue:
if key < search_queue[index].key:
if search_queue[index].left:
search_queue.append(search_queue[index].left)
index += 1
else:
search_queue[index].left = node
node.parent = search_queue[index]
break
elif key > search_queue[index].key:
if search_queue[index].right:
search_queue.append(search_queue[index].right)
index += 1
else:
search_queue[index].right = node
node.parent = search_queue[index]
break
for n in reversed(search_queue):
n.height = max(
n.left.height if n.left else 0,
n.right.height if n.right else 0) + 1
balance = self.get_balance(n)
if balance > 1 or balance < -1:
self.rotate_manager(n, key, balance)
def get(self, key: int) -> Optional[Node]:
""" Returns a node with key if found in tree """
if self.root:
node = self._get(key, self.root)
if node:
return node
return None
return None
def _get(self, key: int, current_node: Node) -> Optional[Node]:
""" Recursive search method called by get() """
if not current_node:
return None
elif current_node.key == key:
return current_node
elif key < current_node.key:
return self._get(key, current_node.left)
return self._get(key, current_node.right)
def __getitem__(self, key: int):
""" Overloads [] getter to use get() """
return self.get(key)
def __contains__(self, key):
return bool(self.get(key))
def min_value(self, key: int) -> int:
""" Return the lowest value key in subtree with root 'node' """
sub_tree_root = self.get(key)
while sub_tree_root.left != None:
sub_tree_root = sub_tree_root.left
return sub_tree_root.key
def delete(self, key: int, starting_node: Node = None) -> None:
"""
When removing a node there are three cases:
1. The node has no children:
Delete pointer in parent node and
delete node object.
2. The node has one child:
Promote the child to take node's place
then delete node object.
3. The node has two children:
Search tree for a node that can replace
the node and preserve the binary structure
This will be the next largest node in
the tree and will never have two children.
This means it can be removed and swapped
in using the first two cases.
"""
if self.root is None:
return
if starting_node is None:
starting_node = self.root
# key < starting_node so we recurse left
if key < starting_node.key:
self.delete(key, starting_node.left)
# key > starting_node so we recurse right
elif key > starting_node.key:
self.delete(key, starting_node.right)
# starting_node is key and we can begin the deletion process.
else:
if starting_node.count > 1:
starting_node.count -= 1
# starting_node is a leaf
elif starting_node.left is None and starting_node.right is None:
if starting_node == starting_node.parent.left:
starting_node.parent.left = None
else:
starting_node.parent.right = None
# starting_node has both children
elif starting_node.left != None and starting_node.right != None:
succ = self.get(self.min_value(starting_node.right.key))
starting_node.key = succ.key
starting_node.payload = succ.payload
# succ is a leaf
# (succ cannot have a left child because it is the min)
if succ.right is None:
# succ is a left child
if succ.parent.left == succ:
succ.parent.left = None
# succ is a right child
else:
succ.parent.right = None
# succ has a right child
else:
# succ is a left child
if succ.parent.left == succ:
succ.parent.left = succ.right
succ.right.parent = succ.parent
# succ is a right child
else:
succ.parent.right = succ.right
succ.right.parent = succ.parent
# starting_node has one child
else:
if starting_node == self.root:
# Child is left
if starting_node.left != None:
starting_node.left.parent = None
self.root = starting_node.left
# Child is right
else:
starting_node.right.parent = None
self.root = starting_node.right
# starting_node is left child:
elif starting_node.parent.left == starting_node:
# Child is left
if starting_node.left != None:
starting_node.left.parent = starting_node.parent
starting_node.parent.left = starting_node.left
# Child is right
else:
starting_node.right.parent = starting_node.parent
starting_node.parent.left = starting_node.right
# starting_node is right child
else:
# Child is left
if starting_node.left != None:
starting_node.left.parent = starting_node.parent
starting_node.parent.right = starting_node.left
else:
starting_node.right.parent = starting_node.parent
starting_node.parent.right = starting_node.right
# Update height of starting_node
starting_node.height = max(
self.height(starting_node.left),
self.height(starting_node.right)) + 1
# Get balance factor
balance = self.get_balance(starting_node)
# Use balance factor to rotate
# Left Left
if balance > 1 and self.get_balance(starting_node.left) >= 0:
print('L L')
self.right_rotate(starting_node)
# Left Right
if balance > 1 and self.get_balance(starting_node.left) < 0:
print('L R')
self.left_rotate(starting_node.left)
self.right_rotate(starting_node)
# Right Right
if balance < -1 and self.get_balance(starting_node.right) <= 0:
print('R R')
self.left_rotate(starting_node)
# Right Left
if balance < -1 and self.get_balance(starting_node.right) > 0:
print('R L')
self.right_rotate(starting_node.right)
self.left_rotate(starting_node)
def __delitem__(self, key):
self.delete(key)
def traverse(rootnode: Node) -> None:
""" Prints a map of the tree starting at rootnode """
thislevel = [rootnode]
while thislevel:
nextlevel = list()
row_string = ""
for node in thislevel:
if node.parent != None:
if node.parent.left == node:
relation = "L"
elif node.parent.right == node:
relation = "R"
else:
relation = "ro"
row_string += str(node.key) + str((relation, node.height)) + " "
if node.left:
nextlevel.append(node.left)
if node.right:
nextlevel.append(node.right)
print(row_string)
thislevel = nextlevel
def timeit(method):
""" timeit decorator """
def timed(*args, **kw):
""" inner timeit function """
ts = time.time()
result = method(*args, **kw)
te = time.time()
if 'log_time' in kw:
name = kw.get('log_name', method.__name__.upper())
kw['log_time'][name] = int((te - ts) * 1000)
else:
print('%r %2.2f ms' % \
(method.__name__, (te - ts) * 1000))
return result
return timed
@timeit
def avl_inserter(items):
""" Tree insertion speed test """
samples = random.sample(range(1, 1000000), items)
sample_tree = AvlTree()
for sample in samples:
sample_tree.insert(sample)
return None
@timeit
def list_inserter(items):
""" List Insertion speed test """
samples = random.sample(range(1, 1000000), items)
sample_list = []
for sample in samples:
sample_list.insert(0, sample)
return None
if __name__ == '__main__':
avl_inserter(100000)
#list_inserter(500000)
#tree = AvlTree()
#tree.insert(10, payload=5)
#tree.insert(15, payload=3)
#tree.insert(11, payload=4)
#tree.insert(20)
#tree.insert(17)
#tree.insert(25)
#tree.insert(18)
#traverse(tree.root)
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.