[英]Python “RuntimeError: maximum recursion depth exceeded” in depth-first search
I'm trying to implement the depth-first search (DFS) algorithm for directed graphs as described in Cormen et al., Introduction to Algorithms (3rd ed.) . 我正在尝试为有向图实现深度优先搜索(DFS)算法,如Cormen等人, 算法简介(第3版)中所述 。 Here is my implementation so far: 到目前为止,这是我的实现:
import pytest
from collections import OrderedDict
import copy
class Node(object):
def __init__(self, color='white', parent=None, d=None, f=None):
self.color = color
self.parent = parent
self.d = d # Discovery time
self.f = f # Finishing time
class Graph(object):
def __init__(self, edges, node_indices=None):
self.edges = edges
self.nodes = self.initialize_nodes(node_indices )
self.adj = self.initialize_adjacency_list()
def initialize_nodes(self, node_indices=None):
if node_indices is None:
node_indices = sorted(list(set(node for edge in self.edges for node in edge)))
return OrderedDict([(node_index, Node()) for node_index in node_indices])
def initialize_adjacency_list(self):
A = {node: [] for node in self.nodes}
for edge in self.edges:
u, v = edge
A[u].append(v)
return A
def dfs(self):
self.time = 0
for u, node in self.nodes.items():
if node.color == 'white':
self.dfs_visit(u)
def dfs_visit(self, u):
self.time += 1
self.nodes[u].d = self.time
self.nodes[u].color = 'gray'
for v in self.adj[u]:
if self.nodes[v].color == 'white':
self.nodes[v].parent = u
self.dfs_visit(v)
self.nodes[u].color = 'black'
self.time += 1
self.nodes[u].f = self.time
@staticmethod
def transpose(edges):
return [(v,u) for (u,v) in edges]
def strongly_connected_components(self):
self.dfs()
finishing_times = {u: node.f for u, node in self.nodes.items()}
self.__init__(self.transpose(self.edges))
node_indices = sorted(finishing_times, key=finishing_times.get, reverse=True)
self.nodes = self.initialize_nodes(node_indices)
self.dfs()
return self.trees()
def trees(self):
_trees = []
nodes = copy.deepcopy(self.nodes)
while nodes:
for u, node in nodes.items():
if node.parent is None:
_trees.append([u])
nodes.pop(u)
else:
for tree in _trees:
if node.parent in tree:
tree.append(u)
nodes.pop(u)
return _trees
To test that it works, I've taken an example from Figure 22.9 of the book: 为了测试它是否有效,我从本书的图22.9中举了一个例子:
After renaming the nodes a to h 1
to 8
, respectively, I ran the following test: 将节点a分别重命名为h 1
到8
后,我运行了以下测试:
def test_strongly_connected_components():
edges = [(1,2), (5,1), (2,5), (5,6), (2,6), (6,7), (7,6), (2,3), (3,7), (3,4), (4,3), (4,8), (7,8), (8,8)]
graph = Graph(edges)
assert graph.strongly_connected_components() == [[1, 5, 2], [3, 4], [6, 7], [8]]
if __name__ == "__main__":
pytest.main([__file__+"::test_strongly_connected_components", "-s"])
This test passes, confirming the gray-shaded SCCs in the figure. 该测试通过,确认图中的灰色阴影SCC。
For the 'real' exercise, however, I need to use an input file, SCC.txt , which contains 875,714 lines representing edges (as a head-tail pair of integers), and output the size of the five largest SCCs. 但是,对于“实际”练习,我需要使用输入文件SCC.txt ,其中包含875,714行代表边缘的行(作为首尾整数对),并输出五个最大的SCC的大小。 To this end I tried the following test: 为此,我尝试了以下测试:
@pytest.fixture
def edges():
with open('SCC.txt') as f:
return [tuple(map(int, line.split())) for line in f.read().splitlines()]
def test_SCC_on_full_graph(edges):
graph = Graph(edges)
SCCs = graph.strongly_connected_components()
print([map(len, SCCs)].sort(reverse=True)) # Read off the size of the largest SCCs
if __name__ == "__main__":
pytest.main([__file__+"::test_SCC_on_full_graph", "-s"])
However, I run into a RuntimeError: maximum recursion depth exceeded in cmp
: 但是,我遇到了RuntimeError: maximum recursion depth exceeded in cmp
:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <scc.Graph object at 0x103253690>, u = 209099
def dfs_visit(self, u):
self.time += 1
self.nodes[u].d = self.time
self.nodes[u].color = 'gray'
for v in self.adj[u]:
> if self.nodes[v].color == 'white':
E RuntimeError: maximum recursion depth exceeded in cmp
scc.py:53: RuntimeError
========================== 1 failed in 21.79 seconds ===========================
I've read about increasing sys.setrecursionlimit , but this doesn't seem to be a recommended practice. 我已经读过有关增加sys.setrecursionlimit的信息 ,但这似乎不是推荐的做法。 Other than than I'm not sure how I could improve the code as it fairly literally implements the pseudocode given in the book. 除此之外,我不确定如何改进代码,因为它确实实现了书中给出的伪代码。 Any ideas on how I can overcome this error? 关于如何克服此错误的任何想法?
The DFS has to be logically DFS, but programmatically you can try a work around. DFS在逻辑上必须是DFS,但是可以通过编程方式尝试解决。
writing the DFS in such a way that you can retry it from one of the top functions, if it reaches a near the recursion limit. 如果DFS达到了递归极限附近,则可以这样编写DFS,使您可以从顶级功能之一中重试它。
Try to use multiprocessing. 尝试使用多处理。
PS: Is it possible that an infinite recursion is occurring for the larger dataset? PS:较大的数据集是否可能发生无限递归? logical error which comes forth when using a larger dataset. 使用较大的数据集时出现逻辑错误。 If you have datasets of incremental sizes, you could also identify the limit of the algorithm's implementation in python. 如果您拥有增量大小的数据集,则还可以在python中确定算法实现的限制。
I managed to solve the problem using the threading
library with an increased stack_size
and recursion limit. 我设法使用增加了stack_size
和递归限制的threading
库解决了这个问题。 Here is the code of the solution: 这是解决方案的代码:
import sys
import pytest
from collections import OrderedDict
import copy
import threading
class Node(object):
def __init__(self, color='white', parent=None, d=None, f=None):
self.color = color
self.parent = parent
self.d = d # Discovery time
self.f = f # Finishing time
class Graph(object):
def __init__(self, edges, node_indices=None):
self.edges = edges
self.nodes = self.initialize_nodes(node_indices )
self.adj = self.initialize_adjacency_list()
self.trees = dict()
def initialize_nodes(self, node_indices=None):
if node_indices is None:
node_indices = sorted(list(set(node for edge in self.edges for node in edge)))
return OrderedDict([(node_index, Node()) for node_index in node_indices])
def initialize_adjacency_list(self):
A = {node: [] for node in self.nodes}
for edge in self.edges:
u, v = edge
A[u].append(v)
return A
def dfs(self):
self.time = 0
for u, node in self.nodes.items():
if node.color == 'white':
self.dfs_visit(u, root=u)
def dfs_visit(self, u, root=None):
if u == root:
self.trees[root] = set()
self.time += 1
self.nodes[u].d = self.time
self.nodes[u].color = 'gray'
for v in self.adj[u]:
if self.nodes[v].color == 'white':
self.nodes[v].parent = u
self.trees[root].add(v)
self.dfs_visit(v, root=root)
self.nodes[u].color = 'black'
self.time += 1
self.nodes[u].f = self.time
@staticmethod
def transpose(edges):
return [(v,u) for (u,v) in edges]
def strongly_connected_components(self):
self.dfs()
finishing_times = {u: node.f for u, node in self.nodes.items()}
self.__init__(self.transpose(self.edges))
node_indices = sorted(finishing_times, key=finishing_times.get, reverse=True)
self.nodes = self.initialize_nodes(node_indices)
self.dfs()
trees = copy.deepcopy(self.trees)
for k, v in trees.items():
v.add(k)
return trees.values()
@pytest.fixture
def edges():
with open('SCC.txt') as f:
return [tuple(map(int, line.split())) for line in f.read().splitlines()]
def SCC_on_full_graph():
E = edges()
graph = Graph(E)
SCCs = graph.strongly_connected_components()
SCC_sizes = sorted(list(map(len, SCCs)), reverse=True)
print(SCC_sizes[:5]) # Read off the size of the 5 largest SCCs
if __name__ == "__main__":
threading.stack_size(67108864)
sys.setrecursionlimit(2**20)
thread = threading.Thread(target=SCC_on_full_graph)
thread.start()
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.