[英]Ping hosts in pandas dataframes
我有一个 dataframe 有 15,000 个主机 IP 地址(IP v4 和 IP v6)我正在尝试检查这些主机中的哪些正在运行。
我有以下我写的代码
def ping_host(hostname):
# hostname = "10.0.0.10 #example
print(hostname)
if hostname != hostname:
return "No Hostname"
response = os.system("ping -c 1 " + hostname + " > /dev/null 2>&1")
# and then check the response...
if response == 0:
print(hostname, 'is up!')
return "Up"
else:
print(hostname, 'is down!')
return "Down"
df['Host_Status'] = df['IP Addresses'].apply(ping_host)
这需要永远完成。 有没有更好/更快的方法来做到这一点?
我确实尝试过-
df['Host_Status'] = df['IP Addresses'].swifter.apply(ping_host)
但即使这样也没有提高速度。
编辑 1-
我使用 32/64/256 线程使用多线程运行了 5 个小时(我认为在任何给定点只有 32 个线程在工作),但在脚本结束时
from multiprocessing.pool import ThreadPool
def ping_host(hostname):
# hostname = "10.0.0.10 #example
print(hostname)
if hostname != hostname:
return "No Hostname"
response = os.system("ping -c 1 " + hostname + " > /dev/null 2>&1")
# and then check the response...
if response == 0:
print(hostname, 'is up!')
return "Up"
else:
print(hostname, 'is down!')
return "Down"
# you can fiddle with pool size
with ThreadPool(processes=32) as t:
df['Host_Status'] = t.map(ping_host, df['IP Addresses'])
df.to_excel('output.xlsx')
当我尝试导出它时,出现以下错误 -
Exception in thread Thread-1:
Traceback (most recent call last):
File "/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/python3.7/threading.py", line 926, in _bootstrap_inner
self.run()
File "/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/python3.7/threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydev_comm/pydev_transport.py", line 64, in _read_forever
self._read_and_dispatch_next_frame()
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydev_comm/pydev_transport.py", line 37, in _read_and_dispatch_next_frame
direction, frame = self._read_frame()
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydev_comm/pydev_transport.py", line 45, in _read_frame
buff = readall(self._socket.recv, 4)
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydev_comm/pydev_io.py", line 110, in readall
chunk = read_fn(sz - have)
TimeoutError: [Errno 60] Operation timed out
Traceback (most recent call last):
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydev_bundle/pydev_code_executor.py", line 112, in add_exec
self.finish_exec(more)
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydev_bundle/pydev_console_utils.py", line 210, in finish_exec
return server.notifyFinished(more)
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydev_comm/pydev_transport.py", line 226, in _req
return super(TSyncClient, self)._req(_api, *args, **kwargs)
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/third_party/thriftpy/_shaded_thriftpy/thrift.py", line 160, in _req
return self._recv(_api)
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/third_party/thriftpy/_shaded_thriftpy/thrift.py", line 172, in _recv
fname, mtype, rseqid = self._iprot.read_message_begin()
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/third_party/thriftpy/_shaded_thriftpy/protocol/binary.py", line 372, in read_message_begin
self.trans, strict=self.strict_read)
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/third_party/thriftpy/_shaded_thriftpy/protocol/binary.py", line 164, in read_message_begin
sz = unpack_i32(inbuf.read(4))
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/third_party/thriftpy/_shaded_thriftpy/transport/__init__.py", line 32, in read
return readall(self._read, sz)
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/third_party/thriftpy/_shaded_thriftpy/transport/__init__.py", line 20, in readall
"End of file reading from transport")
_shaded_thriftpy.transport.TTransportException: TTransportException(type=4, message='End of file reading from transport')
尝试 2 Asyncio-
在阅读了 dataframe 之后,我尝试了 -
async def async_ping(host):
proc = await asyncio.create_subprocess_shell(
f"/sbin/ping -c 1 {host} > /dev/null 2>&1",
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
status = await proc.wait()
if status == 0:
return 'Alive'
else:
return 'Timeout'
async def async_main(hosts):
tasks1 = deque()
for host in hosts:
tasks1.append(asyncio.create_task(async_ping(host)))
return (t1 for t1 in await asyncio.gather(*tasks1))
start = time.perf_counter()
loop = asyncio.get_event_loop()
asyncio.set_event_loop(loop)
resp = loop.run_until_complete(async_main(df['IP Addresses'].to_list()))
loop.close()
finish = time.perf_counter()
df['Status'] = list(resp)
print(df)
print(f'Runtime: {round(finish-start,4)} seconds')
我遇到了阻塞错误-
Traceback (most recent call last):
File "~path/venv/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3331, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-6-2646fe9bd357>", line 26, in <module>
resp = loop.run_until_complete(async_main(df['IP Addresses'].to_list()))
File "/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/python3.7/asyncio/base_events.py", line 587, in run_until_complete
return future.result()
File "<ipython-input-6-2646fe9bd357>", line 20, in async_main
return (t1 for t1 in await asyncio.gather(*tasks1))
File "<ipython-input-6-2646fe9bd357>", line 5, in async_ping
stderr=asyncio.subprocess.PIPE
File "/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/python3.7/asyncio/subprocess.py", line 202, in create_subprocess_shell
stderr=stderr, **kwds)
File "/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/python3.7/asyncio/base_events.py", line 1514, in subprocess_shell
protocol, cmd, True, stdin, stdout, stderr, bufsize, **kwargs)
File "/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/python3.7/asyncio/unix_events.py", line 190, in _make_subprocess_transport
**kwargs)
File "/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/python3.7/asyncio/base_subprocess.py", line 37, in __init__
stderr=stderr, bufsize=bufsize, **kwargs)
File "/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/python3.7/asyncio/unix_events.py", line 775, in _start
universal_newlines=False, bufsize=bufsize, **kwargs)
File "/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/python3.7/subprocess.py", line 800, in __init__
restore_signals, start_new_session)
File "/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/python3.7/subprocess.py", line 1482, in _execute_child
restore_signals, start_new_session, preexec_fn)
BlockingIOError: [Errno 35] Resource temporarily unavailable
当我看到这个问题时,我想尝试制作一些使用asyncio
模块同时运行 ping 的东西。 下面的脚本在大约 7 秒内运行我的测试 IP 地址。 当我同步运行相同的测试 IP 地址列表时,大约需要 127 秒。 我使用python
版本3.8.2
和Windows 10 OS
。 也许它会为你工作。
import asyncio
import time
from collections import deque
import pandas as pd
async def async_ping(host, semaphore):
async with semaphore:
for _ in range(5):
proc = await asyncio.create_subprocess_shell(
f'C:\\Windows\\System32\\ping {host} -n 1 -w 1 -l 1',
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
status = await proc.wait()
if status == 0:
return 'Alive'
return 'Timeout'
async def async_main(hosts, limit):
semaphore = asyncio.Semaphore(limit)
tasks1 = deque()
for host in hosts:
tasks1.append(asyncio.create_task(
async_ping(host, semaphore))
)
return (t1 for t1 in await asyncio.gather(*tasks1))
host_df = pd.read_csv('ping_ip_dest.csv')
# set concurrent task limit
limit = 256
start = time.perf_counter()
loop = asyncio.get_event_loop()
asyncio.set_event_loop(loop)
resp = loop.run_until_complete(async_main(host_df['IP'].to_list(), limit))
loop.close()
finish = time.perf_counter()
host_df['Status'] = list(resp)
print(host_df)
print(f'Runtime: {round(finish-start,4)} seconds')
OUTPUT:
0 N. Virginia (Virginia, USA) 23.235.60.92 Alive
1 Dallas (Texas, USA) 69.162.81.155 Alive
2 Denver (Colorado, USA) 192.199.248.75 Alive
3 Miami (Florida, USA) 162.254.206.227 Alive
4 Minneapolis (Minnesota, USA) 207.250.234.100 Alive
5 Montreal (Canada) 184.107.126.165 Alive
6 New York (New York, USA) 206.71.50.230 Alive
7 San Francisco (California, USA) 65.49.22.66 Alive
8 Seattle (Washington, USA) 23.81.0.59 Alive
9 Washington DC (Virginia, USA) 207.228.238.7 Alive
10 Buenos Aires (Argentina) 131.255.7.26 Alive
11 Amsterdam (Netherlands) 95.142.107.181 Alive
12 Copenhagen (Denmark) 185.206.224.67 Alive
13 Frankfurt (Germany) 195.201.213.247 Alive
14 London (United Kingdom) 5.152.197.179 Alive
15 Madrid (Spain) 195.12.50.155 Alive
16 Paris (France) 51.158.22.211 Alive
17 Warsaw (Poland) 46.248.187.100 Alive
18 Johannesburg (South Africa) 197.221.23.194 Alive
19 Beijing (China) 47.94.129.116 Alive
20 Hong Kong (China) 103.1.14.238 Alive
21 Mumbai (India) 103.120.178.71 Alive
22 Shanghai (China) 106.14.156.213 Alive
23 Tokyo (Japan) 110.50.243.6 Alive
24 Brisbane 223.252.19.130 Alive
25 Sydney 101.0.86.43 Alive
26 Tel-Aviv (Israel) 185.229.226.83 Alive
27 Test 47.94.129.115 Timeout
Runtime: 3.1945 seconds
您可以使用线程池来做到这一点。
将参数更新为 1 秒后超时 ping、块大小 1、更多线程和跳过中间 shell - 适用于我的本地计算机上的少数 IP 地址,总共复制到 14000 个。 等待时间和块大小可能是最重要的。 默认情况下,ping 在 10 秒后超时,线程池在返回主线程之前运行 14000/threadcount ping。 考虑到在测试此代码时遇到超时,更新应该是一个很好的改进。
from multiprocessing.pool import ThreadPool
import subprocess as subp
def ping_host(hostname):
# hostname = "10.0.0.10 #example
print(hostname)
if hostname != hostname:
return "No Hostname"
response = subp.run(["ping","-c", "1", "-W", "1", hostname], stdout=subp.DEVNULL,
stderr=subp.DEVNULL).returncode
# and then check the response...
if response == 0:
print(hostname, 'is up!')
return "Up"
else:
print(hostname, 'is down!')
return "Down"
# you can fiddle with pool size
with ThreadPool(processes=128) as t:
df['Host_Status'] = t.map(ping_host, df['IP Addresses'],
chunksize=1)
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.