[英]Ping hosts in pandas dataframes
我有一個 dataframe 有 15,000 個主機 IP 地址(IP v4 和 IP v6)我正在嘗試檢查這些主機中的哪些正在運行。
我有以下我寫的代碼
def ping_host(hostname):
# hostname = "10.0.0.10 #example
print(hostname)
if hostname != hostname:
return "No Hostname"
response = os.system("ping -c 1 " + hostname + " > /dev/null 2>&1")
# and then check the response...
if response == 0:
print(hostname, 'is up!')
return "Up"
else:
print(hostname, 'is down!')
return "Down"
df['Host_Status'] = df['IP Addresses'].apply(ping_host)
這需要永遠完成。 有沒有更好/更快的方法來做到這一點?
我確實嘗試過-
df['Host_Status'] = df['IP Addresses'].swifter.apply(ping_host)
但即使這樣也沒有提高速度。
編輯 1-
我使用 32/64/256 線程使用多線程運行了 5 個小時(我認為在任何給定點只有 32 個線程在工作),但在腳本結束時
from multiprocessing.pool import ThreadPool
def ping_host(hostname):
# hostname = "10.0.0.10 #example
print(hostname)
if hostname != hostname:
return "No Hostname"
response = os.system("ping -c 1 " + hostname + " > /dev/null 2>&1")
# and then check the response...
if response == 0:
print(hostname, 'is up!')
return "Up"
else:
print(hostname, 'is down!')
return "Down"
# you can fiddle with pool size
with ThreadPool(processes=32) as t:
df['Host_Status'] = t.map(ping_host, df['IP Addresses'])
df.to_excel('output.xlsx')
當我嘗試導出它時,出現以下錯誤 -
Exception in thread Thread-1:
Traceback (most recent call last):
File "/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/python3.7/threading.py", line 926, in _bootstrap_inner
self.run()
File "/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/python3.7/threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydev_comm/pydev_transport.py", line 64, in _read_forever
self._read_and_dispatch_next_frame()
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydev_comm/pydev_transport.py", line 37, in _read_and_dispatch_next_frame
direction, frame = self._read_frame()
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydev_comm/pydev_transport.py", line 45, in _read_frame
buff = readall(self._socket.recv, 4)
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydev_comm/pydev_io.py", line 110, in readall
chunk = read_fn(sz - have)
TimeoutError: [Errno 60] Operation timed out
Traceback (most recent call last):
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydev_bundle/pydev_code_executor.py", line 112, in add_exec
self.finish_exec(more)
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydev_bundle/pydev_console_utils.py", line 210, in finish_exec
return server.notifyFinished(more)
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydev_comm/pydev_transport.py", line 226, in _req
return super(TSyncClient, self)._req(_api, *args, **kwargs)
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/third_party/thriftpy/_shaded_thriftpy/thrift.py", line 160, in _req
return self._recv(_api)
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/third_party/thriftpy/_shaded_thriftpy/thrift.py", line 172, in _recv
fname, mtype, rseqid = self._iprot.read_message_begin()
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/third_party/thriftpy/_shaded_thriftpy/protocol/binary.py", line 372, in read_message_begin
self.trans, strict=self.strict_read)
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/third_party/thriftpy/_shaded_thriftpy/protocol/binary.py", line 164, in read_message_begin
sz = unpack_i32(inbuf.read(4))
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/third_party/thriftpy/_shaded_thriftpy/transport/__init__.py", line 32, in read
return readall(self._read, sz)
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/third_party/thriftpy/_shaded_thriftpy/transport/__init__.py", line 20, in readall
"End of file reading from transport")
_shaded_thriftpy.transport.TTransportException: TTransportException(type=4, message='End of file reading from transport')
嘗試 2 Asyncio-
在閱讀了 dataframe 之后,我嘗試了 -
async def async_ping(host):
proc = await asyncio.create_subprocess_shell(
f"/sbin/ping -c 1 {host} > /dev/null 2>&1",
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
status = await proc.wait()
if status == 0:
return 'Alive'
else:
return 'Timeout'
async def async_main(hosts):
tasks1 = deque()
for host in hosts:
tasks1.append(asyncio.create_task(async_ping(host)))
return (t1 for t1 in await asyncio.gather(*tasks1))
start = time.perf_counter()
loop = asyncio.get_event_loop()
asyncio.set_event_loop(loop)
resp = loop.run_until_complete(async_main(df['IP Addresses'].to_list()))
loop.close()
finish = time.perf_counter()
df['Status'] = list(resp)
print(df)
print(f'Runtime: {round(finish-start,4)} seconds')
我遇到了阻塞錯誤-
Traceback (most recent call last):
File "~path/venv/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3331, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-6-2646fe9bd357>", line 26, in <module>
resp = loop.run_until_complete(async_main(df['IP Addresses'].to_list()))
File "/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/python3.7/asyncio/base_events.py", line 587, in run_until_complete
return future.result()
File "<ipython-input-6-2646fe9bd357>", line 20, in async_main
return (t1 for t1 in await asyncio.gather(*tasks1))
File "<ipython-input-6-2646fe9bd357>", line 5, in async_ping
stderr=asyncio.subprocess.PIPE
File "/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/python3.7/asyncio/subprocess.py", line 202, in create_subprocess_shell
stderr=stderr, **kwds)
File "/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/python3.7/asyncio/base_events.py", line 1514, in subprocess_shell
protocol, cmd, True, stdin, stdout, stderr, bufsize, **kwargs)
File "/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/python3.7/asyncio/unix_events.py", line 190, in _make_subprocess_transport
**kwargs)
File "/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/python3.7/asyncio/base_subprocess.py", line 37, in __init__
stderr=stderr, bufsize=bufsize, **kwargs)
File "/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/python3.7/asyncio/unix_events.py", line 775, in _start
universal_newlines=False, bufsize=bufsize, **kwargs)
File "/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/python3.7/subprocess.py", line 800, in __init__
restore_signals, start_new_session)
File "/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/python3.7/subprocess.py", line 1482, in _execute_child
restore_signals, start_new_session, preexec_fn)
BlockingIOError: [Errno 35] Resource temporarily unavailable
當我看到這個問題時,我想嘗試制作一些使用asyncio
模塊同時運行 ping 的東西。 下面的腳本在大約 7 秒內運行我的測試 IP 地址。 當我同步運行相同的測試 IP 地址列表時,大約需要 127 秒。 我使用python
版本3.8.2
和Windows 10 OS
。 也許它會為你工作。
import asyncio
import time
from collections import deque
import pandas as pd
async def async_ping(host, semaphore):
async with semaphore:
for _ in range(5):
proc = await asyncio.create_subprocess_shell(
f'C:\\Windows\\System32\\ping {host} -n 1 -w 1 -l 1',
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
status = await proc.wait()
if status == 0:
return 'Alive'
return 'Timeout'
async def async_main(hosts, limit):
semaphore = asyncio.Semaphore(limit)
tasks1 = deque()
for host in hosts:
tasks1.append(asyncio.create_task(
async_ping(host, semaphore))
)
return (t1 for t1 in await asyncio.gather(*tasks1))
host_df = pd.read_csv('ping_ip_dest.csv')
# set concurrent task limit
limit = 256
start = time.perf_counter()
loop = asyncio.get_event_loop()
asyncio.set_event_loop(loop)
resp = loop.run_until_complete(async_main(host_df['IP'].to_list(), limit))
loop.close()
finish = time.perf_counter()
host_df['Status'] = list(resp)
print(host_df)
print(f'Runtime: {round(finish-start,4)} seconds')
OUTPUT:
0 N. Virginia (Virginia, USA) 23.235.60.92 Alive
1 Dallas (Texas, USA) 69.162.81.155 Alive
2 Denver (Colorado, USA) 192.199.248.75 Alive
3 Miami (Florida, USA) 162.254.206.227 Alive
4 Minneapolis (Minnesota, USA) 207.250.234.100 Alive
5 Montreal (Canada) 184.107.126.165 Alive
6 New York (New York, USA) 206.71.50.230 Alive
7 San Francisco (California, USA) 65.49.22.66 Alive
8 Seattle (Washington, USA) 23.81.0.59 Alive
9 Washington DC (Virginia, USA) 207.228.238.7 Alive
10 Buenos Aires (Argentina) 131.255.7.26 Alive
11 Amsterdam (Netherlands) 95.142.107.181 Alive
12 Copenhagen (Denmark) 185.206.224.67 Alive
13 Frankfurt (Germany) 195.201.213.247 Alive
14 London (United Kingdom) 5.152.197.179 Alive
15 Madrid (Spain) 195.12.50.155 Alive
16 Paris (France) 51.158.22.211 Alive
17 Warsaw (Poland) 46.248.187.100 Alive
18 Johannesburg (South Africa) 197.221.23.194 Alive
19 Beijing (China) 47.94.129.116 Alive
20 Hong Kong (China) 103.1.14.238 Alive
21 Mumbai (India) 103.120.178.71 Alive
22 Shanghai (China) 106.14.156.213 Alive
23 Tokyo (Japan) 110.50.243.6 Alive
24 Brisbane 223.252.19.130 Alive
25 Sydney 101.0.86.43 Alive
26 Tel-Aviv (Israel) 185.229.226.83 Alive
27 Test 47.94.129.115 Timeout
Runtime: 3.1945 seconds
您可以使用線程池來做到這一點。
將參數更新為 1 秒后超時 ping、塊大小 1、更多線程和跳過中間 shell - 適用於我的本地計算機上的少數 IP 地址,總共復制到 14000 個。 等待時間和塊大小可能是最重要的。 默認情況下,ping 在 10 秒后超時,線程池在返回主線程之前運行 14000/threadcount ping。 考慮到在測試此代碼時遇到超時,更新應該是一個很好的改進。
from multiprocessing.pool import ThreadPool
import subprocess as subp
def ping_host(hostname):
# hostname = "10.0.0.10 #example
print(hostname)
if hostname != hostname:
return "No Hostname"
response = subp.run(["ping","-c", "1", "-W", "1", hostname], stdout=subp.DEVNULL,
stderr=subp.DEVNULL).returncode
# and then check the response...
if response == 0:
print(hostname, 'is up!')
return "Up"
else:
print(hostname, 'is down!')
return "Down"
# you can fiddle with pool size
with ThreadPool(processes=128) as t:
df['Host_Status'] = t.map(ping_host, df['IP Addresses'],
chunksize=1)
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.