簡體   English   中英

消耗看門狗隊列事件的Python並行線程

[英]Python parallel thread that consume Watchdog queue events

我有這段代碼,每次外部程序 (TCPdump) 在我的目錄中創建一個 *.pcap 文件時,它應該將一個事件放入隊列中。 我的問題是我總是得到一個空隊列,盡管我從 process() 函數得到了打印。

我究竟做錯了什么? 隊列是否正確定義並在兩個類之間共享?

編輯 - - - - - - - - -
我可能理解為什么我得到了一個空隊列,我認為這是因為我正在打印我在 Handler 類填充之前初始化的隊列。 我修改了我的代碼並創建了兩個應該使用同一個隊列的進程,但現在執行卡在 queue.put() 和線程 ReadPcapFiles() 停止運行。

這里更新的代碼:

import time
import pyshark
import concurrent.futures
import threading
import logging
from queue import Queue
from multiprocessing import Process
from watchdog.observers import Observer, api
from watchdog.events import PatternMatchingEventHandler

class Handler(PatternMatchingEventHandler):
    patterns = ["*.pcap", "*.pcapng"]

    def __init__(self, queue):
        PatternMatchingEventHandler.__init__(self)
        self.queue = queue

    def process(self, event):
        #print(f'event type: {event.event_type}  path : {event.src_path}')   
        self.queue.put(event.src_path)
        logging.info(f"Storing message: {self.queue.qsize()}")
        print("Producer queue: ", list(self.queue.queue))
        #self.queue.get()

    def on_created(self, event):
        self.process(event)          


def StartWatcher(watchdogq, event):
    path = 'C:\\...'
    handler = Handler(watchdogq)
    observer = Observer()
    while not event.is_set():
        observer.schedule(handler, path, recursive=False)
        print("About to start observer")
        observer.start()
        try:
            while True:
                time.sleep(1)
        except Exception as error:
            observer.stop()
            print("Error: " + str(error))
        observer.join()


def ReadPcapFiles(consumerq, event):
    while not event.is_set() or not consumerq.empty():
        print("Consumer queue: ", consumerq.get())
        #print("Consumer queue: ", list(consumerq.queue))

    # pcapfile = pyshark.FileCapture(self.queue.get())
    #     for packet in pcapfile:
    #         countPacket +=1 

if __name__ == '__main__':
    format = "%(asctime)s: %(message)s"
    logging.basicConfig(format=format, level=logging.INFO,datefmt="%H:%M:%S")
    logging.getLogger().setLevel(logging.DEBUG)

    queue = Queue()
    event = threading.Event()
    with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
        executor.submit(StartWatcher,queue, event)
        executor.submit(ReadPcapFiles,queue, event)

        time.sleep(0.1)
        logging.info("Main: about to set event")
        event.set()

舊代碼:

import time
from queue import Queue
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler

class Handler(PatternMatchingEventHandler):
    patterns = ["*.pcap", "*.pcapng"]

    def __init__(self, queue):
        PatternMatchingEventHandler.__init__(self)
        self.queue = queue

    def process(self, event):
        print(f'event type: {event.event_type}  path : {event.src_path}')   
        self.queue.put(event.src_path)

    def on_created(self, event):
        self.process(event)

class Watcher():
    def __init__(self, path):
        self.queue = Queue()
        self.observer = Observer()
        self.handler = Handler(self.queue)
        self.path = path

    def start(self): 
        self.observer.schedule(self.handler, self.path, recursive=True)
        self.observer.start()
        try:
            while True:
                time.sleep(1)
                self.queue.get()
                print(list(self.queue.queue))
        except Exception as error:
            self.observer.stop()
            print("Error: " + str(error))
        self.observer.join()  

if __name__ == '__main__':
    watcher = Watcher('C:\\...')
    watcher.start()

這對我有用(我從這個答案中得到了主要想法,謝謝!)但請注意,我認為這是一種解決方法,因此如果有人對此有更好的解決方案或可以更好地解釋 Python 中此類行為的原因,請執行毫不猶豫地回答!

我的猜測是我有兩個主要問題:
- 我正在另一個線程中啟動 Watchdog 進程(這以某種方式阻塞了我的隊列消耗線程)。
- Python 線程並不能真正並行工作,因此必須啟動一個獨立的進程。

這是我的代碼:

import time
import pyshark
import threading
import logging
import os
from queue import Queue
from multiprocessing import Process, Pool
from watchdog.observers import Observer, api
from watchdog.events import PatternMatchingEventHandler
from concurrent.futures import ThreadPoolExecutor

class Handler(PatternMatchingEventHandler):
    patterns = ["*.pcap", "*.pcapng"]

    def __init__(self, queue):
        PatternMatchingEventHandler.__init__(self)
        self.queue = queue

    def process(self, event):  
        self.queue.put(event.src_path)
        logging.info(f"Storing message: {self.queue.qsize()}")
        print("Producer queue: ", list(self.queue.queue))


    def on_created(self, event):
        #wait that the transfer of the file is finished before processing it
        file_size = -1
        while file_size != os.path.getsize(event.src_path):
            file_size = os.path.getsize(event.src_path)
            time.sleep(1)

        self.process(event)         

def ConsumeQueue(consumerq):
    while True:
        if not consumerq.empty(): 
            pool = Pool()
            pool.apply_async(ReadPcapFiles, (consumerq.get(), ))
        else:    
            time.sleep(1)

def ReadPcapFiles(get_event):        
    createdFile = get_event
    print(f"This is my event in ReadPacapFile {createdFile}")

    countPacket = 0
    bandwidth = 0
    pcapfile = pyshark.FileCapture(createdFile)
    for packet in pcapfile:
        countPacket +=1
        bandwidth = bandwidth + int(packet.length)
    print(f"Packet nr {countPacket}")
    print(f"Byte per second {bandwidth}")


if __name__ == '__main__':

    format = "%(asctime)s: %(message)s"
    logging.basicConfig(format=format, level=logging.INFO,datefmt="%H:%M:%S")
    logging.getLogger().setLevel(logging.DEBUG)

    queue = Queue()
    path = 'C:\\...'

    worker = threading.Thread(target=ConsumeQueue, args=(queue, ), daemon=True)
    print("About to start worker")
    worker.start()

    event_handler = Handler(queue)
    observer = Observer()
    observer.schedule(event_handler, path, recursive=False)
    print("About to start observer")
    observer.start()

    try:
        while True:
            time.sleep(1)
    except Exception as error:
        observer.stop()
        print("Error: " + str(error))
    observer.join()

有一個優秀的庫,它提供對該隊列中項目的並發訪問。 隊列也是持久的[基於文件和基於數據庫],因此如果程序崩潰,您仍然可以從程序崩潰的點開始使用事件。

持久隊列

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM