簡體   English   中英

OpenCV / Python:用於實時面部識別的多線程

[英]OpenCV / Python : multi-threading for live facial recognition

我正在使用 OpenCv 和 Dlib 從網絡攝像頭流中實時執行帶有地標的面部識別 語言是Python 它在我的 macbook 筆記本電腦上運行良好,但我需要它從台式計算機 24/7 全天候運行。 計算機是運行 Debian Jessie 的 PC Intel® Core™2 Quad CPU Q6600 @ 2.40GHz 32bit。 性能急劇下降:由於處理延遲了 10 秒!

因此,我研究了多線程以獲得性能:

  1. 我首先嘗試了 OpenCv 的示例代碼,結果很好,四個核心都命中了 100%。 而且性能要好得多。
  2. 然后我用我的代碼替換了幀處理代碼,它根本沒有提高性能,只有一個核心達到了 100%。 其他的保持很低。 我什至認為開啟多線程會更糟。

我從 dlib 示例代碼中獲得了面部地標代碼。 我知道它可能會被優化,但我想了解為什么我無法通過多線程使用我的(舊)計算機的全部功能?

我會把我的代碼放在下面,非常感謝閱讀:)

 from __future__ import print_function import numpy as np import cv2 import dlib from multiprocessing.pool import ThreadPool from collections import deque from common import clock, draw_str, StatValue import video class DummyTask: def __init__(self, data): self.data = data def ready(self): return True def get(self): return self.data if __name__ == '__main__': import sys print(__doc__) try: fn = sys.argv[1] except: fn = 0 cap = video.create_capture(fn) #Face detector detector = dlib.get_frontal_face_detector() #Landmarks shape predictor predictor = dlib.shape_predictor("landmarks/shape_predictor_68_face_landmarks.dat") # This is where the facial detection takes place def process_frame(frame, t0, detector, predictor): # some intensive computation... gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)) clahe_image = clahe.apply(gray) detections = detector(clahe_image, 1) for k,d in enumerate(detections): shape = predictor(clahe_image, d) for i in range(1,68): #There are 68 landmark points on each face cv2.circle(frame, (shape.part(i).x, shape.part(i).y), 1, (0,0,255), thickness=2) return frame, t0 threadn = cv2.getNumberOfCPUs() pool = ThreadPool(processes = threadn) pending = deque() threaded_mode = True latency = StatValue() frame_interval = StatValue() last_frame_time = clock() while True: while len(pending) > 0 and pending[0].ready(): res, t0 = pending.popleft().get() latency.update(clock() - t0) draw_str(res, (20, 20), "threaded: " + str(threaded_mode)) draw_str(res, (20, 40), "latency: %.1f ms" % (latency.value*1000)) draw_str(res, (20, 60), "frame interval: %.1f ms" % (frame_interval.value*1000)) cv2.imshow('threaded video', res) if len(pending) < threadn: ret, frame = cap.read() t = clock() frame_interval.update(t - last_frame_time) last_frame_time = t if threaded_mode: task = pool.apply_async(process_frame, (frame.copy(), t, detector, predictor)) else: task = DummyTask(process_frame(frame, t, detector, predictor)) pending.append(task) ch = cv2.waitKey(1) if ch == ord(' '): threaded_mode = not threaded_mode if ch == 27: break cv2.destroyAllWindows()

性能問題是由於 dlib 編譯錯誤造成的。 不要使用pip install dlib ,與正確的編譯相比,由於某種原因它運行得非常非常慢。 通過這種方式,我從將近 10 秒的延遲減少到大約 2 秒。 所以最后我不需要多線程/處理,但我正在努力提高速度。 謝謝您的幫助:)

我嘗試了一種簡化的方法,如 P.Ro 在他的回答中提到的將進程寫入輸出隊列,但不知何故隊列大部分時間都被鎖定,因為所有進程同時寫入它。 (只是我的猜測)我可能做錯了什么。

最后我最終使用了管道。

代碼很討厭。 但如果我是幾個小時前的我。 我仍然很高興找到一個實際運行起來毫不費力的例子。

from multiprocessing import Process, Queue, Manager,Pipe
import multiprocessing
import face_recognition as fik
import cv2
import time


video_input = 0

obama_image = fik.load_image_file("obama.png")
obama_face_encoding = fik.face_encodings(obama_image)[0]



quality = 0.7


def f(id,fi,fl):
    import face_recognition as fok

    while True:
        small_frame = fi.get()
        print("running thread"+str(id))
        face_locations = fok.face_locations(small_frame)

        if(len(face_locations)>0):
            print(face_locations)
            for (top7, right7, bottom7, left7) in face_locations:

                small_frame_c = small_frame[top7:bottom7, left7:right7]
                fl.send(small_frame_c)

fps_var =0
if __name__ == '__main__':
        multiprocessing.set_start_method('spawn')


        # global megaman
        with Manager() as manager:

            video_capture = cv2.VideoCapture(video_input)

            fi = Queue(maxsize=14)

            threads = 8
            proc = []

            parent_p = []
            thread_p = []
            # procids = range(0,threads)
            for t in range(0,threads):
                p_t,c_t = Pipe()
                parent_p.append(p_t)
                thread_p.append(c_t)
                print(t)
                proc.append(Process(target=f, args=(t,fi,thread_p[t])))
                proc[t].start()


            useframe = False

            frame_id = 0
            while True:
                # Grab a single frame of video
                ret, frame = video_capture.read()
                effheight, effwidth = frame.shape[:2]
                if effwidth < 20:
                    break
                # Resize frame of video to 1/4 size for faster face recognition processing
                xxx = 930
                yyy = 10/16 #0.4234375
                small_frame = cv2.resize(frame, (xxx, int(xxx*yyy)))
                if frame_id%2 == 0:
                    if not fi.full():


                        fi.put(small_frame)

                        print(frame_id)

                        cv2.imshow('Video', small_frame)


                        print("FPS: ", int(1.0 / (time.time() - fps_var)))
                        fps_var = time.time()


                #GET ALL DETECTIONS
                for t in range(0,threads):
                    if parent_p[t].poll():
                        small_frame_c = parent_p[t].recv()
                        cv2.imshow('recc', small_frame_c)
                        height34, width34 = small_frame_c.shape[:2]
                        # print fsizeee
                        if(width34<20):
                            print("face 2 small")
                            print(width34)
                            break
                        face_encodings_cam = fik.face_encodings(small_frame_c,[(0, width34, height34, 0)])

                        match = fik.compare_faces([obama_face_encoding], face_encodings_cam[0])
                        name = "Unknown"

                        if match[0]:
                            name = "Barack"

                        print(name)
                        break

                frame_id += 1

                # Hit 'q' on the keyboard to quit!
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

沒有太多使用 ThreadPool 的經驗,但我總是只使用 Process,如下所示。 您應該能夠輕松地編輯此代碼以滿足您的需要。 我寫這篇文章時考慮到了您的實施。

此代碼將獲取核心數量並啟動許多工作進程,這些進程都將並行實現所需的功能。 它們都共享一個幀隊列以供輸入,並都放入同一個輸出隊列以供主要獲取和顯示。 每個隊列都有一個最大大小,在本例中為 5。這確保了盡管處理需要 CPU 時間,但它始終是相對活躍的時間。

 import numpy as np import cv2 from multiprocessing import Process, Queue import time #from common import clock, draw_str, StatValue #import video class Canny_Process(Process): def __init__(self,frame_queue,output_queue): Process.__init__(self) self.frame_queue = frame_queue self.output_queue = output_queue self.stop = False #Initialize your face detectors here def get_frame(self): if not self.frame_queue.empty(): return True, self.frame_queue.get() else: return False, None def stopProcess(self): self.stop = True def canny_frame(self,frame): # some intensive computation... gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) edges = cv2.Canny(gray, 50, 100) #To simulate CPU Time ############################# for i in range(1000000): x = 546*546 res = x/(i+1) ############################# 'REPLACE WITH FACE DETECT CODE HERE' if self.output_queue.full(): self.output_queue.get_nowait() self.output_queue.put(edges) def run(self): while not self.stop: ret, frame = self.get_frame() if ret: self.canny_frame(frame) if __name__ == '__main__': frame_sum = 0 init_time = time.time() def put_frame(frame): if Input_Queue.full(): Input_Queue.get_nowait() Input_Queue.put(frame) def cap_read(cv2_cap): ret, frame = cv2_cap.read() if ret: put_frame(frame) cap = cv2.VideoCapture(0) threadn = cv2.getNumberOfCPUs() threaded_mode = True process_list = [] Input_Queue = Queue(maxsize = 5) Output_Queue = Queue(maxsize = 5) for x in range((threadn -1)): canny_process = Canny_Process(frame_queue = Input_Queue,output_queue = Output_Queue) canny_process.daemon = True canny_process.start() process_list.append(canny_process) ch = cv2.waitKey(1) cv2.namedWindow('Threaded Video', cv2.WINDOW_NORMAL) while True: cap_read(cap) if not Output_Queue.empty(): result = Output_Queue.get() cv2.imshow('Threaded Video', result) ch = cv2.waitKey(5) if ch == ord(' '): threaded_mode = not threaded_mode if ch == 27: break cv2.destroyAllWindows()

這應該可以解決問題,只需更改我精明的功能即可進行人臉檢測。 我用你的代碼寫了這個並比較了兩者。 這明顯更快。 我在這里使用 multiprocessing.Process。 在 python 中,進程是真正並行的,而線程則不是,因為 GIL。 我正在使用 2 個隊列在主進程和進程之間來回發送數據。 隊列是線程和進程安全的。

你可以使用這個,多線程的:

from imutils.video import VideoStream

# Initialize multithreading the video stream.
videostream = "rtsp://192.168.x.y/user=admin=xxxxxxx_channel=vvvv=1.sdp?params"
vs = VideoStream(src=videostream, resolution=frameSize,
                 framerate=32).start()

frame = vs.read()

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM