OpenCV / Python：用于实时面部识别的多线程

Question

我正在使用 OpenCv 和 Dlib 从网络摄像头流中实时执行带有地标的面部识别。 语言是Python 。 它在我的 macbook 笔记本电脑上运行良好，但我需要它从台式计算机 24/7 全天候运行。 计算机是运行 Debian Jessie 的 PC Intel® Core™2 Quad CPU Q6600 @ 2.40GHz 32bit。 性能急剧下降：由于处理延迟了 10 秒！

因此，我研究了多线程以获得性能：

我首先尝试了 OpenCv 的示例代码，结果很好，四个核心都命中了 100%。 而且性能要好得多。
然后我用我的代码替换了帧处理代码，它根本没有提高性能，只有一个核心达到了 100%。 其他的保持很低。 我什至认为开启多线程会更糟。

我从 dlib 示例代码中获得了面部地标代码。 我知道它可能会被优化，但我想了解为什么我无法通过多线程使用我的（旧）计算机的全部功能？

我会把我的代码放在下面，非常感谢阅读:)

 from __future__ import print_function import numpy as np import cv2 import dlib from multiprocessing.pool import ThreadPool from collections import deque from common import clock, draw_str, StatValue import video class DummyTask: def __init__(self, data): self.data = data def ready(self): return True def get(self): return self.data if __name__ == '__main__': import sys print(__doc__) try: fn = sys.argv[1] except: fn = 0 cap = video.create_capture(fn) #Face detector detector = dlib.get_frontal_face_detector() #Landmarks shape predictor predictor = dlib.shape_predictor("landmarks/shape_predictor_68_face_landmarks.dat") # This is where the facial detection takes place def process_frame(frame, t0, detector, predictor): # some intensive computation... gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)) clahe_image = clahe.apply(gray) detections = detector(clahe_image, 1) for k,d in enumerate(detections): shape = predictor(clahe_image, d) for i in range(1,68): #There are 68 landmark points on each face cv2.circle(frame, (shape.part(i).x, shape.part(i).y), 1, (0,0,255), thickness=2) return frame, t0 threadn = cv2.getNumberOfCPUs() pool = ThreadPool(processes = threadn) pending = deque() threaded_mode = True latency = StatValue() frame_interval = StatValue() last_frame_time = clock() while True: while len(pending) > 0 and pending[0].ready(): res, t0 = pending.popleft().get() latency.update(clock() - t0) draw_str(res, (20, 20), "threaded: " + str(threaded_mode)) draw_str(res, (20, 40), "latency: %.1f ms" % (latency.value*1000)) draw_str(res, (20, 60), "frame interval: %.1f ms" % (frame_interval.value*1000)) cv2.imshow('threaded video', res) if len(pending) < threadn: ret, frame = cap.read() t = clock() frame_interval.update(t - last_frame_time) last_frame_time = t if threaded_mode: task = pool.apply_async(process_frame, (frame.copy(), t, detector, predictor)) else: task = DummyTask(process_frame(frame, t, detector, predictor)) pending.append(task) ch = cv2.waitKey(1) if ch == ord(' '): threaded_mode = not threaded_mode if ch == 27: break cv2.destroyAllWindows()

Answer 1

性能问题是由于 dlib 编译错误造成的。 不要使用pip install dlib ，与正确的编译相比，由于某种原因它运行得非常非常慢。 通过这种方式，我从将近 10 秒的延迟减少到大约 2 秒。 所以最后我不需要多线程/处理，但我正在努力提高速度。 谢谢您的帮助：）

Answer 2

我尝试了一种简化的方法，如 P.Ro 在他的回答中提到的将进程写入输出队列，但不知何故队列大部分时间都被锁定，因为所有进程同时写入它。 （只是我的猜测）我可能做错了什么。

最后我最终使用了管道。

代码很讨厌。 但如果我是几个小时前的我。 我仍然很高兴找到一个实际运行起来毫不费力的例子。

from multiprocessing import Process, Queue, Manager,Pipe
import multiprocessing
import face_recognition as fik
import cv2
import time


video_input = 0

obama_image = fik.load_image_file("obama.png")
obama_face_encoding = fik.face_encodings(obama_image)[0]



quality = 0.7


def f(id,fi,fl):
    import face_recognition as fok

    while True:
        small_frame = fi.get()
        print("running thread"+str(id))
        face_locations = fok.face_locations(small_frame)

        if(len(face_locations)>0):
            print(face_locations)
            for (top7, right7, bottom7, left7) in face_locations:

                small_frame_c = small_frame[top7:bottom7, left7:right7]
                fl.send(small_frame_c)

fps_var =0
if __name__ == '__main__':
        multiprocessing.set_start_method('spawn')


        # global megaman
        with Manager() as manager:

            video_capture = cv2.VideoCapture(video_input)

            fi = Queue(maxsize=14)

            threads = 8
            proc = []

            parent_p = []
            thread_p = []
            # procids = range(0,threads)
            for t in range(0,threads):
                p_t,c_t = Pipe()
                parent_p.append(p_t)
                thread_p.append(c_t)
                print(t)
                proc.append(Process(target=f, args=(t,fi,thread_p[t])))
                proc[t].start()


            useframe = False

            frame_id = 0
            while True:
                # Grab a single frame of video
                ret, frame = video_capture.read()
                effheight, effwidth = frame.shape[:2]
                if effwidth < 20:
                    break
                # Resize frame of video to 1/4 size for faster face recognition processing
                xxx = 930
                yyy = 10/16 #0.4234375
                small_frame = cv2.resize(frame, (xxx, int(xxx*yyy)))
                if frame_id%2 == 0:
                    if not fi.full():


                        fi.put(small_frame)

                        print(frame_id)

                        cv2.imshow('Video', small_frame)


                        print("FPS: ", int(1.0 / (time.time() - fps_var)))
                        fps_var = time.time()


                #GET ALL DETECTIONS
                for t in range(0,threads):
                    if parent_p[t].poll():
                        small_frame_c = parent_p[t].recv()
                        cv2.imshow('recc', small_frame_c)
                        height34, width34 = small_frame_c.shape[:2]
                        # print fsizeee
                        if(width34<20):
                            print("face 2 small")
                            print(width34)
                            break
                        face_encodings_cam = fik.face_encodings(small_frame_c,[(0, width34, height34, 0)])

                        match = fik.compare_faces([obama_face_encoding], face_encodings_cam[0])
                        name = "Unknown"

                        if match[0]:
                            name = "Barack"

                        print(name)
                        break

                frame_id += 1

                # Hit 'q' on the keyboard to quit!
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

Answer 3

没有太多使用 ThreadPool 的经验，但我总是只使用 Process，如下所示。 您应该能够轻松地编辑此代码以满足您的需要。 我写这篇文章时考虑到了您的实施。

此代码将获取核心数量并启动许多工作进程，这些进程都将并行实现所需的功能。 它们都共享一个帧队列以供输入，并都放入同一个输出队列以供主要获取和显示。 每个队列都有一个最大大小，在本例中为 5。这确保了尽管处理需要 CPU 时间，但它始终是相对活跃的时间。

 import numpy as np import cv2 from multiprocessing import Process, Queue import time #from common import clock, draw_str, StatValue #import video class Canny_Process(Process): def __init__(self,frame_queue,output_queue): Process.__init__(self) self.frame_queue = frame_queue self.output_queue = output_queue self.stop = False #Initialize your face detectors here def get_frame(self): if not self.frame_queue.empty(): return True, self.frame_queue.get() else: return False, None def stopProcess(self): self.stop = True def canny_frame(self,frame): # some intensive computation... gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) edges = cv2.Canny(gray, 50, 100) #To simulate CPU Time ############################# for i in range(1000000): x = 546*546 res = x/(i+1) ############################# 'REPLACE WITH FACE DETECT CODE HERE' if self.output_queue.full(): self.output_queue.get_nowait() self.output_queue.put(edges) def run(self): while not self.stop: ret, frame = self.get_frame() if ret: self.canny_frame(frame) if __name__ == '__main__': frame_sum = 0 init_time = time.time() def put_frame(frame): if Input_Queue.full(): Input_Queue.get_nowait() Input_Queue.put(frame) def cap_read(cv2_cap): ret, frame = cv2_cap.read() if ret: put_frame(frame) cap = cv2.VideoCapture(0) threadn = cv2.getNumberOfCPUs() threaded_mode = True process_list = [] Input_Queue = Queue(maxsize = 5) Output_Queue = Queue(maxsize = 5) for x in range((threadn -1)): canny_process = Canny_Process(frame_queue = Input_Queue,output_queue = Output_Queue) canny_process.daemon = True canny_process.start() process_list.append(canny_process) ch = cv2.waitKey(1) cv2.namedWindow('Threaded Video', cv2.WINDOW_NORMAL) while True: cap_read(cap) if not Output_Queue.empty(): result = Output_Queue.get() cv2.imshow('Threaded Video', result) ch = cv2.waitKey(5) if ch == ord(' '): threaded_mode = not threaded_mode if ch == 27: break cv2.destroyAllWindows()

这应该可以解决问题，只需更改我精明的功能即可进行人脸检测。 我用你的代码写了这个并比较了两者。 这明显更快。 我在这里使用 multiprocessing.Process。 在 python 中，进程是真正并行的，而线程则不是，因为 GIL。 我正在使用 2 个队列在主进程和进程之间来回发送数据。 队列是线程和进程安全的。

Answer 4

你可以使用这个，多线程的：

from imutils.video import VideoStream

# Initialize multithreading the video stream.
videostream = "rtsp://192.168.x.y/user=admin=xxxxxxx_channel=vvvv=1.sdp?params"
vs = VideoStream(src=videostream, resolution=frameSize,
                 framerate=32).start()

frame = vs.read()

OpenCV / Python：用于实时面部识别的多线程

问题描述

4 个解决方案

解决方案1
7 2017-02-25 21:39:21

解决方案2
3 2018-01-14 00:46:54

解决方案3
1 2017-02-17 10:51:44

解决方案4
0 2020-03-18 08:10:09

OpenCV / Python：用于实时面部识别的多线程

问题描述

4 个解决方案

解决方案1 7 2017-02-25 21:39:21

解决方案2 3 2018-01-14 00:46:54

解决方案3 1 2017-02-17 10:51:44

解决方案4 0 2020-03-18 08:10:09

解决方案1
7 2017-02-25 21:39:21

解决方案2
3 2018-01-14 00:46:54

解决方案3
1 2017-02-17 10:51:44

解决方案4
0 2020-03-18 08:10:09