python turns on the camera and deep learning to realize the target detection method

  • 2020-11-30 08:28:59
  • OfStack

Recently, if I want to do real-time target detection, I need to use python to turn on the camera. I only have two uvc drive-free cameras with the performance of 1. It took me a while to turn on the python camera, mainly because none of my cameras could be turned on with cv2's VideCapture, which reminded me that the original opencv could not turn on the camera on Android phone (Camera module was adopted later). It seems that the opencv is still not perfect for camera compatibility.

I tried several methods: v4l2, v4l2_capture and simpleCV. None of them opened. Finally, pygame is adopted to realize the camera acquisition function. Here I will directly share the specific implementation code (python3.6, cv2, opencv3.3, ubuntu16.04). The middle part of the comment is my attempt to turn on the camera with the above method. Maybe there is one for me.

import time
import pygame
import cv2
import numpy as np
def surface_to_string(surface):
 """convert pygame surface into string"""
 return pygame.image.tostring(surface, 'RGB')
def pygame_to_cvimage(surface):
 """conver pygame surface into cvimage"""
 #cv_image = np.zeros(surface.get_size, np.uint8, 3)
 image_string = surface_to_string(surface)
 image_np = np.fromstring(image_string, np.uint8).reshape(480, 640, 3)
 frame = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)
 return image_np, frame
cam ="/dev/video0", [640, 480])
screen = pygame.display.set_mode([640, 480])
while True:
 image = cam.get_image()
 cv_image, frame = pygame_to_cvimage(image)
 screen.fill([0, 0, 0])
 screen.blit(image, (0, 0))
 cv2.imshow('frame', frame)
 key = cv2.waitKey(1)
 if key & 0xFF == ord('q'):
  break, "pygame1.jpg")

The above code needs to pay attention to 1 place, is pygame image and opencv image conversion (pygame_to_cvimage) some places use cv. CreateImageHeader and SetData to achieve, note that these two functions after opencv3+ disappeared. Therefore, numpy is adopted for implementation.

As for target detection, there are many ways to achieve it on the Internet, MobileNet and so on. I'm not going to go into the details here, because that's not what I'm looking for, but I'm just going to post the code, and it worked.

from import FPS
import argparse
import imutils
import v4l2
import fcntl
import v4l2capture
import select
import image
import pygame
import cv2
import numpy as np
import time
def surface_to_string(surface):
 """convert pygame surface into string"""
 return pygame.image.tostring(surface, 'RGB')
def pygame_to_cvimage(surface):
 """conver pygame surface into cvimage"""
 #cv_image = np.zeros(surface.get_size, np.uint8, 3)
 image_string = surface_to_string(surface)
 image_np = np.fromstring(image_string, np.uint8).reshape(480, 640, 3)
 frame = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)
 return frame
ap = argparse.ArgumentParser()
ap.add_argument("-p", "--prototxt", required=True, help="path to caffe deploy prototxt file")
ap.add_argument("-m", "--model", required=True, help="path to caffe pretrained model")
ap.add_argument("-c", "--confidence", type=float, default=0.2, help="minimum probability to filter weak detection")
args = vars(ap.parse_args())
CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
   "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))
print("[INFO] loading model...")
net = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"])
print("[INFO] starting video stream ...")
###### opencv ########
#vs = VideoStream(src=1).start()
#camera = cv2.VideoCapture(0)
#if not camera.isOpened():
# print("camera is not open")
###### v4l2 ########
#vd = open('/dev/video0', 'r')
#cp = v4l2.v4l2_capability()
#fcntl.ioctl(vd, v4l2.VIDIOC_QUERYCAP, cp)
##### v4l2_capture
#video = v4l2capture.Video_device("/dev/video0")
#size_x, size_y = video.set_format(640, 480, fourcc= 'MJPEG')
##### pygame ####
cam ="/dev/video0", [640, 480])
fps = FPS().start()
while True:
 # frame =
 # print("camera is not opened")
 #frame = imutils.resize(frame, width=400)
 #(h, w) = frame.shape[:2]
 #grabbed, frame =
 #if not grabbed:
 # break,), (), ())
 #frame = video.read_and_queue()
 #npfs = np.frombuffer(frame, dtype=np.uint8)
 #frame = cv2.imdecode(npfs, cv2.IMREAD_COLOR)
 image = cam.get_image()
 frame = pygame_to_cvimage(image)
 frame = imutils.resize(frame, width=640)
 blob = cv2.dnn.blobFromImage(frame, 0.00783, (640, 480), 127.5)
 detections = net.forward()
 for i in np.arange(0, detections.shape[2]):
  confidence = detections[0, 0, i, 2]
  if confidence > args["confidence"]:
   idx = int(detections[0, 0, i, 1])
   box = detections[0, 0, i, 3:7]*np.array([640, 480, 640, 480])
   (startX, startY, endX, endY) = box.astype("int")
   label = "{}:{:.2f}%".format(CLASSES[idx], confidence*100)
   cv2.rectangle(frame, (startX, startY), (endX, endY), COLORS[idx], 2)
   y = startY - 15 if startY - 15 > 15 else startY + 15
   cv2.putText(frame, label, (startX, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)
 cv2.imshow("Frame", frame)
 key = cv2.waitKey(1)& 0xFF
 if key ==ord("q"):
print("[INFO] elapsed time :{:.2f}".format(fps.elapsed()))
print("[INFO] approx. FPS :{:.2f}".format(fps.fps()))

The above implementation needs two files, which is a good model implemented by caffe, I uploaded directly (the files are named MobileNetSSD_deploy.caffemodel and ES47en_deploy.prototxt, google can be downloaded on google).

Related articles: