加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
yolov5.py 10.22 KB
一键复制 编辑 原始数据 按行查看 历史
KX 提交于 2023-02-12 16:34 . f
import cv2
import dlib
import numpy as np
import torch
from imutils import face_utils
from scipy.spatial import distance as dist
from models.experimental import attempt_load
from utils.general import non_max_suppression, scale_coords
from utils.torch_utils import select_device
#video_file = 'input.mp4'
#audio_file = 'audio.mp3'
#output_file = 'output.mp4'
#output2_file = 'C:/Users/admin/Desktop/output.mp4'
#cap = cv2.VideoCapture(video_file)
#ffmpeg_path = 'G:/ffmpeg-master-latest-win64-gpl/bin/ffmpeg.exe'
# 设置需要保存视频的格式“xvid”
# 该参数是MPEG-4编码类型,文件名后缀为.mp4
#fourcc = cv2.VideoWriter_fourcc(*'mp4v')
# 设置视频帧频
#fps = cap.get(cv2.CAP_PROP_FPS)
# 设置视频大小
#size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
# VideoWriter方法是cv2库提供的保存视频方法
# 按照设置的格式来out输出
#out = cv2.VideoWriter(output_file, fourcc, fps, size)
# Load the pre-trained face detector
face_detector = dlib.get_frontal_face_detector()
# Load the pre-trained facial landmark predictor
landmark_predictor = dlib.shape_predictor("G:\\test\\shape_predictor_68_face_landmarks.dat")
(lStart, lEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"]
(rStart, rEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"]
(mStart, mEnd) = face_utils.FACIAL_LANDMARKS_IDXS["mouth"]
# Start the webcam
EYE_AR_THRESH = 0.2
MAR_THRESH = 0.75
EYE_AR_CONSEC_FRAMES = 1
MOUTH_AR_CONSEC_FRAMES = 1
# initialize the frame counters and the total number of blinks
COUNTER = 0
mCOUNTER = 0
TOTAL = 0
mTOTAL = 0
dura = 0.0
FPS = 0.0
start_time = cv2.getTickCount()
weights = 'yolov5s.pt'
weights1 = 'best.pt'
input = 'bus.jpg'
w = str(weights[0] if isinstance(weights, list) else weights)
w1 = str(weights1[0] if isinstance(weights1, list) else weights1)
device = select_device(0)
model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device) # load the model
model1 = torch.jit.load(w1) if 'torchscript' in w else attempt_load(weights1, map_location=device) # load the model
height, width = 640, 640 # image size
cap = cv2.VideoCapture(0)
ear = 0.0
mar = 0.0
def eye_aspect_ratio(eye):
# compute the euclidean distances between the two sets of
# vertical eye landmarks (x, y)-coordinates
A = dist.euclidean(eye[1], eye[5])
B = dist.euclidean(eye[2], eye[4])
# compute the euclidean distance between the horizontal
# eye landmark (x, y)-coordinates
C = dist.euclidean(eye[0], eye[3])
# compute the eye aspect ratio
ear = (A + B) / (2.0 * C)
# return the eye aspect ratio
return ear
def mouth_aspect_ratio(mouth):
A = np.linalg.norm(mouth[2] - mouth[9]) # 51, 59
B = np.linalg.norm(mouth[4] - mouth[7]) # 53, 57
C = np.linalg.norm(mouth[0] - mouth[6]) # 49, 55
mar = (A + B) / (2.0 * C)
return mar
while True:
t1 = cv2.getTickCount()
ret, img0 = cap.read() # read the input
if not ret: break
gray = cv2.cvtColor(img0, cv2.COLOR_BGR2GRAY)
rects = face_detector(gray)
img = cv2.resize(img0, (height, width)) # resize the image
img = img / 255.
img = img[:, :, ::-1].transpose((2, 0, 1)) # transfer from HWC to CHW
img = np.expand_dims(img, axis=0) # extend the dimension to [1,3,640,640]
img = torch.from_numpy(img.copy()) # transfer from numpy to tensor
img = img.to(torch.float32).to(device) # transfer from float64 to float32
pred = model(img, augment='store_true', visualize='store_true')[0]
pred.clone().detach()
pred = non_max_suppression(pred, 0.25, 0.45, None, False, max_det=1000) # NMS: Non Max Suppression
pred1 = model1(img, augment='store_true', visualize='store_true')[0]
pred1.clone().detach()
pred1 = non_max_suppression(pred1, 0.25, 0.45, None, False, max_det=1000) # NMS: Non Max Suppression
color = (np.random.randint(255), np.random.randint(255), np.random.randint(255))
if len(rects) == 0: cv2.putText(img0, "No Detect!!!", (0, 400), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 2)
for rect in rects:
# determine the facial landmarks for the face region, then
# convert the facial landmark (x, y)-coordinates to a NumPy
# array
shape = landmark_predictor(gray, rect)
shape1 = face_utils.shape_to_np(shape)
# extract the left and right eye coordinates, then use the
# coordinates to compute the eye aspect ratio for both eyes
leftEye = shape1[lStart:lEnd]
rightEye = shape1[rStart:rEnd]
mouth = shape1[mStart:mEnd]
leftEAR = eye_aspect_ratio(leftEye)
rightEAR = eye_aspect_ratio(rightEye)
# average the eye aspect ratio together for both eyes
ear = (leftEAR + rightEAR) / 2.0
mar = mouth_aspect_ratio(mouth)
# compute the convex hull for the left and right eye, then
# visualize each of the eyes
leftEyeHull = cv2.convexHull(leftEye)
rightEyeHull = cv2.convexHull(rightEye)
mouseHull = cv2.convexHull(mouth)
cv2.drawContours(img0, [leftEyeHull], -1, (0, 255, 0), 1)
cv2.drawContours(img0, [rightEyeHull], -1, (0, 255, 0), 1)
cv2.drawContours(img0, [mouseHull], -1, (255, 0, 0), 1)
# check to see if the eye aspect ratio is below the blink
# threshold, and if so, increment the blink frame counter
cv2.rectangle(img0, (rect.left(), rect.top()), (rect.right(), rect.bottom()), (255, 0, 0), 4)
for i in range(27, 36):
x = shape.part(i).x
y = shape.part(i).y
cv2.circle(img0, (x, y), 1, (0, 0, 255), -1)
if ear < EYE_AR_THRESH:
COUNTER += 1
# otherwise, the eye aspect ratio is not below the blink
# threshold
else:
# if the eyes were closed for a sufficient number of
# then increment the total number of blinks
if COUNTER >= EYE_AR_CONSEC_FRAMES:
dura = COUNTER / FPS
TOTAL += 1
# reset the eye frame
COUNTER = 0
# 同理,判断是否打哈欠
if mar > MAR_THRESH: # 张嘴阈值0.5
mCOUNTER += 1
cv2.putText(img0, "Yawning!", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
else:
# 如果连续3次都小于阈值,则表示打了一次哈欠
if mCOUNTER >= MOUTH_AR_CONSEC_FRAMES: # 阈值:3
mTOTAL += 1
# 重置嘴帧计数器
mCOUNTER = 0
# Display the resulting frame
cv2.putText(img0, "Blinks: {}".format(TOTAL), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
cv2.putText(img0, "EAR: {:.2f}".format(ear), (200, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
cv2.putText(img0, "dura: {:.2f}".format(dura), (400, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
#mouth
cv2.putText(img0, "Yawning: {}".format(mTOTAL), (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
cv2.putText(img0, "mCOUNTER: {}".format(mCOUNTER), (200, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
cv2.putText(img0, "MAR: {:.2f}".format(mar), (400, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
if TOTAL >= 20 or mTOTAL>=3:
cv2.putText(img0, "SLEEP!!!", (400, 200), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 2)
t2 = cv2.getTickCount()
spendTime = (t2 - t1) / (cv2.getTickFrequency())
FPS = 1 / spendTime
t3 = cv2.getTickCount() - start_time
print(t3)
if t3 >= 600000000:
start_time = cv2.getTickCount()
TOTAL = 0
mTOTAL = 0
for i, det in enumerate(pred):
if len(det):
det = det.cpu()
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round()
# xyxy: coords, conf: conference, cls: classification results
for *xyxy, conf, cls in reversed(det):
# transfer the index to class
if cls.numpy() == 39 and conf > 0.5:
img0 = cv2.rectangle(img0, (int(xyxy[0].numpy()), int(xyxy[1].numpy())),
(int(xyxy[2].numpy()), int(xyxy[3].numpy())), (0, 255, 0), 2)
cv2.putText(img0, 'bottle',
org=(int(xyxy[0].numpy()), int(xyxy[1].numpy()) - 10),
fontFace=1, fontScale=1.5, thickness=2, color=(color[1], color[2], color[0]))
cv2.putText(img0, "drinking!!!", (0, 200), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 2)
if cls.numpy() == 67 and conf > 0.5:
img0 = cv2.rectangle(img0, (int(xyxy[0].numpy()), int(xyxy[1].numpy())),
(int(xyxy[2].numpy()), int(xyxy[3].numpy())), (0, 255, 0), 2)
cv2.putText(img0, 'phone',
org=(int(xyxy[0].numpy()), int(xyxy[1].numpy()) - 10),
fontFace=1, fontScale=1.5, thickness=2, color=(color[1], color[2], color[0]))
cv2.putText(img0, "using phone!!!", (250, 450), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 2)
# print the prediction information
print('{},{},{}'.format(xyxy, conf.numpy(), cls))
for j, det1 in enumerate(pred1):
if len(det1):
det1 = det1.cpu()
det1[:, :4] = scale_coords(img.shape[2:], det1[:, :4], img0.shape).round()
# xyxy: coords, conf: conference, cls: classification results
for *xyxy, conf1, cls1 in reversed(det1):
# transfer the index to class
print(conf1)
if cls1.numpy() == 0 and conf1 > 0.7:
img0 = cv2.rectangle(img0, (int(xyxy[0].numpy()), int(xyxy[1].numpy())),
(int(xyxy[2].numpy()), int(xyxy[3].numpy())), (0, 255, 0), 2)
cv2.putText(img0, 'smoking',
org=(int(xyxy[0].numpy()), int(xyxy[1].numpy()) - 10),
fontFace=1, fontScale=1.5, thickness=2, color=(color[1], color[2], color[0]))
cv2.putText(img0, "smoking!!!", (300, 400), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 2)
cv2.imshow("test", img0)
#out.write(img0)
if cv2.waitKey(10) & 0xFF == ord("q"):
break
cap.release()
cv2.destroyAllWindows()
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化