master

分支 (1)

管理

管理

master

fatigue-driving-prediction
/
yolov5.py

import cv2
import dlib
import numpy as np
import torch
from imutils import face_utils
from scipy.spatial import distance as dist
from models.experimental import attempt_load
from utils.general import non_max_suppression, scale_coords
from utils.torch_utils import select_device

#video_file = 'input.mp4'
#audio_file = 'audio.mp3'
#output_file = 'output.mp4'
#output2_file = 'C:/Users/admin/Desktop/output.mp4'
#cap = cv2.VideoCapture(video_file)
#ffmpeg_path = 'G:/ffmpeg-master-latest-win64-gpl/bin/ffmpeg.exe'
# 设置需要保存视频的格式“xvid”
# 该参数是MPEG-4编码类型，文件名后缀为.mp4
#fourcc = cv2.VideoWriter_fourcc(*'mp4v')
# 设置视频帧频
#fps = cap.get(cv2.CAP_PROP_FPS)
# 设置视频大小
#size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
# VideoWriter方法是cv2库提供的保存视频方法
# 按照设置的格式来out输出
#out = cv2.VideoWriter(output_file, fourcc, fps, size)


# Load the pre-trained face detector
face_detector = dlib.get_frontal_face_detector()

# Load the pre-trained facial landmark predictor
landmark_predictor = dlib.shape_predictor("G:\\test\\shape_predictor_68_face_landmarks.dat")
(lStart, lEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"]
(rStart, rEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"]
(mStart, mEnd) = face_utils.FACIAL_LANDMARKS_IDXS["mouth"]


# Start the webcam
EYE_AR_THRESH = 0.2
MAR_THRESH = 0.75
EYE_AR_CONSEC_FRAMES = 1
MOUTH_AR_CONSEC_FRAMES = 1

# initialize the frame counters and the total number of blinks
COUNTER = 0
mCOUNTER = 0
TOTAL = 0
mTOTAL = 0
dura = 0.0
FPS = 0.0
start_time = cv2.getTickCount()

weights = 'yolov5s.pt'
weights1 = 'best.pt'
input = 'bus.jpg'

w = str(weights[0] if isinstance(weights, list) else weights)

w1 = str(weights1[0] if isinstance(weights1, list) else weights1)

device = select_device(0)
model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device)  # load the model
model1 = torch.jit.load(w1) if 'torchscript' in w else attempt_load(weights1, map_location=device)  # load the model
height, width = 640, 640  # image size
cap = cv2.VideoCapture(0)
ear = 0.0
mar = 0.0

def eye_aspect_ratio(eye):
    # compute the euclidean distances between the two sets of
    # vertical eye landmarks (x, y)-coordinates
    A = dist.euclidean(eye[1], eye[5])
    B = dist.euclidean(eye[2], eye[4])
    # compute the euclidean distance between the horizontal
    # eye landmark (x, y)-coordinates
    C = dist.euclidean(eye[0], eye[3])
    # compute the eye aspect ratio
    ear = (A + B) / (2.0 * C)
    # return the eye aspect ratio
    return ear


def mouth_aspect_ratio(mouth):
    A = np.linalg.norm(mouth[2] - mouth[9])  # 51, 59
    B = np.linalg.norm(mouth[4] - mouth[7])  # 53, 57
    C = np.linalg.norm(mouth[0] - mouth[6])  # 49, 55
    mar = (A + B) / (2.0 * C)
    return mar


while True:
    t1 = cv2.getTickCount()
    ret, img0 = cap.read()  # read the input
    if not ret: break
    gray = cv2.cvtColor(img0, cv2.COLOR_BGR2GRAY)
    rects = face_detector(gray)
    img = cv2.resize(img0, (height, width))  # resize the image
    img = img / 255.
    img = img[:, :, ::-1].transpose((2, 0, 1))  # transfer from HWC to CHW
    img = np.expand_dims(img, axis=0)  # extend the dimension to [1,3,640,640]
    img = torch.from_numpy(img.copy())  # transfer from numpy to tensor
    img = img.to(torch.float32).to(device)  # transfer from float64 to float32
    pred = model(img, augment='store_true', visualize='store_true')[0]
    pred.clone().detach()
    pred = non_max_suppression(pred, 0.25, 0.45, None, False, max_det=1000)  # NMS: Non Max Suppression

    pred1 = model1(img, augment='store_true', visualize='store_true')[0]
    pred1.clone().detach()
    pred1 = non_max_suppression(pred1, 0.25, 0.45, None, False, max_det=1000)  # NMS: Non Max Suppression

    color = (np.random.randint(255), np.random.randint(255), np.random.randint(255))
    if len(rects) == 0: cv2.putText(img0, "No Detect!!!", (0, 400), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 2)
    for rect in rects:
        # determine the facial landmarks for the face region, then
        # convert the facial landmark (x, y)-coordinates to a NumPy
        # array
        shape = landmark_predictor(gray, rect)
        shape1 = face_utils.shape_to_np(shape)
        # extract the left and right eye coordinates, then use the
        # coordinates to compute the eye aspect ratio for both eyes
        leftEye = shape1[lStart:lEnd]
        rightEye = shape1[rStart:rEnd]
        mouth = shape1[mStart:mEnd]
        leftEAR = eye_aspect_ratio(leftEye)
        rightEAR = eye_aspect_ratio(rightEye)
        # average the eye aspect ratio together for both eyes
        ear = (leftEAR + rightEAR) / 2.0
        mar = mouth_aspect_ratio(mouth)
        # compute the convex hull for the left and right eye, then
        # visualize each of the eyes
        leftEyeHull = cv2.convexHull(leftEye)
        rightEyeHull = cv2.convexHull(rightEye)
        mouseHull = cv2.convexHull(mouth)
        cv2.drawContours(img0, [leftEyeHull], -1, (0, 255, 0), 1)
        cv2.drawContours(img0, [rightEyeHull], -1, (0, 255, 0), 1)
        cv2.drawContours(img0, [mouseHull], -1, (255, 0, 0), 1)
        # check to see if the eye aspect ratio is below the blink
        # threshold, and if so, increment the blink frame counter
        cv2.rectangle(img0, (rect.left(), rect.top()), (rect.right(), rect.bottom()), (255, 0, 0), 4)
        for i in range(27, 36):
            x = shape.part(i).x
            y = shape.part(i).y
            cv2.circle(img0, (x, y), 1, (0, 0, 255), -1)

        if ear < EYE_AR_THRESH:
            COUNTER += 1
        # otherwise, the eye aspect ratio is not below the blink
        # threshold
        else:

            # if the eyes were closed for a sufficient number of
            # then increment the total number of blinks
            if COUNTER >= EYE_AR_CONSEC_FRAMES:
                dura = COUNTER / FPS
                TOTAL += 1
            # reset the eye frame
            COUNTER = 0

    # 同理，判断是否打哈欠
        if mar > MAR_THRESH:  # 张嘴阈值0.5
            mCOUNTER += 1
            cv2.putText(img0, "Yawning!", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
        else:
            # 如果连续3次都小于阈值，则表示打了一次哈欠
            if mCOUNTER >= MOUTH_AR_CONSEC_FRAMES:  # 阈值：3
                mTOTAL += 1
            # 重置嘴帧计数器
            mCOUNTER = 0
            # Display the resulting frame
    cv2.putText(img0, "Blinks: {}".format(TOTAL), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
    cv2.putText(img0, "EAR: {:.2f}".format(ear), (200, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
    cv2.putText(img0, "dura: {:.2f}".format(dura), (400, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
    #mouth
    cv2.putText(img0, "Yawning: {}".format(mTOTAL), (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
    cv2.putText(img0, "mCOUNTER: {}".format(mCOUNTER), (200, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
    cv2.putText(img0, "MAR: {:.2f}".format(mar), (400, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
    if TOTAL >= 20 or mTOTAL>=3:
        cv2.putText(img0, "SLEEP!!!", (400, 200), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 2)
    t2 = cv2.getTickCount()
    spendTime = (t2 - t1) / (cv2.getTickFrequency())
    FPS = 1 / spendTime
    t3 = cv2.getTickCount() - start_time
    print(t3)
    if t3 >= 600000000:
        start_time = cv2.getTickCount()
        TOTAL = 0
        mTOTAL = 0

    for i, det in enumerate(pred):
        if len(det):
            det = det.cpu()
            det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round()
            # xyxy: coords, conf: conference, cls: classification results
            for *xyxy, conf, cls in reversed(det):
                # transfer the index to class
                if cls.numpy() == 39 and conf > 0.5:
                    img0 = cv2.rectangle(img0, (int(xyxy[0].numpy()), int(xyxy[1].numpy())),
                                         (int(xyxy[2].numpy()), int(xyxy[3].numpy())), (0, 255, 0), 2)
                    cv2.putText(img0, 'bottle',
                                org=(int(xyxy[0].numpy()), int(xyxy[1].numpy()) - 10),
                                fontFace=1, fontScale=1.5, thickness=2, color=(color[1], color[2], color[0]))
                    cv2.putText(img0, "drinking!!!", (0, 200), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 2)
                if cls.numpy() == 67 and conf > 0.5:
                    img0 = cv2.rectangle(img0, (int(xyxy[0].numpy()), int(xyxy[1].numpy())),
                                         (int(xyxy[2].numpy()), int(xyxy[3].numpy())), (0, 255, 0), 2)
                    cv2.putText(img0, 'phone',
                                org=(int(xyxy[0].numpy()), int(xyxy[1].numpy()) - 10),
                                fontFace=1, fontScale=1.5, thickness=2, color=(color[1], color[2], color[0]))
                    cv2.putText(img0, "using phone!!!", (250, 450), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 2)

                # print the prediction information
                print('{},{},{}'.format(xyxy, conf.numpy(), cls))
    for j, det1 in enumerate(pred1):
        if len(det1):
            det1 = det1.cpu()
            det1[:, :4] = scale_coords(img.shape[2:], det1[:, :4], img0.shape).round()
            # xyxy: coords, conf: conference, cls: classification results
            for *xyxy, conf1, cls1 in reversed(det1):
                # transfer the index to class
                print(conf1)
                if cls1.numpy() == 0 and conf1 > 0.7:
                    img0 = cv2.rectangle(img0, (int(xyxy[0].numpy()), int(xyxy[1].numpy())),
                                         (int(xyxy[2].numpy()), int(xyxy[3].numpy())), (0, 255, 0), 2)
                    cv2.putText(img0, 'smoking',
                                org=(int(xyxy[0].numpy()), int(xyxy[1].numpy()) - 10),
                                fontFace=1, fontScale=1.5, thickness=2, color=(color[1], color[2], color[0]))
                    cv2.putText(img0, "smoking!!!", (300, 400), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 2)

    cv2.imshow("test", img0)
    #out.write(img0)
    if cv2.waitKey(10) & 0xFF == ord("q"):
        break
cap.release()
cv2.destroyAllWindows()