subject: computer vision (machine learning - artificail intelligence)
Computer Vision (Machine Learning- Artificial Intelligence) Assignment !!!! SOLVE ONLY Task 3 and Task 4 Data description and grading scheme The release data directory (available at https://drive.google.com/drive/folders/1KoFGNLpRiYahdWf7BqCXCAzTh9u8- NoR?usp=sharing) contains three directories: train, test and evaluation. The directories train and test have the same structure, although the test data will be made available after the deadline. The train directory contains data organized in four subdirectories corresponding to the four Tasks that you need to solve. The subdirectories are: John_pepprs_407 SUMMARY: !!! SUBJECTS of computer vision that are coverd through this assignment: Object Recongnition o object classification o object detection o part based models o bovw models Video Understanding o object tracking o background subtraction o motion descriptors o optical flow A usfeul lab will be the jupyther notebook provided at the previous link (the one with the training set). # -*- coding: utf-8 -*- """Laborator7-solution.ipynb Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/120Rz0GylJBeIp8TA157PsfYhpm8rHljv ## Computer Vision - lab 7 ### Read, Write and Display a video using OpenCV """ import numpy as np import cv2 as cv import os import glob import matplotlib.pyplot as plt from numpy.random import uniform """### Reading and displaying a video""" base_folder = "data" path_video1 = os.path.join(base_folder, "3.mp4") # Open the video cap = cv.VideoCapture(path_video1) if cap.isOpened() == False: print("Error opening video stream or file") frame_width = int(cap.get(cv.CAP_PROP_FRAME_WIDTH)) print('frame_width = ' + str(frame_width)) frame_height = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT)) print('frame_height = ' + str(frame_height)) length = int(cap.get(cv.CAP_PROP_FRAME_COUNT)) print("total number of frames = " + str(length)) fps = int(cap.get(cv.CAP_PROP_FPS)) print("frames per second = " + str(fps)) cap.release() # Display the video def display_video(video_path: str, max_number_of_frame_to_run: int = None) -> None: """ This function display the video of the screen frame by frame. :param video_path: Path to the video :param max_number_of_frame_to_run: Set how many frame to be displayed. If None all frames will be displayed. """ current_frame = 0 # Open the video cap = cv.VideoCapture(path_video1) while cap.isOpened(): ret, frame = cap.read() # Read the frame if ret is True: current_frame = current_frame + 1 cv.imshow("Frame", frame) if max_number_of_frame_to_run is not None and current_frame > max_number_of_frame_to_run: break if cv.waitKey(25) & 0xFF == ord('q'): break else: break # after playing the video, release the video capture cap.release() # close all the frames cv.destroyAllWindows() display_video(video_path=os.path.join(base_folder, "3.mp4"), max_number_of_frame_to_run=750) """### Writing a video""" def read_frames(video_path): """ This function takes the video path and returns the a list of frames. :param video_path: Path to the video """ frames = [] cap = cv.VideoCapture(video_path) if cap.isOpened() == False: raise Exception("Error opening video stream or file") return frames while cap.isOpened(): ret, frame = cap.read() # Read the frame if ret is True: frames.append(frame) else: break cap.release() return frames # We are going to write the same video at 1 fps, first we need to read the frames. frames = read_frames(os.path.join(base_folder, "3.mp4")) # here we have the extensions and the fourcc for each of it video_extension_and_fourcc_dict = {'avi': cv.VideoWriter_fourcc('M', 'J', 'P', 'G'), 'mp4': 0x7634706d} # We need to create a VideoWriter object. # First, we should specify the output file name with its format (eg: 1_fps_1.mp4). # We should specify the FourCC code and the number of frames per second (FPS). # Lastly, the frame size should be passed (width, height). video_output_name = "3_fps_1.mp4" output_video = cv.VideoWriter(video_output_name, video_extension_and_fourcc_dict["mp4"], 1, (frames[0].shape[1], frames[0].shape[0])) num_frames = len(frames) # We know that the first video has 30 fps. for i in range(0, num_frames, 30): output_video.write(frames[i]) # writing the frame # don't forget to release the video writer output_video.release() """# Video analysis of a snooker footage In this lab we will have a look on the last year's second project (all the details are here http://tinyurl.com/CV-2020-Project2). The goal of the project was to develop an automatic system for video analysis of snooker footages. The system should be able to detect the snooker table and the balls on the table, track the balls, infer when a ball is potted into a pocket. We will analyze the Task 3 presented in the project, the one about tracking the cue ball (white ball) and another ball. The initial bounding boxes of the two balls are provided for the first frame (they follow the format [xmin ymin xmax ymax], where (xmin, ymin) is the top left corner and (xmax, ymax) is the bottom right corner of the initial bounding-box). In a video, we consider that the algorithm correctly tracks a ball if in more (greater or equal) than 80% of the frames the algorithm correctly localizes the ball to be tracked. We consider that the algorithm correctly localizes the ball to be tracked in a specific frame if the value of the IOU (intersection over union) between the window provided by your algorithm and the ground-truth window is more than 20%. ## Tracking using template matching """ def select_roi(frame): """ Select the roi from the image. :param frame :return roi, x, y, w, h """ x, y, w, h = cv.selectROI(frame) track_window = (x, y, w, h) roi = frame[y: y + h, x: x + w] annotated_frame = cv.rectangle(frame, (x, y), (x + w, y + h), 255, 2) cv.imshow('First frame initialization', annotated_frame) cv.waitKey(2000) cv.destroyAllWindows() return roi, x, y, w, h def find_ball_using_template_matching(frame, roi_gray, old_bbox): """ :param frame: Current frame :param roi_gray: the previous detected region :param old_bbox: the previous detected bbox corresponding to roi_gray [x, y, w, h]. :return new_x, new_y """ frame_gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY) x, y, w, h = old_bbox # define the searching region for template matching # a rectangular region 4h * 4w center = (y + h//2, x + h//2) y_min = np.max((0, center[0] - (2*h))) y_max = np.min((frame.shape[0], center[0] + (2*h))) x_min = np.max((0, center[1] - (2*w))) x_max = np.min((frame.shape[1], center[1] + (2*w))) # display the searching region mask1 = np.int8(np.zeros(frame_gray.shape)) mask1[y_min: y_max, x_min: x_max] = 255 frame_gray_mask = cv.bitwise_and(frame_gray,frame_gray,mask=mask1) cv.imshow('frame gray mask', frame_gray_mask) cv.waitKey(500) # function cv.matchTemplate works only on grayscale templates res = cv.matchTemplate(frame_gray_mask, roi_gray, cv.TM_CCOEFF_NORMED) min_val, max_val, min_loc, max_loc = cv.minMaxLoc(res) new_y = max_loc[1] new_x = max_loc[0] return new_x, new_y def track_ball_using_template_matching(video_path) -> list: """ This function track the ball (which is initialized using select ROI) using template matching: Template matching: https://docs.opencv.org/master/df/dfb/group__imgproc__object.html#ga586ebfb0a7fb604b35a23d85391329be :param video_path: The path to the video. :return bounding boxes where the ball was found. """ bboxes = [] cap = cv.VideoCapture(video_path) ret, first_frame = cap.read() # Read the first frame roi, x, y, w, h = select_roi(first_frame) roi_gray = cv.cvtColor(roi, cv.COLOR_BGR2GRAY) frame_idx = 0 while cap.isOpened(): frame_idx += 1 ret, frame = cap.read() if ret is True: