From 124ddbfb3b2593f50a72d418f7396a4cd8d6413e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=BCmin=20K=C3=B6yk=C4=B1ran?= Date: Tue, 1 Mar 2022 00:43:18 +0300 Subject: [PATCH] * By using basic auth credential info, the stream is received frame by frame over the specified IP and port. * Stream on Pillow, OpenCV etc. With 3rd libraries, the visual process goes through certain stages. * Frames received from the stream are passed through the action recognition algorithm with predict_frames(). * Output of action recognition and frames read from ip camera are displayed in a widget on the screen. --- scenarios/action_recognition/00_webcam.ipynb | 294 +++++++++++++++++-- 1 file changed, 266 insertions(+), 28 deletions(-) diff --git a/scenarios/action_recognition/00_webcam.ipynb b/scenarios/action_recognition/00_webcam.ipynb index c6c548f4f..c57706305 100644 --- a/scenarios/action_recognition/00_webcam.ipynb +++ b/scenarios/action_recognition/00_webcam.ipynb @@ -54,15 +54,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "3.7.6 (default, Jan 8 2020, 19:59:22) \n", - "[GCC 7.3.0] \n", + "3.8.5 (default, Sep 3 2020, 21:29:08) [MSC v.1916 64 bit (AMD64)] \n", "\n", - "PyTorch 1.2.0 \n", + "PyTorch 1.7.1+cu110 \n", "\n", - "Torch-vision 0.4.0a0 \n", + "Torch-vision 0.8.2+cu110 \n", "\n", "Available devices:\n", - "0: Tesla K80\n" + "0: NVIDIA GeForce GTX 1650\n" ] } ], @@ -72,22 +71,27 @@ "from collections import deque #\n", "import io\n", "import requests\n", + "import urllib.request\n", "import os\n", "from time import sleep, time\n", "from threading import Thread\n", "from IPython.display import Video\n", "\n", "# Third party tools\n", + "import cv2\n", "import decord #\n", "import IPython.display #\n", + "from IPython.display import display\n", "from ipywebrtc import CameraStream, ImageRecorder\n", "from ipywidgets import HBox, HTML, Layout, VBox, Widget, Label\n", + "import ipywidgets as widgets\n", "import numpy as np\n", "from PIL import Image\n", "import torch\n", "import torch.cuda as cuda\n", "import torch.nn as nn\n", "from torchvision.transforms import Compose\n", + "from matplotlib import cm\n", "\n", "# utils_cv\n", "sys.path.append(\"../../\")\n", @@ -187,7 +191,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Using cache found in /home/jiata/.cache/torch/hub/moabitcoin_ig65m-pytorch_master\n" + "Using cache found in C:\\Users\\mumin/.cache\\torch\\hub\\moabitcoin_ig65m-pytorch_master\n" ] } ], @@ -212,7 +216,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 5, "metadata": { "nbpresent": { "id": "eb1dfefe-dfe4-46e4-8482-9b2bc1d05b89" @@ -234,7 +238,7 @@ " 'auctioning']" ] }, - "execution_count": 20, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -252,7 +256,7 @@ } }, "source": [ - "Among them, we will use 50 classes that we are interested in (i.e. the actions make sense to demonstrate in front of the webcam) and ignore other classes by filtering out from the model outputs. This will help us reduce the noise during prediction." + "Among them, we will use 50 classes that we are interested in (i.e. the actions make sense to demonstrate in front of the webcam or ip camera) and ignore other classes by filtering out from the model outputs. This will help us reduce the noise during prediction." ] }, { @@ -352,15 +356,24 @@ "For this example, we'll use the following video:" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Download the video to our data folder" + ] + }, { "cell_type": "code", "execution_count": 7, - "metadata": {}, + "metadata": { + "scrolled": false + }, "outputs": [ { "data": { "text/html": [ - "