Product Creation

Collect data and try different appraoches

Click here to download the code. Contact xiyan.zhang@student.keystoneacademy.cn for password(as the code contains private image for testing)

Data

Data is the ‘soul’ of the product, as the quality of the data directly affects the accuracy of the model. For this specific task, I need labeled images containing people performing different movements. Collecting all the data by myself would be a challenging task. Fortunately, I discovered a relevant dataset online. Although I won’t be able to create the dataset from scratch, it is still a helpful and wise choice. link to the dataset

Codes

version 1

Approach with CNN.

Here we treat a video as a group of images. We split a video into 30 frames, apply the model to each image. There will be 30 output eventually. We than see which output repeated the most.

  import os
  import cv2
  import numpy as np
  import pandas as pd
  from tensorflow.keras.models import Sequential
  from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
  from PIL import Image
  import matplotlib.pyplot as plt
  import seaborn as sns
  import random

  #load data doc
  dataset_path = "/kaggle/input/gesture-recognition"
  train_csv = pd.read_csv('/kaggle/input/train-doc/train_new.csv')
  train_csv.head()

  import matplotlib.pyplot as plt
  import matplotlib.image as mpimg

  # Load the image
  image = mpimg.imread("/kaggle/input/gesture-recognition/train/WIN_20180907_15_45_40_Pro_Left Swipe_new_Left Swipe_new/WIN_20180907_15_45_40_Pro_00045.png")

  # Display the image
  plt.imshow(image)
  plt.axis('on')

  # Show the plot
  plt.show()

sample_face

  #size of the image
  pic_size = 120
  images = np.empty((19860, pic_size, pic_size,1), dtype=np.uint8)
  label = []
  text = []
  train_size = train_csv.shape
  i = 0
  for n in range(train_size[0]):
      subPath = train_csv.loc[n,'File'] #get name of the sub file from train_csv
      folder_path = '/kaggle/input/gesture-recognition/train/'+subPath #path of the subfile
        
      #read images
      for root, dirs, files in os.walk(folder_path):
          for file in files:
              image_path = os.path.join(root, file)
              image = cv2.imread(image_path)
              image = cv2.resize(image, (pic_size, pic_size))
              image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
              image = image.reshape((pic_size, pic_size, 1))
              images[i] = image
            
              label.append(train_csv.loc[n,'Inde'])
              text.append(train_csv.loc[n,'Name'])
              i+=1
  labels = np.array(label)
  print(labels.shape)
  print(images.shape)
  seed = 25
  np.random.seed(seed)
  np.random.shuffle(images)

  np.random.seed(seed)
  np.random.shuffle(labels)

convert the image into array and save them into an array

  from sklearn.model_selection import train_test_split

  X = images[:10000, : , : ]
  y = labels[:10000]
  seed = 43
  np.random.seed(seed)
  np.random.shuffle(X)

  np.random.seed(seed)
  np.random.shuffle(y)

Shuffle and split the data set

  import tensorflow as tf
  from tensorflow import keras

  # Assuming you have your data and labels loaded as X and y respectively
  num_classes = 5

  # Normalize the pixel values between 0 and 1
  X = X / 255.0

  # Split the data into training and validation sets
  train_ratio = 0.8  # 80% of the data for training, adjust as needed
  split_index = int(train_ratio * len(X))

  X_train, X_val = X[:split_index], X[split_index:]
  y_train, y_val = y[:split_index], y[split_index:]
  # Import the required libraries
  from keras.models import Sequential
  from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

  model = Sequential()
  model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(pic_size, pic_size, 1)))
  model.add(MaxPooling2D((2, 2)))
  model.add(Flatten())
  model.add(Dense(64, activation='relu'))
  model.add(Dense(5, activation='softmax'))

  # Compile the model
  model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

  # Train the model
  model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data = (X_val, y_val))

train the model

  te = int(input())
  correct = 0
  for i in range(te):   
      numToText = {0:'Left_Swipe_new',1:'Right_Swipe_new',2:'Stop_new',3:'Thumbs_Down_new',4:'Thumbs_Up_new'}
      rand = random.randint(0,19860)
      test_image = images[rand]
      test_image = test_image.reshape((1,pic_size, pic_size, 1))/255.0
      prediction = model.predict(test_image)
      print("prediction",np.argmax(prediction),numToText[np.argmax(prediction)])
      plt.imshow(images[rand])
      print("actual",labels[rand])
      if np.argmax(prediction) == labels[rand]:
          correct+=1
  print(f"out of {te} prediction, {correct} was correct. The accuracy was {100*(correct/te)}%")

Something went wrong here: error_version1

version 2

Try to use the model

  def pose_rec(image_path):    
  numToText = {0:'Left_Swipe_new',1:'Right_Swipe_new',2:'Stop_new',3:'Thumbs_Down_new',4:'Thumbs_Up_new'}
  image = cv2.imread(image_path)
  image = cv2.resize(image, (120,120))
  image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
  image = image.reshape((120,120,1))
  prediction = model.predict(test_image)
  plt.imshow(image)
  #print("prediction",np.argmax(prediction),numToText[np.argmax(prediction)])
  return int(np.argmax(prediction))

function that use the model

  # Open the video file
  video_path = '/kaggle/input/train-doc/6.mp4'  # Replace with your video file path
  video = cv2.VideoCapture(video_path)

  # Check if the video file is successfully opened
  if not video.isOpened():
      print("Error opening video file")
      exit()

  # Create a directory to store the extracted frames
  output_dir = 'frames_output'
  os.makedirs(output_dir, exist_ok=True)

  # Initialize frame counter
  frame_count = 0
  while True:
  # Read the next frame from the video
  ret, frame = video.read()

  # Check if there is a valid frame
  if not ret:
      break

  # Save the frame as an image file
  frame_filename = f"{output_dir}/frame_{frame_count:04d}.jpg"
  cv2.imwrite(frame_filename, frame)

  # Increment frame counter
  frame_count += 1

  # Release the video file
  video.release()

  print(f"Frames extracted: {frame_count}")

Split video into frames

  import os 
  import cv2
  import shutil
  from collections import Counter
  numToText = {0:'Left_Swipe_new',1:'Right_Swipe_new',2:'Stop_new',3:'Thumbs_Down_new',4:'Thumbs_Up_new'}
  # Folder path containing the images 
  folder_path = '/kaggle/working/frames_output' 
  # Replace with the actual folder path #Get a list of all image files in the folder 
  image_files = [f for f in os.listdir(folder_path) if f.endswith(('.jpg', '.jpeg', '.png'))] 

  result = []
  # Iterate over each image file 
  for image_file in image_files: 
      # Read the image using OpenCV 
      image_path = os.path.join(folder_path, image_file) 
      result.append(pose_rec(image_path))
    

  counter  = Counter(result)
  most_common_element = counter.most_common(1)[0]
  most_common_value = most_common_element[0]
  print(most_common_value)

Use the model

It seems like something went wrong with the model or the function where I used the model as my test doesn’t work well.

version 3

See if any problem occurs in the use of the model

  def pose_rec(t_path):
      numToText = {0:'Left_Swipe_new',1:'Right_Swipe_new',2:'Stop_new',3:'Thumbs_Down_new',4:'Thumbs_Up_new'}
      image = cv2.imread(t_path)
      image = cv2.resize(image, (pic_size, pic_size))
      image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
      image = image.reshape((pic_size, pic_size, 1))
      test_image = image
      test_image = test_image.reshape((1,pic_size, pic_size, 1))/255.0
      prediction = model.predict(test_image)
      pred = np.argmax(prediction)
      print(pred)
      category = numToText[pred]
      plt.imshow(image)
      return category
  pose_rec('/kaggle/input/train-doc/2023-8-17 6.39.jpg')

  numToText = {0:'Left_Swipe_new',1:'Right_Swipe_new',2:'Stop_new',3:'Thumbs_Down_new',4:'Thumbs_Up_new'}
  # Folder path containing the images 
  folder_path = '/kaggle/working/frames_output' 
  # Replace with the actual folder path #Get a list of all image files in the folder 
  image_files = [f for f in os.listdir(folder_path) if f.endswith(('.jpg', '.jpeg', '.png'))] 

  LS = 0
  RS = 0
  ST = 0
  TD = 0
  TU = 0
  # Iterate over each image file 
  for image_file in image_files: 
      # Read the image using OpenCV 
      image_path = os.path.join(folder_path, image_file) 
      if pose_rec(image_path) == 0:
          LS+=1
      elif pose_rec(image_path) == 1:
          RS+=1
      elif pose_rec(image_path) == 2:
          ST+=1
      elif pose_rec(image_path) == 3:
          TD+=1
      elif pose_rec(image_path) == 4:
          TU+=1
      else:
          print(f'something went wrong, the outcome is: {pose_rec(image_path)}')

It turns out the problem is the model.

Data

Codes