Йога-позы-классификатор

Описание

Модель машинного обучения, которая классифицирует позу йоги на 4 самых известных асаны, а именно на нисходящую собаку, позу доски, позу дерева, позу богини и позу воина-2 с использованием Mediapipe Blazepose для извлечения признаков.

набор данных

Набор данных представляет собой комбинированный набор данных:

https://www.kaggle.com/niharika41298/yoga-poses-dataset
https://sites.google.com/view/yoga-82/home
Объединенный набор данных содержит более 900 различных изображений (детей, женщин и мужчин) для четырех различных асан йоги.

Предварительная обработка изображений

Масштабирование изображений

Изображения сначала изменяются в размере, чтобы уменьшить объем вычислений.

DESIRED_HEIGHT = 480
DESIRED_WIDTH = 480
def resize_2D_array(overall_images):
    new_arr_for_outerdirectory = []
    for i in range(len(overall_images)):
      col = dict[i]
      new_array_for_subdirectory = []
      for j in range(len(overall_images[i])):

        img = overall_images[i][j]
        h, w = img.shape[:2]
        if h < w:
          img = cv2.resize(img, (DESIRED_WIDTH, math.floor(h/(w/DESIRED_WIDTH))))
          print(img.shape)
        
        else:
          img = cv2.resize(img, (math.floor(w/(h/DESIRED_HEIGHT)), DESIRED_HEIGHT))
          print(img.shape)
   
        #appending the image to a new array
        new_array_for_subdirectory.append(img)
      new_arr_for_outerdirectory.append(new_array_for_subdirectory)
    return new_arr_for_outerdirectory

Регулировка яркости

Гамма-коррекция — это нелинейная корректировка значений отдельных пикселей. При нормализации изображения линейные операции выполняются над отдельными пикселями, гамма-коррекция выполняет нелинейную операцию над пикселями исходного изображения и может привести к искажению насыщенности изображения.

#for brightness improvement
def gammaCorrection(src, gamma):
    invGamma = 1 / gamma
 
    table = [((i / 255) ** invGamma) * 255 for i in range(256)]
    table = np.array(table, np.uint8)
 
    return cv2.LUT(src, table)
def isbright(image, dim=10):
    # Resize image to 10x10
    image = cv2.resize(image, (dim, dim))
    # Convert color space to LAB format and extract L channel
    L, A, B = cv2.split(cv2.cvtColor(image, cv2.COLOR_BGR2LAB))
    # Normalize L channel by dividing all pixel values with maximum pixel value
    L = L/np.max(L)
    # Return True if mean is greater than thresh else False
    return np.mean(L)
def changeBrightness(image):
  if (isbright(image) < 0.5):
    gammaImg = gammaCorrection(image, 1)
  elif (isbright(image) > 0.85):
    gammaImg = gammaCorrection(image, 0.75)
  else:
    gammaImg = image
  return gammaImg

def improve_brightness(overall_images):
    new_arr = []
    for i in range(len(overall_images)):
      col = dict[i]
      new_array_1 = []
      for j in range(len(overall_images[i])):
        img = changeBrightness(overall_images[i][j])
        #appending the image to a new array
        new_array_1.append(img)
      new_arr.append(new_array_1)
    return new_arr

Регулировка контрастности

Регулирует контраст изображения по его гистограмме.
Для повышения контрастности расширяет диапазон интенсивности изображения.
Это позволяет областям изображения с более низким контрастом получить более высокий контраст.

# import the neccessasry library
from skimage.exposure import is_low_contrast
def histogram_equalization(img_in):
# segregate color streams
    b,g,r = cv2.split(img_in)
    h_b, bin_b = np.histogram(b.flatten(), 256, [0, 256])
    h_g, bin_g = np.histogram(g.flatten(), 256, [0, 256])
    h_r, bin_r = np.histogram(r.flatten(), 256, [0, 256])
# calculate cdf    
    cdf_b = np.cumsum(h_b)  
    cdf_g = np.cumsum(h_g)
    cdf_r = np.cumsum(h_r)
    
# mask all pixels with value=0 and replace it with mean of the pixel values 
    cdf_m_b = np.ma.masked_equal(cdf_b,0)
    cdf_m_b = (cdf_m_b - cdf_m_b.min())*255/(cdf_m_b.max()-cdf_m_b.min())
    cdf_final_b = np.ma.filled(cdf_m_b,0).astype('uint8')
  
    cdf_m_g = np.ma.masked_equal(cdf_g,0)
    cdf_m_g = (cdf_m_g - cdf_m_g.min())*255/(cdf_m_g.max()-cdf_m_g.min())
    cdf_final_g = np.ma.filled(cdf_m_g,0).astype('uint8')
    cdf_m_r = np.ma.masked_equal(cdf_r,0)
    cdf_m_r = (cdf_m_r - cdf_m_r.min())*255/(cdf_m_r.max()-cdf_m_r.min())
    cdf_final_r = np.ma.filled(cdf_m_r,0).astype('uint8')
    
# merge the images in the three channels
    img_b = cdf_final_b[b]
    img_g = cdf_final_g[g]
    img_r = cdf_final_r[r]
  
    img_out = cv2.merge((img_b, img_g, img_r))
# validation
    equ_b = cv2.equalizeHist(b)
    equ_g = cv2.equalizeHist(g)
    equ_r = cv2.equalizeHist(r)
    equ = cv2.merge((equ_b, equ_g, equ_r))
  
    return img_out
    
def improve_contrast(overall_images):
    new_arr = []
    for i in range(len(overall_images)):
      col = dict[i]
      new_array_1 = []
      for j in range(len(overall_images[i])):
        
        img = overall_images[i][j]
        if(is_low_contrast(img, fraction_threshold=0.05, lower_percentile=1, upper_percentile=99, method='linear')):
          img = histogram_equalization(img)
        #appending the image to a new array
        new_array_1.append(img)
      new_arr.append(new_array_1)
    return new_arr

newarray = improve_contrast(newarray)

Повышение резкости изображений

Детектор границ, используемый для вычисления вторых производных изображения.
Это определяет, происходит ли изменение значений соседних пикселей от края или непрерывного прогресса. Ядра фильтра Лапласа обычно содержат отрицательные значения в перекрестном шаблоне, центрированном внутри массива. Углы имеют нулевые или положительные значения. Центральное значение может быть как отрицательным, так и положительным.

def sharpenimage(image):
  laplacian_var = cv2.Laplacian(image, cv2.CV_64F).var()
  if laplacian_var < 100:
    kernel = np.array([[0, -1, 0],
                      [-1, 5,-1],
                      [0, -1, 0]])
    sharpened_img = cv2.filter2D(src=image, ddepth=-1, kernel=kernel)
  else:
    sharpened_img = image
  return sharpened_img
def improve_sharpening(overall_images):
    new_arr = []
    for i in range(len(overall_images)):
      col = dict[i]
      new_array_1 = []
      for j in range(len(overall_images[i])):
        img = sharpenimage(overall_images[i][j])
        #appending the image to a new array
        new_array_1.append(img)
      new_arr.append(new_array_1)
    return new_arr

Сегментация тела

Функция Media Pipe Segmentation используется для размытия фона изображения.
Маска имеет ту же ширину и высоту, что и входное изображение, и содержит значения в диапазоне [0,0, 1,0], где 1,0 и 0,0 обозначают «человеческий» и «фоновый» пиксель соответственно.

Ориентиры позы

Поза медийной трубы используется для извлечения 3D-координат 33 суставов из изображения.
x и y: координаты ориентира, нормализованные к [0,0, 1,0] по ширине и высоте изображения соответственно.
z: представляет глубину ориентира с глубиной в средней точке бедер, являющейся исходной точкой, и чем меньше значение, тем ближе ориентир находится к камере.

Вычисление угла

Ключевые углы в (колене, локте, плече, лодыжке) рассчитываются из точек, извлеченных и помеченных соответствующим названием асаны. Угол в суставе определяется по формуле:

угол = градусы(atan2(y3 — y2, x3 — x2) — math.atan2(y1 — y2, x1 — x2))

def calculateAngle(landmark1, landmark2, landmark3):
 
    x1, y1, _ = landmark1
    x2, y2, _ = landmark2
    x3, y3, _ = landmark3
 
    angle = math.degrees(math.atan2(y3 - y2, x3 - x2) - math.atan2(y1 - y2, x1 - x2))
    
    # Check if the angle is less than zero. 
    if angle< 0:
 
        # Add 360 to the found angle.
        angle += 360
    
    return angle

Результаты модели машинного обучения:

Обучайте и тестируйте алгоритмы машинного обучения (случайный лес, SVC, дерево решений, KNN, Adaboost, RFC) с использованием сгенерированного фрейма данных (csv), чтобы определить, какая модель лучше всего подходит.