-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
145 lines (117 loc) · 4.9 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# The project only works with a stable version of python which can coordinate with the libraries used in the project. Thus we use "python 3.8.2"
# All the supported modules are listed below
# pip install mediapipe
# pip install opencv-python
# pip install pyautogui
# pip install numpy
# pip install comtypes
# pip install pycaw
import cv2
import mediapipe as mp
import numpy as np
import time
import math
import pyautogui
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume
# Initialize video capture
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
# Initialize mediapipe hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(min_detection_confidence=0.8, min_tracking_confidence=0.8, max_num_hands=2)
mp_drawing = mp.solutions.drawing_utils
# Get screen size and initialize cursor control variables
screen_width, screen_height = pyautogui.size()
prev_x, prev_y = 0, 0
# Initialize audio utilities
devices = AudioUtilities.GetSpeakers()
interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
volume = cast(interface, POINTER(IAudioEndpointVolume))
volRange = volume.GetVolumeRange()
minVol, maxVol = volRange[0], volRange[1]
last_volume_change = 0
last_action_time = 0
def hand_landmarks(image):
results = hands.process(image)
landmark_list = []
if results.multi_hand_landmarks:
for hand_landmarks in results.multi_hand_landmarks:
mp_drawing.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
landmark_list.append([(int(landmark.x * image.shape[1]), int(landmark.y * image.shape[0])) for landmark in hand_landmarks.landmark])
return landmark_list
def perform_actions(finger_status, current_time):
global last_action_time
if current_time - last_action_time < 0.2:
return
action_performed = True
if all(finger_status):
pyautogui.press('space')
elif finger_status == [0, 1, 0, 0, 1]:
pyautogui.press('right')
elif finger_status == [0, 1, 1, 0, 1]:
pyautogui.press('left')
elif finger_status == [0, 1, 1, 1, 1]:
pyautogui.press('up')
elif finger_status == [0, 1, 1, 1, 0]:
pyautogui.press('down')
elif finger_status == [0, 0, 1, 0, 0]:
pyautogui.screenshot('screenshot.png')
else:
action_performed = False
if action_performed:
last_action_time = current_time
def get_finger_status(landmarks):
tip_ids = [4, 8, 12, 16, 20]
return [int(landmarks[tip_ids[0]][0] > landmarks[tip_ids[0] - 1][0])] + [int(landmarks[tip_id][1] < landmarks[tip_id - 2][1]) for tip_id in tip_ids[1:]]
def get_average_finger_distance(landmarks):
if len(landmarks) >= 2:
return (math.hypot(*(np.array(landmarks[0][20]) - np.array(landmarks[1][20]))) +
math.hypot(*(np.array(landmarks[0][16]) - np.array(landmarks[1][16])))) / 2
return None
# Create a named window
cv2.namedWindow("Hand Gesture Control")
running = True
while running:
ret, frame = cap.read()
if not ret:
break
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
landmarks = hand_landmarks(frame_rgb)
current_time = time.time()
if landmarks:
distance = get_average_finger_distance(landmarks)
if distance is not None and (current_time - last_volume_change) > 0.1:
vol = np.interp(distance, [50, 300], [minVol, maxVol])
volPercentage = np.interp(vol, [minVol, maxVol], [0, 100])
volume.SetMasterVolumeLevel(vol, None)
last_volume_change = current_time
volBar = int(np.interp(volPercentage, [0, 100], [720, 150]))
cv2.rectangle(frame, (50, 150), (85, 720), (0, 255, 0), 3)
cv2.rectangle(frame, (50, volBar), (85, 720), (0, 255, 0), cv2.FILLED)
cv2.putText(frame, f'{int(volPercentage)}%', (40, 140), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 3)
if landmarks[0]:
finger_status = get_finger_status(landmarks[0])
perform_actions(finger_status, current_time)
if finger_status[1] == 1 and finger_status[2] == 0:
x1, y1 = landmarks[0][8]
x3 = np.interp(x1, (75, 1205), (0, screen_width))
y3 = np.interp(y1, (75, 645), (0, screen_height))
curr_x = prev_x + (x3 - prev_x) / 8
curr_y = prev_y + (y3 - prev_y) / 8
pyautogui.moveTo(screen_width - curr_x, curr_y)
prev_x, prev_y = curr_x, curr_y
if finger_status[1] == 0 and finger_status[0] == 1:
pyautogui.click()
time.sleep(0.3)
cv2.imshow("Hand Gesture Control", frame)
# Click'q' key press to quit
key = cv2.waitKey(1) & 0xFF
if key == ord('q'):
running = False
# Clean up
cap.release()
cv2.destroyAllWindows()
print("Program terminated.")