upd

anatol-grabowski · Aug 16, 2023 · f9c625d · f9c625d
1 parent f077588
commit f9c625d
Show file tree

Hide file tree

Showing 8 changed files with 162 additions and 87 deletions.
diff --git a/capture.py b/capture.py
@@ -3,7 +3,6 @@
 import re
 import cv2
 import mediapipe as mp
-import pyautogui
 from pynput import keyboard, mouse
 import pynput
 import uuid
@@ -25,6 +24,7 @@
 import keyboard
 from modules.webcam import list_webcams, cams_init, cams_capture, cam_init
 from modules.dataset import Dataset
+from modules.interpolate_points import interpolate_points
 
 
 def draw_cursors(frame, cursor, cursors):
@@ -35,9 +35,9 @@ def draw_cursors(frame, cursor, cursors):
             col = (255, 0, 0) if k != len(cursors) - 1 else (0, 0, 255)
             cv2.circle(frame, cursor_to_pixelxy(cur, imsize).astype(int), 2, col, -1)
 
-    xy = cursor_to_pixelxy(pixelxy_to_cursor(np.array(pyautogui.position()), monsize), imsize)
+    xy = cursor_to_pixelxy(pixelxy_to_cursor(np.array(mouse_controller.position), monsize), imsize)
     color_capt = (0, 0, 0)
-    cv2.circle(frame, xy.astype(int), 4, (255, 255, 255) if not is_capturing else color_capt)
+    cv2.circle(frame, xy.astype(int), 4, (255, 255, 255) if not is_capture else color_capt)
     cv2.circle(frame, xy.astype(int), 3, (0, 255, 0))
 
 
@@ -46,14 +46,18 @@ def render(frame, cursor, cursors, faces):
     frame = cv2.flip(frame, 1)
     imsize = np.array([frame.shape[1], frame.shape[0]])
     if faces is not None:
-        left_blink, right_blink = detect_blink(faces[0])
+        left_blink, right_blink = detect_blink(faces[0], 0.3)
         cv2.putText(frame, f"{'L' if left_blink else ' '} {'R' if right_blink else ''}",
                     (100, 100), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(0, 255, 0), thickness=2)
     draw_cursors(frame, cursor, cursors)
 
-    for xy, loss in path:
-        intensity = int(np.clip(loss * 4 * 255, 20, 255))
-        cv2.circle(frame, cursor_to_pixelxy(xy, imsize).astype(int), 2, (0, 0, 255, intensity))
+    for xy in points[i % len(points): i % len(points) + 80]:
+        framexy = cursor_to_pixelxy(pixelxy_to_cursor(xy, monsize), imsize)
+        cv2.circle(frame, np.array(framexy).astype(int), 1, (0, 255, 0))
+
+    for xy, loss in path[-50:]:
+        intensity = int(np.clip(loss / 0.02 * 255, 0, 255))
+        cv2.circle(frame, cursor_to_pixelxy(xy, imsize).astype(int), 2, (0, 0, intensity))
 
     cv2.namedWindow('Fullscreen Image', cv2.WINDOW_NORMAL)
     cv2.setWindowProperty('Fullscreen Image', cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
@@ -67,29 +71,36 @@ def render(frame, cursor, cursors, faces):
 
 def on_press(key):
     global cam, i, pos, dirpath, monxy, should_exit
+    global is_capture, is_automove
+    # if type(key) == pynput.keyboard.KeyCode and key.vk == ralt_vk:
+    #     frames = {}
+    #     x, y = np.array(pyautogui.position()) - monxy
+    #     t0 = time.time()
+    #     time.sleep(0.1)
+    #     for j in range(3):
+    #         ret, frame = cam.read()
+    #         filename = f'{dirpath}/{i+1}-{j+1} [{x} {y}] {int(time.time() * 1000)}.jpeg'
+    #         frames[filename] = frame
+    #     dt = time.time() - t0
+    #     print(f'{dt*1000:.0f}')
+    #     i += 1
+    #     pyautogui.moveTo(*points[i % len(points)])
+    #     for filename, frame in frames.items():
+    #         os.makedirs(dirpath, exist_ok=True)
+    #         cv2.imwrite(filename, frame)
+    #         print('save', filename)
+
     if type(key) == pynput.keyboard.KeyCode and key.vk == ralt_vk:
-        frames = {}
-        x, y = np.array(pyautogui.position()) - monxy
-        t0 = time.time()
-        time.sleep(0.1)
-        for j in range(3):
-            ret, frame = cam.read()
-            filename = f'{dirpath}/{i+1}-{j+1} [{x} {y}] {int(time.time() * 1000)}.jpeg'
-            frames[filename] = frame
-        dt = time.time() - t0
-        print(f'{dt*1000:.0f}')
-        i += 1
-        pyautogui.moveTo(*points[i % len(points)])
-        for filename, frame in frames.items():
-            os.makedirs(dirpath, exist_ok=True)
-            cv2.imwrite(filename, frame)
-            print('save', filename)
+        is_capture = True
+        is_automove = True
 
     if type(key) == pynput.keyboard.KeyCode and key.vk == fn_vk:
         if len(dataset.datapoints) != 0:
             datapoint = dataset.datapoints.pop()
             path.pop()
-            pyautogui.moveTo(*cursor_to_pixelxy(datapoint['position'], monsize))
+            i -= 1
+            mouse_controller.postion = cursor_to_pixelxy(
+                datapoint['position'], monsize)  # doesn't work from kb_listener thread?
             print('rm', len(dataset.datapoints))
 
     if key == pynput.keyboard.Key.esc:
@@ -101,33 +112,38 @@ def on_press(key):
             frame = cv2.flip(frame, 1)
             imsize = np.array([frame.shape[1], frame.shape[0]])
             for xy, loss in path:
-                intensity = int(np.clip(loss * 4 * 255, 20, 255))
-                cv2.circle(frame, cursor_to_pixelxy(xy, imsize).astype(int), 2, (0, 0, 255, intensity))
-            im_filepath = dataset_filepath.replace('.pickle', '.jpeg')
+                intensity = int(np.clip(loss / 0.02 * 255, 0, 255))
+                cv2.circle(frame, cursor_to_pixelxy(xy, imsize).astype(int), 2, (0, 0, intensity))
+            mean_loss = np.array([l for xy, l in path]).mean()
+            im_filepath = dataset_filepath.replace('.pickle', f' {mean_loss:.3f}.jpeg')
             cv2.imwrite(im_filepath, frame)
         should_exit = True
 
 
 def on_release(key):
-    if type(key) == pynput.keyboard.KeyCode and (key.vk == ralt_vk or key.vk == fn_vk):
-        print(f'release')
+    global is_capture, is_automove
+    if type(key) == pynput.keyboard.KeyCode and key.vk == ralt_vk:
+        is_capture = False
+        is_automove = False
 
 
 kb_listener = pynput.keyboard.Listener(on_press=on_press, on_release=on_release)
 kb_listener.start()
 
 
-is_capturing = False
+is_capture = False
+is_automove = False
 
 
 def on_click(x, y, button, pressed):
-    global is_capturing
+    global is_capture
     if button == pynput.mouse.Button.left:
-        is_capturing = pressed
+        is_capture = pressed
 
 
 mouse_listener = pynput.mouse.Listener(on_click=on_click)
-mouse_listener.start()
+# mouse_listener.start()
+mouse_controller = pynput.mouse.Controller()
 
 
 def capture(face, xy, frame, t0):
@@ -145,8 +161,7 @@ def capture(face, xy, frame, t0):
 
 save_photos = True
 
-camname = 'intg'
-camname = 'brio'
+camname = sys.argv[1]
 cam = cam_init(camname)
 if camname == 'intg':
     monname = 'eDP-1'  # 'eDP-1' (integrated) or 'DP-3' (Dell)
@@ -156,28 +171,36 @@ def capture(face, xy, frame, t0):
 monsize = np.array([mon.width, mon.height])
 monxy = np.array([mon.x, mon.y])
 
-pyautogui.FAILSAFE = False
 edge_offset = 5
-steps = np.array([8, 5])
+steps = np.array([14, 8])
 edge = np.array([edge_offset, edge_offset, monsize[0]-edge_offset, monsize[1]-edge_offset])
 points = spiral(*edge, *steps)
 dstep = np.array([edge[2] - edge[0], edge[3] - edge[1]]) / (steps + 1)
 randomness = 1
 r = np.random.randint(-dstep/2, dstep/2, size=[len(points), 2]) * randomness
 points = (points + r).clip([0, 0], [monsize[0] - 3, monsize[1] - 4])
+points = interpolate_points(points, 7)
+fl = np.random.randint(0, 4)
+if fl == 1:
+    points = np.array(points) * [-1, 1] + [monsize[0], 0]
+if fl == 2:
+    points = np.array(points) * [1, -1] + [0, monsize[1]]
+    points = np.array(points) * [-1, 1] + [monsize[0], 0]
+if fl == 3:
+    points = np.array(points) * [1, -1] + [0, monsize[1]]
 points += monxy
 print(points.max(axis=0))
 
 i = 0
 # if len(sys.argv) >= 2:
 #     int(sys.argv[1])
-pyautogui.moveTo(*points[i % len(points)], 0.2, pyautogui.easeInOutQuad)
+mouse_controller.position = points[i % len(points)]
 
 iso_date = datetime.datetime.now().isoformat().split('.')[0].replace(':', '_')
 dirpath = f'./data/photos/{iso_date}-{camname}'
 dataset = Dataset()
 
-mpaths = sys.argv[1:]
+mpaths = sys.argv[2:]
 models = [GazePredictor.load_from_file(p) for p in mpaths]
 # scores = np.array([float(re.match(r'.* (0.\d+) .*', p)[1]) for p in mpaths])
 scores = np.arange(len(models))
@@ -187,28 +210,39 @@ def capture(face, xy, frame, t0):
 
 path = []
 
-while True:
-    if should_exit:
-        kb_listener.stop()
-        mouse_listener.stop()
-        cv2.destroyAllWindows()
-        sys.exit()
 
+while not should_exit:
     t0 = time.time()
-    ret, frame = cam.read()
-    dt = time.time() - t0
-    # print(dt)
+    xy_true = np.array(mouse_controller.position) - monxy
+    cur_true = pixelxy_to_cursor(xy_true, monsize)
 
+    ret, frame = cam.read()
     cursor, cursors, faces = predict_cursor(frame, models)
     if cursor is not None:
         cursor = cursor[0]
         cursors = cursors[:, 0]
 
-    if is_capturing:
-        xy = np.array(pyautogui.position()) - monxy
-        capture(faces[0], xy, frame, t0)
-        real_cur = pixelxy_to_cursor(xy, monsize)
-        loss = ((cursor - real_cur) ** 2).mean() if cursor is not None else 0.3
-        path.append([real_cur, loss])
+    loss = ((cursor - cur_true) ** 2).mean() if cursor is not None else 1
+    if faces is not None:
+        face = faces[0]
+        # print(loss)
+        if is_capture:
+            left_blink, right_blink = detect_blink(face, 0.25)
+            if not (left_blink and right_blink):
+                capture(face, xy_true, frame, t0)
+                path.append([cur_true, loss])
 
     render(frame, cursor, cursors, faces)
+
+    if is_automove:
+        mouse_controller.position = points[i % len(points)]
+    dt = time.time() - t0
+    # print(dt)
+
+mean_loss = np.array([l for xy, l in path]).mean()
+print(f'{mean_loss=}')
+
+kb_listener.stop()
+mouse_listener.stop()
+cv2.destroyAllWindows()
+sys.exit()
diff --git a/experiments/spiral.py → experiments/draw_spiral.py b/experiments/spiral.py → experiments/draw_spiral.py
@@ -8,6 +8,7 @@
 from pathlib import Path
 sys.path.append(str(Path(__file__).parent.parent))
 from modules.spiral import spiral  # noqa
+from modules.interpolate_points import interpolate_points  # noqa
 
 # Example usage
 edge_offset = 10
@@ -18,6 +19,8 @@
 dstep = np.array([edge[2] - edge[0], edge[3] - edge[1]]) / (steps + 1)
 r = np.random.randint(-dstep/2, dstep/2, size=[len(points), 2])
 points = (points + r).clip([0, 0], monsize - 2)
+points = points * [-1, 1] + [monsize[0], 0]
+points = np.array(interpolate_points(points, 50))
 print(points.max(axis=0))
 # for x, y in points:
 #     pyautogui.moveTo(x, y)

diff --git a/experiments/mouse_move.py b/experiments/mouse_move.py
@@ -24,15 +24,16 @@ def calc_step(src, dst, max_offset=3):
     return step
 
 
-monsize = (2560, 1440)
-while True:
-    target = np.random.randint(low=[0, 0], high=monsize, size=(2))
+def main():
+    monsize = (2560, 1440)
     while True:
-        is_in_target = (target == np.array(pyautogui.position())).all()
-        if is_in_target:
-            break
-        step = calc_step(np.array(pyautogui.position()), target)
-        pyautogui.moveTo(*step)
-        time.sleep(1 / 120)  # Pause to simulate 60 steps per second
-
-move_smoothly(1000, 1000)
+        target = np.random.randint(low=[0, 0], high=monsize, size=(2))
+        while True:
+            is_in_target = (target == np.array(pyautogui.position())).all()
+            if is_in_target:
+                break
+            step = calc_step(np.array(pyautogui.position()), target)
+            pyautogui.moveTo(*step)
+            time.sleep(1 / 120)  # Pause to simulate 60 steps per second
+
+    move_smoothly(1000, 1000)
diff --git a/main.py b/main.py
@@ -70,7 +70,7 @@ def render(frame, cursor, cursors, faces):
 pyautogui.FAILSAFE = False
 
 
-mpaths = sys.argv[1:]
+mpaths = sys.argv[2:]
 models = [GazePredictor.load_from_file(p) for p in mpaths]
 # scores = np.array([float(re.match(r'.* (0.\d+) .*', p)[1]) for p in mpaths])
 scores = np.arange(len(models))
@@ -84,11 +84,12 @@ def render(frame, cursor, cursors, faces):
 # normalized_weights = [weight / sum_weights for weight in weights]
 # print("Normalized Weights:", normalized_weights)
 
-numavg = 3
+numavg = 6
 avgs = np.zeros(shape=(numavg, 2))
 
 camname = 'intg'
 camname = 'brio'
+camname = sys.argv[1]
 cam = cam_init(camname)
 if camname == 'intg':
     monname = 'eDP-1'  # 'eDP-1' (integrated) or 'DP-3' (Dell)
@@ -98,9 +99,11 @@ def render(frame, cursor, cursors, faces):
 monsize = np.array([mon.width, mon.height])
 monxy = np.array([mon.x, mon.y])
 
+mouse_controller = pynput.mouse.Controller()
+
 
 def main():
-    global avgs, numavg
+    global avgs, numavg, i
     print('hello')
 
     while True:
@@ -109,17 +112,17 @@ def main():
     # for filepath in photo_paths:
     #     frame = cv2.imread(filepath)
 
-        cursor, cursors, faces = predict_cursor(frame, models)
-        if cursor is not None:
-            cursor = cursor.reshape(2)
-            cursors = cursors.reshape(-1, 2)
-            avgs = np.roll(avgs, -1, axis=0)
-            avgs[-1] = cursor
-            avg = avgs.mean(axis=0)
-            print(avg)
-            xy = cursor_to_pixelxy(avg, monsize) + monxy
-            print(xy)
-            # pyautogui.moveTo(*xy, 0.0, pyautogui.easeInOutQuad)
+        for i in range(1):
+            cursor, cursors, faces = predict_cursor(frame, models)
+            if cursor is not None:
+                cursor = cursor.reshape(2)
+                cursors = cursors.reshape(-1, 2)
+                avgs = np.roll(avgs, -1, axis=0)
+                avgs[-1] = cursor
+
+        avg = avgs.mean(axis=0)
+        xy = cursor_to_pixelxy(avg, monsize) + monxy
+        mouse_controller.position = xy
 
         render(frame, cursor, cursors, faces)
         # input()

diff --git a/modules/detect_blink.py b/modules/detect_blink.py
@@ -1,8 +1,7 @@
 import numpy as np
 
 
-def detect_blink(face):
-    blink_threshold = 0.25
+def detect_blink(face, blink_threshold=0.25):
     left_h = np.linalg.norm(face[386] - face[374])
     left_w = np.linalg.norm(face[362] - face[263])
     left_blink = left_h < blink_threshold * left_w

diff --git a/modules/draw_landmarks.py b/modules/draw_landmarks.py
@@ -16,9 +16,10 @@ def draw_landmarks(img, faces):
         (r_cx, r_cy), r_radius = cv2.minEnclosingCircle(points[RIGHT_IRIS])
         center_left = np.array([l_cx, l_cy], dtype=np.int32)
         center_right = np.array([r_cx, r_cy], dtype=np.int32)
-        cv2.circle(img, center_left, int(l_radius), (0, 255, 0), 1, cv2.LINE_AA)
-        cv2.circle(img, center_right, int(r_radius), (0, 255, 0), 1, cv2.LINE_AA)
+        # cv2.circle(img, center_left, int(l_radius), (0, 255, 0), 1, cv2.LINE_AA)
+        # cv2.circle(img, center_right, int(r_radius), (0, 255, 0), 1, cv2.LINE_AA)
 
-        for x, y in points[train_indices]:
-            cv2.circle(img, (x, y), 1, (255, 255, 0), -1)
+        # for x, y in points[train_indices]:
+        #     cv2.circle(img, (x, y), 1, (255, 255, 0), -1)
+        cv2.circle(img, center_left, 1, (0, 255, 0), 1)
         cv2.circle(img, center_right, 1, (0, 255, 0), 1)