upd

anatol-grabowski · Aug 14, 2023 · c5a36c7 · c5a36c7
1 parent 5bc378a
commit c5a36c7
Show file tree

Hide file tree

Showing 8 changed files with 64 additions and 190 deletions.
diff --git a/capture.py b/capture.py
@@ -18,7 +18,7 @@
 from modules.gaze_predictor import GazePredictor, train_indices
 from modules.mediapipe_detect_faces import mediapipe_detect_faces
 from modules.draw_landmarks import draw_landmarks
-from modules.predict_cursor import predict_cursor, cursor_to_pixelxy
+from modules.predict_cursor import predict_cursor, cursor_to_pixelxy, pixelxy_to_cursor
 from modules.webcam import list_webcams
 from modules.detect_blink import detect_blink
 from modules.get_paths import get_paths
@@ -30,6 +30,8 @@ def draw_cursors(frame, cursor, cursors):
     cv2.circle(frame, cursor_to_pixelxy(cursor, imsize).astype(int), 4, (255, 0, 0), -1)
     for cur in cursors:
         cv2.circle(frame, cursor_to_pixelxy(cur, imsize).astype(int), 2, (255, 0, 0), -1)
+    xy = cursor_to_pixelxy(pixelxy_to_cursor(np.array(pyautogui.position()), monsize), imsize)
+    cv2.circle(frame, xy.astype(int), 3, (0, 255, 0))
 
 
 def render(frame, cursor, cursors, faces):
@@ -85,7 +87,7 @@ def on_press(key):
 edge = np.array([edge_offset, edge_offset, monsize[0]-edge_offset, monsize[1]-edge_offset])
 points = spiral(*edge, *steps)
 dstep = np.array([edge[2] - edge[0], edge[3] - edge[1]]) / (steps + 1)
-randomness = 0
+randomness = 1
 r = np.random.randint(-dstep/2, dstep/2, size=[len(points), 2]) * randomness
 points = (points + r).clip([0, 0], [monsize[0] - 3, monsize[1] - 4])
 points += monxy
@@ -99,7 +101,8 @@ def on_press(key):
 
 mpaths = sys.argv[2:]
 models = [GazePredictor.load_from_file(p) for p in mpaths]
-scores = np.array([float(re.match(r'.* (0.\d+) .*', p)[1]) for p in mpaths])
+# scores = np.array([float(re.match(r'.* (0.\d+) .*', p)[1]) for p in mpaths])
+scores = np.arange(len(models))
 models = {model: 1 for model, score in zip(models, scores)}
 
 while True:

diff --git a/experiments/draw_eval_interactive.py b/experiments/draw_eval_interactive.py
@@ -41,7 +41,8 @@
     # '/home/anatoly/_tot/proj/ml/eye_controlled_mouse/data/2023-08-08T15:57:06.820873-continuous-ok/brio *.jpeg',
     # '/home/anatoly/_tot/proj/ml/eye_controlled_mouse/data/2023-08-08T16:33:38.163179-3-ok/brio *-1 *.jpeg',
     # '/home/anatoly/_tot/proj/ml/eye_controlled_mouse/data/2023-08-09T15:37:18.761700-first-spiral-ok/brio *-1 *.jpeg',
-    '/home/anatoly/_tot/proj/ml/eye_controlled_mouse/data/*/*-1 *.jpeg',
+    # '/home/anatoly/_tot/proj/ml/eye_controlled_mouse/data/*/*-1 *.jpeg',
+    '/home/anatoly/_tot/proj/ml/eye_controlled_mouse/data/*/*.jpeg',
 ]
 photo_paths = get_paths(photo_globs)
 
@@ -81,7 +82,8 @@ def on_press(key):
 
 mpaths = sys.argv[1:]
 models = [GazePredictor.load_from_file(p) for p in mpaths]
-scores = np.array([float(re.match(r'.* (0.\d+) .*', p)[1]) for p in mpaths])
+# scores = np.array([float(re.match(r'.* (0.\d+) .*', p)[1]) for p in mpaths])
+scores = np.arange(len(models))
 models = {model: 1 for model, score in zip(models, scores)}
 print(f'{scores=}')
 # min_perf = scores.min()
@@ -134,9 +136,13 @@ def draw_eval():
                     cur, curs = y.numpy(), ys.numpy()
                 xy = pixelxy_to_cursor(get_xy_from_filename(filepath), monsize)
                 points.append([xy, cur, 2, (255, 0, 0)])
-                for cr in curs:
-                    points.append([xy, cr, 1, (255, 0, 0)])
-                points[-1][-1] = (0, 0, 255)
+                for k, cr in enumerate(curs):
+                    c = (255, 0, 0)
+                    if k == len(curs) - 1:
+                        c = (0, 255, 255)
+                    if k == len(curs) - 2:
+                        c = (0, 0, 200)
+                    points.append([xy, cr, 1, c])
             bs[b] = points
 
     frame = cv2.imread(photo_paths[i])

diff --git a/main.py b/main.py
@@ -72,7 +72,8 @@ def render(frame, cursor, cursors, faces):
 
 mpaths = sys.argv[1:]
 models = [GazePredictor.load_from_file(p) for p in mpaths]
-scores = np.array([float(re.match(r'.* (0.\d+) .*', p)[1]) for p in mpaths])
+# scores = np.array([float(re.match(r'.* (0.\d+) .*', p)[1]) for p in mpaths])
+scores = np.arange(len(models))
 models = {model: 1 for model, score in zip(models, scores)}
 print(f'{scores=}')
 # min_perf = scores.min()
@@ -87,11 +88,13 @@ def render(frame, cursor, cursors, faces):
 avgs = np.zeros(shape=(numavg, 2))
 
 monname = 'eDP-1'  # 'eDP-1' (integrated) or 'DP-3' (Dell)
+# monname = 'DP-3'
 mon = next((mon for mon in get_monitors() if mon.name == monname))
 monsize = np.array([mon.width, mon.height])
 monxy = np.array([mon.x, mon.y])
 
 camname = 'intg'
+# camname = 'brio'
 cam = cam_init(camname)
 
 

diff --git a/modules/detect_blink.py b/modules/detect_blink.py
@@ -2,7 +2,7 @@
 
 
 def detect_blink(face):
-    blink_threshold = 0.35
+    blink_threshold = 0.25
     left_h = np.linalg.norm(face[386] - face[374])
     left_w = np.linalg.norm(face[362] - face[263])
     left_blink = left_h < blink_threshold * left_w

diff --git a/modules/gaze_predictor.py b/modules/gaze_predictor.py
@@ -1,82 +1,22 @@
+import torch
 import torch.nn as nn
 import pickle
-import torch
+from sklearn.preprocessing import MinMaxScaler, StandardScaler
 
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
 model_filepath = './data/model.pickle'
 
 
-class GazePredictor(nn.Module):
-    def __init__(self, input_size, output_size):
-        super(GazePredictor, self).__init__()
-        self.fc1 = nn.Linear(input_size, 128)
-        self.relu = nn.ReLU()
-        self.hidden1 = nn.Linear(128, 16)
-        self.relu2 = nn.ReLU()
-        self.fc2 = nn.Linear(16, output_size)
-
-    def forward(self, x):
-        x = self.fc1(x)
-        x = self.relu(x)
-        x = self.hidden1(x)
-        x = self.relu2(x)
-        x = self.fc2(x)
-        return x
-
-    def save_to_file(self, filepath=model_filepath):
-        with open(filepath, 'wb') as model_file:
-            pickle.dump(self, model_file)
-        print('saved model to file', filepath)
-
-    def load_from_file(filepath=model_filepath):
-        with open(filepath, 'rb') as file:
-            model = pickle.load(file)
-        return model
-
-# get top inputs:
-    # inputs = X_train_tensor.clone().detach().requires_grad_(True)
-    # optimizer.zero_grad()
-    # outputs = model(inputs)
-    # loss = criterion(outputs, y_train_tensor)
-    # loss.backward()
-    # grads = inputs.grad.cpu().detach().numpy()
-    # print(grads.shape)
-    # scores = grads.mean(axis=0)
-    # print(scores.shape)
-    # # print(scores)
-    # indices = (np.argsort(scores)[::-1] / 2).astype(dtype=np.int)
-    # print(indices.tolist())
-
-
-# 74
-train_indices = [
-    21, 54, 67, 109, 338, 297, 284, 251,  # forehead
-    108, 337,  # forehead lower
-    143, 156, 70, 63, 105, 66, 107,  # brow right outer
-    336, 296, 334, 293, 300, 383, 372,  # brow left outer
-    124, 46, 53, 52, 65, 55, 193,  # brow right middle
-    285, 295, 282, 283, 276, 353, 417,  # brow left middle
-    226, 247, 246, 221,  # around right eye
-    446, 467, 466, 441,  # around left eye
-    189, 173, 133, 243, 245, 233,  # right z
-    413, 398, 362, 463, 465, 153,  # left z
-    58, 172, 136,  # right cheek
-    288, 397, 365,  # left cheek
-    468, 469, 470, 471, 472,  # right iris
-    473, 474, 475, 476, 477,  # left iris
-]
-
-# 84
 train_indices = [
     21, 54, 103, 67, 109, 10, 338, 297, 332, 284, 251,  # forehead
     108, 151, 337,  # forehead lower
     143, 156, 70, 63, 105, 66, 107,  # brow right outer
     336, 296, 334, 293, 300, 383, 372,  # brow left outer
     124, 46, 53, 52, 65, 55, 193,  # brow right middle
     285, 295, 282, 283, 276, 353, 417,  # brow left middle
-    226, 247, 246, 221,  # around right eye, add 130?
-    446, 467, 466, 441,  # around left eye, add 359?
+    226, 247, 246, 221,  # around right eye
+    446, 467, 466, 441,  # around left eye
     189, 190, 173, 133, 243, 244, 245, 233,  # right z
     413, 414, 398, 362, 463, 464, 465, 153,  # left z
     58, 172, 136, 150,  # right cheek
@@ -85,131 +25,23 @@ def load_from_file(filepath=model_filepath):
     473, 474, 475, 476, 477,  # left iris
 ]
 
-train_indices = [
-    21, 54, 103, 67, 109, 10, 338, 297, 332, 284, 251,  # forehead
-    108, 151, 337,  # forehead lower
-    # 143, 156, 70, 63, 105, 66, 107,  # brow right outer
-    # 336, 296, 334, 293, 300, 383, 372,  # brow left outer
-    # 124, 46, 53, 52, 65, 55, 193,  # brow right middle
-    # 285, 295, 282, 283, 276, 353, 417,  # brow left middle
-    # 226, 247, 246, 221,  # around right eye
-    # 446, 467, 466, 441,  # around left eye
-    # 189, 190, 173, 133, 243, 244, 245, 233,  # right z
-    # 413, 414, 398, 362, 463, 464, 465, 153,  # left z
-    58, 172, 136, 150,  # right cheek
-    288, 397, 365, 379,  # left cheek
-    468, 469, 470, 471, 472,  # right iris
-    473, 474, 475, 476, 477,  # left iris
-]
-
-
-model_arch = [512, 128, 32]
-
-
-class GazePredictor(nn.Module):
-    def __init__(self, input_size, output_size):
-        super(GazePredictor, self).__init__()
-        self.input_size = input_size
-        self.output_size = output_size
-        self.fc1 = nn.Linear(input_size, model_arch[0])
-        self.relu = nn.ReLU()
-        self.hidden1 = nn.Linear(model_arch[0], model_arch[1])
-        self.relu2 = nn.ReLU()
-        self.hidden2 = nn.Linear(model_arch[1], model_arch[2])
-        self.relu3 = nn.ReLU()
-        self.fc2 = nn.Linear(model_arch[-1], output_size)
-
-    def forward(self, x):
-        x = self.fc1(x)
-        x = self.relu(x)
-        x = self.hidden1(x)
-        x = self.relu2(x)
-        x = self.hidden2(x)
-        x = self.relu3(x)
-        x = self.fc2(x)
-        return x
-
-    def save_to_file(self, filepath=model_filepath):
-        self.to(torch.device('cpu'))
-        with open(filepath, 'wb') as model_file:
-            pickle.dump(self, model_file)
-        print('saved model to file', filepath)
-        self.to(device)
-
-    def load_from_file(filepath=model_filepath):
-        with open(filepath, 'rb') as file:
-            model = pickle.load(file)
-        return model
-
-    def model_name(self):
-        layers = [self.input_size, *model_arch, self.output_size]
-        return "-".join([str(l) for l in layers])
-
-
-model_arch = [512, 256, 128, 32]
-
-
-class GazePredictor(nn.Module):
-    def __init__(self, input_size, output_size):
-        super(GazePredictor, self).__init__()
-        self.input_size = input_size
-        self.output_size = output_size
-        self.fc1 = nn.Linear(input_size, model_arch[0])
-        self.relu = nn.ReLU()
-        self.hidden1 = nn.Linear(model_arch[0], model_arch[1])
-        self.relu2 = nn.ReLU()
-        self.hidden2 = nn.Linear(model_arch[1], model_arch[2])
-        self.relu3 = nn.ReLU()
-        if len(model_arch) > 3:
-            self.hidden3 = nn.Linear(model_arch[2], model_arch[3])
-            self.relu4 = nn.ReLU()
-        self.fc2 = nn.Linear(model_arch[-1], output_size)
-
-    def forward(self, x):
-        x = self.fc1(x)
-        x = self.relu(x)
-        x = self.hidden1(x)
-        x = self.relu2(x)
-        x = self.hidden2(x)
-        x = self.relu3(x)
-        if len(model_arch) > 3:
-            x = self.hidden3(x)
-            x = self.relu4(x)
-        x = self.fc2(x)
-        return x
-
-    def save_to_file(self, filepath=model_filepath):
-        self.to(torch.device('cpu'))
-        with open(filepath, 'wb') as model_file:
-            pickle.dump(self, model_file)
-        print('saved model to file', filepath)
-        self.to(device)
-
-    def load_from_file(filepath=model_filepath):
-        with open(filepath, 'rb') as file:
-            model = pickle.load(file)
-        return model
-
-    def model_name(self):
-        layers = [self.input_size, *model_arch, self.output_size]
-        return f'{"-".join([str(l) for l in layers])}'
-
 
 class GazePredictor(nn.Module):
     def __init__(self, arch):
         super(GazePredictor, self).__init__()
+        self.scaler = StandardScaler()
         self.arch = arch
         self.fc1 = nn.Linear(*arch[0:1+1])
-        self.relu = nn.ReLU()
+        self.relu = nn.Sigmoid()
         if len(arch) > 3:
             self.hidden1 = nn.Linear(*arch[1:2+1])
-            self.relu2 = nn.ReLU()
+            self.relu2 = nn.Sigmoid()
         if len(arch) > 4:
             self.hidden2 = nn.Linear(*arch[2:3+1])
-            self.relu3 = nn.ReLU()
+            self.relu3 = nn.Sigmoid()
         if len(arch) > 5:
             self.hidden3 = nn.Linear(*arch[3:4+1])
-            self.relu4 = nn.ReLU()
+            self.relu4 = nn.Sigmoid()
         self.fc2 = nn.Linear(*arch[-2:])
 
     def forward(self, x):
@@ -241,3 +73,18 @@ def load_from_file(filepath=model_filepath):
 
     def model_name(self):
         return f'{"-".join([str(l) for l in self.arch])}'
+
+
+# get top inputs:
+    # inputs = X_train_tensor.clone().detach().requires_grad_(True)
+    # optimizer.zero_grad()
+    # outputs = model(inputs)
+    # loss = criterion(outputs, y_train_tensor)
+    # loss.backward()
+    # grads = inputs.grad.cpu().detach().numpy()
+    # print(grads.shape)
+    # scores = grads.mean(axis=0)
+    # print(scores.shape)
+    # # print(scores)
+    # indices = (np.argsort(scores)[::-1] / 2).astype(dtype=np.int)
+    # print(indices.tolist())
diff --git a/modules/predict_cursor.py b/modules/predict_cursor.py
@@ -37,6 +37,9 @@
 def predict(model, X):
     ''' return torch tensor '''
     model.eval()
+    X = model.scaler.transform(X)
+    X = torch.tensor(X, dtype=torch.float32)
+
     with torch.no_grad():
         y = model(X)
     return y

diff --git a/prepare.py b/prepare.py
@@ -23,7 +23,7 @@
     # '/home/anatoly/_tot/proj/ml/eye_controlled_mouse/data/2023-08-08T16:33:38.163179-3-ok/brio *-1 *.jpeg',
     '/home/anatoly/_tot/proj/ml/eye_controlled_mouse/data/*/*.jpeg',
 ]
-photo_paths = get_paths(photo_globs)
+photo_paths = get_paths(photo_globs)[::-1]  # reverse for tqdm to work better
 
 
 face_mesh = mp.solutions.face_mesh.FaceMesh(
@@ -37,15 +37,20 @@
 mon = next((mon for mon in get_monitors() if mon.name == monname))
 monsize = np.array([mon.width, mon.height])
 dataset = Dataset()
+ds = Dataset.load()
 
 num_blinks = 0
 for filepath in tqdm(photo_paths):
+    xy = np.array(get_xy_from_filename(filepath))
+    cur = pixelxy_to_cursor(xy, monsize)
+    datapoint = next((dp for dp in ds.datapoints if dp['label'] == filepath), None)
+    if datapoint is not None:
+        dataset.add_datapoint(filepath, datapoint['face'], cur)
+        continue
     img = cv2.imread(filepath)
     imsize = np.array([img.shape[1], img.shape[0]])
     rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
     faces = mediapipe_detect_faces(face_mesh, rgb)
-    xy = np.array(get_xy_from_filename(filepath))
-    cur = pixelxy_to_cursor(xy, monsize)
 
     # print(filepath, cur, faces is not None)
     if faces is not None:

diff --git a/scripts/update_kaggle_dataset.py b/scripts/update_kaggle_dataset.py
@@ -0,0 +1,7 @@
+import kaggle
+import sys
+
+
+dataset_id = 'grabantot/webcam-mouse'
+updated_dataset_file = sys.argv[1]
+kaggle.api.dataset_create_version(dataset_id, folder=updated_dataset_file)