suitedaces · danlkv · Dec 5, 2024 · Dec 5, 2024
diff --git a/src/computer.py b/src/computer.py
@@ -1,8 +1,11 @@
-import pyautogui
+from wsl import pyautogui_client
 from PIL import Image
 import io
 import base64
 import time
+from wsl import screenshot as sc
+
+pyautogui = pyautogui_client.PyAutoGUIClient()
 
 class ComputerControl:
     def __init__(self):
@@ -45,7 +48,8 @@ def perform_action(self, action):
             raise ValueError(f"Unsupported action: {action_type}")
 
     def take_screenshot(self):
-        screenshot = pyautogui.screenshot()
+        #screenshot = pyautogui.screenshot()
+        screenshot = sc.screenshot()
         ai_screenshot = self.resize_for_ai(screenshot)
         buffered = io.BytesIO()
         ai_screenshot.save(buffered, format="PNG")

diff --git a/wsl/README.md b/wsl/README.md
@@ -0,0 +1,40 @@
+# Running in WSL
+
+
+## Start pyautogui server in windows
+
+1. Install python in windows
+2. Install Flask and pyautogui
+3. Run pyautogui_server.py in windows
+
+
+You'll need to change screen resolution to a scale of 1280x800,
+or change the resulotion in `computer.py` to match your screen ratio.
+
+## Test the screenshot function
+
+The `screenshot.py` function will take a screenshot of the
+screen and save it in the windows "Pictures/Screenshots" folder.
+
+## Test moving the pointer
+
+The `test_move_pointer.py` function will move the pointer to the
+center of the screen.
+
+## Run the agent
+
+You will need to get the IP address of your windows machine. Usually it is
+`192.168.x.x`. You will also need to get an API key from the Anthropic website.
+Note that you should not change `/etc/resolv.conf` in WSL, as it will break the
+network connection.
+
+
+In this directory, run the following command:
+
+```
+export PYAUTOGUI_SERVER_ADDRESS=192.168.x.x:5000
+export ANTHROPIC_API_KEY=your_api_key
+python ../run.py
+```
+
+
diff --git a/wsl/pyautogui_client.py b/wsl/pyautogui_client.py
@@ -0,0 +1,56 @@
+import requests
+import os
+
+ADDRESS = os.getenv("PYAUTOGUI_SERVER_ADDRESS", "http://localhost:5000")
+
+
+class PyAutoGUIClient:
+    def __init__(self, base_url=None):
+        if base_url is None:
+            # check if the protocol is set
+            base_url = ADDRESS
+            if ADDRESS.startswith("http://") or ADDRESS.startswith("https://"):
+                pass
+            else:
+                # add the protocol http://
+                base_url = f"http://{ADDRESS}"
+        self.base_url = base_url
+
+    def size(self):
+        response = requests.get(f"{self.base_url}/screen/size")
+        # convert string to int
+        response_json = response.json()
+        print(f"Screen size: {response_json}")
+        return int(response_json['width']), int(response_json['height'])
+
+    def position(self):
+        response = requests.get(f"{self.base_url}/mouse/position")
+        print(f"Mouse position: {response.json()}")
+        return response.json()
+
+    def moveTo(self, x, y, duration=0):
+        response = requests.post(f"{self.base_url}/mouse/move", json={"x": x, "y": y, "duration": duration})
+        print(f"Moving mouse to: {x}, {y}")
+        return response.json()
+
+    def click(self, x=None, y=None, button='left'):
+        payload = {"button": button}
+        if x is not None and y is not None:
+            payload.update({"x": x, "y": y})
+        print(f"Clicking at: {x}, {y}")
+        response = requests.post(f"{self.base_url}/mouse/click", json=payload)
+        return response.json()
+
+    def write(self, text, interval=0):
+        print(f"Writing text: {text}")
+        response = requests.post(f"{self.base_url}/keyboard/write", json={"text": text, "interval": interval})
+        return response.json()
+
+    def press(self, key):
+        print(f"Pressing key: {key}")
+        response = requests.post(f"{self.base_url}/keyboard/press", json={"key": key})
+        return response.json()
+
+    def screenshot(self):
+        response = requests.get(f"{self.base_url}/screen/screenshot")
+        return response.json()
diff --git a/wsl/pyautogui_server.py b/wsl/pyautogui_server.py
@@ -0,0 +1,72 @@
+from flask import Flask, request, jsonify
+import pyautogui
+from pyautogui import FailSafeException
+
+app = Flask(__name__)
+app.config['CORS_HEADERS'] = 'Content-Type'
+
+from flask_cors import CORS
+CORS(app, resources={r'/*': {'origins': '*'}})
+
+@app.route('/mouse/move', methods=['POST'])
+def mouse_move():
+    data = request.get_json()
+    print(f"Moving mouse to: {data['x']}, {data['y']}")
+    try:
+        pyautogui.moveTo(data['x'], data['y'], duration=data.get('duration', 0))
+    except FailSafeException:
+        print("Mouse moved to a corner, fail-safe guard detected.")
+    return jsonify({'status': 'success'})
+
+@app.route('/mouse/click', methods=['POST'])
+def mouse_click():
+    data = request.get_json()
+    data['x'] = data.get('x', None)
+    data['y'] = data.get('y', None)
+    print(f"Clicking at: {data['x']}, {data['y']}")
+    pyautogui.click(data.get('x'), data.get('y'), button=data.get('button', 'left'))
+    return jsonify({'status': 'success'})
+
+@app.route('/keyboard/write', methods=['POST'])
+def keyboard_write():
+    data = request.get_json()
+    print(f"Writing text: {data['text']}")
+    pyautogui.write(data['text'], interval=data.get('interval', 0))
+    return jsonify({'status': 'success'})
+
+@app.route('/keyboard/press', methods=['POST'])
+def keyboard_press():
+    data = request.get_json()
+    key = data['key']
+    if key.lower() == 'super_l':
+        key = 'winleft'
+    # If shortcut divided by +
+    if '+' in key:
+        keys = key.split('+')
+        print(f"Pressing keys: {keys}")
+        pyautogui.hotkey(*keys)
+    else:
+        print(f"Pressing key: {key}")
+        pyautogui.press(key)
+    return jsonify({'status': 'success'})
+
+@app.route('/screen/screenshot', methods=['GET'])
+def screenshot():
+    screenshot = pyautogui.screenshot()
+    screenshot.save('screenshot.png')
+    return jsonify({'status': 'success', 'file': 'screenshot.png'})
+
+@app.route('/mouse/position', methods=['GET'])
+def mouse_position():
+    x, y = pyautogui.position()
+    print(f"Mouse position: {x}, {y}")
+    return jsonify({'x': x, 'y': y})
+
+@app.route('/screen/size', methods=['GET'])
+def screen_size():
+    width, height = pyautogui.size()
+    print(f"Screen size: {width}, {height}")
+    return jsonify({'width': width, 'height': height})
+
+if __name__ == '__main__':
+    app.run(debug=True, host='0.0.0.0')
diff --git a/wsl/screenshot.py b/wsl/screenshot.py
@@ -0,0 +1,62 @@
+import os
+from PIL import Image
+
+def screenshot(which='primary'):
+    os.system("""
+        powershell.exe \"
+        Add-Type -AssemblyName System.Windows.Forms,System.Drawing
+
+    \\$screens = [Windows.Forms.Screen]::AllScreens
+
+    # Iterate through each screen
+    foreach (\\$screen in \\$screens) {
+        Write-Host "Monitor Name: " \\$screen.DeviceName
+        Write-Host "Bounds: " \\$screen.Bounds
+        Write-Host "Working Area: " \\$screen.WorkingArea
+        Write-Host "Primary: " \\$(\\$screen.Primary)
+        Write-Host "Bounds Top: " \\$screen.Bounds.Top
+        Write-Host "Bounds Left: " \\$screen.Bounds.Left
+        Write-Host "Bounds Right: " \\$screen.Bounds.Right
+        Write-Host "Bounds Bottom: " \\$screen.Bounds.Bottom
+        Write-Host "-----------------------------"
+
+        \\$screenshot_dir = \\$env:USERPROFILE + \\\"\\Pictures\\Screenshots\\\"
+        if (\\$screen.Primary) {
+            Write-Host "Primary Monitor"
+            \\$filename = \\$screenshot_dir + \\\"\\screenshot_primary.png\\\"
+
+        } else {
+            Write-Host "Secondary Monitor"
+            \\$filename = \\$screenshot_dir + \\\"\\screenshot_secondary.png\\\"
+        }
+        \\$top    = (\\$screen.Bounds.Top    | Measure-Object -Minimum).Minimum
+        \\$left   = (\\$screen.Bounds.Left   | Measure-Object -Minimum).Minimum
+        \\$right  = (\\$screen.Bounds.Right  | Measure-Object -Maximum).Maximum
+        \\$bottom = (\\$screen.Bounds.Bottom | Measure-Object -Maximum).Maximum
+
+        \\$bounds   = [Drawing.Rectangle]::FromLTRB(\\$left, \\$top, \\$right, \\$bottom)
+        \\$bmp      = New-Object System.Drawing.Bitmap ([int]\\$bounds.width), ([int]\\$bounds.height)
+        \\$graphics = [Drawing.Graphics]::FromImage(\\$bmp)
+
+        \\$graphics.CopyFromScreen(\\$bounds.Location, [Drawing.Point]::Empty, \\$bounds.size)
+
+        Write-Host \\$filename
+        \\$bmp.Save(\\$filename, [Drawing.Imaging.ImageFormat]::Png)
+
+
+        \\$graphics.Dispose()
+        \\$bmp.Dispose()
+    }
+    \"
+    """)
+    username = "Alhazen"
+    file_path = "/mnt/c/Users/" + username + "/Pictures/Screenshots/"
+    if which == 'primary':
+        filename = file_path + "screenshot_primary.png"
+    else:
+        filename = file_path + "screenshot_secondary.png"
+    im = Image.open(filename)
+    return im
+
+if __name__ == "__main__":
+    screenshot()
diff --git a/wsl/test_move_pointer.py b/wsl/test_move_pointer.py
@@ -0,0 +1,17 @@
+from pyautogui_client import PyAutoGUIClient
+
+# Create a client instance
+client = PyAutoGUIClient()
+
+# Get screen size from server
+size_data = client.get_screen_size()
+
+# Calculate center
+center_x = size_data['width'] // 2
+center_y = size_data['height'] // 2
+
+# Send request to move mouse to center
+response = client.move_mouse(center_x, center_y, duration=1.0)
+
+# Print response
+print(response)
diff --git a/wsl/test_pyautogui_mouse.py b/wsl/test_pyautogui_mouse.py
@@ -0,0 +1,7 @@
+import pyautogui 
+import time 
+
+print(pyautogui.size())
+while True: 
+    print (pyautogui.position())
+    time.sleep(1)
diff --git a/wsl/test_pyqt.py b/wsl/test_pyqt.py
@@ -0,0 +1,10 @@
+import sys
+from PyQt6 import QtWidgets
+
+app = QtWidgets.QApplication(sys.argv)
+windows = QtWidgets.QWidget()
+
+windows.resize(500,500)
+windows.move(100,100)
+windows.show()
+sys.exit(app.exec())