Skip to content

add wsl support #8

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions src/computer.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import pyautogui
from wsl import pyautogui_client
from PIL import Image
import io
import base64
import time
from wsl import screenshot as sc

pyautogui = pyautogui_client.PyAutoGUIClient()

class ComputerControl:
def __init__(self):
Expand Down Expand Up @@ -45,7 +48,8 @@ def perform_action(self, action):
raise ValueError(f"Unsupported action: {action_type}")

def take_screenshot(self):
screenshot = pyautogui.screenshot()
#screenshot = pyautogui.screenshot()
screenshot = sc.screenshot()
ai_screenshot = self.resize_for_ai(screenshot)
buffered = io.BytesIO()
ai_screenshot.save(buffered, format="PNG")
Expand Down
40 changes: 40 additions & 0 deletions wsl/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Running in WSL


## Start pyautogui server in windows

1. Install python in windows
2. Install Flask and pyautogui
3. Run pyautogui_server.py in windows


You'll need to change screen resolution to a scale of 1280x800,
or change the resulotion in `computer.py` to match your screen ratio.

## Test the screenshot function

The `screenshot.py` function will take a screenshot of the
screen and save it in the windows "Pictures/Screenshots" folder.

## Test moving the pointer

The `test_move_pointer.py` function will move the pointer to the
center of the screen.

## Run the agent

You will need to get the IP address of your windows machine. Usually it is
`192.168.x.x`. You will also need to get an API key from the Anthropic website.
Note that you should not change `/etc/resolv.conf` in WSL, as it will break the
network connection.


In this directory, run the following command:

```
export PYAUTOGUI_SERVER_ADDRESS=192.168.x.x:5000
export ANTHROPIC_API_KEY=your_api_key
python ../run.py
```


56 changes: 56 additions & 0 deletions wsl/pyautogui_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import requests
import os

ADDRESS = os.getenv("PYAUTOGUI_SERVER_ADDRESS", "http://localhost:5000")


class PyAutoGUIClient:
def __init__(self, base_url=None):
if base_url is None:
# check if the protocol is set
base_url = ADDRESS
if ADDRESS.startswith("http://") or ADDRESS.startswith("https://"):
pass
else:
# add the protocol http://
base_url = f"http://{ADDRESS}"
self.base_url = base_url

def size(self):
response = requests.get(f"{self.base_url}/screen/size")
# convert string to int
response_json = response.json()
print(f"Screen size: {response_json}")
return int(response_json['width']), int(response_json['height'])

def position(self):
response = requests.get(f"{self.base_url}/mouse/position")
print(f"Mouse position: {response.json()}")
return response.json()

def moveTo(self, x, y, duration=0):
response = requests.post(f"{self.base_url}/mouse/move", json={"x": x, "y": y, "duration": duration})
print(f"Moving mouse to: {x}, {y}")
return response.json()

def click(self, x=None, y=None, button='left'):
payload = {"button": button}
if x is not None and y is not None:
payload.update({"x": x, "y": y})
print(f"Clicking at: {x}, {y}")
response = requests.post(f"{self.base_url}/mouse/click", json=payload)
return response.json()

def write(self, text, interval=0):
print(f"Writing text: {text}")
response = requests.post(f"{self.base_url}/keyboard/write", json={"text": text, "interval": interval})
return response.json()

def press(self, key):
print(f"Pressing key: {key}")
response = requests.post(f"{self.base_url}/keyboard/press", json={"key": key})
return response.json()

def screenshot(self):
response = requests.get(f"{self.base_url}/screen/screenshot")
return response.json()
72 changes: 72 additions & 0 deletions wsl/pyautogui_server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
from flask import Flask, request, jsonify
import pyautogui
from pyautogui import FailSafeException

app = Flask(__name__)
app.config['CORS_HEADERS'] = 'Content-Type'

from flask_cors import CORS
CORS(app, resources={r'/*': {'origins': '*'}})

@app.route('/mouse/move', methods=['POST'])
def mouse_move():
data = request.get_json()
print(f"Moving mouse to: {data['x']}, {data['y']}")
try:
pyautogui.moveTo(data['x'], data['y'], duration=data.get('duration', 0))
except FailSafeException:
print("Mouse moved to a corner, fail-safe guard detected.")
return jsonify({'status': 'success'})

@app.route('/mouse/click', methods=['POST'])
def mouse_click():
data = request.get_json()
data['x'] = data.get('x', None)
data['y'] = data.get('y', None)
print(f"Clicking at: {data['x']}, {data['y']}")
pyautogui.click(data.get('x'), data.get('y'), button=data.get('button', 'left'))
return jsonify({'status': 'success'})

@app.route('/keyboard/write', methods=['POST'])
def keyboard_write():
data = request.get_json()
print(f"Writing text: {data['text']}")
pyautogui.write(data['text'], interval=data.get('interval', 0))
return jsonify({'status': 'success'})

@app.route('/keyboard/press', methods=['POST'])
def keyboard_press():
data = request.get_json()
key = data['key']
if key.lower() == 'super_l':
key = 'winleft'
# If shortcut divided by +
if '+' in key:
keys = key.split('+')
print(f"Pressing keys: {keys}")
pyautogui.hotkey(*keys)
else:
print(f"Pressing key: {key}")
pyautogui.press(key)
return jsonify({'status': 'success'})

@app.route('/screen/screenshot', methods=['GET'])
def screenshot():
screenshot = pyautogui.screenshot()
screenshot.save('screenshot.png')
return jsonify({'status': 'success', 'file': 'screenshot.png'})

@app.route('/mouse/position', methods=['GET'])
def mouse_position():
x, y = pyautogui.position()
print(f"Mouse position: {x}, {y}")
return jsonify({'x': x, 'y': y})

@app.route('/screen/size', methods=['GET'])
def screen_size():
width, height = pyautogui.size()
print(f"Screen size: {width}, {height}")
return jsonify({'width': width, 'height': height})

if __name__ == '__main__':
app.run(debug=True, host='0.0.0.0')
62 changes: 62 additions & 0 deletions wsl/screenshot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import os
from PIL import Image

def screenshot(which='primary'):
os.system("""
powershell.exe \"
Add-Type -AssemblyName System.Windows.Forms,System.Drawing

\\$screens = [Windows.Forms.Screen]::AllScreens

# Iterate through each screen
foreach (\\$screen in \\$screens) {
Write-Host "Monitor Name: " \\$screen.DeviceName
Write-Host "Bounds: " \\$screen.Bounds
Write-Host "Working Area: " \\$screen.WorkingArea
Write-Host "Primary: " \\$(\\$screen.Primary)
Write-Host "Bounds Top: " \\$screen.Bounds.Top
Write-Host "Bounds Left: " \\$screen.Bounds.Left
Write-Host "Bounds Right: " \\$screen.Bounds.Right
Write-Host "Bounds Bottom: " \\$screen.Bounds.Bottom
Write-Host "-----------------------------"

\\$screenshot_dir = \\$env:USERPROFILE + \\\"\\Pictures\\Screenshots\\\"
if (\\$screen.Primary) {
Write-Host "Primary Monitor"
\\$filename = \\$screenshot_dir + \\\"\\screenshot_primary.png\\\"

} else {
Write-Host "Secondary Monitor"
\\$filename = \\$screenshot_dir + \\\"\\screenshot_secondary.png\\\"
}
\\$top = (\\$screen.Bounds.Top | Measure-Object -Minimum).Minimum
\\$left = (\\$screen.Bounds.Left | Measure-Object -Minimum).Minimum
\\$right = (\\$screen.Bounds.Right | Measure-Object -Maximum).Maximum
\\$bottom = (\\$screen.Bounds.Bottom | Measure-Object -Maximum).Maximum

\\$bounds = [Drawing.Rectangle]::FromLTRB(\\$left, \\$top, \\$right, \\$bottom)
\\$bmp = New-Object System.Drawing.Bitmap ([int]\\$bounds.width), ([int]\\$bounds.height)
\\$graphics = [Drawing.Graphics]::FromImage(\\$bmp)

\\$graphics.CopyFromScreen(\\$bounds.Location, [Drawing.Point]::Empty, \\$bounds.size)

Write-Host \\$filename
\\$bmp.Save(\\$filename, [Drawing.Imaging.ImageFormat]::Png)


\\$graphics.Dispose()
\\$bmp.Dispose()
}
\"
""")
username = "Alhazen"
file_path = "/mnt/c/Users/" + username + "/Pictures/Screenshots/"
if which == 'primary':
filename = file_path + "screenshot_primary.png"
else:
filename = file_path + "screenshot_secondary.png"
im = Image.open(filename)
return im

if __name__ == "__main__":
screenshot()
17 changes: 17 additions & 0 deletions wsl/test_move_pointer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from pyautogui_client import PyAutoGUIClient

# Create a client instance
client = PyAutoGUIClient()

# Get screen size from server
size_data = client.get_screen_size()

# Calculate center
center_x = size_data['width'] // 2
center_y = size_data['height'] // 2

# Send request to move mouse to center
response = client.move_mouse(center_x, center_y, duration=1.0)

# Print response
print(response)
7 changes: 7 additions & 0 deletions wsl/test_pyautogui_mouse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import pyautogui
import time

print(pyautogui.size())
while True:
print (pyautogui.position())
time.sleep(1)
10 changes: 10 additions & 0 deletions wsl/test_pyqt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import sys
from PyQt6 import QtWidgets

app = QtWidgets.QApplication(sys.argv)
windows = QtWidgets.QWidget()

windows.resize(500,500)
windows.move(100,100)
windows.show()
sys.exit(app.exec())