|
| 1 | +#!/usr/bin/env python |
| 2 | +# -*- coding: utf-8 -*- |
| 3 | +""" |
| 4 | + simple-http-proxy ( https://github.com/WengChaoxi/simple-http-proxy ) |
| 5 | + ~ ~ ~ ~ ~ ~ |
| 6 | + 一个简单的http代理 |
| 7 | + |
| 8 | + :copyright: (c) 2021 by WengChaoxi. |
| 9 | + :license: MIT, see LICENSE for more details. |
| 10 | +""" |
| 11 | +from __future__ import print_function |
| 12 | + |
| 13 | +import socket |
| 14 | +import select |
| 15 | +import time |
| 16 | + |
| 17 | +def debug(tag, msg): |
| 18 | + print('[%s] %s' % (tag, msg)) |
| 19 | + |
| 20 | +class HttpRequestPacket(object): |
| 21 | + ''' |
| 22 | + HTTP请求包 |
| 23 | + ''' |
| 24 | + def __init__(self, data): |
| 25 | + self.__parse(data) |
| 26 | + |
| 27 | + def __parse(self, data): |
| 28 | + ''' |
| 29 | + 解析一个HTTP请求数据包 |
| 30 | + GET http://test.wengcx.top/index.html HTTP/1.1\r\nHost: test.wengcx.top\r\nProxy-Connection: keep-alive\r\nCache-Control: max-age=0\r\n\r\n |
| 31 | + |
| 32 | + 参数:data 原始数据 |
| 33 | + ''' |
| 34 | + i0 = data.find(b'\r\n') # 请求行与请求头的分隔位置 |
| 35 | + i1 = data.find(b'\r\n\r\n') # 请求头与请求数据的分隔位置 |
| 36 | + |
| 37 | + # 请求行 Request-Line |
| 38 | + self.req_line = data[:i0] |
| 39 | + self.method, self.req_uri, self.version = self.req_line.split() # 请求行由method、request uri、version组成 |
| 40 | + |
| 41 | + # 请求头域 Request Header Fields |
| 42 | + self.req_header = data[i0+2:i1] |
| 43 | + self.headers = {} |
| 44 | + for header in self.req_header.split(b'\r\n'): |
| 45 | + k, v = header.split(b': ') |
| 46 | + self.headers[k] = v |
| 47 | + self.host = self.headers.get(b'Host') |
| 48 | + |
| 49 | + # 请求数据 |
| 50 | + self.req_data = data[i1+4:] |
| 51 | + |
| 52 | +class SimpleHttpProxy(object): |
| 53 | + ''' |
| 54 | + 简单的HTTP代理 |
| 55 | +
|
| 56 | + 客户端(client) <=> 代理端(proxy) <=> 服务端(server) |
| 57 | + ''' |
| 58 | + def __init__(self, host='0.0.0.0', port=8080, listen=10, bufsize=8, delay=1): |
| 59 | + ''' |
| 60 | + 初始化代理套接字,用于与客户端、服务端通信 |
| 61 | +
|
| 62 | + 参数:host 监听地址,默认0.0.0.0,代表本机任意ipv4地址 |
| 63 | + 参数:port 监听端口,默认8080 |
| 64 | + 参数:listen 监听客户端数量,默认10 |
| 65 | + 参数:bufsize 数据传输缓冲区大小,单位kb,默认8kb |
| 66 | + 参数:delay 数据转发延迟,单位ms,默认1ms |
| 67 | + ''' |
| 68 | + self.socket_proxy = socket.socket(socket.AF_INET, socket.SOCK_STREAM) |
| 69 | + self.socket_proxy.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) # 将SO_REUSEADDR标记为True, 当socket关闭后,立刻回收该socket的端口 |
| 70 | + self.socket_proxy.bind((host, port)) |
| 71 | + self.socket_proxy.listen(listen) |
| 72 | + |
| 73 | + self.socket_recv_bufsize = bufsize*1024 |
| 74 | + self.delay = delay/1000.0 |
| 75 | + |
| 76 | + debug('info', 'bind=%s:%s' % (host, port)) |
| 77 | + debug('info', 'listen=%s' % listen) |
| 78 | + debug('info', 'bufsize=%skb, delay=%sms' % (bufsize, delay)) |
| 79 | + |
| 80 | + def __del__(self): |
| 81 | + self.socket_proxy.close() |
| 82 | + |
| 83 | + def __connect(self, host, port): |
| 84 | + ''' |
| 85 | + 解析DNS得到套接字地址并与之建立连接 |
| 86 | +
|
| 87 | + 参数:host 主机 |
| 88 | + 参数:port 端口 |
| 89 | + 返回:与目标主机建立连接的套接字 |
| 90 | + ''' |
| 91 | + # 解析DNS获取对应协议簇、socket类型、目标地址 |
| 92 | + # getaddrinfo -> [(family, sockettype, proto, canonname, target_addr),] |
| 93 | + (family, sockettype, _, _, target_addr) = socket.getaddrinfo(host, port)[0] |
| 94 | + |
| 95 | + tmp_socket = socket.socket(family, sockettype) |
| 96 | + tmp_socket.setblocking(0) |
| 97 | + tmp_socket.settimeout(5) |
| 98 | + tmp_socket.connect(target_addr) |
| 99 | + return tmp_socket |
| 100 | + |
| 101 | + def __proxy(self, socket_client): |
| 102 | + ''' |
| 103 | + 代理核心程序 |
| 104 | +
|
| 105 | + 参数:socket_client 代理端与客户端之间建立的套接字 |
| 106 | + ''' |
| 107 | + # 接收客户端请求数据 |
| 108 | + req_data = socket_client.recv(self.socket_recv_bufsize) |
| 109 | + if req_data == b'': |
| 110 | + return |
| 111 | + |
| 112 | + # 解析http请求数据 |
| 113 | + http_packet = HttpRequestPacket(req_data) |
| 114 | + |
| 115 | + # 获取服务端host、port |
| 116 | + if b':' in http_packet.host: |
| 117 | + server_host, server_port = http_packet.host.split(b':') |
| 118 | + else: |
| 119 | + server_host, server_port = http_packet.host, 80 |
| 120 | + |
| 121 | + # 修正http请求数据 |
| 122 | + tmp = b'%s//%s' % (http_packet.req_uri.split(b'//')[0], http_packet.host) |
| 123 | + req_data = req_data.replace(tmp, b'') |
| 124 | + |
| 125 | + # HTTP |
| 126 | + if http_packet.method in [b'GET', b'POST', b'PUT', b'DELETE', b'HEAD']: |
| 127 | + socket_server = self.__connect(server_host, server_port) # 建立连接 |
| 128 | + socket_server.send(req_data) # 将客户端请求数据发给服务端 |
| 129 | + |
| 130 | + # HTTPS,会先通过CONNECT方法建立TCP连接 |
| 131 | + elif http_packet.method == b'CONNECT': |
| 132 | + socket_server = self.__connect(server_host, server_port) # 建立连接 |
| 133 | + |
| 134 | + success_msg = b'%s %d Connection Established\r\nConnection: close\r\n\r\n'\ |
| 135 | + %(http_packet.version, 200) |
| 136 | + socket_client.send(success_msg) # 完成连接,通知客户端 |
| 137 | + |
| 138 | + # 客户端得知连接建立,会将真实请求数据发送给代理服务端 |
| 139 | + req_data = socket_client.recv(self.socket_recv_bufsize) # 接收客户端真实数据 |
| 140 | + socket_server.send(req_data) # 将客户端真实请求数据发给服务端 |
| 141 | + |
| 142 | + # 使用select异步处理,不阻塞 |
| 143 | + self.__nonblocking(socket_client, socket_server) |
| 144 | + |
| 145 | + def __nonblocking(self, socket_client, socket_server): |
| 146 | + ''' |
| 147 | + 使用select实现异步处理数据 |
| 148 | +
|
| 149 | + 参数:socket_client 代理端与客户端之间建立的套接字 |
| 150 | + 参数:socket_server 代理端与服务端之间建立的套接字 |
| 151 | + ''' |
| 152 | + _rlist = [socket_client, socket_server] |
| 153 | + is_recv = True |
| 154 | + while is_recv: |
| 155 | + try: |
| 156 | + # rlist, wlist, elist = select.select(_rlist, _wlist, _elist, [timeout]) |
| 157 | + # 参数1:当列表_rlist中的文件描述符fd状态为readable时,fd将被添加到rlist中 |
| 158 | + # 参数2:当列表_wlist中存在文件描述符fd时,fd将被添加到wlist |
| 159 | + # 参数3:当列表_xlist中的文件描述符fd发生错误时,fd将被添加到elist |
| 160 | + # 参数4:超时时间timeout |
| 161 | + # 1) 当timeout==None时,select将一直阻塞,直到监听的文件描述符fd发生变化时返回 |
| 162 | + # 2) 当timeout==0时,select不会阻塞,无论文件描述符fd是否有变化,都立刻返回 |
| 163 | + # 3) 当timeout>0时,若文件描述符fd无变化,select将被阻塞timeout秒再返回 |
| 164 | + rlist, _, elist = select.select(_rlist, [], [], 2) |
| 165 | + if elist: |
| 166 | + break |
| 167 | + for tmp_socket in rlist: |
| 168 | + is_recv = True |
| 169 | + # 接收数据 |
| 170 | + data = tmp_socket.recv(self.socket_recv_bufsize) |
| 171 | + if data == b'': |
| 172 | + is_recv = False |
| 173 | + continue |
| 174 | + |
| 175 | + # socket_client状态为readable, 当前接收的数据来自客户端 |
| 176 | + if tmp_socket is socket_client: |
| 177 | + socket_server.send(data) # 将客户端请求数据发往服务端 |
| 178 | + # debug('proxy', 'client -> server') |
| 179 | + |
| 180 | + # socket_server状态为readable, 当前接收的数据来自服务端 |
| 181 | + elif tmp_socket is socket_server: |
| 182 | + socket_client.send(data) # 将服务端响应数据发往客户端 |
| 183 | + # debug('proxy', 'client <- server') |
| 184 | + |
| 185 | + time.sleep(self.delay) # 适当延迟以降低CPU占用 |
| 186 | + except Exception as e: |
| 187 | + break |
| 188 | + |
| 189 | + socket_client.close() |
| 190 | + socket_server.close() |
| 191 | + |
| 192 | + def client_socket_accept(self): |
| 193 | + ''' |
| 194 | + 获取已经与代理端建立连接的客户端套接字,如无则阻塞,直到可以获取一个建立连接套接字 |
| 195 | +
|
| 196 | + 返回:socket_client 代理端与客户端之间建立的套接字 |
| 197 | + ''' |
| 198 | + socket_client, _ = self.socket_proxy.accept() |
| 199 | + return socket_client |
| 200 | + |
| 201 | + def handle_client_request(self, socket_client): |
| 202 | + try: |
| 203 | + self.__proxy(socket_client) |
| 204 | + except: |
| 205 | + pass |
| 206 | + |
| 207 | + def start(self): |
| 208 | + try: |
| 209 | + import _thread as thread # py3 |
| 210 | + except ImportError: |
| 211 | + import thread # py2 |
| 212 | + while True: |
| 213 | + try: |
| 214 | + # self.handle_client_request(self.client_socket_accept()) |
| 215 | + thread.start_new_thread(self.handle_client_request, (self.client_socket_accept(), )) |
| 216 | + except KeyboardInterrupt: |
| 217 | + break |
| 218 | + |
| 219 | +if __name__ == '__main__': |
| 220 | + # 默认参数 |
| 221 | + host, port, listen, bufsize, delay = '0.0.0.0', 8080, 10, 8, 1 |
| 222 | + |
| 223 | + import sys, getopt |
| 224 | + try: |
| 225 | + opts, _ = getopt.getopt(sys.argv[1:], 'h:p:l:b:d:', ['host=', 'port=', 'listen=', 'bufsize=', 'delay=']) |
| 226 | + for opt, arg in opts: |
| 227 | + if opt in ('-h', '--host'): |
| 228 | + host = arg |
| 229 | + elif opt in ('-p', '--port'): |
| 230 | + port = int(arg) |
| 231 | + elif opt in ('-l', '--listen'): |
| 232 | + listen = int(arg) |
| 233 | + elif opt in ('-b', '--bufsize'): |
| 234 | + bufsize = int(arg) |
| 235 | + elif opt in ('-d', '--delay'): |
| 236 | + delay = float(arg) |
| 237 | + except: |
| 238 | + debug('error', 'read the readme.md first!') |
| 239 | + sys.exit() |
| 240 | + |
| 241 | + # 启动代理 |
| 242 | + SimpleHttpProxy(host, port, listen, bufsize, delay).start() |
0 commit comments