httpProxy.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456
  1. # -*- coding: utf-8 -*-
  2. import sys
  3. import os
  4. import socket
  5. import ssl
  6. import select
  7. import httplib
  8. import urlparse
  9. import threading
  10. import gzip
  11. import zlib
  12. import time
  13. import json
  14. import re
  15. from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
  16. from SocketServer import ThreadingMixIn
  17. from cStringIO import StringIO
  18. from subprocess import Popen, PIPE
  19. from HTMLParser import HTMLParser
  20. #filename = 'proxyOutput-%s'%datetime.now().strftime('%Y-%m-%d')
  21. f1 = open('mainData','w')
  22. f = open('extraData', 'w')
  23. def with_color(c, s):
  24. #print ("def #1")
  25. return "\x1b[%dm%s\x1b[0m" % (c, s)
  26. class ThreadingHTTPServer(ThreadingMixIn, HTTPServer):
  27. address_family = socket.AF_INET6
  28. daemon_threads = True
  29. def handle_error(self, request, client_address):
  30. # surpress socket/ssl related errors
  31. #print ("def #2")
  32. cls, e = sys.exc_info()[:2]
  33. if cls is socket.error or cls is ssl.SSLError:
  34. pass
  35. else:
  36. return HTTPServer.handle_error(self, request, client_address)
  37. class ProxyRequestHandler(BaseHTTPRequestHandler):
  38. cakey = 'ca.key'
  39. cacert = 'ca.crt'
  40. certkey = 'cert.key'
  41. certdir = 'certs/'
  42. timeout = 5
  43. lock = threading.Lock()
  44. def __init__(self, *args, **kwargs):
  45. #print ("def #3")
  46. self.tls = threading.local()
  47. self.tls.conns = {}
  48. BaseHTTPRequestHandler.__init__(self, *args, **kwargs)
  49. def log_error(self, format, *args):
  50. #print ("def #4")
  51. # surpress "Request timed out: timeout('timed out',)"
  52. if isinstance(args[0], socket.timeout):
  53. return
  54. self.log_message(format, *args)
  55. def do_CONNECT(self):
  56. #print ("def #5")
  57. if os.path.isfile(self.cakey) and os.path.isfile(self.cacert) and os.path.isfile(self.certkey) and os.path.isdir(self.certdir):
  58. self.connect_intercept()
  59. else:
  60. self.connect_relay()
  61. def connect_intercept(self):
  62. #print ("def #6")
  63. hostname = self.path.split(':')[0]
  64. certpath = "%s/%s.crt" % (self.certdir.rstrip('/'), hostname)
  65. with self.lock:
  66. if not os.path.isfile(certpath):
  67. epoch = "%d" % (time.time() * 1000)
  68. p1 = Popen(["openssl", "req", "-new", "-key", self.certkey, "-subj", "/CN=%s" % hostname], stdout=PIPE)
  69. p2 = Popen(["openssl", "x509", "-req", "-days", "3650", "-CA", self.cacert, "-CAkey", self.cakey, "-set_serial", epoch, "-out", certpath], stdin=p1.stdout, stderr=PIPE)
  70. p2.communicate()
  71. self.wfile.write("%s %d %s\r\n" % (self.protocol_version, 200, 'Connection Established'))
  72. self.end_headers()
  73. self.connection = ssl.wrap_socket(self.connection, keyfile=self.certkey, certfile=certpath, server_side=True)
  74. self.rfile = self.connection.makefile("rb", self.rbufsize)
  75. self.wfile = self.connection.makefile("wb", self.wbufsize)
  76. conntype = self.headers.get('Proxy-Connection', '')
  77. if conntype.lower() == 'close':
  78. self.close_connection = 1
  79. elif (conntype.lower() == 'keep-alive' and self.protocol_version >= "HTTP/1.1"):
  80. self.close_connection = 0
  81. def connect_relay(self):
  82. #print ("def #7")
  83. address = self.path.split(':', 1)
  84. address[1] = int(address[1]) or 443
  85. try:
  86. s = socket.create_connection(address, timeout=self.timeout)
  87. except Exception as e:
  88. self.send_error(502)
  89. return
  90. self.send_response(200, 'Connection Established')
  91. self.end_headers()
  92. conns = [self.connection, s]
  93. self.close_connection = 0
  94. while not self.close_connection:
  95. rlist, wlist, xlist = select.select(conns, [], conns, self.timeout)
  96. if xlist or not rlist:
  97. break
  98. for r in rlist:
  99. other = conns[1] if r is conns[0] else conns[0]
  100. data = r.recv(8192)
  101. if not data:
  102. self.close_connection = 1
  103. break
  104. other.sendall(data)
  105. def do_GET(self):
  106. #print ("def #8")
  107. if self.path == 'http://proxy2.test/':
  108. self.send_cacert()
  109. return
  110. #print ("mainhost: %s \n" % (self.headers['Host']))
  111. ######## request ########
  112. req = self
  113. content_length = int(req.headers.get('Content-Length', 0))
  114. req_body = self.rfile.read(content_length) if content_length else None
  115. if req.path[0] == '/':
  116. if isinstance(self.connection, ssl.SSLSocket):
  117. req.path = "https://%s%s" % (req.headers['Host'], req.path)
  118. else:
  119. req.path = "http://%s%s" % (req.headers['Host'], req.path)
  120. ########### request host name#########
  121. print ("Working on host: %s \n" % (req.headers['Host']))
  122. req_body_modified = self.request_handler(req, req_body)
  123. if req_body_modified is not None:
  124. req_body = req_body_modified
  125. req.headers['Content-length'] = str(len(req_body))
  126. # print ("request size: %s \n" % req.headers.get('content-length')) ###########content size#########
  127. # print ("request type: %s \n" % req.headers.get('content-type')) ###########content type#########
  128. u = urlparse.urlsplit(req.path)
  129. scheme, netloc, path = u.scheme, u.netloc, (u.path + '?' + u.query if u.query else u.path)
  130. assert scheme in ('http', 'https')
  131. if netloc:
  132. req.headers['Host'] = netloc
  133. req_headers = self.filter_headers(req.headers)
  134. try:
  135. origin = (scheme, netloc)
  136. if not origin in self.tls.conns:
  137. if scheme == 'https':
  138. self.tls.conns[origin] = httplib.HTTPSConnection(netloc, timeout=self.timeout)
  139. else:
  140. self.tls.conns[origin] = httplib.HTTPConnection(netloc, timeout=self.timeout)
  141. conn = self.tls.conns[origin]
  142. conn.request(self.command, path, req_body, dict(req_headers))
  143. ####### response###########
  144. res = conn.getresponse()
  145. res_body = res.read()
  146. except Exception as e:
  147. if origin in self.tls.conns:
  148. del self.tls.conns[origin]
  149. self.send_error(502)
  150. return
  151. version_table = {10: 'HTTP/1.0', 11: 'HTTP/1.1'}
  152. setattr(res, 'headers', res.msg)
  153. setattr(res, 'response_version', version_table[res.version])
  154. content_encoding = res.headers.get('Content-Encoding', 'identity')
  155. res_body_plain = self.decode_content_body(res_body, content_encoding)
  156. res_body_modified = self.response_handler(req, req_body, res, res_body_plain)
  157. if res_body_modified is not None:
  158. res_body_plain = res_body_modified
  159. res_body = self.encode_content_body(res_body_plain, content_encoding)
  160. res.headers['Content-Length'] = str(len(res_body))
  161. ###########content size and type#########
  162. # print (" response size: %s \n" % res.headers.get('content-length'))
  163. # print (" response type: %s \n" % res.headers.get('content-type'))
  164. res_headers = self.filter_headers(res.headers)
  165. self.wfile.write("%s %d %s\r\n" % (self.protocol_version, res.status, res.reason))
  166. for line in res_headers.headers:
  167. self.wfile.write(line)
  168. self.end_headers()
  169. self.wfile.write(res_body)
  170. self.wfile.flush()
  171. with self.lock:
  172. self.save_handler(req, req_body, res, res_body_plain)
  173. do_HEAD = do_GET
  174. do_POST = do_GET
  175. do_OPTIONS = do_GET
  176. def filter_headers(self, headers):
  177. #print ("def #9")
  178. # http://tools.ietf.org/html/rfc2616#section-13.5.1
  179. hop_by_hop = ('connection', 'keep-alive', 'proxy-authenticate', 'proxy-authorization', 'te', 'trailers', 'transfer-encoding', 'upgrade')
  180. for k in hop_by_hop:
  181. del headers[k]
  182. return headers
  183. def encode_content_body(self, text, encoding):
  184. #print ("def #10")
  185. if encoding == 'identity':
  186. data = text
  187. elif encoding in ('gzip', 'x-gzip'):
  188. io = StringIO()
  189. with gzip.GzipFile(fileobj=io, mode='wb') as f:
  190. f.write(text)
  191. data = io.getvalue()
  192. elif encoding == 'deflate':
  193. data = zlib.compress(text)
  194. else:
  195. raise Exception("Unknown Content-Encoding: %s" % encoding)
  196. return data
  197. def decode_content_body(self, data, encoding):
  198. #print ("def #11")
  199. if encoding == 'identity':
  200. text = data
  201. elif encoding in ('gzip', 'x-gzip'):
  202. io = StringIO(data)
  203. with gzip.GzipFile(fileobj=io) as f:
  204. text = f.read()
  205. elif encoding == 'deflate':
  206. try:
  207. text = zlib.decompress(data)
  208. except zlib.error:
  209. text = zlib.decompress(data, -zlib.MAX_WBITS)
  210. else:
  211. raise Exception("Unknown Content-Encoding: %s" % encoding)
  212. return text
  213. def send_cacert(self):
  214. #print ("def #12")
  215. with open(self.cacert, 'rb') as f:
  216. data = f.read()
  217. self.wfile.write("%s %d %s\r\n" % (self.protocol_version, 200, 'OK'))
  218. self.send_header('Content-Type', 'application/x-x509-ca-cert')
  219. self.send_header('Content-Length', len(data))
  220. self.send_header('Connection', 'close')
  221. self.end_headers()
  222. self.wfile.write(data)
  223. def print_info(self, req, req_body, res, res_body):
  224. #print ("def #13")
  225. def parse_qsl(s):
  226. #print ("def #14")
  227. return '\n'.join("%-20s %s" % (k, v) for k, v in urlparse.parse_qsl(s, keep_blank_values=True))
  228. request_data = "%s\t%s\t%s\t%s" % (req.headers['Referer'],req.headers['Host'],req.command,req.path)
  229. f1.write('Request:\t')
  230. f1.write(str(request_data))
  231. f1.write(str('\n'))
  232. response_data = "%s\t%s" % (res.headers.get('content-length'),res.headers.get('content-type'))
  233. f1.write('Response:\t')
  234. f1.write(str(response_data))
  235. f1.write(str('\n'))
  236. # print (" request host: %s \n" % (req.headers['Host']))
  237. # print (" request referer: %s \n" % (req.headers['Referer']))
  238. # print (" response size: %s \n" % res.headers.get('content-length'))
  239. # print (" response type: %s \n" % res.headers.get('content-type'))
  240. # print (" req path: %s \n" % req.path)
  241. # print (" req command: %s \n" % req.command)
  242. req_header_text = "%s %s %s\n%s" % (req.command, req.path, req.request_version, req.headers)
  243. res_header_text = "%s %d %s\n%s" % (res.response_version, res.status, res.reason, res.headers)
  244. # print ("\nRequest:\t %s \t %s" % (req.headers['Host']), req.command)
  245. # print ("\nResponse:\t %s \t %d" % (res.headers['Content-Type']), int(res.headers['Content-Length']))
  246. print ("\n+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n")
  247. # print ("=====OUTPUT 1=====")
  248. # req_header_text = " %s \n" % ( req.host)
  249. # res_header_text = "%s %d %s\n%s" % (res.response_version, res.status, res.reason, res.headers)
  250. # r1 = conn.getresponse()
  251. # data1 = r1.read()
  252. # print ("+++++MY OUTPUT+++++")
  253. # print (data)
  254. # print with_color(33, req_header_text)
  255. f.write(str(req_header_text))
  256. u = urlparse.urlsplit(req.path)
  257. if u.query:
  258. query_text = parse_qsl(u.query)
  259. # print with_color(32, "==== QUERY PARAMETERS ====\n%s\n" % query_text)
  260. f.write (str('==== QUERY PARAMETERS ====\n'))
  261. f.write (str(query_text))
  262. f.write (str('\n'))
  263. cookie = req.headers.get('Cookie', '')
  264. if cookie:
  265. cookie = parse_qsl(re.sub(r';\s*', '&', cookie))
  266. # print with_color(32, "==== COOKIE ====\n%s\n" % cookie)
  267. f.write (str('==== COOKIE ====\n'))
  268. f.write (str(cookie))
  269. f.write (str('\n'))
  270. auth = req.headers.get('Authorization', '')
  271. if auth.lower().startswith('basic'):
  272. token = auth.split()[1].decode('base64')
  273. # print with_color(31, "==== BASIC AUTH ====\n%s\n" % token)
  274. f.write (str('==== BASIC AUTH ====\n'))
  275. f.write (str(token))
  276. f.write (str('\n'))
  277. if req_body is not None:
  278. req_body_text = None
  279. content_type = req.headers.get('Content-Type', '')
  280. if content_type.startswith('application/x-www-form-urlencoded'):
  281. req_body_text = parse_qsl(req_body)
  282. elif content_type.startswith('application/json'):
  283. try:
  284. json_obj = json.loads(req_body)
  285. json_str = json.dumps(json_obj, indent=2)
  286. if json_str.count('\n') < 50:
  287. req_body_text = json_str
  288. else:
  289. lines = json_str.splitlines()
  290. req_body_text = "%s\n(%d lines)" % ('\n'.join(lines[:50]), len(lines))
  291. except ValueError:
  292. req_body_text = req_body
  293. elif len(req_body) < 1024:
  294. req_body_text = req_body
  295. if req_body_text:
  296. #print with_color(32, "==== REQUEST BODY ====\n%s\n" % req_body_text)
  297. f.write (str('==== REQUEST BODY ====\n'))
  298. f.write (str(req_body_text))
  299. f.write (str('\n'))
  300. print with_color(36, res_header_text)
  301. f.write(str(res_header_text))
  302. cookies = res.headers.getheaders('Set-Cookie')
  303. if cookies:
  304. cookies = '\n'.join(cookies)
  305. # print with_color(31, "==== SET-COOKIE ====\n%s\n" % cookies)
  306. f.write (str('==== SET-COOKIE ====\n'))
  307. f.write (str(cookies))
  308. f.write (str('\n'))
  309. if res_body is not None:
  310. res_body_text = None
  311. content_type = res.headers.get('Content-Type', '')
  312. if content_type.startswith('application/json'):
  313. try:
  314. json_obj = json.loads(res_body)
  315. json_str = json.dumps(json_obj, indent=2)
  316. if json_str.count('\n') < 50:
  317. res_body_text = json_str
  318. else:
  319. lines = json_str.splitlines()
  320. res_body_text = "%s\n(%d lines)" % ('\n'.join(lines[:50]), len(lines))
  321. except ValueError:
  322. res_body_text = res_body
  323. elif content_type.startswith('text/html'):
  324. m = re.search(r'<title[^>]*>\s*([^<]+?)\s*</title>', res_body, re.I)
  325. if m:
  326. h = HTMLParser()
  327. #print with_color(32, "==== HTML TITLE ====\n%s\n" % h.unescape(m.group(1).decode('utf-8')))
  328. f.write (str('==== HTML TITLE ====\n'))
  329. f.write (str(h.unescape(m.group(1).decode('utf-8'))))
  330. f.write (str('\n'))
  331. elif content_type.startswith('text/') and len(res_body) < 1024:
  332. res_body_text = res_body
  333. if res_body_text:
  334. # print with_color(32, "==== RESPONSE BODY ====\n%s\n" % res_body_text)
  335. f.write (str('==== RESPONSE BODY ====\n'))
  336. f.write (str(res_body_text))
  337. f.write (str('\n'))
  338. def request_handler(self, req, req_body):
  339. #print ("def #15")
  340. pass
  341. def response_handler(self, req, req_body, res, res_body):
  342. #print ("def #16")
  343. pass
  344. def save_handler(self, req, req_body, res, res_body):
  345. #print ("def #17")
  346. self.print_info(req, req_body, res, res_body)
  347. # req_host = req.headers(['Host'])
  348. # req_length = int(len(req_body))
  349. # res_host = res.headers(['Host'])
  350. # res_length = int(len(res_body))
  351. # print ("===DATA===\n %s\t %d\n %s\t %d\n" % req_host, req_length, res_host, res_length )
  352. def test(HandlerClass=ProxyRequestHandler, ServerClass=ThreadingHTTPServer, protocol="HTTP/1.1"):
  353. #print ("def #18")
  354. if sys.argv[1:]:
  355. port = int(sys.argv[1])
  356. else:
  357. port = 8080
  358. server_address = ('', port)
  359. HandlerClass.protocol_version = protocol
  360. httpd = ServerClass(server_address, HandlerClass)
  361. sa = httpd.socket.getsockname()
  362. print "Serving HTTP Proxy on", sa[0], "port", sa[1], "..."
  363. httpd.serve_forever()
  364. if __name__ == '__main__':
  365. test()