main.py 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. # -*-coding:utf-8-*-
  2. import time
  3. from selenium import webdriver
  4. # from seleniumwire import webdriver
  5. from selenium.webdriver.chrome.options import Options
  6. # 通过Keys模拟键盘
  7. from selenium.webdriver.common.keys import Keys
  8. from flask import Flask, jsonify, render_template
  9. from flask import request as flask_request
  10. g_browser_type = 1;
  11. app = Flask(__name__);
  12. @app.route('/proxy', methods=['POST'])
  13. def proxy():
  14. if not flask_request.is_json:
  15. return jsonify({'code': 100, 'message': '请使用JSON数据格式'}), 400;
  16. data = flask_request.get_json();
  17. if 'url' not in data:
  18. return jsonify({'code': 101, 'message': '缺少参数url'}), 200;
  19. url = data['url'];
  20. headers = None;
  21. timeout = 10;
  22. needs = [];
  23. user_agent = flask_request.headers.get('User-Agent');
  24. if 'needs' in data:
  25. needs = data['needs'];
  26. if 'headers' in data:
  27. headers = data['headers'];
  28. if 'timeout' in data:
  29. headers = data['timeout'];
  30. if g_browser_type == 1:
  31. executable_path = r'./chrome-headless-shell-win64/chrome-headless-shell.exe';
  32. options = Options();
  33. options.add_argument("--headless"); # 如果你想在无头模式下运行
  34. if user_agent:
  35. options.add_argument('user-agent=' + user_agent)
  36. options.binary_location = executable_path;
  37. # 加载策略为'eager',等待页面的所有资源完全加载完成,包括图片等
  38. options.page_load_strategy = 'eager'; # 或者'normal'或者'none'
  39. driver = webdriver.Chrome(options=options);
  40. else:
  41. executable_path = r'./phantomjs-2.1.1-windows/bin/phantomjs.exe';
  42. driver = webdriver.PhantomJS(executable_path=executable_path);
  43. # driver.implicitly_wait(timeout);
  44. driver.get(url);
  45. final_url = None;
  46. rsp_headers = None;
  47. content = None;
  48. try:
  49. # driver_wait = WebDriverWait(driver,timeout);
  50. # element = driver_wait.until(lambda x: x.find_element_by_xpath(Xpath));
  51. # 获取最终重定向后的URL
  52. final_url = driver.current_url;
  53. content = driver.page_source;
  54. # 获取HTTP头部信息
  55. rsp_headers = None;
  56. # for one_req in driver.requests:
  57. # if one_req.response:
  58. # print(
  59. # one_req.url,
  60. # one_req.response.status_code,
  61. # one_req.response.headers
  62. # );
  63. except Exception as e:
  64. print(e);
  65. finally:
  66. driver.close();
  67. driver.quit();
  68. result = {
  69. 'url': final_url,
  70. 'headers': None,
  71. 'content': None,
  72. };
  73. if 'headers' in needs:
  74. result['headers'] = rsp_headers;
  75. else:
  76. del result['headers'];
  77. if 'content' in needs:
  78. result['content'] = content;
  79. else:
  80. del result['content'];
  81. return jsonify({'code': 200, 'data': result,'message': '成功'}), 200;
  82. def main():
  83. app.config['DEBUG'] = True;
  84. app.run(host='0.0.0.0', port=5000);
  85. if __name__ == '__main__':
  86. main();