websocket--hook

代码不全,大致思路
原理:
浏览器(客户端):在浏览器中注入一段JS代码,与服务端建立连接。调用浏览器中的js方法,把返回的数据发送给服务端
node启动js代码,监听某端口(客户端):服务端把参数(python发过来的)发送给客户端处理,并接收处理结果,再次把接收的结果返回给python处理
python(调用者):把参数发送给node,接收node传回来的数据 优点:
1.对于js混淆加密较深的,可以采用此方法。
2.不用扣js加密代码,直接调用浏览器环境
缺点:
1.如果有selenium监测,要想使用此方法,必须先绕过selenium监测,否则只能使用真机进行js注入
2.需要node环境,写一个websocket服务端和客户端
3.速度没有直接破解js快

服务端--WebSocketServer.js

let iconv = require('iconv-lite')
var ws = require("nodejs-websocket"); console.log("开始建立连接...") var server = ws.createServer(function(conn){
let cached = {}; conn.on("text", function (msg) {
if (!msg) return;
// console.log("msg", msg); var key = conn.key;
if ((msg === "Browser") || (msg === "Python")){
// browser或者python第一次连接
cached[msg] = key;
// console.log("cached",cached);
return;
}
if (Object.values(cached).includes(key)){
// console.log(server.connections.forEach(conn=>conn.key));
var targetConn = server.connections.filter(function(conn){
return conn.key !== key;
})
// console.log("将要发送的实参:",msg);
targetConn.forEach(conn=>{
conn.send(msg);
})
}
})
conn.on("close", function (code, reason) {
// console.log("关闭连接")
});
conn.on("error", function (code, reason) {
console.log("异常关闭")
});
conn.on("connection", function (conn) {
console.log(conn)
});
}).listen(10512) console.log("WebSocket建立完毕")

客户端注入JS代码

createSocket();

function createSocket() {
window.ws = new WebSocket('ws://127.0.0.1:10512/');
window.ws.onopen = function (e) {
console.log("连接服务器成功");
window.ws.send("Browser");
}
window.ws.onclose = function (e) {
console.log("服务器关闭");
setTimeout(createSocket, 60000);
}
window.ws.onerror = function () {
console.log("连接出错");
} window.ws.onmessage = function (e) {
var xmlhttp = new glb.XMLHttpRequest();
function state_Change() {
if (xmlhttp.readyState == 4) {
if (xmlhttp.status == 200) { let result = xmlhttp.responseText
result = JSON.parse(result)
result = JSON.stringify(result)
// result = String.fromCharCode(result)
//发送给Python
// console.log(result);
window.ws.send(result);
} else {
alert("Problem retrieving XML data");
}
}
}
xmlhttp.onreadystatechange = state_Change;
xmlhttp.open('GET', e.data, true);
xmlhttp.send(null);
}
}

python开端口

# -*- coding: utf-8 -*-
from sanic import Sanic
from sanic.response import json
import os
import urllib3 from toutiao2_文件方式.get_data import get_data
from toutiao2_文件方式.get_user_id import get_user urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
app = Sanic(__name__) @app.route("/get_user_id", methods=["GET"])
def captcha_server(request):
try:
data = request.args
media_id = data['media_id'][0]
return get_user_id(media_id)
except Exception as e:
pass @app.route("/get_data", methods=["GET"])
def captcha_server(request):
try:
data = request.args
user_id = data['user_id'][0]
offset = data['offset'][0]
return get_res(user_id, offset)
except Exception as e:
pass def get_user_id(media_id):
html = get_user(media_id)
return html def get_res(user_id, offset):
html = get_data(user_id,offset)
return html if __name__ == "__main__":
app.run(host="127.0.0.1", port=4007)

get_data.py 文件方式

# -*- coding: utf-8 -*-
import time
from ws4py.client.threadedclient import WebSocketClient
import _locale _locale._getdefaultlocale = (lambda *args: ['zh_CN', 'utf8'])
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) class CG_Client(WebSocketClient):
def opened(self):
self.max_cursor = 0
self.send("Python") def closed(self, code, reason=None):
# print("Closed down:", code, reason)
pass def received_message(self, resp):
data = resp.data.decode("utf-8")
write_data(data)
ws.close() def write_data(data):
with open('./data.txt', 'w', encoding='utf-8') as f:
f.write(data)
f.close() def get_data(user_id, offset):
ws = CG_Client('ws://127.0.0.1:10512/')
ws.connect()
try:
real_arg = f"/api/feed_backflow/profile_share/v1/?category=profile_article&visited_uid={user_id}&stream_api_version=82&request_source=1&offset={offset}&user_id={user_id}&appId=1286&appType=mobile_detail_web&isAndroid=true&isIOS=false&isMobile=true&cookie_enabled=true&screen_width=288&screen_height=511&browser_language=zh-CN&browser_platform=MacIntel&browser_name=firefox&browser_version=85.0.4183.83&browser_online=true&timezone_name=Asia%2FShanghai"
time.sleep(0.1)
ws.send(real_arg)
ws.run_forever()
except KeyboardInterrupt:
print('异常关闭')
ws.close()

get_user_id.py 文件方式

# -*- coding: utf-8 -*-
import time
from ws4py.client.threadedclient import WebSocketClient
import _locale
_locale._getdefaultlocale = (lambda *args: ['zh_CN', 'utf8'])
import io
import sys
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
# media_id = sys.argv[1].split(',', 1)[0] # sys.argv--> [get_attention.py,user_id,cursor] class CG_Client(WebSocketClient):
def opened(self):
self.max_cursor = 0
self.send("Python") def closed(self, code, reason=None):
# print("Closed down:", code, reason)
pass def received_message(self, resp):
data = resp.data.decode("utf-8")
write_user(data)
ws.close()
def write_user(data):
with open('./user.txt', 'w', encoding='utf-8') as f:
f.write(data)
f.close() def get_user(media_id):
ws = CG_Client('ws://127.0.0.1:10512/')
ws.connect()
try:
real_arg = f"/user/profile/homepage/share/v7/?media_id={media_id}&request_source=1&appId=1286&appType=mobile_detail_web&isAndroid=true&isIOS=false&isMobile=true&cookie_enabled=true&screen_width=393&screen_height=882&browser_language=zh-CN&browser_platform=MacIntel&browser_name=Chrome&browser_version=85.0.4183.83&browser_online=true&timezone_name=Asia%2FShanghai"
time.sleep(0.1)
ws.send(real_arg)
ws.run_forever()
except KeyboardInterrupt:
print('异常关闭')
ws.close()

get_data.py 终端方式

# -*- coding: utf-8 -*-
import time
from ws4py.client.threadedclient import WebSocketClient
import _locale _locale._getdefaultlocale = (lambda *args: ['zh_CN', 'utf8'])
import io
import sys
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
user_id = sys.argv[1].split(',', 1)[0] # sys.argv--> [get_attention.py,user_id,cursor]
offset = str(sys.argv[2]) class CG_Client(WebSocketClient): def opened(self):
print("连接成功")
self.max_cursor = 0
self.send("Python") def closed(self, code, reason=None):
print("Closed down:", code, reason) def received_message(self, resp):
data = resp.data.decode("utf-8")
print(data)
ws.close() try:
ws = CG_Client('ws://127.0.0.1:10512/')
ws.connect() real_arg = f"/api/feed_backflow/profile_share/v1/?category=profile_article&visited_uid={user_id}&stream_api_version=82&request_source=1&offset={offset}&user_id={user_id}&appId=1286&appType=mobile_detail_web&isAndroid=true&isIOS=false&isMobile=true&cookie_enabled=true&screen_width=288&screen_height=511&browser_language=zh-CN&browser_platform=MacIntel&browser_name=firefox&browser_version=85.0.4183.83&browser_online=true&timezone_name=Asia%2FShanghai"
time.sleep(0.1)
ws.send(real_arg)
ws.run_forever()
except KeyboardInterrupt:
ws.close()

get_user_id.py 终端方式

# -*- coding: utf-8 -*-
import time
from ws4py.client.threadedclient import WebSocketClient
import _locale
_locale._getdefaultlocale = (lambda *args: ['zh_CN', 'utf8'])
import io
import sys
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
media_id = sys.argv[1].split(',', 1)[0] # sys.argv--> [get_attention.py,user_id,cursor] class CG_Client(WebSocketClient): def opened(self):
print("连接成功")
self.max_cursor = 0
self.send("Python") def closed(self, code, reason=None):
print("Closed down:", code, reason) def received_message(self, resp):
data = resp.data.decode("utf-8")
# data = resp.data.decode("gbk")
print(data)
ws.close() try:
ws = CG_Client('ws://127.0.0.1:10512/')
ws.connect() real_arg = f"/user/profile/homepage/share/v7/?media_id={media_id}&request_source=1&appId=1286&appType=mobile_detail_web&isAndroid=true&isIOS=false&isMobile=true&cookie_enabled=true&screen_width=393&screen_height=882&browser_language=zh-CN&browser_platform=MacIntel&browser_name=Chrome&browser_version=85.0.4183.83&browser_online=true&timezone_name=Asia%2FShanghai"
time.sleep(0.1)
ws.send(real_arg)
ws.run_forever()
except KeyboardInterrupt:
ws.close()

爬虫调用者

import time

import requests
import json
import urllib3 from toutiao2_文件方式.get_user_id import get_user, CG_Client urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) def open_user():
with open('./user.txt', 'r', encoding='utf-8') as f:
user = json.loads(f.read())
f.close()
return user def open_data():
with open('./data.txt', 'r', encoding='utf-8') as f:
data = json.loads(f.read())
f.close()
return data # media_id换user_id
def start_ocean_toutiao_user_id(media_id):
data = {
'media_id': media_id,
}
requests.get('http://127.0.0.1:4007/get_user_id', params=data, timeout=3)
time.sleep(2)
response = open_user()
res_media_id = response.get('data').get('media_id')
if int(res_media_id) == int(media_id):
user_id = response.get('data').get('user_id')
return user_id
else:
print('media不对应,请检查')
return None # 通过websocket获取数据
def start_ocean_toutiao_data(user_id, offset):
if user_id == None:
print('没有获取到user_id,请检查原因。可能消息堆积错误')
return None
data = {
'user_id': user_id,
'offset': offset
}
requests.get('http://127.0.0.1:4007/get_data', params=data, timeout=3)
response = open_data()
return response def get_response(media_id,offset):
user_id = start_ocean_toutiao_user_id(media_id)
print(user_id)
data = start_ocean_toutiao_data(user_id, offset)
print(data)
return data if __name__ == '__main__':
for i in range(1):
offset = 1587744000
# media_id = 6860767764
media_id = 6989633739
user_id = start_ocean_toutiao_user_id(media_id)
print(user_id)
# user_id = 6860406890
data = start_ocean_toutiao_data(user_id, offset)
print(data)
get_response(media_id, offset)
pass

最新文章

  1. ES6 - Note2:解构赋值
  2. 获取下拉框的文本值和value值
  3. Invoke() 方法是 Unity3D 的一种委托机制
  4. 用命令提示符压缩文件,解压缩文件(不需要客户端安装7zip)
  5. [NOIP2010] 普及组
  6. 【转】linux之mkfs/mke2fs格式化
  7. kindle
  8. 有限状态机FSM(自动售报机Verilog实现)
  9. linux case 语句
  10. Android NOTE
  11. GetCursorPos/WindowFromPoint/SendMessage
  12. MVC4加载zTree树小控件
  13. 【装逼利器效率软件】一张图问你想不想用Launchy
  14. SpringBoot系列: SpringBoot Web项目中使用Shiro
  15. 【XSY2668】排列统计 DP
  16. 操作系统学习笔记(三) windows内存管理
  17. 【转】comparable Interface
  18. adb调试android设备 说的比较清楚的一篇文章
  19. CGJ02、BD09、西安80、北京54、CGCS2000常用坐标系详解
  20. ubuntu14.04禁止触摸板和恢复触摸板

热门文章

  1. 数据隐私和GDPR
  2. First-Spike-Based Visual Categorization Using Reward-Modulated STDP
  3. asp.net报表结构学习记录
  4. ClickHouse和他的朋友们(9)MySQL实时复制与实现
  5. 网站被K或者降权后应该如何恢复
  6. 贪心思想之区间贪心 关联洛谷P1803
  7. 【Go语言入门系列】(八)Go语言是不是面向对象语言?
  8. Android开发之java代码工具类。判断当前网络是否连接并请求下载图片
  9. Android开发之封装log打印日志的工具类,实用logutils详细代码
  10. Unity3D如何有效地组织代码?