首页 > temp > python入门教程 >
-
通过websocket抓取斗鱼弹幕和礼物消息
1.斗鱼弹幕协议
到斗鱼官方开放平台看斗鱼通讯协议,网址“https://open.douyu.com/source/api/63”,登录后可查看
所以根据斗鱼协议做编码函数:
1 def msg_encode(msg): 2 #消息以 \0 结尾,并以utf-8编码 3 msg = msg + '\0' 4 msg_bytes = msg.encode('utf-8') 5 #消息长度 + 头部长度8 6 length_bytes = int.to_bytes(len(msg) + 8, 4, byteorder='little') 7 #斗鱼客户端发送消息类型 689 8 type = 689 9 type_bytes = int.to_bytes(type, 2, byteorder='little') 10 # 加密字段与保留字段,默认 0 长度各 1 11 end_bytes = int.to_bytes(0, 1, byteorder='little') 12 #按顺序相加 消息长度 + 消息长度 + 消息类型 + 加密字段 + 保留字段 13 head_bytes = length_bytes + length_bytes + type_bytes + end_bytes + end_bytes 14 #消息头部拼接消息内容 15 data = head_bytes + msg_bytes 16 return data
然后根据斗鱼协议做解码函数:
1 def msg_decode(msg_bytes): 2 #定义一个游标位置 3 cursor = 0 4 msg = [] 5 while cursor < len(msg_bytes): 6 #根据斗鱼协议,报文 前四位与第二个四位,都是消息长度,取前四位,转化成整型 7 content_length = int.from_bytes(msg_bytes[cursor: (cursor + 4) - 1], byteorder='little') 8 #报文长度不包含前4位,从第5位开始截取消息长度的字节流,并扣除前8位的协议头,取出正文,用utf-8编码成字符串 9 content = msg_bytes[(cursor + 4) + 8:(cursor + 4) + content_length - 1].decode(encoding='utf-8', 10 errors='ignore') 11 msg.append(content) 12 cursor = (cursor + 4) + content_length 13 # print(msg) 14 return msg
解码后的消息需要反序列化:
1 def msg_format(msg_str): 2 try: 3 msg_dict = {} 4 msg_list = msg_str.split('/')[0:-1] 5 for msg in msg_list: 6 msg = msg.replace('@s', '/').replace('@A', '@') 7 msg_tmp = msg.split('@=') 8 msg_dict[msg_tmp[0]] = msg_tmp[1] 9 return msg_dict 10 except Exception as e: 11 print(str(e))
2.抓包分析
建议通过chrome开发者工具抓包,或者使用fiddler抓包,网页的东西,直接用chrome更方便
为什么不用斗鱼开放平台提供的api直接用呢,因为api是面向注册的开发者的,api都需要申请使用,普通用户只能抓包分析通用接口
分析:
wsproxy.douyu.com中交互消息可以获得多个弹幕服务器地址,我们直接取一个就行,不需要在程序中实现实时分析
然后分析danmuproxy.douyu.com连接中详细交互信息,实现与服务器交互
匿名登录时弹幕中礼物消息可能默认被屏蔽,需在登录时提交开启指令,或登录后提交开启指令
弹幕消息类型与指令非常丰富,这里举例简单的消息和指令,详细可以继续分析
结果如下:
登录:type@=loginreq/roomid@=74751/dfl@=/username@=visitor8243989/uid@=1132317461/ver@=20190610/aver@=218101901/ct@=0/
##匿名分配的username与uid可以从wsproxy.douyu.com中交互消息获取,也可直接自定义提交就行
退出登录:type@=logout/
心跳消息:type@=mrkl/
加入组消息:type@=joingroup/rid@=74751/gid@=-9999/ #gid默认1,此处改成 - 9999 改成海量弹幕模式
屏蔽礼物消息:type@=dmfbdreq/dfl@=sn@AA=105@ASss@AA=1@AS@Ssn@AA=106@ASss@AA=1@AS@Ssn@AA=107@ASss@AA=1@AS@Ssn@AA=108@ASss@AA=1@AS@Ssn@AA=110@ASss@AA=1@AS@Ssn@AA=901@ASss@AA=1@AS@S/
开启礼物消息:type@=dmfbdreq/dfl@=sn@AA=105@ASss@AA=0@AS@Ssn@AA=106@ASss@AA=0@AS@Ssn@AA=107@ASss@AA=0@AS@Ssn@AA=108@ASss@AA=0@AS@Ssn@AA=110@ASss@AA=0@AS@Ssn@AA=901@ASss@AA=0@AS@S/
##开启与屏蔽礼物可以详细控制,可以逐项尝试分析
注:登录消息中有个字段/dfl@=,此处直接加入屏蔽礼物或开启礼物指令中相应dfl@=后面的内容,即可在登录时控制礼物屏蔽或开启
3.礼物消息处理
获取斗鱼礼物类型,需抓包分析来源网址
url1 = 'https://webconf.douyucdn.cn/resource/common/gift/flash/gift_effect.json'
url2 = 'https://webconf.douyucdn.cn/resource/common/prop_gift_list/prop_gift_config.json'
将获取到的礼物json处理一下,合并且去除不需要信息,做成字典:
1 def get_gift_dict(): 2 gift_json = {} 3 gift_json1 = requests.get('https://webconf.douyucdn.cn/resource/common/gift/flash/gift_effect.json').text 4 gift_json2 = requests.get('https://webconf.douyucdn.cn/resource/common/prop_gift_list/prop_gift_config.json').text 5 gift_json1=gift_json1.replace('DYConfigCallback(','')[0:-2] 6 gift_json2=gift_json2.replace('DYConfigCallback(','')[0:-2] 7 gift_json1 = json.loads(gift_json1)['data']['flashConfig'] 8 gift_json2= json.loads(gift_json2)['data'] 9 for gift in gift_json1: 10 gift_json[gift] = gift_json1[gift]['name'] 11 for gift in gift_json2: 12 gift_json[gift] = gift_json2[gift]['name'] 13 return gift_json
4.websocket连接
python环境先安装websocket包 pip3 install websocket-client
引入websocket包后,使用websecket.WebSocketApp建立websecket连接客户端,创建时在 on_open、on_error、on_message、on_close 4个参数中传入相应事件发生时需要处理的方法
顾名思义,4个参数很好理解了
websocket.WebSocketApp(url, on_open=on_open, on_error=on_error,on_message=on_message, on_close=on_close)
5.效果与源码
实际效果展示:
源码(演示代码,异常判断,数据校验就不做了):
1 __author__ = 'admin' 2 import websocket 3 import threading 4 import time 5 import requests 6 import json 7 8 9 class DyDanmu: 10 def __init__(self, roomid, url): 11 self.gift_dict = self.get_gift_dict() 12 self.gift_dict_keys = self.gift_dict.keys() 13 self.room_id = roomid 14 self.client = websocket.WebSocketApp(url, on_open=self.on_open, on_error=self.on_error, 15 on_message=self.on_message, on_close=self.on_close) 16 self.heartbeat_thread = threading.Thread(target=self.heartbeat) 17 18 def start(self): 19 self.client.run_forever() 20 21 def stop(self): 22 self.logout() 23 self.client.close() 24 25 def on_open(self): 26 self.login() 27 self.join_group() 28 self.heartbeat_thread.setDaemon(True) 29 self.heartbeat_thread.start() 30 31 32 def on_error(self, error): 33 print(error) 34 35 def on_close(self): 36 print('close') 37 38 def send_msg(self, msg): 39 msg_bytes = self.msg_encode(msg) 40 self.client.send(msg_bytes) 41 42 def on_message(self, msg): 43 message = self.msg_decode(msg) 44 # print(message) 45 for msg_str in message: 46 msg_dict = self.msg_format(msg_str) 47 if msg_dict['type'] == 'chatmsg': 48 print(msg_dict['nn'] + ':' + msg_dict['txt']) 49 if msg_dict['type'] == 'dgb': 50 if msg_dict['gfid'] in self.gift_dict_keys: 51 print(msg_dict['nn'] + '\t送出\t' + msg_dict['gfcnt'] + '\t个\t' + self.gift_dict[msg_dict['gfid']]) 52 else: 53 print(msg_dict['nn'] + '\t送出\t' + msg_dict['gfcnt'] + '\t个\t' + msg_dict['gfid'] + '\t未知礼物') 54 # print(msg_dict) 55 56 # 发送登录信息 57 def login(self): 58 login_msg = 'type@=loginreq/roomid@=%s/' \ 59 'dfl@=sn@AA=105@ASss@AA=0@AS@Ssn@AA=106@ASss@AA=0@AS@Ssn@AA=107@ASss@AA=0@AS@Ssn@AA=108@ASss@AA=0@AS@Ssn@AA=110@ASss@AA=0@AS@Ssn@AA=901@ASss@AA=0/' \ 60 'username@=%s/uid@=%s/ltkid@=/biz@=/stk@=/devid@=8d8c22ce6093e6a7264f99da00021501/ct@=0/pt@=2/cvr@=0/tvr@=7/apd@=/rt@=1605498503/vk@=0afb8a90c2cb545e8459d60c760dc08b/' \ 61 'ver@=20190610/aver@=218101901/dmbt@=chrome/dmbv@=78/' % ( 62 self.room_id, 'visitor4444086', '1178849206' 63 ) 64 self.send_msg(login_msg) 65 66 def logout(self): 67 logout_msg = 'type@=logout/' 68 self.send_msg(logout_msg) 69 70 # 发送入组消息 71 def join_group(self): 72 join_group_msg = 'type@=joingroup/rid@=%s/gid@=-9999/' % (self.room_id) 73 self.send_msg(join_group_msg) 74 75 # 关闭礼物信息推送 76 def close_gift(self): 77 close_gift_msg = 'type@=dmfbdreq/dfl@=sn@AA=105@ASss@AA=1@AS@Ssn@AA=106@ASss@AA=1@AS@Ssn@AA=107@ASss@AA=1@AS@Ssn@AA=108@ASss@AA=1@AS@Ssn@AA=110@ASss@AA=1@AS@Ssn@AA=901@ASss@AA=1@AS@S/' 78 self.send_msg(close_gift_msg) 79 80 # 保持心跳线程 81 def heartbeat(self): 82 while True: 83 # 45秒发送一个心跳包 84 self.send_msg('type@=mrkl/') 85 print('发送心跳') 86 time.sleep(45) 87 88 89 def msg_encode(self, msg): 90 # 消息以 \0 结尾,并以utf-8编码 91 msg = msg + '\0' 92 msg_bytes = msg.encode('utf-8') 93 #消息长度 + 头部长度8 94 length_bytes = int.to_bytes(len(msg) + 8, 4, byteorder='little') 95 #斗鱼客户端发送消息类型 689 96 type = 689 97 type_bytes = int.to_bytes(type, 2, byteorder='little') 98 # 加密字段与保留字段,默认 0 长度各 1 99 end_bytes = int.to_bytes(0, 1, byteorder='little') 100 #按顺序相加 消息长度 + 消息长度 + 消息类型 + 加密字段 + 保留字段 101 head_bytes = length_bytes + length_bytes + type_bytes + end_bytes + end_bytes 102 #消息头部拼接消息内容 103 data = head_bytes + msg_bytes 104 return data 105 106 def msg_decode(self, msg_bytes): 107 # 定义一个游标位置 108 cursor = 0 109 msg = [] 110 while cursor < len(msg_bytes): 111 #根据斗鱼协议,报文 前四位与第二个四位,都是消息长度,取前四位,转化成整型 112 content_length = int.from_bytes(msg_bytes[cursor: (cursor + 4) - 1], byteorder='little') 113 #报文长度不包含前4位,从第5位开始截取消息长度的字节流,并扣除前8位的协议头,取出正文,用utf-8编码成字符串 114 content = msg_bytes[(cursor + 4) + 8:(cursor + 4) + content_length - 1].decode(encoding='utf-8', 115 errors='ignore') 116 msg.append(content) 117 cursor = (cursor + 4) + content_length 118 # print(msg) 119 return msg 120 121 def msg_format(self, msg_str): 122 try: 123 msg_dict = {} 124 msg_list = msg_str.split('/')[0:-1] 125 for msg in msg_list: 126 msg = msg.replace('@s', '/').replace('@A', '@') 127 msg_tmp = msg.split('@=') 128 msg_dict[msg_tmp[0]] = msg_tmp[1] 129 return msg_dict 130 except Exception as e: 131 print(str(e)) 132 133 def get_gift_dict(self): 134 gift_json = {} 135 gift_json1 = requests.get('https://webconf.douyucdn.cn/resource/common/gift/flash/gift_effect.json').text 136 gift_json2 = requests.get( 137 'https://webconf.douyucdn.cn/resource/common/prop_gift_list/prop_gift_config.json').text 138 gift_json1 = gift_json1.replace('DYConfigCallback(', '')[0:-2] 139 gift_json2 = gift_json2.replace('DYConfigCallback(', '')[0:-2] 140 gift_json1 = json.loads(gift_json1)['data']['flashConfig'] 141 gift_json2 = json.loads(gift_json2)['data'] 142 for gift in gift_json1: 143 gift_json[gift] = gift_json1[gift]['name'] 144 for gift in gift_json2: 145 gift_json[gift] = gift_json2[gift]['name'] 146 return gift_json 147 148 149 if __name__ == '__main__': 150 roomid = '74751' 151 url = 'wss://danmuproxy.douyu.com:8506/' 152 dy = DyDanmu(roomid, url) 153 dy.start()
出处:https://www.cnblogs.com/aadd123/p/14009467.html