1 import requests 2 import time 3 import json 4 import threading 5 # 采集https://careers.tencent.com/search.html 网站的招聘信息 6 7 # 时间戳 8 timestamp = '%d' % (time.time() * 1000) 9 10 11 # 请求url,解析数据 12 def parse_url(json_url): 13 # 发起请求 14 res = requests.get(json_url).json() 15 for i in res['Data']['Posts']: 16 # 职位名称 17 title = i['RecruitPostName'] 18 # 工作职责 19 resbity = i['Responsibility'] 20 # 职位ID 21 id = i['PostId'] 22 # 职位链接 23 posi_url = 'https://careers.tencent.com/jobdesc.html?postId=' + id 24 # 根据ID找到工作详情页的内容 25 id_url = 'https://careers.tencent.com/tencentcareer/api/post/ByPostId?timestamp={}&postId={}&language=zh-cn'.format( 26 timestamp, id) 27 res_ment = requests.get(id_url).json() 28 # 工作要求 29 rement = res_ment['Data']['Requirement'] 30 # 发布时间 31 posi_time = i['LastUpdateTime'] 32 item = { 33 '职位': title, 34 '职责': resbity, 35 '要求': rement, 36 '链接': posi_url, 37 '时间': posi_time 38 } 39 print('正在写入 → ', item) 40 with open('腾讯招聘.json', 'a', encoding='utf-8') as f: 41 f.write(json.dumps(item, ensure_ascii=False) + '\n') 42 43 44 # 页数 45 num = 10 46 t_list = [] 47 for count in range(1, num + 1): 48 print('加载第{}页数据'.format(count)) 49 # json数据源 50 json_url = 'https://careers.tencent.com/tencentcareer/api/post/Query?timestamp={}&countryId=&cityId=&bgIds=&productId=&categoryId=&parentCategoryId=&attrId=&keyword=&pageIndex={}&pageSize=10&language=zh-cn&area=cn'.format( 51 timestamp, count) 52 t = threading.Thread(target=parse_url, args=((json_url,))) 53 t_list.append(t) 54 55 for t in t_list: 56 t.start() 57 for t in t_list: 58 t.join()