VB.net 2010 视频教程 VB.net 2010 视频教程 python基础视频教程
SQL Server 2008 视频教程 c#入门经典教程 Visual Basic从门到精通视频教程
当前位置:
首页 > Python基础教程 >
  • Python 常用笔记(2)

前10']] # 根据key分组显示index和数据 dfcod a1.index = a1.index.droplevel() #删除一个多索引的index-names # series 根据list 判断是否存在 df0[df0['id'].isin([3,4])] #根据list获取列表信息 df0[~df0['id'].isin([3,4])] #根据list获取列表信息 取反 # series 根据list 排序 df['words'] = df['words'].astype('category') #必须转换成这个格式   df['words'].cat.reorder_categories([1,2,3], inplace=True) # list长度相等用这个   df['words'].cat.set_categories([1,2,3], inplace=True) # list多 用这个   df['words'].cat.set_categories([1,2,3], inplace=True) # list少 用这个 df.sort_values('words', inplace=True) #pandas 读写mysql from sqlalchemy import create_engine mysq = create_engine('mysql+pymysql://root:mysql.123@localhost/abdata?charset=utf8') df.to_sql('coun',mysq,if_exists='append',index=False) # 追加数据 df.to_sql('counts',mysq,if_exists='replace',index=False) #删除并写入表 df = pd.read_sql_query('select * from cod1',mysq) # 查询mysql表 #pymysql读写mysql import pymysql conn = pymysql.connect('127.0.0.1', 'root', 'mysql.123', 'data',charset='utf8') cur = conn.cursor() sql1 = "SELECT * from (SELECT * from data1 ORDER BY id DESC LIMIT %s ) aa order by id" %sum cur.execute(sql1) c1 = cur.fetchall() #读取mysql conn.commit() #写入mysql cur.close() conn.close()
复制代码

 

DataFrame样式设置

复制代码
def show(v):
col = 'black' if v > 0 else 'green'
return 'color:%s'%col

def background_gradient(s, m, M, cmap='PuBu', low=0, high=0.8):
rng = M - m
norm = colors.Normalize(m - (rng * low),M + (rng * high))
normed = norm(s.values)
c = [colors.rgb2hex(x) for x in plt.cm.get_cmap(cmap)(normed)]
return ['" style="color: rgb(128, 0, 0);">' % color for color in c]

def highlight_max(s,m):
is_max = s == m
return ['" style="color: rgb(128, 0, 0);">' if v else '' for v in is_max]


tabs.style.applymap(show).background_gradient(cmap='Reds',axis = 1,low = 0,high = 1,subset = set1).\
apply(background_gradient,cmap='Purples',m=tabs[set2].min().min(),M=tabs[set2].max().max(),low=0,high=1,subset = set2).\
apply(highlight_max,m=tabs[set2].max().max()).background_gradient(cmap='Wistia',axis = 1,subset=['总金额'])

accdeteil.style.applymap(show).background_gradient(cmap='Reds',axis = 1,low = 0,high = 1).\
background_gradient(cmap='Reds',axis = 1,low = 0,high = 1 ,subset=set2).\
background_gradient(cmap='Purples',axis = 1,low = 0,high = 1,subset = pd.IndexSlice['前10',:'9']).\
background_gradient(cmap='Purples',axis = 1,low = 0,high = 1,subset = pd.IndexSlice['前20',:'9']).\
background_gradient(cmap='Purples',axis = 1,low = 0,high = 1,subset = pd.IndexSlice['前05','1_':]).\
background_gradient(cmap='Purples',axis = 1,low = 0,high = 1,subset = pd.IndexSlice['前15','1_':]).\
background_gradient(cmap='GnBu',axis = 0,low = 0,high = 1 ,subset=['SH_']).\
apply(highlight_max,m=tabs[set2].max().max())


#可参考
https://blog.csdn.net/xiaodongxiexie/article/details/71202279

#颜色样式
https://matplotlib.org/tutorials/colors/colormaps.html
复制代码

 

pandas作图

复制代码
import matplotlib.pyplot as plt

ax1 = df1[['策略净值','指数净值']].plot(figsize=(15,8))  #dataframe折线图
ax1 = ax1.axhline(y=1,ls=":",c="r"),ax1.legend(loc = 'upper right')   #标记0线和指定图例位置
plt.title('策略简单回测%s'%x,size=15)
plt.xlabel('')

for i in range(len(df1)):
    if df1['当天仓位'][i]==0 and df1['当天仓位'].shift(-1)[i]==1:
        plt.annotate('',xy=(df1.index[i],df1.策略净值[i]),arrowprops=dict(facecolor='r',shrink=0.05))   #标记买卖点
    if df1['当天仓位'][i]==0 and df1['当天仓位'].shift(1)[i]==1:
        plt.annotate('',xy=(df1.index[i],df1.策略净值[i]),arrowprops=dict(facecolor='g',shrink=0.1))

bbox = dict(boxstyle="round", fc="w", ec="0.5", alpha=0.9)  #指定文字边框样式
t = f'累计收益率:策略{TA1}%,指数{TA2}%;\n年化收益率:策略{AR1}%,指数{AR2}%;'+\
f'\n最大回撤:  策略{MD1}%,指数{MD2}%;\n策略alpha: {round(alpha,2)},策略beta:{round(beta,2)}; \n夏普比率:  {S}'
plt.text(df1.index[0], df1['指数净值'].min(),text,size=13,bbox=bbox)   #指定位置加文字框
ax=plt.gca()   #设置图形样式
ax.spines['right'].set_color('none')
ax.spines['top'].set_color('none')
plt.show()
复制代码

 

爬虫

复制代码
from bs4 import BeautifulSoup
import requests
headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'
    }

htm = requests.get(url=url,headers=headers,timeout=30,stream=False).text
soup = BeautifulSoup(htm, 'html.parser')
txt = soup.find_all('div', class_='lax-s')
#txt = soup.find('div', class_='qi').children


#etree方式获取   原文  https://mp.weixin.qq.com/s/c2Sg_LVTjOokePY2lxCGSA
import requests
import pandas as pd
from pprint import pprint
from lxml import etree
import time
import warnings
warnings.filterwarnings("ignore")

for i in range(1,15):
    print("正在爬取第" + str(i) + "页的数据")
    url = "https://search.51job.com/list/000000,000000,0000,00,9,99,%25E6%2595%25B0%25E6%258D%25AE,2,"+str(i)+'.html?'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'
    }
    web = requests.get(url, headers=headers)
    web.encoding = "gbk"
    dom = etree.HTML(web.text)
    #print(etree.tostring(dom, encoding="utf-8", pretty_print=True).decode("utf-8")) #打印整个html 不能直接print
    # 1、岗位名称
    job_name = dom.xpath('//div[@class="dw_table"]/div[@class="el"]//p/span/a[@target="_blank"]/@title')
    # 2、公司名称
    company_name = dom.xpath('//div[@class="dw_table"]/div[@class="el"]/span[@class="t2"]/a[@target="_blank"]/@title')
    # 3、工作地点
    address = dom.xpath('//div[@class="dw_table"]/div[@class="el"]/span[@class="t3"]/text()')
    # 4、工资:工资这一列有空值,为了保证数据框的一致性。采取以下方式进行数据的获取
    salary_mid = dom.xpath('//div[@class="dw_table"]/div[@class="el"]/span[@class="t4"]')
    salary = [i.text for i in salary_mid]  #这里None也占一个元素 保持长度一致
    # 5、发布日期
    release_time = dom.xpath('//div[@class="dw_table"]/div[@class="el"]/span[@class="t5"]/text()')
    #----------------------------------------------------------------------------------------------#
    # 下面获取二级网址的信息。为了获取二级网址的信息,首先需要获取二级网址的url
    # 6、获取二级网址url
    deep_url = dom.xpath('//div[@class="dw_table"]/div[@class="el"]//p/span/a[@target="_blank"]/@href')
    RandomAll = []
    JobDescribe = []
    CompanyType = []
    CompanySize = []
    Industry = []
    for i in range(len(deep_url)):
        web_test = requests.get(deep_url[i], headers=headers)
        web_test.encoding = "gbk"
        dom_test = etree.HTML(web_test.text)
        # 7、爬取经验、学历信息,先合在一个字段里面,以后再做数据清洗。命名为random_all
        random_all = dom_test.xpath('//div[@class="tHeader tHjob"]//div[@class="cn"]/p[@class="msg ltype"]/text()')
        # 8、岗位描述性息
        job_describe = dom_test.xpath('//div[@class="tBorderTop_box"]//div[@class="bmsg job_msg inbox"]/p/text()')
        # 9、公司类型
        company_type = dom_test.xpath('//div[@class="tCompany_sidebar"]//div[@class="com_tag"]/p[1]/@title')
        # 10、公司规模(人数)
        company_size = dom_test.xpath('//div[@class="tCompany_sidebar"]//div[@class="com_tag"]/p[2]/@title')
        # 11、所属行业(公司)
        industry = dom_test.xpath('//div[@class="tCompany_sidebar"]//div[@class="com_tag"]/p[3]/@title')
        # 将上述信息保存到各自的列表中
        RandomAll.append(random_all)
        JobDescribe.append(job_describe)
        CompanyType.append(company_type)
        CompanySize.append(company_size)
        Industry.append(industry)
        # 为了反爬,设置睡眠时间
        time.sleep(1)
    # 由于我们需要爬取很多页,为了防止最后一次性保存所有数据出现的错误,因此,我们每获取一夜的数据,就进行一次数据存取。
    df = pd.DataFrame()
    df["岗位名称"] = job_name
    df["公司名称"] = company_name
    df["工作地点"] = address
    df["工资"] = salary
    df["发布日期"] = release_time
    df["经验、学历"] = RandomAll
    df["公司类型"] = CompanyType
    df["公司规模"] = CompanySize
    df["所属行业"] = Industry
    df["岗位描述"] = JobDescribe
    # 这里在写出过程中,有可能会写入失败,为了解决这个问题,我们使用异常处理。
    try:
        df.to_csv("job_info.csv", mode="a+", header=None, index=None, encoding="gbk")
    except:
        print("当页数据写入失败")
    time.sleep(1)
print("完毕")
复制代码

 

OCR图片识别

复制代码
#需要安装 tesseract-ocr(需要环境变量) 、chi_sim.traineddata 、 pytesseract-0.2.4 

from PIL import Image
import pytesseract,os,re

png = r'D:\123\111.png'
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files (x86)\Tesseract-OCR\tesseract.exe'
img = Image.open(png)
tim = os.stat(png).st_mtime
img1 = img.size
aa = pytesseract.image_to_string(img, lang='chi_sim')
print(img1,tim)
print(aa)
复制代码

 

webdriver自动化测试

复制代码
#需要安装 chromedriver-v69 、ChromeSetup_64_69.exe 

from selenium import webdriver
from selenium.webdriver.common.keys import Keys


try:
    driver = webdriver.Chrome()
    driver.get("http://user/login")
    time.sleep(1)

    driver.find_element_by_id('username').send_keys('123123')
    driver.find_element_by_id('password').send_keys('123123')
    driver.find_element_by_id('login').click()
    time.sleep(2)

    driver.find_element_by_xpath('//*[@id="header"]/div[7]/div/div[1]/ul/li[4]/a').click()
    time.sleep(2)
    driver.find_elements_by_class_name('content')[2].click()
    time.sleep(2)

    s1 = driver.find_element_by_class_name('i1').text
    s2 = s1[3:6]
    s3 = driver.find_element_by_id('pre-kanjiang').text
    s4 = driver.find_element_by_xpath('//*[@id="money"]/strong').text
    s5 = driver.find_element_by_xpath('//*[@id="money"]/em').text
    print('key=', s2, 'time=', s3, s5 + '=', s4)
    fs.write('key=' + s2 + '\n' + 'time=' + s3 + '\n' + s5 + '=' + s4 + '\n')
    time.sleep(2)

    if int(s2) == int(s.get('key')):
        elements = driver.find_elements_by_class_name('code')

        if 'A' in s.keys():
            data_values = s.get('A')
            for i in data_values:
                a_button_index = int(i) - 1
                elements[a_button_index].click()
                print('a_button_index = ', a_button_index)
                fs.write('a_button_index = ' + str(a_button_index) + '\n')
        if 'B' in s.keys():
            data_values = s.get('B')
            for j in data_values:
                b_button_index = int(j) + 9
                elements[b_button_index].click()
                print('b_button_index = ', b_button_index)
                fs.write('b_button_index = ' + str(b_button_index) + '\n')
        if 'C' in s.keys():
            data_values = s.get('C')
            for k in data_values:
                c_button_index = int(k) + 19
                elements[c_button_index].click()
                print('c_button_index = ', c_button_index)
                fs.write('c_button_index = ' + str(c_button_index) + '\n')

        time.sleep(1)
        driver.find_elements_by_name('danwei')[1].click()
        driver.find_element_by_class_name('txt').clear()
        driver.find_element_by_class_name('txt').send_keys(int(s.get('T')) * 1)
        driver.find_element_by_class_name('tztj-hover').click()
        time.sleep(2)
        driver.find_element_by_class_name('tz-true-hover').click()

        time.sleep(2)
        driver.find_element_by_xpath("/html/body/div[2]/div[3]/div/button[1]").send_keys(Keys.ENTER)
        time.sleep(2)
        driver.quit()

except Exception as e:
    print(e)
复制代码

 

cs客户端自动化测试

复制代码
import os,sys,time
import pywinauto
import pywinauto.clipboard
import pywinauto.application
import win32clipboard as wincb
import win32con


def winmax(): #窗口最大化
      



  

相关教程