from lxml import etree html = "需要解析的网页" html = etree.HTML(html) #获取所有的href属性 url_list = html.xpath("//*/@href") for url in url_list: print(url)
from lxml import etree html = "需要解析的网页" html = etree.HTML(html) #获取所有的href属性 url_list = html.xpath("//*/@href") for url in url_list: print(url)