VB.net 2010 视频教程 VB.net 2010 视频教程 python基础视频教程
SQL Server 2008 视频教程 c#入门经典教程 Visual Basic从门到精通视频教程
当前位置:
首页 > temp > python入门教程 >
  • 如何用html写书

原理:利用xml.etree.ElementTree对html进行解析和写入

复制代码
# encoding=utf-8
import xml.etree.ElementTree as ET  # 解析html、xml

name = input('type in file name:')


def circle():
    tree = ET.parse(f'{name}.html')  # 解析文件
    root = tree.getroot()  # 获取根节点
    link_div = root.find('div[@id="link"]')  # 获取link区的div节点。注意find和findall只能查找直接子元素
    body_div = root.find('div[@id="body"]')  # 获取body区的div节点
    h1List = link_div.findall('div')
    for i in h1List:  # 列出所有h1标题
        print(i.find('h1').find('a').text)

    def create_h1():  # 创造h1节点
        tree = ET.parse(f'{name}.html')  # 用于刷新节点
        root = tree.getroot()  # 获取根节点
        link_div = root.find('div[@id="link"]')
        body_div = root.find('div[@id="body"]')
        h1List = link_div.findall('div')
        print('输入again回到程序开始处,输入exit退出程序')
        h1_text = input('输入章节名:')
        if h1_text == 'again':
            circle()
        elif h1_text == 'exit':
            print('program terminated!')
        else:
            page = input('页数:')
            h1_link_div = ET.Element('div')  # 创造节点
            h1_link_h1 = ET.Element('h1')
            h1_link_a = ET.Element('a')
            h1_link_a.text = f'{len(h1List) + 1}、' + h1_text + '(p' + page + ')'
            h1_link_a.set('href', f'#{len(h1List) + 1}')  # 设置a节点的属性
            h1_link_h1.append(h1_link_a)  # 悬挂节点
            h1_link_div.append(h1_link_h1)
            link_div.append(h1_link_div)  # 写入link区
            h1_body_div = ET.Element('div')  # 创造节点
            h1_body_h1 = ET.Element('h1')
            h1_body_h1_a = ET.Element('a')
            h1_body_h1_a.text = '回到顶部'  # 设置回到顶部的链接
            h1_body_h1_a.set('href', '#link')
            h1_body_h1.text = f'{len(h1List) + 1}、' + h1_text + '(p' + page + ')'
            h1_body_div.set('id', f'{len(h1List) + 1}')
            h1_body_div.append(h1_body_h1)
            h1_body_div.append(h1_body_h1_a)
            body_div.append(h1_body_div)  # 写入body区
            newTree = ET.ElementTree(root)  # root为修改后的root
            newTree.write(f'{name}.html', encoding='utf-8')  # 重新写入xml,进行更新。需要声明编码,否则写入后会乱码
            create_h1()

    def create_h2():  # 创造h2节点
        tree = ET.parse(f'{name}.html')  # 用于刷新节点
        root = tree.getroot()  # 获取根节点
        link_div = root.find('div[@id="link"]')
        body_div = root.find('div[@id="body"]')
        h1List = link_div.findall('div')
        h2List = link_div.findall('div')[int(a) - 1].findall('div')
        print('输入again回到程序开始处,输入exit退出程序')
        h2_text = input('输入章节名:')
        if h2_text == 'again':
            circle()
        elif h2_text == 'exit':
            print('program terminated!')
        else:
            page = input('页数:')
            h2_link_div = ET.Element('div')
            h2_link_h2 = ET.Element('h2')
            h2_link_a = ET.Element('a')
            h2_link_a.text = a + f'.{len(h2List) + 1}、' + h2_text + '(p' + page + ')'
            h2_link_a.set('href', '#' + a + f'_{len(h2List) + 1}')
            h2_link_h2.append(h2_link_a)
            h2_link_div.append(h2_link_h2)
            h1List[int(a) - 1].append(h2_link_div)
            h2_body_div = ET.Element('div')
            h2_body_h2 = ET.Element('h2')
            h2_body_h2_a = ET.Element('a')
            h2_body_h2_a.text = '回到顶部'
            h2_body_h2_a.set('href', '#link')
            h2_body_h2.text = a + f'.{len(h2List) + 1}、' + h2_text + '(p' + page + ')'
            h2_body_div.set('id', a + f'_{len(h2List) + 1}')
            h2_body_div.append(h2_body_h2)
            h2_body_div.append(h2_body_h2_a)
            body_div.findall('div')[int(a) - 1].append(h2_body_div)
            newTree = ET.ElementTree(root)
            newTree.write(f'{name}.html', encoding='utf-8')
            create_h2()

    def create_statement():  # 创造h2下的内容
        tree = ET.parse(f'{name}.html')  # 用于刷新节点
        root = tree.getroot()  # 获取根节点
        body_div = root.find('div[@id="body"]')
        h2 = body_div.findall('div')[int(a) - 1].findall('div')[int(b) - 1]
        print('输入again回到程序开始处,输入exit退出程序')
        p_text = input('输入内容:')
        if p_text == 'again':
            circle()
        elif p_text == 'exit':
            print('program terminated!')
        else:
            page = input('页数:')
            p = ET.Element('p')
            p.text = p_text + '(p' + page + ')'
            h2.append(p)
            newTree = ET.ElementTree(root)
            newTree.write(f'{name}.html', encoding='utf-8')
            create_statement()

    print('在此处创建输入0\n选择章节输入序号')
    a = input('输入:')  # 输入的为字符串而非数字
    if a == '0':
        create_h1()
    else:
        h2List = link_div.findall('div')[int(a) - 1].findall('div')
        for i in h2List:  # 列出所有h2标题
            print(i.find('h2').find('a').text)
        print('在此处创建输入0\n选择章节输入序号')
        b = input('输入:')
        if b == '0':
            create_h2()
        else:
            h2 = body_div.findall('div')[int(a) - 1].findall('div')[int(b) - 1]  # 选择的h2节点
            pList = h2.findall('p')  # 列出所有h2标题下的内容
            for i in pList:
                print(i.text)
            create_statement()


circle()  # 循环往复
复制代码

注意千万要仔细检查,本人就是因为某个变量打错字了,没有发现,耽搁了很久……总之现在是顺利地解决了


出处:https://www.cnblogs.com/daxiangcai/p/16188213.html


相关教程