python-docx操作word文件(

发布时间:2019-05-30 20:55:28编辑:auto阅读(2455)

    基础操作

    from docx import Document
    from docx.shared import Inches
    
    # 创建空文档
    document = Document()
    
    # 添加标题,设置级别level,0为Title,1或省略为Heading 1,0<=level<=9
    document.add_heading('Document Title', 0)
    # 添加段落,参数为text=''和style=None
    p = document.add_paragraph('A plain paragraph having some ')
    # 添加run对象,参数为text=None和style=None,
    # run对象有bold(加粗)和italic(斜体)这两个属性
    p.add_run('bold').bold = True
    p.add_run(' and some ')
    p.add_run('italic.').italic = True
    
    document.add_heading('Heading, level 1', level=1)
    document.add_paragraph('Intense quote', style='Intense Quote')
    
    document.add_paragraph(
        'first item in unordered list', style='List Bullet'
    )
    document.add_paragraph(
        'first item in ordered list', style='List Number'
    )
    # 添加图片
    document.add_picture('monty-truth.png', width=Inches(1.25))
    
    # 添加表格
    records = (
        (3, '101', 'Spam'),
        (7, '422', 'Eggs'),
        (4, '631', 'Spam, spam, eggs, and spam')
    )
    
    table = document.add_table(rows=1, cols=3)
    hdr_cells = table.rows[0].cells
    hdr_cells[0].text = 'Qty'
    hdr_cells[1].text = 'Id'
    hdr_cells[2].text = 'Desc'
    for qty, id, desc in records:
        row_cells = table.add_row().cells
        row_cells[0].text = str(qty)
        row_cells[1].text = id
        row_cells[2].text = desc
    
    document.add_page_break()

    对象关系

    1556184806969

    document.add_paragraph()之后,默认paragraph的内容到第一个run中。

    添加样式

    中文字体微软雅黑,西文字体Times New Roman

    import docx
    from docx.enum.text import WD_ALIGN_PARAGRAPH
    from docx.oxml.ns import qn
    from docx.shared import Cm, Pt
    
    document = Document()
    # 设置一个空白样式
    style = document.styles['Normal']
    # 设置西文字体
    style.font.name = 'Times New Roman'
    # 设置中文字体
    style.element.rPr.rFonts.set(qn('w:eastAsia'), '微软雅黑')

    首行缩进

    # 获取段落样式
    paragraph_format = style.paragraph_format
    # 首行缩进0.74厘米,即2个字符
    paragraph_format.first_line_indent = Cm(0.74)

    单独设置标题样式

    # 设置标题
    title_ = document.add_heading(level=0)
    # 标题居中
    title_.alignment = WD_ALIGN_PARAGRAPH.CENTER
    # 添加标题内容
    title_run = title_.add_run(title)
    # 设置标题字体大小
    title_run.font.size = Pt(14)
    # 设置标题西文字体
    title_run.font.name = 'Times New Roman'
    # 设置标题中文字体
    title_run.element.rPr.rFonts.set(qn('w:eastAsia'), '微软雅黑')

    设置超链接

    def add_hyperlink(paragraph, url, text, color, underline):
        """
        A function that places a hyperlink within a paragraph object.
    
        :param paragraph: The paragraph we are adding the hyperlink to.
        :param url: A string containing the required url
        :param text: The text displayed for the url
        :return: The hyperlink object
        """
    
        # This gets access to the document.xml.rels file and gets a new relation id value
        part = paragraph.part
        r_id = part.relate_to(url, docx.opc.constants.RELATIONSHIP_TYPE.HYPERLINK, is_external=True)
    
        # Create the w:hyperlink tag and add needed values
        hyperlink = docx.oxml.shared.OxmlElement('w:hyperlink')
        hyperlink.set(docx.oxml.shared.qn('r:id'), r_id, )
    
        # Create a w:r element
        new_run = docx.oxml.shared.OxmlElement('w:r')
    
        # Create a new w:rPr element
        rPr = docx.oxml.shared.OxmlElement('w:rPr')
    
        # Add color if it is given
        if not color is None:
            c = docx.oxml.shared.OxmlElement('w:color')
            c.set(docx.oxml.shared.qn('w:val'), color)
            rPr.append(c)
    
        # Remove underlining if it is requested
        if not underline:
            u = docx.oxml.shared.OxmlElement('w:u')
            u.set(docx.oxml.shared.qn('w:val'), 'none')
            rPr.append(u)
    
        # Join all the xml elements together add add the required text to the w:r element
        new_run.append(rPr)
        new_run.text = text
        hyperlink.append(new_run)
    
        paragraph._p.append(hyperlink)
    
        return hyperlink
    
    document = docx.Document()
    p = document.add_paragraph()
    
    #add a hyperlink with the normal formatting (blue underline)
    hyperlink = add_hyperlink(p, 'http://www.google.com', 'Google', None, True)
    
    #add a hyperlink with a custom color and no underline
    hyperlink = add_hyperlink(p, 'http://www.google.com', 'Google', 'FF8822', False)
    
    document.save('demo.docx')

    上面的函数是对整段内容直接添加链接,日常使用的时候,超链接多为关键词,或<a>标签的格式,用paragraph和run这两个对象的关系来解决。

    比如有文本内容如下,将其中的<a>标签换为超链接:

    """I am trying to add an hyperlink in a MS Word document using docx module for <a href="python.org">Python</a>. Just do it."""

    # 判断字段是否为链接
    def is_text_link(text):
        for i in ['http', '://', 'www.', '.com', '.org', '.cn', '.xyz', '.htm']:
            if i in text:
                return True
            else:
                return False
    
    # 对段落中的链接加上超链接
    def add_text_link(document, text):
        paragraph = document.add_paragraph()
        # 根据<a>标签拆分文本内容
        text = re.split(r'<a href="|">|</a>',text)
        keyword = None
        for i in range(len(text)):
            # 对非链接和非关键词的内容,通过run直接加入段落中
            if not is_text_link(text[i]):
                if text[i] != keyword:
                    paragraph.add_run(text[i])
            # 对链接和关键词,使用add_hyperlink插入超链接
            elif i + 1<len(text):
                url=text[i]
                keyword=text[i + 1]
                add_hyperlink(paragraph, url, keyword, None, True)

    参考文档

    1. https://python-docx.readthedocs.io/en/latest/index.html
    2. https://github.com/python-openxml/python-docx/issues/74
    3. http://www.warmeng.com/2018/12/02/auto_report/

关键字

上一篇: Python2.7在Windows下CM

下一篇: 闭包函数