使用python-docx读取doc,d

发布时间:2019-09-24 08:20:59编辑:auto阅读(1442)

    API:    http://python-docx.readthedocs.io/en/latest/#api-documentation

    将doc转为docx:

            from win32com import client as wc

            word = wc.Dispatch("Word.Application")

            doc = word.Documents.Open(路径+名称.doc)

            doc.SaveAs(路径+名称.docx, 12)   12为docx

            doc.Close()

            word.Quit()

    读取段落:

            import docx

            docStr = Document(docName)   打开文档

            for paragraph in docStr.paragraphs:

                    parStr = paragraph.text

                    --》paragraph.style.name == 'Heading 1'  一级标题   

                    --》paragraph.paragraph_format.alignment == 1  居中显示

                    --》paragraph.style.next_paragraph_style.paragraph_format.alignment == 1  下一段居中显示

                    --》paragraph.style.font.color

    读取表格:

            numTables = docStr.tables

            for table in numTables:

                    #行列个数

                    row_count = len(table.rows)

                    col_count = len(table.columns)

                    for i in range(row_count):

                            row = table.rows[i].cells

                            i行j列内容:row[j].text

               或者:

                        row_count = len(table.rows)
                        col_count = len(table.columns)
                        for i in range(row_count):
                                for j in range(col_count):
                                        print(table.cell(i,j).text)

     

     

关键字