import aspose.words as aw import pdfkit from docx import Document from docx.oxml.ns import qn # from docx.oxml import OxmlElement def remove_mark_from_doc(doc): # 移除aspose库的标记 def remove_images_from_element(element): #移除嵌入的图片 for paragraph in element.paragraphs: for run in paragraph.runs: drawing_elements = run._element.findall(qn('w:drawing')) for drawing in drawing_elements: drawing.getparent().remove(drawing) pict_elements = run._element.findall(qn('w:pict')) for pict in pict_elements: pict.getparent().remove(pict) def remove_text_from_element(element, text_to_remove=None): # 移除aspose的标记(此处为页脚处的) if text_to_remove == None: text_to_remove = "Evaluation Only. Created with Aspose.Words. Copyright 2003-2024 Aspose Pty Ltd." for paragraph in element.paragraphs: if text_to_remove in paragraph.text: # 清除特定文本 paragraph.clear() for section in doc.sections: header = section.header footer = section.footer remove_images_from_element(header) remove_text_from_element(footer) #删除第一段落 if doc.paragraphs: first_paragraph = doc.paragraphs[0] p = first_paragraph._element p.getparent().remove(p) def html2docx_aspose(html_path, docx_path): doc = aw.Document(html_path) doc.save(docx_path,aw.SaveFormat.DOCX) doc = Document(docx_path) remove_mark_from_doc(doc) doc.save(docx_path) print("word报告生成成功!") def html2pdf_pdfkit(html_path, pdf_path): # 将HTML文件转换为PDF options = { 'page-size': 'Letter', 'margin-top': '0.35in', 'margin-right': '0.75in', 'margin-bottom': '0.75in', 'margin-left': '0.75in', 'encoding': "UTF-8", 'no-outline': None, 'enable-local-file-access': None } pdfkit.from_file(html_path, pdf_path, options=options) print("pdf报告生成成功!") ###############################备选方案 def html2docx_pypandoc(html_path, docx_path): import pypandoc pypandoc.convert_file(html_path, 'docx', outputfile=docx_path) print("Word报告生成成功!") def html2docx_spire(html_path, docx_path): from spire.doc import FileFormat,XHTMLValidationType,Document # from spire.doc.common import * document = Document() document.LoadFromFile(html_path, FileFormat.Html, XHTMLValidationType.none) document.SaveToFile(docx_path, FileFormat.Docx2016) document.Close()