ReportGeneration/process/inference.py

93 lines
2.7 KiB
Python
Raw Normal View History

import aspose.words as aw
import pdfkit
from docx import Document
from docx.oxml.ns import qn
# from docx.oxml import OxmlElement
def remove_mark_from_doc(doc):
# 移除aspose库的标记
def remove_images_from_element(element):
#移除嵌入的图片
for paragraph in element.paragraphs:
for run in paragraph.runs:
drawing_elements = run._element.findall(qn('w:drawing'))
for drawing in drawing_elements:
drawing.getparent().remove(drawing)
pict_elements = run._element.findall(qn('w:pict'))
for pict in pict_elements:
pict.getparent().remove(pict)
def remove_text_from_element(element, text_to_remove=None):
# 移除aspose的标记此处为页脚处的
if text_to_remove == None:
text_to_remove = "Evaluation Only. Created with Aspose.Words. Copyright 2003-2024 Aspose Pty Ltd."
for paragraph in element.paragraphs:
if text_to_remove in paragraph.text:
# 清除特定文本
paragraph.clear()
for section in doc.sections:
header = section.header
footer = section.footer
remove_images_from_element(header)
remove_text_from_element(footer)
#删除第一段落
if doc.paragraphs:
first_paragraph = doc.paragraphs[0]
p = first_paragraph._element
p.getparent().remove(p)
def html2docx_aspose(html_path, docx_path):
doc = aw.Document(html_path)
doc.save(docx_path,aw.SaveFormat.DOCX)
doc = Document(docx_path)
remove_mark_from_doc(doc)
doc.save(docx_path)
print("word报告生成成功")
def html2pdf_pdfkit(html_path, pdf_path):
# 将HTML文件转换为PDF
options = {
'page-size': 'Letter',
'margin-top': '0.35in',
'margin-right': '0.75in',
'margin-bottom': '0.75in',
'margin-left': '0.75in',
'encoding': "UTF-8",
'no-outline': None,
'enable-local-file-access': None
}
pdfkit.from_file(html_path, pdf_path, options=options)
print("pdf报告生成成功")
###############################备选方案
def html2docx_pypandoc(html_path, docx_path):
import pypandoc
pypandoc.convert_file(html_path, 'docx', outputfile=docx_path)
print("Word报告生成成功")
def html2docx_spire(html_path, docx_path):
from spire.doc import FileFormat,XHTMLValidationType,Document
# from spire.doc.common import *
document = Document()
document.LoadFromFile(html_path, FileFormat.Html, XHTMLValidationType.none)
document.SaveToFile(docx_path, FileFormat.Docx2016)
document.Close()