aspose-words生成文档,与html格式对应
This commit is contained in:
parent
95c7522a1e
commit
52a6d03899
|
|
@ -30,7 +30,7 @@
|
||||||
}
|
}
|
||||||
nav a {
|
nav a {
|
||||||
float: left;
|
float: left;
|
||||||
display: block;
|
/* display: block; */
|
||||||
color: white;
|
color: white;
|
||||||
text-align: center;
|
text-align: center;
|
||||||
padding: 14px 20px;
|
padding: 14px 20px;
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@
|
||||||
<h2>章节划分</h2>
|
<h2>章节划分</h2>
|
||||||
{% for section in sections %}
|
{% for section in sections %}
|
||||||
<div class="section">
|
<div class="section">
|
||||||
<h2>{{ section.title }}</h2>
|
<h3>{{ section.title }}</h3>
|
||||||
<p>{{ section.content }}</p>
|
<p>{{ section.content }}</p>
|
||||||
</div>
|
</div>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|
@ -74,6 +74,7 @@
|
||||||
<h2>图片展示</h2>
|
<h2>图片展示</h2>
|
||||||
{% for image in images %}
|
{% for image in images %}
|
||||||
<!-- <div class="image-container"> -->
|
<!-- <div class="image-container"> -->
|
||||||
|
<!-- <img src="{{ image.src }}" alt="{{ image.alt }}"> -->
|
||||||
<div style="text-align: center; margin-bottom: 10px;">
|
<div style="text-align: center; margin-bottom: 10px;">
|
||||||
<img src="{{ image.src }}" alt="{{ image.alt }}" style="width: 200px; height: auto;">
|
<img src="{{ image.src }}" alt="{{ image.alt }}" style="width: 200px; height: auto;">
|
||||||
<p>{{ image.caption }}</p>
|
<p>{{ image.caption }}</p>
|
||||||
|
|
|
||||||
Binary file not shown.
|
|
@ -30,7 +30,7 @@
|
||||||
}
|
}
|
||||||
nav a {
|
nav a {
|
||||||
float: left;
|
float: left;
|
||||||
display: block;
|
/* display: block; */
|
||||||
color: white;
|
color: white;
|
||||||
text-align: center;
|
text-align: center;
|
||||||
padding: 14px 20px;
|
padding: 14px 20px;
|
||||||
|
|
@ -124,12 +124,12 @@
|
||||||
<h2>章节划分</h2>
|
<h2>章节划分</h2>
|
||||||
|
|
||||||
<div class="section">
|
<div class="section">
|
||||||
<h2>章节一</h2>
|
<h3>章节一</h3>
|
||||||
<p>内容一</p>
|
<p>内容一</p>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="section">
|
<div class="section">
|
||||||
<h2>章节二</h2>
|
<h3>章节二</h3>
|
||||||
<p>内容二</p>
|
<p>内容二</p>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
@ -213,12 +213,14 @@
|
||||||
<h2>图片展示</h2>
|
<h2>图片展示</h2>
|
||||||
|
|
||||||
<!-- <div class="image-container"> -->
|
<!-- <div class="image-container"> -->
|
||||||
|
<!-- <img src="/home/dengjinlai/ReportGeneration/ReportGeneration/input/image.png" alt="图片1"> -->
|
||||||
<div style="text-align: center; margin-bottom: 10px;">
|
<div style="text-align: center; margin-bottom: 10px;">
|
||||||
<img src="/home/dengjinlai/ReportGeneration/ReportGeneration/input/image.png" alt="图片1" style="width: 200px; height: auto;">
|
<img src="/home/dengjinlai/ReportGeneration/ReportGeneration/input/image.png" alt="图片1" style="width: 200px; height: auto;">
|
||||||
<p>这是一张示例图片1</p>
|
<p>这是一张示例图片1</p>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- <div class="image-container"> -->
|
<!-- <div class="image-container"> -->
|
||||||
|
<!-- <img src="/home/dengjinlai/ReportGeneration/ReportGeneration/input/image.png" alt="图片2"> -->
|
||||||
<div style="text-align: center; margin-bottom: 10px;">
|
<div style="text-align: center; margin-bottom: 10px;">
|
||||||
<img src="/home/dengjinlai/ReportGeneration/ReportGeneration/input/image.png" alt="图片2" style="width: 200px; height: auto;">
|
<img src="/home/dengjinlai/ReportGeneration/ReportGeneration/input/image.png" alt="图片2" style="width: 200px; height: auto;">
|
||||||
<p>这是一张示例图片2</p>
|
<p>这是一张示例图片2</p>
|
||||||
|
|
|
||||||
Binary file not shown.
|
|
@ -0,0 +1,93 @@
|
||||||
|
import aspose.words as aw
|
||||||
|
import pdfkit
|
||||||
|
from docx import Document
|
||||||
|
from docx.oxml.ns import qn
|
||||||
|
# from docx.oxml import OxmlElement
|
||||||
|
|
||||||
|
|
||||||
|
def remove_mark_from_doc(doc):
|
||||||
|
# 移除aspose库的标记
|
||||||
|
def remove_images_from_element(element):
|
||||||
|
#移除嵌入的图片
|
||||||
|
for paragraph in element.paragraphs:
|
||||||
|
for run in paragraph.runs:
|
||||||
|
drawing_elements = run._element.findall(qn('w:drawing'))
|
||||||
|
for drawing in drawing_elements:
|
||||||
|
drawing.getparent().remove(drawing)
|
||||||
|
|
||||||
|
pict_elements = run._element.findall(qn('w:pict'))
|
||||||
|
for pict in pict_elements:
|
||||||
|
pict.getparent().remove(pict)
|
||||||
|
|
||||||
|
def remove_text_from_element(element, text_to_remove=None):
|
||||||
|
# 移除aspose的标记(此处为页脚处的)
|
||||||
|
if text_to_remove == None:
|
||||||
|
text_to_remove = "Evaluation Only. Created with Aspose.Words. Copyright 2003-2024 Aspose Pty Ltd."
|
||||||
|
|
||||||
|
for paragraph in element.paragraphs:
|
||||||
|
if text_to_remove in paragraph.text:
|
||||||
|
# 清除特定文本
|
||||||
|
paragraph.clear()
|
||||||
|
|
||||||
|
for section in doc.sections:
|
||||||
|
header = section.header
|
||||||
|
footer = section.footer
|
||||||
|
|
||||||
|
remove_images_from_element(header)
|
||||||
|
remove_text_from_element(footer)
|
||||||
|
|
||||||
|
#删除第一段落
|
||||||
|
if doc.paragraphs:
|
||||||
|
first_paragraph = doc.paragraphs[0]
|
||||||
|
p = first_paragraph._element
|
||||||
|
p.getparent().remove(p)
|
||||||
|
|
||||||
|
|
||||||
|
def html2docx_aspose(html_path, docx_path):
|
||||||
|
doc = aw.Document(html_path)
|
||||||
|
doc.save(docx_path,aw.SaveFormat.DOCX)
|
||||||
|
|
||||||
|
doc = Document(docx_path)
|
||||||
|
remove_mark_from_doc(doc)
|
||||||
|
doc.save(docx_path)
|
||||||
|
print("word报告生成成功!")
|
||||||
|
|
||||||
|
def html2pdf_pdfkit(html_path, pdf_path):
|
||||||
|
# 将HTML文件转换为PDF
|
||||||
|
options = {
|
||||||
|
'page-size': 'Letter',
|
||||||
|
'margin-top': '0.35in',
|
||||||
|
'margin-right': '0.75in',
|
||||||
|
'margin-bottom': '0.75in',
|
||||||
|
'margin-left': '0.75in',
|
||||||
|
'encoding': "UTF-8",
|
||||||
|
'no-outline': None,
|
||||||
|
'enable-local-file-access': None
|
||||||
|
}
|
||||||
|
|
||||||
|
pdfkit.from_file(html_path, pdf_path, options=options)
|
||||||
|
print("pdf报告生成成功!")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
###############################备选方案
|
||||||
|
|
||||||
|
def html2docx_pypandoc(html_path, docx_path):
|
||||||
|
import pypandoc
|
||||||
|
pypandoc.convert_file(html_path, 'docx', outputfile=docx_path)
|
||||||
|
print("Word报告生成成功!")
|
||||||
|
|
||||||
|
|
||||||
|
def html2docx_spire(html_path, docx_path):
|
||||||
|
from spire.doc import FileFormat,XHTMLValidationType,Document
|
||||||
|
# from spire.doc.common import *
|
||||||
|
document = Document()
|
||||||
|
document.LoadFromFile(html_path, FileFormat.Html, XHTMLValidationType.none)
|
||||||
|
document.SaveToFile(docx_path, FileFormat.Docx2016)
|
||||||
|
document.Close()
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
from jinja2 import Environment, FileSystemLoader
|
from jinja2 import Environment, FileSystemLoader
|
||||||
import pdfkit
|
|
||||||
import config
|
import config
|
||||||
import pypandoc
|
from process.inference import html2docx_aspose, html2pdf_pdfkit
|
||||||
|
|
||||||
|
|
||||||
def render_html(template, output_file_path):
|
def render_html(template, output_file_path):
|
||||||
|
|
@ -46,42 +45,15 @@ def render_html(template, output_file_path):
|
||||||
f.write(report_html)
|
f.write(report_html)
|
||||||
|
|
||||||
|
|
||||||
def html2pdf(html_path, pdf_path):
|
|
||||||
# 将HTML文件转换为PDF
|
|
||||||
options = {
|
|
||||||
'page-size': 'Letter',
|
|
||||||
'margin-top': '0.35in',
|
|
||||||
'margin-right': '0.75in',
|
|
||||||
'margin-bottom': '0.75in',
|
|
||||||
'margin-left': '0.75in',
|
|
||||||
'encoding': "UTF-8",
|
|
||||||
'no-outline': None,
|
|
||||||
'enable-local-file-access': None
|
|
||||||
}
|
|
||||||
|
|
||||||
pdfkit.from_file(html_path, pdf_path, options=options)
|
|
||||||
print("pdf报告生成成功!")
|
|
||||||
|
|
||||||
def html2docx(html_path, docx_path):
|
|
||||||
# 将HTML文件转换为WORD
|
|
||||||
pypandoc.convert_file(html_path, 'docx', outputfile=docx_path)
|
|
||||||
print("Word报告生成成功!")
|
|
||||||
|
|
||||||
def html2docx_spire(html_path, docx_path):
|
|
||||||
from spire.doc import FileFormat,XHTMLValidationType,Document
|
|
||||||
# from spire.doc.common import *
|
|
||||||
document = Document()
|
|
||||||
document.LoadFromFile(html_path, FileFormat.Html, XHTMLValidationType.none)
|
|
||||||
document.SaveToFile(docx_path, FileFormat.Docx2016)
|
|
||||||
document.Close()
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
env = Environment(loader=FileSystemLoader(config.template_dir))
|
env = Environment(loader=FileSystemLoader(config.template_dir))
|
||||||
template = env.get_template("report_template.html")
|
template = env.get_template("report_template.html")
|
||||||
|
|
||||||
render_html(template, config.output_html_path)
|
render_html(template, config.output_html_path)
|
||||||
html2pdf(config.output_html_path, config.output_pdf_path)
|
html2pdf_pdfkit(config.output_html_path, config.output_pdf_path)
|
||||||
html2docx(config.output_html_path, config.output_docx_path)
|
html2docx_aspose(config.output_html_path, config.output_docx_path)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
||||||
|
|
@ -5,5 +5,17 @@ pandas==2.2.2
|
||||||
opencv-python==4.10.0.84
|
opencv-python==4.10.0.84
|
||||||
openpyxl==3.1.4
|
openpyxl==3.1.4
|
||||||
Jinja2==3.1.4
|
Jinja2==3.1.4
|
||||||
|
aspose-words==24.7.0
|
||||||
|
|
||||||
#Spire.Doc==12.7.1
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Spire.Doc==12.7.1
|
||||||
|
|
||||||
|
# html2docx==1.6.0
|
||||||
|
|
||||||
|
# sudo dpkg -i libssl1.0.0_1.0.2g-1ubuntu4.20_amd64.deb
|
||||||
|
# wget http://security.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.0.0_1.0.2g-1ubuntu4.20_amd64.deb
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,12 @@
|
||||||
|
from html2docx import html2docx
|
||||||
|
|
||||||
|
with open("output/generated_report.html") as fp:
|
||||||
|
html = fp.read()
|
||||||
|
|
||||||
|
# html2docx() returns an io.BytesIO() object. The HTML must be valid.
|
||||||
|
buf = html2docx(html, title="My Document")
|
||||||
|
|
||||||
|
with open("my.docx", "wb") as fp:
|
||||||
|
fp.write(buf.getvalue())
|
||||||
|
|
||||||
|
|
||||||
Binary file not shown.
Loading…
Reference in New Issue