环境

python -m venv venv #建立虚拟环境
venv\Scripts\activate #激活虚拟环境
pip install tabula-py
pip install pandas
#安装java,jdk,添加环境变量
pip install tabula-py
pip install pandas openpyxl
# pdf_to_excel.py
import tabula
import pandas as pd
# PDF文件路径
pdf_path = "E:/Users/93914/Desktop/123.pdf"
# 从PDF中提取表格
tables = tabula.read_pdf(pdf_path, pages="all", multiple_tables=True)
# 检查是否提取到表格
if tables:
# 将所有表格合并为一个DataFrame(如果有多个表格)
df = pd.concat(tables, ignore_index=True)
# 输出Excel文件路径
excel_path = "E:/Users/93914/Desktop/ddddd.xlsx"
# 将DataFrame导出为Excel
df.to_excel(excel_path, index=False)
print(f"成功将 {pdf_path} 转换为 {excel_path}")
else:
print("PDF中未找到表格。")