-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpdf2word.html
118 lines (106 loc) · 4.74 KB
/
pdf2word.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
<!doctype html>
<html lang="cn">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="description" content="">
<title>pdf2word</title>
<!-- Bootstrap core CSS -->
<script src="https://unpkg.com/pdfjs-dist/build/pdf.js"></script>
<script src="https://unpkg.com/docx@7.1.0/build/index.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/FileSaver.js/1.3.8/FileSaver.js"></script>
</head>
<body class="d-flex flex-column min-vh-100">
<input type="file" id="pdfFileInput" accept=".pdf" multiple>
<button onclick="handleFileSelection()">选择PDF文件</button>
<!-- <button onclick="start()">start</button> -->
<script>
// 创建一个PDF.js实例
var pdfjsLib = window['pdfjs-dist/build/pdf'];
function fileToArrayBuffer(file) {
return new Promise(function (resolve, reject) {
var reader = new FileReader();
reader.onload = function (event) {
var arrayBuffer = new Uint8Array(event.target.result);
resolve(event.target.result);
};
reader.onerror = function (event) {
reject(event.target.error);
};
reader.readAsArrayBuffer(file);
});
}
function handleFileSelection() {
var fileInput = document.getElementById('pdfFileInput');
fileInput.addEventListener('change', function (e) {
var files = e.target.files;
if (files.length > 0) {
// 执行处理选择的多个PDF文件的操作
processPDFFiles(files);
}
});
fileInput.click();
}
async function processPDFFiles(files) {
for (let file of files) {
var arrayBuffer = await fileToArrayBuffer(file)
await start(arrayBuffer, file.name)
}
}
function start(file, fileName) {
return new Promise((resolve, reject) => {
// 加载PDF文件
var loadingTask = pdfjsLib.getDocument(file);
// 创建Word文档并插入图像
var doc = new docx.Document({
sections: []
});
loadingTask.promise.then(function (pdf) {
// 遍历每个页面
var promises = [];
for (var pageNumber = 1; pageNumber <= pdf.numPages; pageNumber++) {
// 获取页面对象
promises.push(pdf.getPage(pageNumber).then(function (page) {
// 创建一个Canvas元素
var canvas = document.createElement('canvas');
var context = canvas.getContext('2d');
// 获取页面尺寸
var viewport = page.getViewport({ scale: 1 });
// 设置Canvas尺寸与页面尺寸一致
canvas.width = viewport.width;
canvas.height = viewport.height;
// 渲染页面为图像
return page.render({
canvasContext: context,
viewport: viewport
}).promise.then(function () {
document.body.append(canvas)
// 将Canvas图像导出为Base64格式
var imageData = canvas.toDataURL('image/png');
var image = new docx.Paragraph({
children: [new docx.ImageRun({
data: imageData,
transformation: {
width: viewport.width,
height: viewport.height,
},
})]
})
doc.addSection({ children: [image] });
});
}));
}
Promise.all(promises).then(function (e) {
console.log('doc', doc)
// 保存Word文档
docx.Packer.toBlob(doc).then(function (blob) {
saveAs(blob, `${fileName}.docx`);
});
resolve(doc);
});
});
});
}
</script>
</body>
</html>