使用 JS 提取 Pdf 指定范围页面
How to Extract Pages from a PDF and Render Them with JavaScript
在线 Demo:Pdf Modification
代码
HTML
1<!DOCTYPE html>
2<html lang="en">
3<head>
4 <meta charset="UTF-8">
5 <meta name="viewport" content="width=device-width, initial-scale=1.0">
6 <title>Pdf Modification</title>
7 <link rel="stylesheet" href="style.css">
8</head>
9<body>
10 <div class="inputarea">
11 <input type="file" accept=".pdf" name="pdfinput" id="pdfinput">
12 <input type="text" id="rangeSelector" placeholder="eg. 1-3">
13 <button id="extractBtn">Extract</button>
14 </div>
15
16 <iframe id="pdfFrame"></iframe>
17
18 <script src="https://unpkg.com/pdf-lib@1.17.1/dist/pdf-lib.min.js"></script>
19 <script src="https://cdn.jsdelivr.net/npm/file-saver@2.0.5/dist/FileSaver.min.js"></script>
20 <script src="./script.js" type="module"></script>
21</body>
22</html>
JS
1const input = document.getElementById("pdfinput");
2const pdfFrame = document.getElementById("pdfFrame");
3const rangeSelector = document.getElementById("rangeSelector");
4const extractBtn = document.getElementById("extractBtn");
5
6let pdfArrayBuffer;
7
8// Read our file in async/await fashion
9function readAsyncFile(file) {
10 return new Promise((resolve, reject) => {
11 let reader = new FileReader();
12 reader.onload = () => {
13 resolve(reader.result);
14 };
15 reader.onerror = reject;
16 reader.readAsArrayBuffer(file);
17 });
18}
19
20// Render the pdf in an Iframe
21function renderPdf(arrayBuffer) {
22 const tempBlob = new Blob([new Uint8Array(arrayBuffer)], {
23 type: "application/pdf",
24 });
25 const docUrl = URL.createObjectURL(tempBlob);
26 pdfFrame.src = docUrl;
27}
28
29// Select page range
30function range(start, end) {
31 let length = end - start + 1;
32 return Array.from({ length }, (_, i) => start + i - 1);
33}
34
35// Get file from filePicker
36input.addEventListener("change", async (e) => {
37 const files = e.target.files;
38 if (files.length > 0) {
39 pdfArrayBuffer = await readAsyncFile(files[0]);
40 renderPdf(pdfArrayBuffer);
41 }
42});
43
44// Start extraction
45extractBtn.addEventListener("click", async () => {
46 const rawRange = rangeSelector.value;
47 const rangeList = rawRange.split("-");
48 const pdfSrcDoc = await PDFLib.PDFDocument.load(pdfArrayBuffer);
49 const pdfNewDoc = await PDFLib.PDFDocument.create();
50 const pages = await pdfNewDoc.copyPages(
51 pdfSrcDoc,
52 range(Number(rangeList[0]), Number(rangeList[1])),
53 );
54 pages.forEach((page) => pdfNewDoc.addPage(page));
55 const newPdf = await pdfNewDoc.save();
56 saveAs(new Blob([newPdf], { type: "application/pdf" }), "extracted.pdf");
57});
CSS
1*,
2*::before,
3*::after {
4 margin: 0;
5 padding: 0;
6 box-sizing: border-box;
7}
8
9.inputarea {
10 margin: 5rem 0 3rem 0;
11 width: 100%;
12 display: flex;
13 justify-content: center;
14 position: relative;
15}
16
17#rangeSelector {
18 margin-left: 2rem;
19}
20
21#pdfFrame {
22 border-style: none;
23 border-spacing: 0;
24 width: 50%;
25 height: 80vh;
26 position: absolute;
27 top: 50%;
28 left: 50%;
29 transform: translate(-50%, -40%);
30}
31
32button {
33 margin: 0 0 0 0.5rem;
34}
笔记
<iframe>
frameborder 属性已被弃用,用 CSS 属性 border 替代;
cellspacing 属性已被弃用,用 CSS 属性 border-spacing 替代。
<input type="file">
通过 accept 属性,指定可接受的文件类型,格式 accept=".pdf"
。
<script src="" type="module">
使用 module 类型的 JS 语法。
1function readAsyncFile(file) {
2 return new Promise((resolve, reject) => {
3 let reader = new FileReader()
4 reader.onload = () => {
5 resolve(reader.result)
6 }
7 reader.onerror = reject
8 reader.readAsArrayBuffer(file)
9 })
10}
该函数使用了期约(Promise),它是 JS 异步编程的基础。