How to Extract Pages from a PDF and Render Them with JavaScript
在线 Demo:Pdf Modification
代码
HTML
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Pdf Modification</title>
<link rel="stylesheet" href="style.css">
</head>
<body>
<div class="inputarea">
<input type="file" accept=".pdf" name="pdfinput" id="pdfinput">
<input type="text" id="rangeSelector" placeholder="eg. 1-3">
<button id="extractBtn">Extract</button>
</div>
<iframe id="pdfFrame"></iframe>
<script src="https://unpkg.com/pdf-lib@1.17.1/dist/pdf-lib.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/file-saver@2.0.5/dist/FileSaver.min.js"></script>
<script src="./script.js" type="module"></script>
</body>
</html>
JS
const input = document.getElementById("pdfinput");
const pdfFrame = document.getElementById("pdfFrame");
const rangeSelector = document.getElementById("rangeSelector");
const extractBtn = document.getElementById("extractBtn");
let pdfArrayBuffer;
// Read our file in async/await fashion
function readAsyncFile(file) {
return new Promise((resolve, reject) => {
let reader = new FileReader();
reader.onload = () => {
resolve(reader.result);
};
reader.onerror = reject;
reader.readAsArrayBuffer(file);
});
}
// Render the pdf in an Iframe
function renderPdf(arrayBuffer) {
const tempBlob = new Blob([new Uint8Array(arrayBuffer)], {
type: "application/pdf",
});
const docUrl = URL.createObjectURL(tempBlob);
pdfFrame.src = docUrl;
}
// Select page range
function range(start, end) {
let length = end - start + 1;
return Array.from({ length }, (_, i) => start + i - 1);
}
// Get file from filePicker
input.addEventListener("change", async (e) => {
const files = e.target.files;
if (files.length > 0) {
pdfArrayBuffer = await readAsyncFile(files[0]);
renderPdf(pdfArrayBuffer);
}
});
// Start extraction
extractBtn.addEventListener("click", async () => {
const rawRange = rangeSelector.value;
const rangeList = rawRange.split("-");
const pdfSrcDoc = await PDFLib.PDFDocument.load(pdfArrayBuffer);
const pdfNewDoc = await PDFLib.PDFDocument.create();
const pages = await pdfNewDoc.copyPages(
pdfSrcDoc,
range(Number(rangeList[0]), Number(rangeList[1])),
);
pages.forEach((page) => pdfNewDoc.addPage(page));
const newPdf = await pdfNewDoc.save();
saveAs(new Blob([newPdf], { type: "application/pdf" }), "extracted.pdf");
});
CSS
*,
*::before,
*::after {
margin: 0;
padding: 0;
box-sizing: border-box;
}
.inputarea {
margin: 5rem 0 3rem 0;
width: 100%;
display: flex;
justify-content: center;
position: relative;
}
#rangeSelector {
margin-left: 2rem;
}
#pdfFrame {
border-style: none;
border-spacing: 0;
width: 50%;
height: 80vh;
position: absolute;
top: 50%;
left: 50%;
transform: translate(-50%, -40%);
}
button {
margin: 0 0 0 0.5rem;
}
笔记
<iframe>
frameborder 属性已被弃用,用 CSS 属性 border 替代;
cellspacing 属性已被弃用,用 CSS 属性 border-spacing 替代。
<input type="file">
通过 accept 属性,指定可接受的文件类型,格式 accept=".pdf"
。
<script src="" type="module">
使用 module 类型的 JS 语法。
function readAsyncFile(file) {
return new Promise((resolve, reject) => {
let reader = new FileReader()
reader.onload = () => {
resolve(reader.result)
}
reader.onerror = reject
reader.readAsArrayBuffer(file)
})
}
该函数使用了期约(Promise),它是 JS 异步编程的基础。