使用 JS 提取 Pdf 指定范围页面

How to Extract Pages from a PDF and Render Them with JavaScript

在线 Demo:Pdf Modification

代码

HTML

<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <title>Pdf Modification</title>
  <link rel="stylesheet" href="style.css">
</head>
<body>
  <div class="inputarea">
    <input type="file" accept=".pdf" name="pdfinput" id="pdfinput">
    <input type="text" id="rangeSelector" placeholder="eg. 1-3">
    <button id="extractBtn">Extract</button>
  </div>

  <iframe id="pdfFrame"></iframe>

  <script src="https://unpkg.com/[email protected]/dist/pdf-lib.min.js"></script>
  <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/FileSaver.min.js"></script>
  <script src="./script.js" type="module"></script>
</body>
</html>

JS

const input = document.getElementById("pdfinput");
const pdfFrame = document.getElementById("pdfFrame");
const rangeSelector = document.getElementById("rangeSelector");
const extractBtn = document.getElementById("extractBtn");

let pdfArrayBuffer;

// Read our file in async/await fashion
function readAsyncFile(file) {
	return new Promise((resolve, reject) => {
		let reader = new FileReader();
		reader.onload = () => {
			resolve(reader.result);
		};
		reader.onerror = reject;
		reader.readAsArrayBuffer(file);
	});
}

// Render the pdf in an Iframe
function renderPdf(arrayBuffer) {
	const tempBlob = new Blob([new Uint8Array(arrayBuffer)], {
		type: "application/pdf",
	});
	const docUrl = URL.createObjectURL(tempBlob);
	pdfFrame.src = docUrl;
}

// Select page range
function range(start, end) {
	let length = end - start + 1;
	return Array.from({ length }, (_, i) => start + i - 1);
}

// Get file from filePicker
input.addEventListener("change", async (e) => {
	const files = e.target.files;
	if (files.length > 0) {
		pdfArrayBuffer = await readAsyncFile(files[0]);
		renderPdf(pdfArrayBuffer);
	}
});

// Start extraction
extractBtn.addEventListener("click", async () => {
	const rawRange = rangeSelector.value;
	const rangeList = rawRange.split("-");
	const pdfSrcDoc = await PDFLib.PDFDocument.load(pdfArrayBuffer);
	const pdfNewDoc = await PDFLib.PDFDocument.create();
	const pages = await pdfNewDoc.copyPages(
		pdfSrcDoc,
		range(Number(rangeList[0]), Number(rangeList[1])),
	);
	pages.forEach((page) => pdfNewDoc.addPage(page));
	const newPdf = await pdfNewDoc.save();
	saveAs(new Blob([newPdf], { type: "application/pdf" }), "extracted.pdf");
});

CSS

*,
*::before,
*::after {
  margin: 0;
  padding: 0;
  box-sizing: border-box;
}

.inputarea {
  margin: 5rem 0 3rem 0;
  width: 100%;
  display: flex;
  justify-content: center;
  position: relative;
}

#rangeSelector {
  margin-left: 2rem;
}

#pdfFrame {
  border-style: none;
  border-spacing: 0;
  width: 50%;
  height: 80vh;
  position: absolute;
  top: 50%;
  left: 50%;
  transform: translate(-50%, -40%);
}

button {
  margin: 0 0 0 0.5rem;
}

笔记

<iframe>

frameborder 属性已被弃用,用 CSS 属性 border 替代;

cellspacing 属性已被弃用,用 CSS 属性 border-spacing 替代。

<input type="file">

通过 accept 属性,指定可接受的文件类型,格式 accept=".pdf"

<script src="" type="module">

使用 module 类型的 JS 语法。

function readAsyncFile(file) {
  return new Promise((resolve, reject) => {
    let reader = new FileReader()
    reader.onload = () => {
      resolve(reader.result)
    }
    reader.onerror = reject
    reader.readAsArrayBuffer(file)
  })
}

该函数使用了期约(Promise),它是 JS 异步编程的基础。

欢迎通过「邮件」或者点击「这里」告诉我你的想法
Welcome to tell me your thoughts via "email" or click "here"