I unfortunately can't upload the PDF because I don't have authorization to do so in my massive bureaucracy.
Web browser: Chrome 58.0.3029.110 (64-bit)
Windows 10
PDF.js version 1.8.188 (although main fails as well)
Background; I am using the library to extract the contents of form fields (getAnnotations()), and NOT to display the document in the browser. The gist of the problem is that a call to page.getAnnotation() fails when there is a form field (such as in my case a signature block) that can't be serialized. It throws an exception deep in util.js and abandons the whole call to page.getAnnotation() as below;
:8888/src/shared/util.js:1301
Uncaught (in promise) DataCloneError: Failed to execute 'postMessage' on 'DedicatedWorkerGlobalScope': function NetworkManager_getXhr() {
return new XMLHttpRequest();
} could not be cloned.
I tried catching the exception in my code by wrapping the call to page.getAnnotations() in a try catch, but it doesn't seem to work. It does work to comment out that block of code.
If someone could tell me how to just ignore problematic fields/annotations this would be a wonderful solution.
'use strict';
// In production, the bundled pdf.js shall be used instead of SystemJS.
Promise.all([SystemJS.import('pdfjs/display/api'),
SystemJS.import('pdfjs/display/global')])
.then(function (modules) {
var api = modules[0], global = modules[1];
// In production, change this to point to the built `pdf.worker.js` file.
global.PDFJS.verbosity=global.PDFJS.VERBOSITY_LEVELS.infos;
global.PDFJS.workerSrc = '../../src/worker_loader.js';
// Fetch the PDF document from the URL using promises.
api.getDocument('myDocument.pdf').then(function (pdf) {
// Fetch the page.
var numPages = pdf.numPages;
var myResult="";
function myCallback(pageNumber, text) {
//alert("processing page "+pageNumber+", text="+text);
document.getElementById("myFormDiv").innerHTML+=text;
}
for (var pageNum = 1; pageNum<=numPages; pageNum++) {
function doit(currentPageNumber) {
//alert("processing page "+currentPageNumber);
var response="<h1>Page "+currentPageNumber+"</h1>\n";
pdf.getPage(currentPageNumber).then(function (page) {
function escapeHtml(unsafe) {
if (typeof unsafe !== 'string') { return ""; }
return unsafe.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'");
}
//attempt to read the data!!!!
page.getAnnotations().then(function(items) {
console.log(items); //Dump this into the console to see what properties are available for each field
items.forEach(function(item) {
response+="<div>"+escapeHtml(item.alternativeText)+":"+escapeHtml(item.fieldValue)+"<div>\n";
});
myCallback(currentPageNumber, response);
}); //end getAnnotations()
}); //end getPage()
return response;
} //end doit()
doit(pageNum);
}//end iterating pages
}); //end getDocument()
});
Problem solved with the following added line... I just had to state my intention.
var parameters = { intent: 'display' };
page.getAnnotations(parameters).then(function(items) {
Most helpful comment
Problem solved with the following added line... I just had to state my intention.