Parses PDF files to extract text and images.
Low-level usage:
use com\adobe\pdf\PdfReader;
use util\cmd\Console;
use io\streams\FileInputStream;
$reader= new PdfReader(new FileInputStream($argv[1]));
// Create objects lookup table while streaming
$objects= $trailer= [];
foreach ($reader->objects() as $kind => $value) {
if ('object' === $kind) {
$objects[$value['id']->hashCode()]= $value['dict'];
} else if ('trailer' === $kind) {
$trailer+= $value;
}
}
Console::writeLine('Trailer: ', $trailer);
// Optional meta information like author and creation date
if ($info= ($trailer['Info'] ?? null)) {
Console::writeLine('Info: ', $objects[$info->hashCode()]);
}
// Root catalogue and pages enumeration
Console::writeLine('Root: ', $objects[$trailer['Root']->hashCode()]);
Console::writeLine('Pages: ', $objects[$trailer['Pages']->hashCode()]);