SetaPDF-Core - Detect colors in PDF files
With the use of the Core system it is possible to walk through a PDF document at its lowest level and analyse its internal structure. In this demo we will show you how to collect Information about used colors and color spaces in a PDF document with PHP.
The demo make use of 2 individual classes:
StreamProcessor.php
PHP
<?php
use setasign\SetaPDF2\Core\Canvas\Canvas;
use setasign\SetaPDF2\Core\ColorSpace\ColorSpace;
use setasign\SetaPDF2\Core\ColorSpace\DeviceN;
use setasign\SetaPDF2\Core\ColorSpace\IccBased;
use setasign\SetaPDF2\Core\ColorSpace\Indexed;
use setasign\SetaPDF2\Core\ColorSpace\Separation;
use setasign\SetaPDF2\Core\DataStructure\Color\AbstractColor;
use setasign\SetaPDF2\Core\DataStructure\Color\Cmyk;
use setasign\SetaPDF2\Core\DataStructure\Color\Gray;
use setasign\SetaPDF2\Core\DataStructure\Color\Rgb;
use setasign\SetaPDF2\Core\Parser\Content;
use setasign\SetaPDF2\Core\Resource\ResourceInterface;
use setasign\SetaPDF2\Core\TransparencyGroup;
use setasign\SetaPDF2\Core\Type\IndirectReference\Exception as IndirectReferenceException;
use setasign\SetaPDF2\Core\Type\PdfDictionary;
use setasign\SetaPDF2\Core\Type\PdfIndirectReference;
use setasign\SetaPDF2\Core\Type\PdfStream;
use setasign\SetaPDF2\Core\XObject\Form;
use setasign\SetaPDF2\Core\XObject\Image;
use setasign\SetaPDF2\Core\XObject\XObject;
/**
* Class StreamProcessor
*
* This class offer the desired callback methods for the content stream parser
*/
class StreamProcessor
{
/**
* @var \ColorInspector
*/
protected $_colorInspector;
/**
* @var Canvas
*/
protected $_canvas;
/**
* @var Content
*/
protected $_parser;
/**
* The constructor
*
* @param Canvas $canvas
* @param \ColorInspector $colorInspector
*/
public function __construct(Canvas $canvas, \ColorInspector $colorInspector)
{
$this->_canvas = $canvas;
$this->_colorInspector = $colorInspector;
}
/**
* Callback for standard color operators
*
* @param array $args
* @param string $operator
*/
public function _color(array $args, $operator)
{
$color = AbstractColor::createByComponents($args);
$info = 'Standard color operator (' . $operator . ') in content stream.';
switch (true) {
case $color instanceof Rgb:
$this->_colorInspector->addFoundColor('DeviceRGB', $color, $info);
return;
case $color instanceof Gray:
$this->_colorInspector->addFoundColor('DeviceGray', $color, $info);
return;
case $color instanceof Cmyk:
$this->_colorInspector->addFoundColor('DeviceCMYK', $color, $info);
return;
}
}
/**
* Callback for color space operators
*
* @param array $args
* @param string $operator
*/
public function _colorSpace(array $args, $operator)
{
$colorSpace = $args[0];
$colorSpaces = $this->_canvas->getResources(true, false, ResourceInterface::TYPE_COLOR_SPACE);
if ($colorSpaces && $colorSpaces->offsetExists($colorSpace->getValue())) {
$colorSpace = $colorSpaces->getValue($colorSpace->getValue());
}
$colorSpace = ColorSpace::createByDefinition($colorSpace);
$info = 'Color space operator (' . $operator . ') in content stream.';
$this->_resolveColorSpace($colorSpace, $info);
}
/**
* Helper method to recursively resolve color space and their alternate color spaces
*
* @param ColorSpace $colorSpace
* @param $info
*/
protected function _resolveColorSpace(ColorSpace $colorSpace, $info)
{
$this->_colorInspector->addFoundColor($colorSpace->getFamily(), $colorSpace, $info);
switch (true) {
case $colorSpace instanceof Separation:
$alternate = $colorSpace->getAlternateColorSpace();
$info = 'Alternate color space for Separation color space.';
$this->_resolveColorSpace($alternate, $info);
break;
case $colorSpace instanceof DeviceN:
$alternate = $colorSpace->getAlternateColorSpace();
$info = 'Alternate color space for DeviceN color space.';
$this->_resolveColorSpace($alternate, $info);
break;
case $colorSpace instanceof Indexed:
$base = $colorSpace->getBase();
$info = 'Base color space for Indexed color space.';
$this->_resolveColorSpace($base, $info);
break;
case $colorSpace instanceof IccBased:
$stream = $colorSpace->getIccProfileStream();
$alternate = $stream->getAlternate();
if ($alternate) {
$info = 'Alternate color space for ICC profile color space.';
$this->_resolveColorSpace($alternate, $info);
}
/* See ICC.1:2010 - Table 19 (ICC1v43_2010-12.pdf)
*/
$info = 'Color space signature extracted from ICC profile.';
$colorSpace = $stream->getParser()->getColorSpace();
$this->_colorInspector->addFoundColor(trim($colorSpace), $stream, $info);
break;
}
}
/**
* Callback for painting a XObject
*
* @param $args
*/
public function _paintXObject($args)
{
$name = $args[0]->getValue();
$xObjects = $this->_canvas->getResources(true, false, ResourceInterface::TYPE_X_OBJECT);
if ($xObjects === false) {
return;
}
$xObjectIndirectObject = $xObjects->getValue($name);
if (!($xObjectIndirectObject instanceof PdfIndirectReference)) {
return;
}
$xObject = XObject::get($xObjectIndirectObject);
if ($xObject instanceof Image) {
$dict = $xObject->getIndirectObject()->ensure()->getValue();
if ($dict->offsetExists('ImageMask') && $dict->getValue('ImageMask')->ensure()->getValue() == true) {
return;
}
$colorSpace = $xObject->getColorSpace();
$info = 'Color space of an image used in a content stream.';
$this->_resolveColorSpace($colorSpace, $info);
} elseif ($xObject instanceof Form) {
/* Get the colorspace from the transparency group */
$group = $xObject->getGroup();
if ($group instanceof TransparencyGroup) {
$colorSpace = $group->getColorSpace(true);
if ($colorSpace !== null) {
$info = 'Color space from Transparency Group of XObject.';
$this->_resolveColorSpace(ColorSpace::createByDefinition($colorSpace), $info);
}
}
/* We got a Form XObject - start recursively processing
*/
$streamProcessor = new self($xObject->getCanvas(), $this->_colorInspector);
$streamProcessor->process();
}
}
/**
* Callback for inline image operator
*
* @param $args
*/
public function _startInlineImageData($args)
{
$dict = new PdfDictionary();
for ($i = 0, $c = count($args); $i < $c; $i += 2) {
$dict[$args[$i]] = $args[$i + 1];
}
$colorSpace = $dict->offsetExists('CS') ? $dict->getValue('CS') : $dict->getValue('ColorSpace');
if (null === $colorSpace) {
return;
}
$colorSpace = $colorSpace->getValue();
switch ($colorSpace) {
case 'G':
$colorSpace = 'DeviceGray';
break;
case 'RGB':
$colorSpace = 'DeviceRGB';
break;
case 'CMYK':
$colorSpace = 'DeviceCMYK';
break;
case 'I':
$colorSpace = 'Indexed';
break;
}
$info = 'Color space of an inline image in content stream.';
$this->_colorInspector->addFoundColor($colorSpace, ColorSpace::createByDefinition($colorSpace), $info);
}
/**
* Callback for shading operator
*
* @param array $args
*/
public function _paintShapeAndColourShading($args)
{
$name = $args[0]->getValue();
$shadings = $this->_canvas->getResources(true, false, ResourceInterface::TYPE_SHADING);
if ($shadings === false) {
return;
}
$shadingIndirectObject = $shadings->getValue($name);
if (!($shadingIndirectObject instanceof PdfIndirectReference)) {
return;
}
try {
/** @var PdfDictionary $shading */
$shading = $shadingIndirectObject->ensure();
} catch (IndirectReferenceException $e) {
return;
}
if ($shading instanceof PdfStream) {
$shading = $shading->getValue();
}
$colorSpaceValue = $shading->getValue('ColorSpace');
if ($colorSpaceValue === null) {
return;
}
$colorSpace = ColorSpace::createByDefinition($colorSpaceValue);
$info = 'Paint shading operator in content stream.';
$this->_resolveColorSpace($colorSpace, $info);
}
/**
* Process the content stream
*/
public function process()
{
try {
$stream = $this->_canvas->getStream();
} catch (\setasign\SetaPDF2\Core\Filter\Exception $e) {
// if a stream cannot be unfiltered, we ignore it
return;
}
$this->_parser = new Content($stream);
/* Register colorspace operators
* f.g. -> /DeviceRGB CS % Set DeviceRGB colour space
*/
$this->_parser->registerOperator(
['CS', 'cs'],
[$this, '_colorSpace']
);
/* Register default color space operators */
$this->_parser->registerOperator(
['G', 'g', 'RG', 'rg', 'K', 'k'],
[$this, '_color']
);
/* Register draw operator for XObjects */
$this->_parser->registerOperator('Do', [$this, '_paintXObject']);
/* Inline image */
$this->_parser->registerOperator('ID', [$this, '_startInlineImageData']);
/* Shading Operator */
$this->_parser->registerOperator('sh', [$this, '_paintShapeAndColourShading']);
$this->_parser->process();
}
}
ColorInspector.php
PHP
<?php
use setasign\SetaPDF2\Core\Document;
use setasign\SetaPDF2\Core\Type\PdfDictionary;
use setasign\SetaPDF2\Core\Type\PdfStream;
/**
* Class ColorInspector
*/
class ColorInspector
{
/**
* @var Document
*/
protected $_document;
/**
* All found color definitions
*
* @var array
*/
protected $_colors = [];
/**
* Information about the currently processed "location"
*
* @var string
*/
protected $_currentLocation;
/**
* The constructor
*
* @param Document $document
*/
public function __construct(Document $document)
{
$this->_document = $document;
}
/**
* Get all used colors
*
* @param bool $processAnnotations Set to false to ignore color definitions in annotation appearance streams
* @param null|int $maxPages The maximum of pages to process
* @return array
*/
public function getColors($processAnnotations = true, $maxPages = null)
{
$pages = $this->_document->getCatalog()->getPages();
$pageCount = $pages->count();
$maxPages = $maxPages === null ? $pageCount : min($maxPages, $pageCount);
for ($pageNo = 1; $pageNo <= $maxPages; $pageNo++) {
$this->_currentLocation = 'Page ' . $pageNo;
$page = $pages->getPage($pageNo);
$canvas = $page->getCanvas();
$streamProcessor = new \StreamProcessor($canvas, $this);
$streamProcessor->process();
if (false == $processAnnotations)
continue;
$annotations = $page->getAnnotations();
$allAnnotations = $annotations->getAll();
foreach ($allAnnotations AS $annotation) {
$dict = $annotation->getDictionary();
$ap = $dict->getValue('AP');
if (null === $ap)
continue;
$this->_currentLocation = 'Annotation (' . $dict->getValue('Subtype')->getValue() . ') on Page ' . $pageNo;
foreach ($ap AS $type => $value) {
$object = $value->ensure();
if ($object instanceof PdfStream) {
$streamProcessor = new \StreamProcessor($annotation->getAppearance($type)->getCanvas(), $this);
$streamProcessor->process();
} elseif ($object instanceof PdfDictionary) {
foreach ($object AS $subType => $subValue) {
$subObject = $subValue->ensure();
if ($subObject instanceof PdfStream) {
$streamProcessor = new \StreamProcessor($annotation->getAppearance($type, $subType)->getCanvas(), $this);
$streamProcessor->process();
}
}
}
}
}
}
return $this->_colors;
}
/**
* A method which will register found color definitions.
*
* @param $colorSpace
* @param null $data
* @param null $info
*/
public function addFoundColor($colorSpace, $data = null, $info = null)
{
$this->_colors[] = [
'colorSpace' => $colorSpace,
'data' => $data,
'info' => $info,
'location' => $this->_currentLocation,
];
}
}
