aneamal/func.php
<?php
/* Copyright 2010-2024 Martin Janecke <martin@aneamal.org>
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/
// declare (strict_types = 1); // only during development
namespace prlbr\aneamal;
/* Constants
*/
// version
const vfunc = '31';
// metadata types
const META_PLAIN = 1; // plain variable without special function
const META_SPECIAL = 2; // recognized or reserved metadata name
const META_CUSTOM = 4; // custom mark
// URI types
const URI_REMOTE = 1; // absolute URI: http://example.com/
const URI_DATA = 2; // data URI: data:text/plain;base64,Oik=
const URI_LOCAL = 4; // local path: /style/me.css or dog.nml
const URI_PAGE = 8; // relative to current page: #section-2 or ?v=27
/* Classes
*/
class CardinalityException extends \Exception {}
class PreviewException extends \Exception {}
class ModuleMessage extends \Exception {}
/* This class is used to read the width and height of visual media in a Matroska
* or WebM container file. WebM is basically a subset of Matroska which is
* implemented in EBML, hence all three specifications are needed/helpful to
* understand how WebM is composed:
* EBML specification: https://datatracker.ietf.org/doc/html/rfc8794
* Matroska specification: https://www.matroska.org/technical/basics.html
* WebM specification: https://www.webmproject.org/docs/container/
* NOTE: It may seem like there is an excessive number of file operations like
* fread, fgetc. However, PHP reads a chunk of usually 8KB at once and buffers
* the result internally even if only a few bytes were requested. Further
* operations within the range of the chunk are served from the RAM and do not
* access the hard disk. Therefore it would not be more efficient to read longer
* chunks in our code and do string operations instead of the file operations.
*/
class Matroska {
// EBML Element IDs needed to find Matroska/WebM video dimensions. Indentation
// reflects the structure of the video file.
const ID = [
'EBML' => 0x1A45DFA3, // mandatory first bytes
'DocType' => 0x4282, // mandatory, see DocTypes
'Segment' => 0x18538067,
'SeekHead' => 0x114D9B74, // multiple optional
'Seek' => 0x4DBB,
'SeekID' => 0x53AB,
'SeekPosition' => 0x53AC,
'Tracks' => 0x1654AE6B,
'TrackEntry' => 0xAE,
'TrackType' => 0x83, // mandatory, 1=video 3=complex
'Video' => 0xE0,
'PixelWidth' => 0xB0, // mandatory, not 0
'PixelHeight' => 0xBA, // mandatory, not 0
'PixelCropBottom' => 0x54AA, // default 0
'PixelCropTop' => 0x54BB, // default 0
'PixelCropLeft' => 0x54CC, // default 0
'PixelCropRight' => 0x54DD, // default 0
'DisplayWidth' => 0x54B0, // optional
'DisplayHeight' => 0x54BA, // optional
'DisplayUnit' => 0x54B2, // optional, must be 0
];
// supported DocType strings
const DocTypes = [
'webm',
'matroska',
];
public int $width = 0; // display width of the video in pixels
public int $height = 0; // display height of the video in pixels
public string $type = ''; // registered DocType like "webm" or "matroska"
public string $error = ''; // string representation of a thrown Exception
private $f; // file resource identifying an opened video file
private int $filesize = 0; // its filesize in bytes
/* The constructor opens the video file given as $filename, tries to find width
* and height in it and closes the file. Anything which is unexpected or cannot
* be handled in the file by this class will throw an Exception. They are
* caught here.
* NOTE: There are no other public methods. Access the object's properties to
* read the information which the constructor found.
*/
public function __construct (
string $filename
) // returns nothing
{
try {
if ($this->filesize = filesize ($filename)):
if ($this->f = fopen ($filename, 'rb')):
$this->seek_segment ();
if (in_array ($this->type, self::DocTypes, true)):
$this->seek_tracks ();
$this->seek_video ();
$this->find_dimensions ();
endif;
fclose ($this->f);
endif;
endif;
} catch (\Throwable $e) {
$this->error = (string) $e;
@fclose ($this->f);
}
}
/* Returns a string which it reads from the current position in the file and
* which is $width bytes long. If the $width is zero, the $default value is
* returned, which defaults to an empty string. The pointer in the file
* resource points behind the string at the end.
* NOTE: The EBML specification restricts its type String to bytes from the
* range 0x20 to 0x7E which represent US-ASCII characters and a 0x00 byte for
* termination. This function does not check whether the string adheres to the
* range, but it terminates the string at a 0x00 byte.
*/
private function read_string (
int $width,
string $default = ''
): string
{
if ($width === 0):
return $default;
endif;
$bytes = fread ($this->f, $width);
if ($bytes === false):
throw new \Exception ('Failed to read STRING');
endif;
$zero = strpos ($bytes, "\x00");
return $zero === false? $bytes: substr ($bytes, 0, $zero);
}
/* An EBML Element consists of Element ID, Element Data Size and Element Data.
* This function returns an array of the format [Element ID, Element Data Size]
* whose two items are integers. They are read from the current position in the
* file.
*/
private function read_tag (
): array
{
return [$this->read_vint (false), $this->read_vint (true)];
}
/* Returns an unsigned integer (as defined in the EBML specification) which it
* reads from the current position in the file and which is $width bytes long.
* If the $width is 0, the $default value is returned, which defaults to 0
* itself. The pointer in the file resource points behind the unsigned integer
* at the end.
*/
private function read_uint (
int $width, // in bytes, must be in the range 0 to 8
int $default = 0
): int
{
if ($width === 0):
return $default;
elseif ($width > PHP_INT_SIZE):
throw new \Exception ('UINT exceeds PHP_INT_SIZE');
endif;
$bytes = fread ($this->f, $width);
if ($bytes === false):
throw new \Exception ('Failed to read UINT');
endif;
// Transform the binary data (handled by PHP as string) into an int. The
// binary represention of $bytes and $integer should actually be the same
// then, except that PHP recognizes the latter as int.
$integer = hexdec (bin2hex ($bytes));
if (is_float ($integer)):
throw new \Exception ('PHPs signed INT too small for this UINT');
endif;
return $integer;
}
/* Reads a variable width integer (VINT) from the current pointer position of
* the file. A VINT consists of VINT_WIDTH, VINT_MARKER and VINT_DATA.
* VINT_WIDTH determines the width in bytes of the VINT and encodes that width
* in the number of 0-bits before VINT_MARKER plus 1. VINT_MARKER is a single
* 1-bit. The rest of the VINT is VINT_DATA, a binary big-endian integer.
* If $onlydata is true, just that number in VINT_DATA is returned. If
* $onlydata is false, the whole VINT is interpreted as a binary number.
* This method is limited to VINTs of width 8, since this is the maximum valid
* size in Matroska and WebM, determined by the bigger value of EBMLMaxIDLength
* (must be 4) and EBMLMaxSizeLength (must be from the range 1-8). It is also
* what PHP can handle as integer on 64-bit systems. EBML allows greater widths
* though.
*/
private function read_vint (
bool $onlydata = true
): int
{
$byte = fgetc ($this->f);
if ($byte === false):
throw new \Exception ('Failed to read VINT_WIDTH');
endif;
$n = ord ($byte);
// Determine the number of initial 0-bits in the first byte $n, plus one.
// Mind the following equivalence of hexadecimal and binary:
// 0: 0000, 1: 0001, 2: 0010, 3: 0011, 4: 0100, 5: 0101, 6: 0110, 7: 0111
// 8: 1000, 9: 1001, A: 1010, B: 1011, C: 1100, D: 1101, E: 1110, F: 1111
if ($n & 0xF0):
if ($n & 0xC0):
$width = $n & 0x80? 1: 2;
else:
$width = $n & 0x20? 3: 4;
endif;
elseif ($n & 0x0F):
if ($n & 0x0C):
$width = $n & 0x08? 5: 6;
else:
$width = $n & 0x02? 7: 8;
endif;
else:
throw new \Exception ('VINT exceeds 8 bytes');
endif;
if ($width > PHP_INT_SIZE):
throw new \Exception ('VINT exceeds PHP_INT_SIZE');
endif;
// Remove the VINT_MARKER, if only the VINT_DATA is desired:
if ($onlydata):
$n ^= 1 << (8 - $width);
endif;
// Read the part of VINT_DATA that was not already in the first byte:
if ($width > 1):
$bytes = fread ($this->f, $width - 1);
if ($bytes === false):
throw new \Exception ('Failed to read VINT_DATA size');
endif;
foreach (str_split ($bytes) as $byte):
$n <<= 8;
$n |= ord ($byte);
endforeach;
endif;
return $n;
}
/* Calculates the width and height of a WebM video. The file pointer is
* supposed to point to a Video element inside the file at the beginning.
*/
private function find_dimensions (
): void
{
// The Video element is our start.
[$id, $size] = $this->read_tag ();
if ($id !== self::ID['Video']):
throw new \Exception ('Expected Video');
endif;
$afterVideo = $size + ftell ($this->f);
if ($this->filesize < $afterVideo):
throw new \Exception ('Video beyond filesize');
endif;
// Fixed default values for optional tags:
$PixelCropBottom =
$PixelCropTop =
$PixelCropLeft =
$PixelCropRight =
$DisplayUnit = 0;
// The following tags are optional and have conditional default values
// which we compute later; 0 is an invalid value to check against.
$DisplayWidth =
$DisplayHeight = 0;
// The following tags are mandatory and must be > 0; use 0 as an invalid
// value to check later whether they have been set correctly.
$PixelWidth =
$PixelHeight = 0;
// Search through the elements in Video:
while (ftell ($this->f) < $afterVideo):
[$id, $size] = $this->read_tag ();
if ($id === self::ID['PixelWidth']):
$PixelWidth = $this->read_uint ($size);
elseif ($id === self::ID['PixelHeight']):
$PixelHeight = $this->read_uint ($size);
elseif ($id === self::ID['PixelCropBottom']):
$PixelCropBottom = $this->read_uint ($size, 0);
elseif ($id === self::ID['PixelCropTop']):
$PixelCropTop = $this->read_uint ($size, 0);
elseif ($id === self::ID['PixelCropLeft']):
$PixelCropLeft = $this->read_uint ($size, 0);
elseif ($id === self::ID['PixelCropRight']):
$PixelCropRight = $this->read_uint ($size, 0);
elseif ($id === self::ID['DisplayWidth']):
$DisplayWidth = $this->read_uint ($size);
elseif ($id === self::ID['DisplayHeight']):
$DisplayHeight = $this->read_uint ($size);
elseif ($id === self::ID['DisplayUnit']):
$DisplayUnit = $this->read_uint ($size, 0);
else:
fseek ($this->f, $size, SEEK_CUR); // skip
endif;
endwhile;
// PixelWidth and PixelHeight are required unsigned integers > 0
if ($PixelWidth < 1 || $PixelHeight < 1):
throw new \Exception ("PixelWidth: $PixelWidth; PixelHeight: $PixelHeight");
endif;
// Calculate width and height from the pixel dimension of the track; this
// calculation yields the default values for DisplayWidth and DisplayHeight.
$width = $PixelWidth - $PixelCropLeft - $PixelCropRight;
$height = $PixelHeight - $PixelCropTop - $PixelCropBottom;
// Switch to DisplayWidth and DisplayHeight, if they were provided and
// DisplayUnit has the only value WebM supports, i.e. 0, which means pixels.
if ($DisplayWidth > 0 && $DisplayHeight > 0 && $DisplayUnit === 0):
$this->width = $DisplayWidth;
$this->height = $DisplayHeight;
// Only set dimensions, if both width and height are available, non-zero.
elseif ($width > 0 and $height > 0):
$this->width = $width;
$this->height = $height;
else:
throw new \Exception ("Calculated width: $width; height: $height");
endif;
}
/* Reads the EBML Header of the file and saves the EBML DocType, e.g. "webm".
* The file pointer is supposed to point at the start of the Header (usually
* the first byte of the file) at the beginning. The file pointer points at
* the first byte after the Header afterwards, expected to be a Segment.
*/
private function seek_segment (
): void
{
// The EBML element is mandatory at the start.
[$id, $size] = $this->read_tag ();
if ($id !== self::ID['EBML']):
throw new \Exception ('Expected EBML Header');
endif;
// Set $beyond to the position of the first byte after the EBML Header.
$beyond = ftell ($this->f) + $size;
if ($this->filesize < $beyond):
throw new \Exception ('EBML Header exceeds filesize');
endif;
// Search through the elements in the EBML Header to find the DocType. If it
// is found, set the file pointer to the byte after the header, expected to
// be a Segment, and return.
while (ftell ($this->f) < $beyond):
[$id, $size] = $this->read_tag ();
if ($id === self::ID['DocType']):
$this->type = $this->read_string ($size);
fseek ($this->f, $beyond, SEEK_SET);
return;
else:
fseek ($this->f, $size, SEEK_CUR);
endif;
endwhile;
// Since the DocType is mandatory, we should never reach this part.
throw new \Exception ('EBML DocType missing');
}
/* Expects the file pointer to point at the (first) EBML Segment element at the
* beginning and makes it point to the Tracks element. The Tracks element is
* either found directly while seeking through the file or via a reference in
* a SeekHead element.
*/
private function seek_tracks (
): void
{
// The Segment element is our start.
[$id, $size] = $this->read_tag ();
if ($id !== self::ID['Segment']):
throw new \Exception ('Expected Segment');
endif;
// $afterSegment and $this->filesize are expected to be equal, if the file
// contains a single EBML Document or is the last EBML Document.
// $innerSegment is a reference point for SeekPositions in the SeekHead
$innerSegment = ftell ($this->f);
$afterSegment = $innerSegment + $size;
if ($this->filesize < $afterSegment):
throw new \Exception ('Segment beyond filesize');
endif;
// Search through the elements in the Segment to find the Tracks element
// directly or via a SeekHead Element
while ($start = ftell ($this->f) and $start < $afterSegment):
[$id, $size] = $this->read_tag ();
if ($id === self::ID['Tracks']):
fseek ($this->f, $start, SEEK_SET);
return;
elseif ($id === self::ID['SeekHead']):
$afterSeekHead = ftell ($this->f) + $size;
// Search through the elements in the SeekHead
while (ftell ($this->f) < $afterSeekHead):
[$id, $size] = $this->read_tag ();
// Seek Elements are expected here, but there can also be global
// Elements like a Void element
if ($id === self::ID['Seek']):
$afterSeek = ftell ($this->f) + $size;
$SeekID = $SeekPosition = 0;
// Search through the elements in Seek; there are exactly
// two mandatory and no specific optional Elements expected,
// but there could also be global Elements such as Void
while (ftell ($this->f) < $afterSeek):
[$id, $size] = $this->read_tag ();
if ($id === self::ID['SeekID']):
// The Matroska spec defines the type of the SeekID
// as binary; we handle it as unsigned integer
$SeekID = $this->read_uint ($size);
// skip this Seek, if it doesn't reference Tracks
if ($SeekID !== self::ID['Tracks']):
fseek ($this->f, $afterSeek, SEEK_SET);
break;
endif;
elseif ($id === self::ID['SeekPosition']):
$SeekPosition = $this->read_uint ($size);
else:
fseek ($this->f, $size, SEEK_CUR); // skip Element Data
endif;
endwhile;
if ($SeekID === self::ID['Tracks'] and $SeekPosition > 0):
fseek ($this->f, $innerSegment + $SeekPosition, SEEK_SET);
return;
endif;
else:
fseek ($this->f, $size, SEEK_CUR); // skip Element Data
endif;
endwhile;
else:
fseek ($this->f, $size, SEEK_CUR); // skip Element Data
endif;
endwhile;
// Tracks are not mandatory; we can reach this point.
throw new \Exception ('Tracks not found');
}
/* Expects the file pointer to point at a Tracks element and makes it point to
* the first Video element inside it.
*/
private function seek_video (
): void
{
// The Tracks element is our start.
[$id, $size] = $this->read_tag ();
if ($id !== self::ID['Tracks']):
throw new \Exception ('Expected Tracks');
endif;
$afterTracks = ftell ($this->f) + $size;
if ($this->filesize < $afterTracks):
throw new \Exception ('Tracks beyond filesize');
endif;
// Search through the elements in Tracks to find Video elements
while (ftell ($this->f) < $afterTracks):
[$id, $size] = $this->read_tag ();
// The only valid element besides global elements here is TrackEntry
if ($id === self::ID['TrackEntry']):
$afterTrackEntry = ftell ($this->f) + $size;
$TrackType = $Video = 0;
// Search through the elements in the TrackEntry
while ($start = ftell ($this->f) and $start < $afterTrackEntry):
[$id, $size] = $this->read_tag ();
if ($id === self::ID['TrackType']):
// TrackType 1 and 3 identify video and complex tracks,
// which are the only ones where we expect video dimensions
$TrackType = $this->read_uint ($size);
if ($TrackType !== 1 and $TrackType !== 3):
fseek ($this->f, $afterTrackEntry, SEEK_SET);
break;
elseif ($Video):
fseek ($this->f, $Video, SEEK_SET);
return;
endif;
elseif ($id === self::ID['Video']):
if ($TrackType):
fseek ($this->f, $start, SEEK_SET);
return;
else:
$Video = $start;
endif;
// We could check for the mandatory FlagEnabled tag here, but
// its specification - "Set to 1 if the track is usable. It is
// possible to turn a not usable track into a usable track using
// chapter codecs or control tracks." - does not give confidence
// of being reliable on its own. We assume for our purposes that
// a Video track is useful.
else:
fseek ($this->f, $size, SEEK_CUR); // skip Element Data
endif;
endwhile;
else:
fseek ($this->f, $size, SEEK_CUR); // skip Element Data
endif;
endwhile;
}
} // class Matroska
/* This class is used to read the width and height of visual media that
* corresponds to the ISO/IEC 14496-12 base media file format, which includes
* container formats such as MP4 and 3GP. It should be compatible with Apple's
* QuickTime File Format (.mov) as well; its specification can be found at:
* https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/
*/
class MP4 {
// A few recognized box types; we skip types that we do not recognize.
// Indentation reflects the structure of the video file.
const TYPE = [
'ftyp' => 0x66747970, // File Type Box
'moov' => 0x6D6F6F76, // Movie Box, exactly one mandatory in file
'trak' => 0x7472616B, // Track Box, at least one mandatory
'tkhd' => 0x746B6864, // Track Header Box, exactly one mandatory
];
public int $width = 0; // display width of the video in pixels
public int $height = 0; // display height of the video in pixels
public string $error = ''; // string representation of a thrown Exception
private $f; // file resource identifying an opened video file
private int $filesize = 0; // its filesize in bytes
/* The constructor opens the video file given as $filename, tries to find width
* and height in it and closes the file. Anything which is unexpected or cannot
* be handled in the file by this class will throw an Exception. They are
* caught here.
* NOTE: There are no other public methods. Access the object's properties to
* read the information which the constructor found.
*/
public function __construct (
string $filename
) // returns nothing
{
try {
if ($this->filesize = filesize ($filename)):
if ($this->f = fopen ($filename, 'rb')):
$this->find_dimensions ();
fclose ($this->f);
endif;
endif;
} catch (\Throwable $e) {
$this->error = (string) $e;
@fclose ($this->f);
}
}
/* Seeks through the video file to find width and height. A file consists of
* boxes. Width and height may be found in a 'tkhd' box which is expected in a
* 'trak' box which is expected in a 'moov' box. Unknown boxes are skipped. The
* file pointer should point at the file start at the beginning, but must at
* least point at a box header.
*/
private function find_dimensions (
): void
{
$skiptrak = 0;
while ($this->filesize > ($start = ftell ($this->f))):
// Read and evaluate the box header.
[$size, $type] = $this->read_header ();
if ($this->filesize < ($beyond = $start + $size)):
throw new \Exception ('Box extends beyond filesize');
// Continue to read boxes inside a moov box.
elseif ($type === self::TYPE['moov']):
continue;
// Continue to read boxes inside a trak box, but prepare to skip it.
elseif ($type === self::TYPE['trak']):
$skiptrak = $beyond;
continue;
// Parse the tkhd box. If width and height are found inside, we have
// what we want; otherwise skip that trak.
elseif ($type === self::TYPE['tkhd']):
$this->parse_tkhd ($start, $size);
if ($this->width and $this->height):
return;
elseif ($skiptrak > $beyond):
fseek ($this->f, $skiptrak, SEEK_SET);
endif;
// Skip other boxes.
else:
fseek ($this->f, $beyond, SEEK_SET);
endif;
endwhile;
}
/* Reads width and height of a Track Header Box. The $start and the $length of
* the Track Header Box in bytes within the file must be given. This function
* does not check that they actually correspond to a Track Header Box. The file
* pointer must point behind the box header initially and points behind the box
* at the end.
*/
private function parse_tkhd (
int $start,
int $length
): bool
{
// Track Header Boxes are Full Boxes, i.e. they have an 8-bit version and
// 24-bit flags after the header. We only need the least significant flags.
fseek ($this->f, 3, SEEK_CUR);
$flags = $this->read_uint (1);
// The least significant flag bit signals whether the track is enabled.
// Tracks that are not enabled are skipped.
if (!($flags & 1)):
fseek ($this->f, $start + $length, SEEK_SET);
return false;
endif;
// Width and height are encoded in the last 8 bytes of the Track Header, so
// position the file pointer there.
fseek ($this->f, $start + $length - 8, SEEK_SET);
// Width and height are fixed point 16-bit.16-bit numbers. Only read the
// part before the decimal point, skip over the rest.
$width = $this->read_uint (2);
fseek ($this->f, 2, SEEK_CUR);
$height = $this->read_uint (2);
fseek ($this->f, 2, SEEK_CUR);
// Set the dimensions, if both width and height are available.
if ($width > 0 and $height > 0):
$this->width = $width;
$this->height = $height;
return true;
else:
return false;
endif;
}
/* Reads the header of a box (called atom in QuickTime) from the current
* position in the file. The box header consists of the box size in bytes
* (header included) and a type. Both values are returned as an integer array.
*/
function read_header (
): array
{
// The box header starts with a 32-bit (= 4-byte) size field.
$size = $this->read_uint (4);
// A size value of 0 indicates that the box extends to the end of the file,
// so the size is computed from filesize, current position and the 4 bytes
// of the size field that was just read.
if ($size === 0):
$size = $this->filesize - ftell ($this->f) + 4;
endif;
// Then comes a 32-bit type field. There is a special value 0x75756964
// ("uuid") that indicates an extended type defined in the 16 bytes
// following this header. We only need standard boxes though.
$type = $this->read_uint (4);
// A size value 1 means that the actual size is in a 64-bit largesize field
// after the type field, which is read now in that case.
if ($size === 1):
$size = $this->read_uint (8);
$headersize = 16;
else:
$headersize = 8;
endif;
// Since the box header belongs to the box, the given box size must not
// undercut the header's actual size.
if ($size < $headersize):
throw new \Exception ('Given box size below box header size');
endif;
return [$size, $type];
}
/* Reads an unsigned integer that is $width bytes wide from the current pointer
* position in the file and returns the result. UINTs are stored as big-endian
* binary data in the file.
*/
private function read_uint (
int $width = 4 // in bytes (i.e. 32 bits = 4 bytes; 64 bits = 8 bytes)
): int
{
if ($width > PHP_INT_SIZE):
throw new \Exception ("UINT width ($width bytes) exceeds PHP_INT_SIZE");
endif;
$bytes = fread ($this->f, $width);
if ($bytes === false):
throw new \Exception ('Failed to read UINT');
endif;
// Transform the binary data (handled by PHP as string) into an int. The
// binary represention of $bytes and $integer should actually be the same
// then, except that PHP recognizes the latter as int.
$integer = hexdec (bin2hex ($bytes));
if (is_float ($integer)):
throw new \Exception ('PHPs signed INT too small for this UINT');
endif;
return $integer;
}
} // class MP4
abstract class Preview {
protected object|null|false $image;
/* The constructor is supposed to load an image from a $filename and compute
* a preview image object according to the $specWidth and $specHeight settings.
*/
abstract public function __construct (
string $filename,
int $specWidth = 0,
int $specHeight = 0,
);
/* Returns an array of coordinates that are necessary to resize an orignial
* image of $fullWidth and $fullHeight according to settings $specWidth and
* $specHeight for the desired width and height in pixels. A positive
* specified value will be fulfilled exactly, a negative value is interpreted
* as maximum which is undershot iff the original image is smaller or it is
* necessary to retain the image ratio. A value of zero is interpreted as no
* restriction. Iff the image ratio cannot be retained, a central crop of the
* original image is used.
*
* full: original image
* spec: specified settings for the resized image
* crop: crop of the original image used in the preview
* mini: resized image
*/
protected static function coordinates (
int $fullWidth,
int $fullHeight,
int $specWidth,
int $specHeight,
): array
{
if ($fullWidth < 1 or $fullHeight < 1):
throw new \ValueError ('Width and height of an image must be positive');
endif;
// Set the width and height of the crop we are going to use from the
// original image to the original width and height; i.e. the whole image.
// We will change this in a few cases later.
$cropWidth = $fullWidth;
$cropHeight = $fullHeight;
// Calculate the width and height of the preview, distinguishing nine cases.
// Explanation in German: https://prlbr.de/2016/vorschau-bilder-berechnen/
if ($specWidth < 0 and $specHeight < 0):
$z = min (-$specWidth / $fullWidth, -$specHeight / $fullHeight, 1);
$miniWidth = self::roundp ($fullWidth * $z);
$miniHeight = self::roundp ($fullHeight * $z);
elseif ($specWidth < 0 and $specHeight === 0):
$miniWidth = min ($fullWidth, -$specWidth);
$miniHeight = self::roundp ($fullHeight * $miniWidth / $fullWidth);
elseif ($specWidth < 0 and $specHeight > 0):
$miniHeight = $specHeight;
$miniWidth = min (self::roundp ($fullWidth * $miniHeight / $fullHeight), -$specWidth);
$z = min ($fullWidth / $miniWidth, $fullHeight / $miniHeight);
$cropWidth = self::roundp ($miniWidth * $z);
$cropHeight = self::roundp ($miniHeight * $z);
elseif ($specWidth === 0 and $specHeight < 0):
$miniHeight = min ($fullHeight, -$specHeight);
$miniWidth = self::roundp ($fullWidth * $miniHeight / $fullHeight);
elseif ($specWidth === 0 and $specHeight === 0):
$miniWidth = $fullWidth;
$miniHeight = $fullHeight;
elseif ($specWidth === 0 and $specHeight > 0):
$miniHeight = $specHeight;
$miniWidth = self::roundp ($fullWidth * $miniHeight / $fullHeight);
elseif ($specWidth > 0 and $specHeight < 0):
$miniWidth = $specWidth;
$miniHeight = min (self::roundp ($fullHeight * $miniWidth / $fullWidth), -$specHeight);
$z = min ($fullWidth / $miniWidth, $fullHeight / $miniHeight);
$cropWidth = self::roundp ($miniWidth * $z);
$cropHeight = self::roundp ($miniHeight * $z);
elseif ($specWidth > 0 and $specHeight === 0):
$miniWidth = $specWidth;
$miniHeight = self::roundp ($fullHeight * $miniWidth / $fullWidth);
elseif ($specWidth > 0 and $specHeight > 0):
$miniWidth = $specWidth;
$miniHeight = $specHeight;
$z = min ($fullWidth / $miniWidth, $fullHeight / $miniHeight);
$cropWidth = self::roundp ($miniWidth * $z);
$cropHeight = self::roundp ($miniHeight * $z);
endif;
// Calculate the starting point in the original image for the copying
// process. This is usually (0, 0) unless we need to cut the image because
// the image ratio of the preview image can't match the original image.
$cropX = ($fullWidth - $cropWidth) >> 1;
$cropY = ($fullHeight - $cropHeight) >> 1;
return [$cropX, $cropY, $cropWidth, $cropHeight, $miniWidth, $miniHeight];
}
/* Returns the nearest positive integer for a given $real number.
*/
private static function roundp (
int|float $real
): int
{
return max ((int) round ($real), 1);
}
/* Save the image at the given $filename. The format to be used is defined by
* the file extension. The $quality ranges from 0 to 100 where 0 means least
* quality, tiny file size and 100 means best quality, big file size. A value
* in the range 80 to 85 is good for the web. Returns true on success and false
* for unsupported types.
*/
abstract public function save (
string $filename,
int $quality = 100,
): bool;
/* Return for a given image $type whether the class supports saving it. The
* type should be an uppercase preferred file extension such as JPG or JXL.
*/
abstract public static function supports (
string $type,
): bool;
} // abstract class Preview
class PreviewGD extends Preview {
/* The constructor computes a preview, whose size is constrained by $specWidth
* and $specHeight, from an image at a given $filename. The preview is held in
* a \GdImage object.
*/
public function __construct (
string $filename,
int $specWidth = 0,
int $specHeight = 0,
)
{
// load original image file
$this->image = @imagecreatefromstring (@file_get_contents ($filename));
if (!$this->image):
throw new PreviewException ('Could not load image');
endif;
// determine original image size and orientation
$fullWidth = imagesx ($this->image);
$fullHeight = imagesy ($this->image);
$exif = @exif_read_data ($filename, 'IFD0') ?: [];
$orientation = intval ($exif['Orientation'] ?? 0);
if ($fullWidth < 1 or $fullHeight < 1):
throw new PreviewException ('Image width or height is not positive');
endif;
// We will transform images that are not saved upright into their natural
// position after resizing. Transforming a smaller image is more efficient.
// But turning them late requires swapping the spec'ed width and height.
if (in_array ($orientation, [5, 6, 7, 8])):
[$specHeight, $specWidth] = [$specWidth, $specHeight];
endif;
$this->resize (...self::coordinates ($fullWidth, $fullHeight, $specWidth, $specHeight));
$this->upright ($orientation);
}
/* Resize the image by copying (a crop) of the original image resampled to a
* newly created white canvas with which we replace the original then. We use
* the GD library only for JPEGs at this point and these do not support
* transparency, hence the white background. Gamma correction is applied
* because the resizing function assumes a linear color space while GD
* apparently uses sRGB. The gamma correction is an approximation to the
* correct colorspace conversion.
*/
private function resize (
int $cropX,
int $cropY,
int $cropWidth,
int $cropHeight,
int $miniWidth,
int $miniHeight,
): void
{
imagegammacorrect ($this->image, 2.2, 1);
$preview = imagecreatetruecolor ($miniWidth, $miniHeight);
imagefill ($preview, 0, 0, imagecolorallocate ($preview, 255, 255, 255));
imagecopyresampled ($preview, $this->image, 0, 0, $cropX, $cropY, $miniWidth, $miniHeight, $cropWidth, $cropHeight);
$this->image = $preview;
imagegammacorrect ($this->image, 1, 2.2);
}
/* Save the image at the given $filename. The format to be used is defined by
* the file extension. The $quality ranges from 0 to 100 where 0 means least
* quality, tiny file size and 100 means best quality, big file size. A value
* in the range 80 to 85 is good for the web. Returns true on success and false
* for unsupported types.
*/
public function save (
string $filename,
int $quality = 100,
): bool
{
$type = strtoupper (pathinfo ($filename, PATHINFO_EXTENSION));
if (in_array ($type, ['JPEG', 'JPG'], true) and self::supports ($type)):
imageinterlace ($this->image, true);
if (@imagejpeg ($this->image, $filename, $quality)):
return true;
else:
throw new PreviewException ('Failed to save preview');
endif;
else:
return false;
endif;
}
/* Return for a given image $type whether the class supports saving it. The
* type should be an uppercase preferred file extension such as JPG or JXL.
*/
public static function supports (
string $type,
): bool
{
static $supports = [];
return $supports[$type] ??= function_exists ('imagetypes') && match ($type) {
'JPG',
'JPEG' => boolval (imagetypes () & IMG_JPG),
default => false,
};
}
/* Rotate and/or mirror the image so that it becomes oriented as displayed, if
* the EXIF $orientation tag (see https://exiftool.org/TagNames/EXIF.html)
* indicates that it has a different orientation.
*/
private function upright (
int $orientation,
): void
{
switch ($orientation):
case 2:
imageflip ($this->image, IMG_FLIP_HORIZONTAL);
break;
case 3:
imageflip ($this->image, IMG_FLIP_BOTH);
break;
case 4:
imageflip ($this->image, IMG_FLIP_VERTICAL);
break;
case 5:
imageflip ($this->image, IMG_FLIP_HORIZONTAL);
$this->image = imagerotate ($this->image, 90, 0); // anti-clockwise
break;
case 6:
$this->image = imagerotate ($this->image, 270, 0);
break;
case 7:
imageflip ($this->image, IMG_FLIP_HORIZONTAL);
$this->image = imagerotate ($this->image, 270, 0);
break;
case 8:
$this->image = imagerotate ($this->image, 90, 0);
break;
default:
break;
endswitch;
}
} // class PreviewGD
class PreviewImagick extends Preview {
/* The constructor computes a preview, whose size is constrained by $specWidth
* and $specHeight, from an image at a given $filename. The preview is held in
* an \Imagick object.
*/
public function __construct (
string $filename,
int $specWidth = 0,
int $specHeight = 0,
)
{
// load original image file
try {
$this->image = new \Imagick ($filename);
} catch (\ImagickException $e) {
throw new PreviewException ('Could not load image');
}
// determine original image size and orientation
$fullWidth = $this->image->getImageWidth ();
$fullHeight = $this->image->getImageHeight ();
$orientation = $this->image->getImageOrientation ();
if ($fullWidth < 1 or $fullHeight < 1):
throw new PreviewException ('Image width or height is not positive');
endif;
// We will transform images that are not saved upright into their natural
// position after resizing. Transforming a smaller image is more efficient.
// But turning them late requires swapping the spec'ed width and height.
if (in_array ($orientation, [5, 6, 7, 8])):
[$specHeight, $specWidth] = [$specWidth, $specHeight];
endif;
$this->resize (...self::coordinates ($fullWidth, $fullHeight, $specWidth, $specHeight));
$this->upright ($orientation);
}
/* Resize the original image by cropping it to the aspect ratio of the desired
* preview geometry and then scaling it down or up. We use the linear RGB
* colorspace during resizing, because the resizing filter expects linear
* values, see https://imagemagick.org/Usage/resize/#resize_colorspace. Then we
* switch to sRGB that is standard on the web. Strip metadata.
*/
private function resize (
int $cropX,
int $cropY,
int $cropWidth,
int $cropHeight,
int $miniWidth,
int $miniHeight,
): void
{
if ($cropX !== 0 or $cropY !== 0):
$this->image->cropImage ($cropWidth, $cropHeight, $cropX, $cropY);
endif;
if ($this->image->getImageColorspace () !== \Imagick::COLORSPACE_RGB):
$this->image->transformImageColorspace (\Imagick::COLORSPACE_RGB);
endif;
if ($cropWidth !== $miniWidth or $cropHeight !== $miniHeight):
$this->image->resizeImage ($miniWidth, $miniHeight, \Imagick::FILTER_CATROM, 1);
endif;
$this->image->transformImageColorspace (\Imagick::COLORSPACE_SRGB);
$this->image->stripImage ();
}
/* Save the image at the given $filename. The format to be used is defined by
* the file extension. The $quality ranges from 0 to 100 where 0 means least
* quality, tiny file size and 100 means best quality, big file size. A value
* in the range 80 to 85 is good for the web. Returns true on success and false
* for unsupported types.
*/
public function save (
string $filename,
int $quality = 100,
): bool
{
$type = strtoupper (pathinfo ($filename, PATHINFO_EXTENSION));
if (!self::supports ($type)):
return false;
endif;
// prepare image for saving
switch ($type):
case 'JPEG':
case 'JPG':
if ($this->image->getImageAlphaChannel ()):
// JPGs do not support transparency, hence the white background.
$image = clone $this->image;
$image->setImageBackgroundColor ('#FFF');
$image->setImageAlphaChannel (\Imagick::ALPHACHANNEL_REMOVE);
else:
$image = $this->image;
endif;
$image->setImageCompressionQuality ($quality); // apparently works for JPG
$image->setImageFormat ('JPEG');
$image->setInterlaceScheme (\Imagick::INTERLACE_JPEG);
break;
case 'JXL':
$image = $this->image;
$image->setCompressionQuality ($quality); // apparently works for JXL
$image->setOption ('jxl:effort', '8');
$image->setImageFormat ('JXL');
// NOTE: Imagick does not support progressive encoding of JXL yet.
break;
default:
return false;
endswitch;
// We try file_put_contents when Imagick::writeImage fails without throwing
// an \ImagickException because that can happen on some systems according to
// php.net comments -- maybe due to rights issues of ImageMagick vs. PHP?
try {
$success = false;
$success = $image->writeImage ($filename) || @file_put_contents ($filename, $image);
} catch (\ImagickException $e) {
} finally {
return $success or throw new PreviewException ('Failed to save preview');
}
}
/* Return for a given image $type whether the class supports saving it. The
* type should be an uppercase preferred file extension such as JPG or JXL.
*/
public static function supports (
string $type,
): bool
{
static $supports = [];
return $supports[$type] ??= class_exists ('\\Imagick') && match ($type) {
'JPEG',
'JPG' => !empty (\Imagick::queryFormats ('JPEG')),
'JXL' => !empty (\Imagick::queryFormats ('JXL')),
default => false,
};
}
/* Rotate and/or mirror the image so that it becomes oriented as displayed, if
* the EXIF $orientation tag (see https://exiftool.org/TagNames/EXIF.html)
* indicates that it has a different orientation.
*/
private function upright (
int $orientation,
): void
{
switch ($orientation):
case 2:
$this->image->flopImage (); // mirror horizontal
break;
case 3:
$this->image->rotateImage ('#FFF', 180);
break;
case 4:
$this->image->flipImage (); // mirror vertical
break;
case 5:
$this->image->flopImage ();
$this->image->rotateImage ('#FFF', 270); // clockwise
break;
case 6:
$this->image->rotateImage ('#FFF', 90);
break;
case 7:
$this->image->flopImage ();
$this->image->rotateImage ('#FFF', 90);
break;
case 8:
$this->image->rotateImage ('#FFF', 270);
break;
default:
break;
endswitch;
}
} // class PreviewImagick
/* Converts an $alphabetic base-26 number into a decimal number string. The
* $alphabetic is expected to use uppercase letters A-Z as digits except when
* $is_lowercase is true.
*/
function convert_alpha_number (
string $alphabetic,
bool $is_lowercase = false
): string
{
$shift = $is_lowercase? 96: 64;
$value = '0';
foreach (str_split ($alphabetic) as $char):
$value = bcadd (bcmul ($value, '26'), (string) (ord ($char) - $shift));
endforeach;
return $value;
}
/* Converts a natural $number encoded as string to a lowercase or uppercase
* alphabetic base-26 number, if $type is 'a' or 'A'. Used in alphabetic list
* counters.
*/
function convert_number_string (
string $number,
string $type
): string|null
{
if ($type === '1'):
return $number;
elseif ($type !== 'A' and $type !== 'a'):
return NULL;
elseif (bccomp ($number, '0') < 1):
return NULL;
endif;
// https://drafts.csswg.org/css-counter-styles-3/#alphabetic-system
$string = '';
while ($number !== '0') {
$number = bcsub ($number, '1');
$string = chr (65 + (int) bcmod ($number, '26')) . $string;
$number = bcdiv ($number, '26', 0);
}
return $type === 'A'? $string: strtolower ($string);
}
/* Returns an HTML decimal numeric character reference for the given $byte, if
* it is a printable ASCII character but not a blank space. Otherwise the $byte
* is returned unchanged.
*/
function encode_printable_ascii (
string $byte
): string
{
$ord = ord ($byte);
return ($ord > 32 and $ord < 127)? '&#' . strval ($ord) . ';': $byte;
}
/* Encodes characters with a special meaning in HTML and also the dollar sign,
* which is used as mark for math formulas by Aneamal, in a $string as HTML
* character references. The $string is expected to be UTF-8 encoded; characters
* that do not match UTF-8 are substituted by a Unicode replacement character.
* The encoded $string is returned.
*/
function encode_special_chars (
string $string
): string
{
return str_replace ('$', '$', htmlspecialchars ($string, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8'));
}
/* Removes leading and trailing whitespace from the items in a comma-separated
* string and returns them as array.
*/
function explode_comma_separated (
string $string,
int $limit = PHP_INT_MAX
): array
{
return array_map ('trim', explode (',', $string, $limit));
}
/* Returns an array of two integers that correspond to two integer values given
* as a $string and separated by a $delimiter. If the $delimiter is not found
* in the $string, the second returned integer defaults to zero.
*/
function explode_integer_pair (
string $delimiter,
string $string
): array
{
$pair = explode ($delimiter, $string, 2);
if (isset ($pair[1])):
return [(int) $pair[0], (int) $pair[1]];
else:
return [(int) $pair[0], 0];
endif;
}
/* Returns an array of substrings of $string formed by splitting $string at
* $delimiter that is not protected with a backslash. The returned array will
* contain a maximum of $limit elements with the last element containing the
* rest of $string. Any $limit below 2 is treated as 1.
*/
function explode_unslashed (
string $delimiter,
string $string,
int $limit = PHP_INT_MAX
): array
{
if ($limit < 2):
return [$string];
endif;
$final = $limit - 1;
$count = $offset = 0;
$array = [];
while ($count < $final and !is_null ($pos = strpos_unslashed ($string, $delimiter, $offset))):
$array[$count++] = substr ($string, $offset, $pos - $offset);
$offset = $pos + 1;
endwhile;
$array[] = substr ($string, $offset);
return $array;
}
/* Returns a text that is composed from given $lines from the file referenced
* by $filename. $lines is a string of comma-separated integers and ranges; a
* range consists of two integers seperated by a colon. Here's an example:
* "1,6,9:11,-3". This example would compose a text from the 1st, 6th, 9th,
* 10th, 11th line and the 3rd last line from the file. The function returns the
* whole text if the $lines parameter is not provided.
* NOTE: The file should exist, so instead of checking for existence first we
* assume it does, suppress errors during reading and check the return value.
*/
function file_get_lines (
string $filename,
string $lines = NULL
): string|null
{
// return whole file, if no line choice was made
if ($lines === NULL):
$text = @file_get_contents ($filename);
return $text === false? NULL: $text;
endif;
// load lines of the file
$text = @file ($filename, FILE_IGNORE_NEW_LINES);
if (!is_array ($text)):
return NULL;
endif;
$count = count ($text);
$output = [];
foreach (explode (',', $lines) as $item):
if ($item === ''):
continue;
endif;
$range = explode (':', $item, 2);
if (isset ($range[1])):
$start = get_line_index ((int) $range[0], $count);
$until = get_line_index ((int) $range[1], $count);
if ($until >= $start):
$start = max ($start, 0);
$until = min ($until, $count - 1);
for ($i = $start; $i <= $until; ++$i):
$output[] = $text[$i];
endfor;
else:
$start = min ($start, $count - 1);
$until = max ($until, 0);
for ($i = $start; $i >= $until; --$i):
$output[] = $text[$i];
endfor;
endif;
else:
$i = get_line_index ((int) $item, $count);
if (isset ($text[$i])):
$output[] = $text[$i];
endif;
endif;
endforeach;
return implode ("\n", $output);
}
/* Expects a $directory, for example: '/foo/bar/baz'. Returns an array of the
* given directory and all parent directories up to the root, for example
* ['/foo/bar/baz', '/foo/bar', '/foo', '']. The function does only string
* manipulation. It does not check the existence of directories and while it
* resolves and hence ignores a path component '.', it does not resolve the
* parent directory identifier '..'.
*/
function get_directories (
string $directory
): array
{
$dir = '';
$directories = [''];
foreach (explode ('/', trim ($directory, '/')) as $segment):
if ($segment !== '' and $segment !== '.'):
$dir .= '/' . $segment;
$directories[] = $dir;
endif;
endforeach;
return array_reverse ($directories);
}
/* Returns an HTML attribute 'loading' for HTML <img> and <iframe> elements.
* Its value will be 'lazy', if $lazy is positive, and 'eager', if $lazy is 0.
* If $lazy is negative, an empty string (which browser normally interpret as
* eager except in data-saving modes) will be returned twice before the return
* value switches to lazy.
*/
function get_loading_attribute (
int $lazy = -1
): string
{
static $countdown = 2;
if ($lazy < 0):
if ($countdown <= 0):
return " loading='lazy'";
else:
--$countdown;
return '';
endif;
elseif ($lazy > 0):
$countdown = 0;
return " loading='lazy'";
else:
return " loading='eager'";
endif;
}
/* Returns the index from an array of a given $length (created with PHP's file
* function) which corresponds to a given $line number (in the file). The line
* number input can be negative; in that case lines are counted from the end of
* the file. Note that the returned index will be outside the index range of the
* array, iff the input line does not exist in the file.
*/
function get_line_index (
int $line,
int $length
): int
{
return $line + ($line < 0? $length: -1);
}
/* Extracts and returns the value of the charset attribute from the $mediatype
* of a data URI or NULL, if no charset is found. The syntax of $mediatype is
* described among other places at
* https://tools.ietf.org/html/rfc2397#section-3
* https://tools.ietf.org/html/rfc2045#section-5.1
* This function may not actually handle all possible media types correctly,
* for instance those which use semikolons inside quoted parameter values that
* SHOULD not be used in data URIs, but can be used. It works for sensible URIs
* that are not deliberately made more complicated than necessary. The function
* should only be used to handle data URIs entered by authors to be parsed by
* the Aneamal Translator, not for arbitrary data URIs and not to validate data
* URIs passed to browsers.
*/
function get_mediatype_charset (
string $mediatype
): string|null
{
foreach (explode (';', $mediatype) as $parameter):
if ($pos = strpos ($parameter, '=')):
if (strtolower (rawurldecode (substr ($parameter, 0, $pos))) === 'charset'):
return trim (rawurldecode (substr ($parameter, $pos + 1)), '"');
endif;
endif;
endforeach;
return NULL;
}
/* Classifies an Aneamal metadata $name and returns an integer code for its
* type. Backslashes should not be removed from the $name before running this
* function since they are significant to distinguish between plain and special
* metadata names.
*/
function get_meta_type (
string $name
): int
{
if (!isset ($name[0])):
return 0;
elseif ($name[0] === '&'):
return META_CUSTOM;
elseif (is_made_of ($name, 'abcdefghijklmnopqrstuvwxyz', '-0123456789abcdefghijklmnopqrstuvwxyz')):
return META_SPECIAL;
else:
return META_PLAIN;
endif;
}
/* Returns a short string to be used as HTML attribute value, which contains a
* unique number within a given $realm, e.g. the name of a HTML <input> element
* within its form.
* Prefixes currently in use:
* _f : form IDs
* _m : math
* _t : t-modules
* _x : x-modules
*/
function get_unique (
string $realm = '',
string $prefix = '_'
): string
{
static $count = [];
if (isset ($count[$realm])):
++$count[$realm];
else:
$count[$realm] = 1;
endif;
return $prefix . (string) $count[$realm];
}
/* Classifies an $uri as one of a few types of absolute and relative URIs and
* returns an integer code for the corresponding type.
*/
function get_uri_type (
string $uri
): int
{
if ($uri === ''):
return 0;
elseif (str_starts_with ($uri, '//')):
return URI_REMOTE;
elseif ($uri[0] === '/'):
return URI_LOCAL;
elseif ($uri[0] === '?' or $uri[0] === '#'):
return URI_PAGE;
elseif ($scheme = parse_url ($uri, PHP_URL_SCHEME)):
return strtolower ($scheme) === 'data'? URI_DATA: URI_REMOTE;
else:
return URI_LOCAL;
endif;
}
/* Returns an array [width, height] which represents the pixel dimensions of a
* a given video identified by $filename. If the dimensions can not be
* determined, NULL is returned instead. Currently, the only supported file
* extensions are .mp4 and .webm corresponding to the video container formats.
*/
function get_video_dimensions (
string $filename
): array|null
{
$width = $height = 0;
switch (strtolower (substr ($filename, strrpos ($filename, '.')))):
case '.mp4': // would be similar enough to .3gp, .mov, .m4v
$mp4 = new MP4 ($filename);
$width = $mp4->width;
$height = $mp4->height;
break;
case '.webm': // would be similar enough to .mkv
$webm = new Matroska ($filename);
$width = $webm->width;
$height = $webm->height;
break;
endswitch;
return $width > 0 && $height > 0? [$width, $height]: NULL;
}
/* Joins groups of string items from an $array and returns the resulting array
* of joined strings. Each string is passed to a $callback, which is expected to
* return true, if the string starts a new group, and false otherwise. Strings
* within each group are joined with $joint. The keys of the first items in each
* group are preserved.
*/
function joint (
array $array,
callable $callback,
string $joint = "\n"
): array
{
$groups = [];
foreach ($array as $k => $string):
if ($callback ($string) or !isset ($start)):
$groups[$start = $k] = $string;
else:
$groups[$start] .= $joint . $string;
endif;
endforeach;
return $groups;
}
/* Turns an $array of attribute name => attribute value pairs into a string of
* HTML attributes that can be inserted into an opening HTML tag. The returned
* string has a leading space unless $array and hence returned string are empty.
*/
function implode_html_attributes (
array $array
): string
{
$return = '';
foreach ($array as $name => $value):
if ($value === NULL):
$return .= " $name";
elseif (is_array ($value)):
$return .= " $name='" . implode_html_attribute_value ($value) . "'";
else:
$return .= " $name='" . encode_special_chars ($value) . "'";
endif;
endforeach;
return $return;
}
/* Turns an $array of HTML attribute tokens into a space-separated token set
* that can be used as a HTML attribute value. An example would be the value of
* the HTML class attribute.
*/
function implode_html_attribute_value (
array $array
): string
{
$tokens = array_filter (array_unique ($array), fn ($x) => $x !== '');
sort ($tokens, SORT_STRING);
return encode_special_chars (implode (' ', $tokens));
}
/* Includes a PHP file, providing parameters in the $_ array, and forwards the
* included file's return value. NOTE: DO NOT CHANGE THE PARAMETER NAMES OF THIS
* FUNCTION. Scripts included via this function could rely on them. The purpose
* of this otherwise trivial function is to create a local variable scope for
* the included file.
*/
function include_module (
string $extension,
array $_ = []
): mixed // SHOULD be a Closure or a string, cast others to string
{
return include $extension;
}
/* Returns whether a given $string matches a simple pattern. The exact behaviour
* depends on the number of provided parameters: If only $string and $charlist_1
* are given, the function returns true iff $string contains only characters
* that are also in $charlist_1. An empty $string results in true. If $string,
* $charlist_1 and $charlist_2 are given, the function returns true iff the
* first byte of $string is also in $charlist_1 and all further characters of
* $string are also in $charlist_2. An empty $string results in false.
*/
function is_made_of (
string $string,
string $charlist_1,
string|null $charlist_2 = NULL
): bool
{
if ($charlist_2 === NULL):
return strspn ($string, $charlist_1) === strlen ($string);
else:
return
isset ($string[0])
and str_contains ($charlist_1, $string[0])
and strspn ($string, $charlist_2, 1) === strlen ($string) - 1
;
endif;
}
/* Returns whether a given $filename identifies an existing file, not a
* directory, and is readable.
*/
function is_readable_file (
string $filename
): bool
{
return is_file ($filename) and is_readable ($filename);
}
/* Returns whether a byte whose position in a $string is given by $pos (starting
* at 0 for the first byte) is preceded by a backslash that is not preceded by
* a backslash itself. The byte is slashed, iff the number of preceding
* backslahes is odd.
*/
function is_slashed (
string $string,
int $pos
): bool
{
if (!isset ($string[$chk = $pos - 1])):
return false;
endif;
while ($chk >= 0 and $string[$chk] === '\\') --$chk;
return ($chk & 1) === ($pos & 1);
}
/* Ensures that a given $string is UTF-8 encoded, normalizes line-breaks to \n
* and removes the optional initial UTF-8 byte order mark. Returns the result.
*/
function normalize_text (
string $string,
): string
{
// check wether the string matches UTF-8
if (!mb_check_encoding ($string, 'UTF-8')):
mb_substitute_character (0xFFFD);
$string = mb_scrub ($string, 'UTF-8');
endif;
// remove the optional byte order mark (BOM)
if (str_starts_with ($string, "\xEF\xBB\xBF")):
$string = substr ($string, 3);
endif;
// replace Windows- and old Mac-style line breaks with Unix-style ones
if (str_contains ($string, "\r")):
$string = str_replace (["\r\n", "\r"], "\n", $string);
endif;
return $string;
}
/* Parses a file token which may be found at the beginning of Aneamal blocks and
* looks like [type:clue] where the :clue part is optional and the
* case-insensitive type is converted to small letters. If the first byte of the
* type is "a", "x" or "t" and the second byte is "-", the type is further
* separated into supertype-subtype .
* The $token must be provided to the function WITHOUT surrounding square
* brackets. Here are valid example tokens:
* Just a type: d
* Type with clue: i:Five foxes frolic freely.
* Type with subtype: a-warning
* Type with subtype and clue: t-excel:sheet 2
* The function returns an array [(super)type, subtype, clue] where each item is
* a string or NULL if missing.
*/
function parse_file_token (
string $token = ''
): array
{
if ($token === ''):
return [NULL, NULL, NULL];
endif;
$initial = strtolower ($token[0]);
if (!isset ($token[1])):
return [$initial, NULL, NULL];
elseif ($token[1] === '-' and in_array ($initial, ['a', 't', 'x'], true)):
if ($colon = strpos ($token, ':', 2)):
return [$initial, mb_strtolower (substr ($token, 2, $colon - 2), 'UTF-8'), substr ($token, $colon + 1)];
else:
return [$initial, mb_strtolower (substr ($token, 2), 'UTF-8'), NULL];
endif;
elseif ($token[1] === ':'):
return [$initial, NULL, substr ($token, 2)];
elseif ($colon = strpos ($token, ':', 2)):
return [strtolower (substr ($token, 0, $colon)), NULL, substr ($token, $colon + 1)];
else:
return [strtolower ($token), NULL, NULL];
endif;
}
/* Prepares a barely formatted text $string for use as a HTML attribute value
* and returns it. Barely formatted means that the only character with a special
* meaning is the backslash. A backslash before a line break implies that the
* line break shall be removed. A backslash before any other character implies
* that the following character has no special meaning.
*/
function prepare_html_attribute (
string $string
): string
{
return encode_special_chars (strip_slashed_breaks_and_slashes ($string));
}
/* Encodes a UTF-8 $string so that it can be used as HTML id and URL fragment
* and CSS selector. The result contains only lowercase letters from the ASCII
* range, digits and the hyphen -.
*/
function prepare_html_id (
string $string
): string
{
// Normalization: Treat no-break space as space, remove soft hyphens and
// turn letters to lowercase. More could be done, but we prefer simplicity.
$string = mb_strtolower (str_replace (["\u{A0}", "\u{AD}"], ["\x20", ''], $string), 'UTF-8');
// Prepare the bytes (i.e. not Unicode characters) one by one: leave ASCII
// letters and digits unchanged; collapse other ASCII bytes to a single
// hyphen; encode non-ASCII bytes as two-byte lowercase hexadecimal number.
$output = '';
foreach (str_split ($string) as $i => $byte):
if (str_contains ('abcdefghijklmnopqrstuvwxyz0123456789', $byte)):
$output .= $byte;
elseif (ord ($byte) > 0x7F):
$output .= bin2hex ($byte);
elseif (!str_ends_with ($output, '-')):
$output .= '-';
endif;
endforeach;
// Remove trailing and leading hyphens and return the output.
return trim ($output, '-');
}
/* Returns the value for a given $key from the memory (a file on the disk) that
* is specific to the Query String of the current request. If the $key can not
* be found, but a $callback can be called, then its return value is both added
* to the memory and returned.
* NOTE: Using APCu has been considered instead of reading/writing from the
* disk, but APCu is neither reliable nor secure in shared web hosting.
*/
function query_memory (
string $key,
callable|null $callback = NULL
): string|null
{
static $cache = NULL;
static $write = TRUE;
static $filename = '';
// Load the memory from the disk at the first call. Usually a memory file
// exists, so we do not check for existence first, but suppress errors while
// trying to load it and check the return value. If neither it nor its
// directory exists yet, try to create the directory. QUERY_HASH is defined
// in main.php
if ($cache === NULL):
$cache = [];
$filename = __DIR__ . '/private/memory/' . QUERY_HASH . '.tsv';
if ($lines = @file ($filename, FILE_IGNORE_NEW_LINES)):
foreach ($lines as $line):
$record = explode ("\t", $line, 2);
if (isset ($record[1])):
$cache[$record[0]] = $record[1];
endif;
endforeach;
elseif (!is_dir ($dirname = dirname ($filename))):
$write = @mkdir ($dirname, 0777, true);
endif;
endif;
// Return the requested value from the cache, if available.
if (isset ($cache[$key])):
return $cache[$key];
endif;
// Return NULL, if the value neither exists nor can be computed.
if ($callback === NULL):
return NULL;
endif;
// Compute the value; add it to the cache if possible; return it.
$cache[$key] = $value = $callback ();
if ($write):
$write = (bool) @file_put_contents ($filename, "$key\t$value\n", FILE_APPEND);
endif;
return $value;
}
/* Returns an array with three string components: all parts of an URI from
* scheme to path, query, fragment.
*/
function split_uri_tail (
string $uri
): array
{
// get fragment
$delimiter = strpos ($uri, '#');
if ($delimiter === false):
$fragment = NULL;
else:
$fragment = substr ($uri, $delimiter + 1);
$uri = substr ($uri, 0, $delimiter);
endif;
// get query
$delimiter = strpos ($uri, '?');
if ($delimiter === false):
$query = NULL;
else:
$query = substr ($uri, $delimiter + 1);
$uri = substr ($uri, 0, $delimiter);
endif;
return [$uri, $query, $fragment];
}
/* Returns whether $subject matches a $pattern, where $pattern is an array in
* which each key defines an n-th byte of $subject that is checked and the
* corresponding value is a string in which the checked byte should occur.
*/
function str_match (
string $subject,
array $pattern
): bool
{
foreach ($pattern as $n => $characters):
if (!isset ($subject[$n]) or !str_contains ($characters, $subject[$n])):
return false;
endif;
endforeach;
return true;
}
/* Strips the suffix .nml from an $url and returns the result. The suffix is the
* last part of the $url before the first ? or # character.
*/
function strip_nml_suffix (
string $url
): string
{
$length = strcspn ($url, '?#');
$path = $length < strlen ($url)? substr ($url, 0, $length): $url;
return str_ends_with ($path, '.nml')? substr ($path, 0, -4) . substr ($url, $length): $url;
}
/* Returns the portion of a given $path which is relative to the Aneamal root
* directory or, if this is not possible, just the basename. This is used to
* reduce the information published publicly in error messages.
*/
function strip_root (
string $path
): string
{
$aneamal_root = dirname (__DIR__);
if (str_starts_with ($path, $aneamal_root)):
return substr ($path, strlen ($aneamal_root));
else:
return basename ($path);
endif;
}
/* Removes all line feed characters U+000A that are slashed and the backslash
* from a $string and returns the result.
*/
function strip_slashed_breaks (
string $string
): string
{
if (str_contains ($string, "\\\n")):
$lines = explode ("\n", $string);
$final = array_pop ($lines);
$string = '';
foreach ($lines as $line):
$string .= is_slashed ($line, strlen ($line))? substr ($line, 0, -1): $line . "\n";
endforeach;
$string .= $final;
endif;
return $string;
}
/* Removes all backslashes from a $string that are not slashed themselves as
* well as slashed line feed characters U+000A and returns the result. This is
* equivalent to: stripslashes (strip_slashed_breaks ($string))
*/
function strip_slashed_breaks_and_slashes (
string $string
): string
{
$return = '';
for ($i = 0, $length = strlen ($string); $i < $length; ++$i):
if ($string[$i] !== '\\' or ++$i !== $length and $string[$i] !== "\n"):
$return .= $string[$i];
endif;
endfor;
return $return;
}
/* Works like PHP's strpos function but ignores finds that are slashed with a
* backslash or included within quotation marks or in a sub-context and returns
* NULL instead of false when nothing is found.
*
* $haystack: the string to search in
* $needle: the string to search for
* $masks: an array that defines mask marks where the keys provide the
* opening marks and the values provide the closing marks, e.g.
* ['"' => '"', "'" => "'"]; we do not look
* for the needle inside a quotation and we do not look for other
* quotation marks inside a quotation either
* $offset: initial search offset, 0 is the first byte in $haystack
* $context: a string such as ` which opens and closes a sub-string where we
* do not look for the needle, but where we do respect quotations
*
* returns: the first position of an unslashed, unmasked needle in haystack
* at/after the initial offset or NULL if none was found
*/
function strpos_unmasked (
string $haystack,
string $needle,
array $masks,
int $offset = 0,
string $context = ''
): int|null
{
$haystack_length = strlen ($haystack);
// build an array of opening quotation marks and add the context mark to it
$opening_marks = array_keys ($masks);
if ($context !== ''):
$opening_marks[] = $context;
endif;
$needle_pos = -1;
while (true):
// find occurrence of an unprotected needle or return NULL
if ($offset > $needle_pos and is_null ($needle_pos = strpos_unslashed ($haystack, $needle, $offset))):
return NULL;
endif;
// search haystack, starting at offset, for the first occurrence of an
// opening mark which is not protected by a backslash. The offset is
// then set to that position and the found mark is saved. In case of no
// find the new offset will be the haystack length.
$mark_pos = $haystack_length;
foreach ($opening_marks as $i => $candidate):
if (is_null ($new_pos = strpos_unslashed ($haystack, $candidate, $offset))):
unset ($opening_marks[$i]); // don't look for this again
elseif ($new_pos < $mark_pos):
$mark_pos = $new_pos;
$mark = $candidate;
endif;
endforeach;
// return needle pos if it's before/an unprotected opening mark
if ($needle_pos <= $mark_pos):
return $needle_pos;
endif;
// else find closing context/quotation mark or return NULL
if ($mark === $context):
if (is_null ($offset = strpos_unmasked ($haystack, $mark, $masks, $mark_pos + strlen ($mark)))):
return NULL;
endif;
$offset += strlen ($mark);
else:
if (is_null ($offset = strpos_unslashed ($haystack, $masks[$mark], $mark_pos + strlen ($mark)))):
return NULL;
endif;
$offset += strlen ($masks[$mark]);
endif;
endwhile;
}
/* Works like strpos_unmasked but additionally takes nesting levels into
* account. That means it finds the first needle that is neither slashed nor
* masked nor matched by an unslashed opening nesting mark before it.
*
* $haystack: the string to search in
* $needle: the string to search for, also a closing nesting mark
* $match: an opening nesting mark to match a subsequent needle
* $masks: an array that defines masking marks where the keys provide the
* opening marks and the values provide the closing marks, e.g.
* ['"' => '"', "'" => "'"]
* $offset: initial search offset, 0 is the first byte in $haystack
* $context: a string such as ` which opens and closes a sub-string where we
* do not look for the needle, but where we do respect masks
*
* returns: the first position of an unslashed, unmasked, unmatched needle in
* haystack at/after the initial offset or NULL if none was found
*/
function strpos_unmatched (
string $haystack,
string $needle,
string $match,
array $masks,
int $offset = 0,
string $context = ''
): int|null
{
$unmatched = 0;
$open = -1;
$matchlen = strlen ($match);
while (true):
// find occurrence of an unmasked needle or return NULL
if (is_null ($pos = strpos_unmasked ($haystack, $needle, $masks, $offset, $context))):
return NULL;
endif;
++$unmatched;
// find the number of unmasked opening nesting marks before the needle
if (isset ($open)):
$open = $offset - $matchlen;
while (!is_null ($open = strpos_unmasked ($haystack, $match, $masks, $open += $matchlen, $context)) and $pos - $open >= $matchlen):
--$unmatched;
endwhile;
endif;
// return position of the needle iff the needle was unmatched
if ($unmatched > 0):
return $pos;
else:
$offset = $pos + strlen ($needle);
endif;
endwhile;
}
/* Works similar to PHP's strpos function but ignores finds that are protected
* by an unprotected backslash and returns NULL instead of false when nothing
* is found.
*
* $haystack: the string to search in
* $needle: the string to search for
* $offset: initial search offset, 0 is the first byte in $haystack
*
* returns: the first position of unslashed needle in haystack but at/after
* the initial offset or NULL if it cannot be found
*/
function strpos_unslashed (
string $haystack,
string $needle,
int $offset = 0
): int|null
{
$pos = $offset - 1;
while ($pos = strpos ($haystack, $needle, $pos + 1) and is_slashed ($haystack, $pos));
return $pos === false? NULL: $pos;
}
/* Works like strpos_unmasked except for finding the last instead of the first
* occurrence. Look at strpos_unmasked for a detailed description.
* This function must search forwards in the string until all occurrences have
* been identified. Searching backwards from the end is not an option due to
* the asymmetric nature of masks.
*/
function strrpos_unmasked (
string $haystack,
string $needle,
array $masks,
int $offset = 0,
string $context = ''
): int|null
{
$occurence = NULL;
while (!is_null ($pos = strpos_unmasked ($haystack, $needle, $masks, $offset, $context = ''))):
$occurence = $pos;
$offset = $pos + 1;
endwhile;
return $occurence;
}
/* Changes a $string so that all input bytes from the ASCII range are preserved
* and byte sequences outside the range are replaced by an Unicode replacement
* character.
*/
function substitute_nonascii (
string $string
): string
{
$ascii = true;
$result = '';
for ($i = 0, $length = strlen ($string); $i < $length; ++$i):
if (ord ($string[$i]) < 128):
$result .= $string[$i];
$ascii = true;
elseif ($ascii === true):
$result .= "\xEF\xBF\xBD";
$ascii = false;
endif;
endfor;
return $result;
}
/* Returns a two-dimensional $array transposed, i.e. rows become columns and
* columns becomes rows, and returns the result. Keys are NOT preserved. In case
* of an empty two-dimensional input $array, an empty one-dimensional array is
* returned. The function is optimized for the default case of more than 1 input
* row. Case 1 could be done quicker with a verbose foreach, but is atypical.
*/
function transpose_matrix (
array $array
): array
{
return match (count ($array)) {
0 => [],
1 => array_map (fn ($x) => [$x], ...$array),
default => array_map (null, ...$array),
};
}
// The End