aneamal/func.php

<?php

/* Copyright 2010-2024 Martin Janecke <martin@aneamal.org>
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at https://mozilla.org/MPL/2.0/.
 */

// declare (strict_types = 1); // only during development

namespace prlbr\aneamal;


/* Constants
 */

// version
const vfunc = '31';

// metadata types
const META_PLAIN = 1;   // plain variable without special function
const META_SPECIAL = 2; // recognized or reserved metadata name
const META_CUSTOM = 4;  // custom mark

// URI types
const URI_REMOTE = 1; // absolute URI:  http://example.com/
const URI_DATA = 2;   // data URI:  data:text/plain;base64,Oik=
const URI_LOCAL = 4;  // local path:  /style/me.css  or  dog.nml
const URI_PAGE = 8;   // relative to current page:  #section-2  or  ?v=27


/* Classes
 */

class CardinalityException extends \Exception {}
class PreviewException extends \Exception {}
class ModuleMessage extends \Exception {}

/* This class is used to read the width and height of visual media in a Matroska
 * or WebM container file. WebM is basically a subset of Matroska which is
 * implemented in EBML, hence all three specifications are needed/helpful to
 * understand how WebM is composed:
 *   EBML specification: https://datatracker.ietf.org/doc/html/rfc8794
 *   Matroska specification: https://www.matroska.org/technical/basics.html
 *   WebM specification: https://www.webmproject.org/docs/container/
 * NOTE: It may seem like there is an excessive number of file operations like
 * fread, fgetc. However, PHP reads a chunk of usually 8KB at once and buffers
 * the result internally even if only a few bytes were requested. Further
 * operations within the range of the chunk are served from the RAM and do not
 * access the hard disk. Therefore it would not be more efficient to read longer
 * chunks in our code and do string operations instead of the file operations.
 */
class Matroska {

 // EBML Element IDs needed to find Matroska/WebM video dimensions. Indentation
 // reflects the structure of the video file.
 const ID = [
    'EBML' => 0x1A45DFA3, // mandatory first bytes
        'DocType' => 0x4282, // mandatory, see DocTypes
    'Segment' => 0x18538067,
        'SeekHead' => 0x114D9B74, // multiple optional
            'Seek' => 0x4DBB,
                'SeekID' => 0x53AB,
                'SeekPosition' => 0x53AC,
        'Tracks' => 0x1654AE6B,
            'TrackEntry' => 0xAE,
                'TrackType' => 0x83, // mandatory, 1=video 3=complex
                'Video' => 0xE0,
                    'PixelWidth' => 0xB0, // mandatory, not 0
                    'PixelHeight' => 0xBA, // mandatory, not 0
                    'PixelCropBottom' => 0x54AA, // default 0
                    'PixelCropTop' => 0x54BB, // default 0
                    'PixelCropLeft' => 0x54CC, // default 0
                    'PixelCropRight' => 0x54DD, // default 0
                    'DisplayWidth' => 0x54B0, // optional
                    'DisplayHeight' => 0x54BA, // optional
                    'DisplayUnit' => 0x54B2, // optional, must be 0
 ];

 // supported DocType strings
 const DocTypes = [
    'webm',
    'matroska',
 ];

 public int $width = 0; // display width of the video in pixels
 public int $height = 0; // display height of the video in pixels
 public string $type = ''; // registered DocType like "webm" or "matroska"
 public string $error = ''; // string representation of a thrown Exception

 private $f; // file resource identifying an opened video file
 private int $filesize = 0; // its filesize in bytes


 /* The constructor opens the video file given as $filename, tries to find width
  * and height in it and closes the file. Anything which is unexpected or cannot
  * be handled in the file by this class will throw an Exception. They are
  * caught here.
  * NOTE: There are no other public methods. Access the object's properties to
  * read the information which the constructor found.
  */
 public function __construct (
    string $filename
 )    // returns nothing
 {
    try {
        if ($this->filesize = filesize ($filename)):
            if ($this->f = fopen ($filename, 'rb')):
                $this->seek_segment ();
                if (in_array ($this->type, self::DocTypes, true)):
                    $this->seek_tracks ();
                    $this->seek_video ();
                    $this->find_dimensions ();
                endif;
                fclose ($this->f);
            endif;
        endif;
    } catch (\Throwable $e) {
        $this->error = (string) $e;
        @fclose ($this->f);
    }
 }


 /* Returns a string which it reads from the current position in the file and
  * which is $width bytes long. If the $width is zero, the $default value is
  * returned, which defaults to an empty string. The pointer in the file
  * resource points behind the string at the end.
  * NOTE: The EBML specification restricts its type String to bytes from the
  * range 0x20 to 0x7E which represent US-ASCII characters and a 0x00 byte for
  * termination. This function does not check whether the string adheres to the
  * range, but it terminates the string at a 0x00 byte.
  */
 private function read_string (
    int $width,
    string $default = ''
 ):    string
 {
    if ($width === 0):
        return $default;
    endif;

    $bytes = fread ($this->f, $width);
    if ($bytes === false):
        throw new \Exception ('Failed to read STRING');
    endif;
    $zero = strpos ($bytes, "\x00");

    return $zero === false? $bytes: substr ($bytes, 0, $zero);
 }


 /* An EBML Element consists of Element ID, Element Data Size and Element Data.
  * This function returns an array of the format [Element ID, Element Data Size]
  * whose two items are integers. They are read from the current position in the
  * file.
  */
 private function read_tag (
 ):    array
 {
    return [$this->read_vint (false), $this->read_vint (true)];
 }


 /* Returns an unsigned integer (as defined in the EBML specification) which it
  * reads from the current position in the file and which is $width bytes long.
  * If the $width is 0, the $default value is returned, which defaults to 0
  * itself. The pointer in the file resource points behind the unsigned integer
  * at the end.
  */
 private function read_uint (
    int $width, // in bytes, must be in the range 0 to 8
    int $default = 0
 ):    int
 {
    if ($width === 0):
        return $default;
    elseif ($width > PHP_INT_SIZE):
        throw new \Exception ('UINT exceeds PHP_INT_SIZE');
    endif;

    $bytes = fread ($this->f, $width);
    if ($bytes === false):
        throw new \Exception ('Failed to read UINT');
    endif;

    // Transform the binary data (handled by PHP as string) into an int. The
    // binary represention of $bytes and $integer should actually be the same
    // then, except that PHP recognizes the latter as int.
    $integer = hexdec (bin2hex ($bytes));

    if (is_float ($integer)):
        throw new \Exception ('PHPs signed INT too small for this UINT');
    endif;

    return $integer;
 }


 /* Reads a variable width integer (VINT) from the current pointer position of
  * the file. A VINT consists of VINT_WIDTH, VINT_MARKER and VINT_DATA.
  * VINT_WIDTH determines the width in bytes of the VINT and encodes that width
  * in the number of 0-bits before VINT_MARKER plus 1. VINT_MARKER is a single
  * 1-bit. The rest of the VINT is VINT_DATA, a binary big-endian integer.
  * If $onlydata is true, just that number in VINT_DATA is returned. If
  * $onlydata is false, the whole VINT is interpreted as a binary number.
  * This method is limited to VINTs of width 8, since this is the maximum valid
  * size in Matroska and WebM, determined by the bigger value of EBMLMaxIDLength
  * (must be 4) and EBMLMaxSizeLength (must be from the range 1-8). It is also
  * what PHP can handle as integer on 64-bit systems. EBML allows greater widths
  * though.
  */
 private function read_vint (
    bool $onlydata = true
 ):    int
 {
    $byte = fgetc ($this->f);
    if ($byte === false):
        throw new \Exception ('Failed to read VINT_WIDTH');
    endif;
    $n = ord ($byte);

    // Determine the number of initial 0-bits in the first byte $n, plus one.
    // Mind the following equivalence of hexadecimal and binary:
    // 0: 0000, 1: 0001, 2: 0010, 3: 0011, 4: 0100, 5: 0101, 6: 0110, 7: 0111
    // 8: 1000, 9: 1001, A: 1010, B: 1011, C: 1100, D: 1101, E: 1110, F: 1111
    if ($n & 0xF0):
        if ($n & 0xC0):
            $width = $n & 0x80? 1: 2;
        else:
            $width = $n & 0x20? 3: 4;
        endif;
    elseif ($n & 0x0F):
        if ($n & 0x0C):
            $width = $n & 0x08? 5: 6;
        else:
            $width = $n & 0x02? 7: 8;
        endif;
    else:
        throw new \Exception ('VINT exceeds 8 bytes');
    endif;

    if ($width > PHP_INT_SIZE):
        throw new \Exception ('VINT exceeds PHP_INT_SIZE');
    endif;

    // Remove the VINT_MARKER, if only the VINT_DATA is desired:
    if ($onlydata):
        $n ^= 1 << (8 - $width);
    endif;

    // Read the part of VINT_DATA that was not already in the first byte:
    if ($width > 1):
        $bytes = fread ($this->f, $width - 1);
        if ($bytes === false):
            throw new \Exception ('Failed to read VINT_DATA size');
        endif;
        foreach (str_split ($bytes) as $byte):
            $n <<= 8;
            $n |= ord ($byte);
        endforeach;
    endif;

    return $n;
 }


 /* Calculates the width and height of a WebM video. The file pointer is
  * supposed to point to a Video element inside the file at the beginning.
  */
 private function find_dimensions (
 ):    void
 {
    // The Video element is our start.
    [$id, $size] = $this->read_tag ();
    if ($id !== self::ID['Video']):
        throw new \Exception ('Expected Video');
    endif;

    $afterVideo = $size + ftell ($this->f);
    if ($this->filesize < $afterVideo):
        throw new \Exception ('Video beyond filesize');
    endif;

    // Fixed default values for optional tags:
    $PixelCropBottom =
    $PixelCropTop =
    $PixelCropLeft =
    $PixelCropRight =
    $DisplayUnit = 0;
    // The following tags are optional and have conditional default values
    // which we compute later; 0 is an invalid value to check against.
    $DisplayWidth =
    $DisplayHeight = 0;
    // The following tags are mandatory and must be > 0; use 0 as an invalid
    // value to check later whether they have been set correctly.
    $PixelWidth =
    $PixelHeight = 0;

    // Search through the elements in Video:
    while (ftell ($this->f) < $afterVideo):
        [$id, $size] = $this->read_tag ();
        if ($id === self::ID['PixelWidth']):
            $PixelWidth = $this->read_uint ($size);
        elseif ($id === self::ID['PixelHeight']):
            $PixelHeight = $this->read_uint ($size);
        elseif ($id === self::ID['PixelCropBottom']):
            $PixelCropBottom = $this->read_uint ($size, 0);
        elseif ($id === self::ID['PixelCropTop']):
            $PixelCropTop = $this->read_uint ($size, 0);
        elseif ($id === self::ID['PixelCropLeft']):
            $PixelCropLeft = $this->read_uint ($size, 0);
        elseif ($id === self::ID['PixelCropRight']):
            $PixelCropRight = $this->read_uint ($size, 0);
        elseif ($id === self::ID['DisplayWidth']):
            $DisplayWidth = $this->read_uint ($size);
        elseif ($id === self::ID['DisplayHeight']):
            $DisplayHeight = $this->read_uint ($size);
        elseif ($id === self::ID['DisplayUnit']):
            $DisplayUnit = $this->read_uint ($size, 0);
        else:
            fseek ($this->f, $size, SEEK_CUR); // skip
        endif;
    endwhile;

    // PixelWidth and PixelHeight are required unsigned integers > 0
    if ($PixelWidth < 1 || $PixelHeight < 1):
        throw new \Exception ("PixelWidth: $PixelWidth; PixelHeight: $PixelHeight");
    endif;

    // Calculate width and height from the pixel dimension of the track; this
    // calculation yields the default values for DisplayWidth and DisplayHeight.
    $width = $PixelWidth - $PixelCropLeft - $PixelCropRight;
    $height = $PixelHeight - $PixelCropTop - $PixelCropBottom;

    // Switch to DisplayWidth and DisplayHeight, if they were provided and
    // DisplayUnit has the only value WebM supports, i.e. 0, which means pixels.
    if ($DisplayWidth > 0 && $DisplayHeight > 0 && $DisplayUnit === 0):
        $this->width = $DisplayWidth;
        $this->height = $DisplayHeight;
    // Only set dimensions, if both width and height are available, non-zero.
    elseif ($width > 0 and $height > 0):
        $this->width = $width;
        $this->height = $height;
    else:
        throw new \Exception ("Calculated width: $width; height: $height");
    endif;
 }


 /* Reads the EBML Header of the file and saves the EBML DocType, e.g. "webm".
  * The file pointer is supposed to point at the start of the Header (usually
  * the first byte of the file) at the beginning. The file pointer points at
  * the first byte after the Header afterwards, expected to be a Segment.
  */
 private function seek_segment (
 ):    void
 {
    // The EBML element is mandatory at the start.
    [$id, $size] = $this->read_tag ();
    if ($id !== self::ID['EBML']):
        throw new \Exception ('Expected EBML Header');
    endif;

    // Set $beyond to the position of the first byte after the EBML Header.
    $beyond = ftell ($this->f) + $size;
    if ($this->filesize < $beyond):
        throw new \Exception ('EBML Header exceeds filesize');
    endif;

    // Search through the elements in the EBML Header to find the DocType. If it
    // is found, set the file pointer to the byte after the header, expected to
    // be a Segment, and return.
    while (ftell ($this->f) < $beyond):
        [$id, $size] = $this->read_tag ();
        if ($id === self::ID['DocType']):
            $this->type = $this->read_string ($size);
            fseek ($this->f, $beyond, SEEK_SET);
            return;
        else:
            fseek ($this->f, $size, SEEK_CUR);
        endif;
    endwhile;

    // Since the DocType is mandatory, we should never reach this part.
    throw new \Exception ('EBML DocType missing');
 }


 /* Expects the file pointer to point at the (first) EBML Segment element at the
  * beginning and makes it point to the Tracks element. The Tracks element is
  * either found directly while seeking through the file or via a reference in
  * a SeekHead element.
  */
 private function seek_tracks (
 ):    void
 {
    // The Segment element is our start.
    [$id, $size] = $this->read_tag ();
    if ($id !== self::ID['Segment']):
        throw new \Exception ('Expected Segment');
    endif;

    // $afterSegment and $this->filesize are expected to be equal, if the file
    // contains a single EBML Document or is the last EBML Document.
    // $innerSegment is a reference point for SeekPositions in the SeekHead
    $innerSegment = ftell ($this->f);
    $afterSegment = $innerSegment + $size;
    if ($this->filesize < $afterSegment):
        throw new \Exception ('Segment beyond filesize');
    endif;

    // Search through the elements in the Segment to find the Tracks element
    // directly or via a SeekHead Element
    while ($start = ftell ($this->f) and $start < $afterSegment):
        [$id, $size] = $this->read_tag ();
        if ($id === self::ID['Tracks']):
            fseek ($this->f, $start, SEEK_SET);
            return;
        elseif ($id === self::ID['SeekHead']):
            $afterSeekHead = ftell ($this->f) + $size;
            // Search through the elements in the SeekHead
            while (ftell ($this->f) < $afterSeekHead):
                [$id, $size] = $this->read_tag ();
                // Seek Elements are expected here, but there can also be global
                // Elements like a Void element
                if ($id === self::ID['Seek']):
                    $afterSeek = ftell ($this->f) + $size;
                    $SeekID = $SeekPosition = 0;
                    // Search through the elements in Seek; there are exactly
                    // two mandatory and no specific optional Elements expected,
                    // but there could also be global Elements such as Void
                    while (ftell ($this->f) < $afterSeek):
                        [$id, $size] = $this->read_tag ();
                        if ($id === self::ID['SeekID']):
                            // The Matroska spec defines the type of the SeekID
                            // as binary; we handle it as unsigned integer
                            $SeekID = $this->read_uint ($size);
                            // skip this Seek, if it doesn't reference Tracks
                            if ($SeekID !== self::ID['Tracks']):
                                fseek ($this->f, $afterSeek, SEEK_SET);
                                break;
                            endif;
                        elseif ($id === self::ID['SeekPosition']):
                            $SeekPosition = $this->read_uint ($size);
                        else:
                            fseek ($this->f, $size, SEEK_CUR); // skip Element Data
                        endif;
                    endwhile;
                    if ($SeekID === self::ID['Tracks'] and $SeekPosition > 0):
                        fseek ($this->f, $innerSegment + $SeekPosition, SEEK_SET);
                        return;
                    endif;
                else:
                    fseek ($this->f, $size, SEEK_CUR); // skip Element Data
                endif;
            endwhile;
        else:
            fseek ($this->f, $size, SEEK_CUR); // skip Element Data
        endif;
    endwhile;

    // Tracks are not mandatory; we can reach this point.
    throw new \Exception ('Tracks not found');
 }



 /* Expects the file pointer to point at a Tracks element and makes it point to
  * the first Video element inside it.
  */
 private function seek_video (
 ):    void
 {
    // The Tracks element is our start.
    [$id, $size] = $this->read_tag ();
    if ($id !== self::ID['Tracks']):
        throw new \Exception ('Expected Tracks');
    endif;

    $afterTracks = ftell ($this->f) + $size;
    if ($this->filesize < $afterTracks):
        throw new \Exception ('Tracks beyond filesize');
    endif;

    // Search through the elements in Tracks to find Video elements
    while (ftell ($this->f) < $afterTracks):
        [$id, $size] = $this->read_tag ();
        // The only valid element besides global elements here is TrackEntry
        if ($id === self::ID['TrackEntry']):
            $afterTrackEntry = ftell ($this->f) + $size;
            $TrackType = $Video = 0;
            // Search through the elements in the TrackEntry
            while ($start = ftell ($this->f) and $start < $afterTrackEntry):
                [$id, $size] = $this->read_tag ();
                if ($id === self::ID['TrackType']):
                    // TrackType 1 and 3 identify video and complex tracks,
                    // which are the only ones where we expect video dimensions
                    $TrackType = $this->read_uint ($size);
                    if ($TrackType !== 1 and $TrackType !== 3):
                        fseek ($this->f, $afterTrackEntry, SEEK_SET);
                        break;
                    elseif ($Video):
                        fseek ($this->f, $Video, SEEK_SET);
                        return;
                    endif;
                elseif ($id === self::ID['Video']):
                    if ($TrackType):
                        fseek ($this->f, $start, SEEK_SET);
                        return;
                    else:
                        $Video = $start;
                    endif;
                // We could check for the mandatory FlagEnabled tag here, but
                // its specification - "Set to 1 if the track is usable. It is
                // possible to turn a not usable track into a usable track using
                // chapter codecs or control tracks." - does not give confidence
                // of being reliable on its own. We assume for our purposes that
                // a Video track is useful.
                else:
                    fseek ($this->f, $size, SEEK_CUR); // skip Element Data
                endif;
            endwhile;
        else:
            fseek ($this->f, $size, SEEK_CUR); // skip Element Data
        endif;
    endwhile;
 }

} // class Matroska


/* This class is used to read the width and height of visual media that
 * corresponds to the ISO/IEC 14496-12 base media file format, which includes
 * container formats such as MP4 and 3GP. It should be compatible with Apple's
 * QuickTime File Format (.mov) as well; its specification can be found at:
 * https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/
 */
class MP4 {

 // A few recognized box types; we skip types that we do not recognize.
 // Indentation reflects the structure of the video file.
 const TYPE = [
    'ftyp' => 0x66747970, // File Type Box
    'moov' => 0x6D6F6F76, // Movie Box, exactly one mandatory in file
        'trak' => 0x7472616B, // Track Box, at least one mandatory
            'tkhd' => 0x746B6864, // Track Header Box, exactly one mandatory
 ];

 public int $width = 0; // display width of the video in pixels
 public int $height = 0; // display height of the video in pixels
 public string $error = ''; // string representation of a thrown Exception

 private $f; // file resource identifying an opened video file
 private int $filesize = 0; // its filesize in bytes


 /* The constructor opens the video file given as $filename, tries to find width
  * and height in it and closes the file. Anything which is unexpected or cannot
  * be handled in the file by this class will throw an Exception. They are
  * caught here.
  * NOTE: There are no other public methods. Access the object's properties to
  * read the information which the constructor found.
  */
 public function __construct (
    string $filename
 )    // returns nothing
 {
    try {
        if ($this->filesize = filesize ($filename)):
            if ($this->f = fopen ($filename, 'rb')):
                $this->find_dimensions ();
                fclose ($this->f);
            endif;
        endif;
    } catch (\Throwable $e) {
        $this->error = (string) $e;
        @fclose ($this->f);
    }
 }


 /* Seeks through the video file to find width and height. A file consists of
  * boxes. Width and height may be found in a 'tkhd' box which is expected in a
  * 'trak' box which is expected in a 'moov' box. Unknown boxes are skipped. The
  * file pointer should point at the file start at the beginning, but must at
  * least point at a box header.
  */
 private function find_dimensions (
 ):    void
 {
    $skiptrak = 0;
    while ($this->filesize > ($start = ftell ($this->f))):

        // Read and evaluate the box header.
        [$size, $type] = $this->read_header ();
        if ($this->filesize < ($beyond = $start + $size)):
            throw new \Exception ('Box extends beyond filesize');

        // Continue to read boxes inside a moov box.
        elseif ($type === self::TYPE['moov']):
            continue;

        // Continue to read boxes inside a trak box, but prepare to skip it.
        elseif ($type === self::TYPE['trak']):
            $skiptrak = $beyond;
            continue;

        // Parse the tkhd box. If width and height are found inside, we have
        // what we want; otherwise skip that trak.
        elseif ($type === self::TYPE['tkhd']):
            $this->parse_tkhd ($start, $size);
            if ($this->width and $this->height):
                return;
            elseif ($skiptrak > $beyond):
                fseek ($this->f, $skiptrak, SEEK_SET);
            endif;

        // Skip other boxes.
        else:
            fseek ($this->f, $beyond, SEEK_SET);
        endif;
    endwhile;
 }


 /* Reads width and height of a Track Header Box. The $start and the $length of
  * the Track Header Box in bytes within the file must be given. This function
  * does not check that they actually correspond to a Track Header Box. The file
  * pointer must point behind the box header initially and points behind the box
  * at the end.
  */
 private function parse_tkhd (
    int $start,
    int $length
 ):    bool
 {
    // Track Header Boxes are Full Boxes, i.e. they have an 8-bit version and
    // 24-bit flags after the header. We only need the least significant flags.
    fseek ($this->f, 3, SEEK_CUR);
    $flags = $this->read_uint (1);

    // The least significant flag bit signals whether the track is enabled.
    // Tracks that are not enabled are skipped.
    if (!($flags & 1)):
        fseek ($this->f, $start + $length, SEEK_SET);
        return false;
    endif;

    // Width and height are encoded in the last 8 bytes of the Track Header, so
    // position the file pointer there.
    fseek ($this->f, $start + $length - 8, SEEK_SET);

    // Width and height are fixed point 16-bit.16-bit numbers. Only read the
    // part before the decimal point, skip over the rest.
    $width = $this->read_uint (2);
    fseek ($this->f, 2, SEEK_CUR);
    $height = $this->read_uint (2);
    fseek ($this->f, 2, SEEK_CUR);

    // Set the dimensions, if both width and height are available.
    if ($width > 0 and $height > 0):
        $this->width = $width;
        $this->height = $height;
        return true;
    else:
        return false;
    endif;
 }


 /* Reads the header of a box (called atom in QuickTime) from the current
  * position in the file. The box header consists of the box size in bytes
  * (header included) and a type. Both values are returned as an integer array.
  */
 function read_header (
 ):    array
 {
    // The box header starts with a 32-bit (= 4-byte) size field.
    $size = $this->read_uint (4);

    // A size value of 0 indicates that the box extends to the end of the file,
    // so the size is computed from filesize, current position and the 4 bytes
    // of the size field that was just read.
    if ($size === 0):
        $size = $this->filesize - ftell ($this->f) + 4;
    endif;

    // Then comes a 32-bit type field. There is a special value 0x75756964
    // ("uuid") that indicates an extended type defined in the 16 bytes
    // following this header. We only need standard boxes though.
    $type = $this->read_uint (4);

    // A size value 1 means that the actual size is in a 64-bit largesize field
    // after the type field, which is read now in that case.
    if ($size === 1):
        $size = $this->read_uint (8);
        $headersize = 16;
    else:
        $headersize = 8;
    endif;

    // Since the box header belongs to the box, the given box size must not
    // undercut the header's actual size.
    if ($size < $headersize):
        throw new \Exception ('Given box size below box header size');
    endif;

    return [$size, $type];
 }


 /* Reads an unsigned integer that is $width bytes wide from the current pointer
  * position in the file and returns the result. UINTs are stored as big-endian
  * binary data in the file.
  */
 private function read_uint (
    int $width = 4 // in bytes (i.e. 32 bits = 4 bytes; 64 bits = 8 bytes)
 ):    int
 {
    if ($width > PHP_INT_SIZE):
        throw new \Exception ("UINT width ($width bytes) exceeds PHP_INT_SIZE");
    endif;

    $bytes = fread ($this->f, $width);
    if ($bytes === false):
        throw new \Exception ('Failed to read UINT');
    endif;

    // Transform the binary data (handled by PHP as string) into an int. The
    // binary represention of $bytes and $integer should actually be the same
    // then, except that PHP recognizes the latter as int.
    $integer = hexdec (bin2hex ($bytes));

    if (is_float ($integer)):
        throw new \Exception ('PHPs signed INT too small for this UINT');
    endif;

    return $integer;
 }

} // class MP4


abstract class Preview {

 protected object|null|false $image;

 /* The constructor is supposed to load an image from a $filename and compute
  * a preview image object according to the $specWidth and $specHeight settings.
  */
 abstract public function __construct (
    string $filename,
    int $specWidth = 0,
    int $specHeight = 0,
 );


 /* Returns an array of coordinates that are necessary to resize an orignial
  * image of $fullWidth and $fullHeight according to settings $specWidth and
  * $specHeight for the desired width and height in pixels. A positive
  * specified value will be fulfilled exactly, a negative value is interpreted
  * as maximum which is undershot iff the original image is smaller or it is
  * necessary to retain the image ratio. A value of zero is interpreted as no
  * restriction. Iff the image ratio cannot be retained, a central crop of the
  * original image is used.
  *
  * full: original image
  * spec: specified settings for the resized image
  * crop: crop of the original image used in the preview
  * mini: resized image
  */
 protected static function coordinates (
    int $fullWidth,
    int $fullHeight,
    int $specWidth,
    int $specHeight,
 ):    array
 {
    if ($fullWidth < 1 or $fullHeight < 1):
        throw new \ValueError ('Width and height of an image must be positive');
    endif;

    // Set the width and height of the crop we are going to use from the
    // original image to the original width and height; i.e. the whole image.
    // We will change this in a few cases later.
    $cropWidth = $fullWidth;
    $cropHeight = $fullHeight;

    // Calculate the width and height of the preview, distinguishing nine cases.
    // Explanation in German: https://prlbr.de/2016/vorschau-bilder-berechnen/
    if ($specWidth < 0 and $specHeight < 0):
        $z = min (-$specWidth / $fullWidth, -$specHeight / $fullHeight, 1);
        $miniWidth = self::roundp ($fullWidth * $z);
        $miniHeight = self::roundp ($fullHeight * $z);
    elseif ($specWidth < 0 and $specHeight === 0):
        $miniWidth = min ($fullWidth, -$specWidth);
        $miniHeight = self::roundp ($fullHeight * $miniWidth / $fullWidth);
    elseif ($specWidth < 0 and $specHeight > 0):
        $miniHeight = $specHeight;
        $miniWidth = min (self::roundp ($fullWidth * $miniHeight / $fullHeight), -$specWidth);
        $z = min ($fullWidth / $miniWidth, $fullHeight / $miniHeight);
        $cropWidth = self::roundp ($miniWidth * $z);
        $cropHeight = self::roundp ($miniHeight * $z);
    elseif ($specWidth === 0 and $specHeight < 0):
        $miniHeight = min ($fullHeight, -$specHeight);
        $miniWidth = self::roundp ($fullWidth * $miniHeight / $fullHeight);
    elseif ($specWidth === 0 and $specHeight === 0):
        $miniWidth = $fullWidth;
        $miniHeight = $fullHeight;
    elseif ($specWidth === 0 and $specHeight > 0):
        $miniHeight = $specHeight;
        $miniWidth = self::roundp ($fullWidth * $miniHeight / $fullHeight);
    elseif ($specWidth > 0 and $specHeight < 0):
        $miniWidth = $specWidth;
        $miniHeight = min (self::roundp ($fullHeight * $miniWidth / $fullWidth), -$specHeight);
        $z = min ($fullWidth / $miniWidth, $fullHeight / $miniHeight);
        $cropWidth = self::roundp ($miniWidth * $z);
        $cropHeight = self::roundp ($miniHeight * $z);
    elseif ($specWidth > 0 and $specHeight === 0):
        $miniWidth = $specWidth;
        $miniHeight = self::roundp ($fullHeight * $miniWidth / $fullWidth);
    elseif ($specWidth > 0 and $specHeight > 0):
        $miniWidth = $specWidth;
        $miniHeight = $specHeight;
        $z = min ($fullWidth / $miniWidth, $fullHeight / $miniHeight);
        $cropWidth = self::roundp ($miniWidth * $z);
        $cropHeight = self::roundp ($miniHeight * $z);
    endif;

    // Calculate the starting point in the original image for the copying
    // process. This is usually (0, 0) unless we need to cut the image because
    // the image ratio of the preview image can't match the original image.
    $cropX = ($fullWidth - $cropWidth) >> 1;
    $cropY = ($fullHeight - $cropHeight) >> 1;

    return [$cropX, $cropY, $cropWidth, $cropHeight, $miniWidth, $miniHeight];
 }


 /* Returns the nearest positive integer for a given $real number.
  */
 private static function roundp (
    int|float $real
 ):    int
 {
    return max ((int) round ($real), 1);
 }


 /* Save the image at the given $filename. The format to be used is defined by
  * the file extension. The $quality ranges from 0 to 100 where 0 means least
  * quality, tiny file size and 100 means best quality, big file size. A value
  * in the range 80 to 85 is good for the web. Returns true on success and false
  * for unsupported types.
  */
 abstract public function save (
    string $filename,
    int $quality = 100,
 ): bool;


 /* Return for a given image $type whether the class supports saving it. The
  * type should be an uppercase preferred file extension such as JPG or JXL.
  */
 abstract public static function supports (
    string $type,
 ): bool;

} // abstract class Preview


class PreviewGD extends Preview {

 /* The constructor computes a preview, whose size is constrained by $specWidth
  * and $specHeight, from an image at a given $filename. The preview is held in
  * a \GdImage object.
  */
 public function __construct (
    string $filename,
    int $specWidth = 0,
    int $specHeight = 0,
 )
 {
    // load original image file
    $this->image = @imagecreatefromstring (@file_get_contents ($filename));
    if (!$this->image):
        throw new PreviewException ('Could not load image');
    endif;

    // determine original image size and orientation
    $fullWidth = imagesx ($this->image);
    $fullHeight = imagesy ($this->image);
    $exif = @exif_read_data ($filename, 'IFD0') ?: [];
    $orientation = intval ($exif['Orientation'] ?? 0);

    if ($fullWidth < 1 or $fullHeight < 1):
        throw new PreviewException ('Image width or height is not positive');
    endif;

    // We will transform images that are not saved upright into their natural
    // position after resizing. Transforming a smaller image is more efficient.
    // But turning them late requires swapping the spec'ed width and height.
    if (in_array ($orientation, [5, 6, 7, 8])):
        [$specHeight, $specWidth] = [$specWidth, $specHeight];
    endif;

    $this->resize (...self::coordinates ($fullWidth, $fullHeight, $specWidth, $specHeight));
    $this->upright ($orientation);
 }


 /* Resize the image by copying (a crop) of the original image resampled to a
  * newly created white canvas with which we replace the original then. We use
  * the GD library only for JPEGs at this point and these do not support
  * transparency, hence the white background. Gamma correction is applied
  * because the resizing function assumes a linear color space while GD
  * apparently uses sRGB. The gamma correction is an approximation to the
  * correct colorspace conversion.
  */
 private function resize (
    int $cropX,
    int $cropY,
    int $cropWidth,
    int $cropHeight,
    int $miniWidth,
    int $miniHeight,
 ):    void
 {
    imagegammacorrect ($this->image, 2.2, 1);
    $preview = imagecreatetruecolor ($miniWidth, $miniHeight);
    imagefill ($preview, 0, 0, imagecolorallocate ($preview, 255, 255, 255));
    imagecopyresampled ($preview, $this->image, 0, 0, $cropX, $cropY, $miniWidth, $miniHeight, $cropWidth, $cropHeight);
    $this->image = $preview;
    imagegammacorrect ($this->image, 1, 2.2);
 }


 /* Save the image at the given $filename. The format to be used is defined by
  * the file extension. The $quality ranges from 0 to 100 where 0 means least
  * quality, tiny file size and 100 means best quality, big file size. A value
  * in the range 80 to 85 is good for the web. Returns true on success and false
  * for unsupported types.
  */
 public function save (
    string $filename,
    int $quality = 100,
 ):    bool
 {
    $type = strtoupper (pathinfo ($filename, PATHINFO_EXTENSION));
    if (in_array ($type, ['JPEG', 'JPG'], true) and self::supports ($type)):
        imageinterlace ($this->image, true);
        if (@imagejpeg ($this->image, $filename, $quality)):
            return true;
        else:
            throw new PreviewException ('Failed to save preview');
        endif;
    else:
        return false;
    endif;
 }


 /* Return for a given image $type whether the class supports saving it. The
  * type should be an uppercase preferred file extension such as JPG or JXL.
  */
 public static function supports (
    string $type,
 ):    bool
 {
    static $supports = [];
     return $supports[$type] ??= function_exists ('imagetypes') && match ($type) {
        'JPG',
        'JPEG'  => boolval (imagetypes () & IMG_JPG),
        default => false,
    };
 }


 /* Rotate and/or mirror the image so that it becomes oriented as displayed, if
  * the EXIF $orientation tag (see https://exiftool.org/TagNames/EXIF.html)
  * indicates that it has a different orientation.
  */
 private function upright (
    int $orientation,
 ):    void
 {
    switch ($orientation):
        case 2:
            imageflip ($this->image, IMG_FLIP_HORIZONTAL);
            break;
        case 3:
            imageflip ($this->image, IMG_FLIP_BOTH);
            break;
        case 4:
            imageflip ($this->image, IMG_FLIP_VERTICAL);
            break;
        case 5:
            imageflip ($this->image, IMG_FLIP_HORIZONTAL);
            $this->image = imagerotate ($this->image, 90, 0); // anti-clockwise
            break;
        case 6:
            $this->image = imagerotate ($this->image, 270, 0);
            break;
        case 7:
            imageflip ($this->image, IMG_FLIP_HORIZONTAL);
            $this->image = imagerotate ($this->image, 270, 0);
            break;
        case 8:
            $this->image = imagerotate ($this->image, 90, 0);
            break;
        default:
            break;
    endswitch;
 }

} // class PreviewGD


class PreviewImagick extends Preview {

 /* The constructor computes a preview, whose size is constrained by $specWidth
  * and $specHeight, from an image at a given $filename. The preview is held in
  * an \Imagick object.
  */
 public function __construct (
    string $filename,
    int $specWidth = 0,
    int $specHeight = 0,
 )
 {
    // load original image file
    try {
        $this->image = new \Imagick ($filename);
    } catch (\ImagickException $e) {
        throw new PreviewException ('Could not load image');
    }

    // determine original image size and orientation
    $fullWidth = $this->image->getImageWidth ();
    $fullHeight = $this->image->getImageHeight ();
    $orientation = $this->image->getImageOrientation ();

    if ($fullWidth < 1 or $fullHeight < 1):
        throw new PreviewException ('Image width or height is not positive');
    endif;

    // We will transform images that are not saved upright into their natural
    // position after resizing. Transforming a smaller image is more efficient.
    // But turning them late requires swapping the spec'ed width and height.
    if (in_array ($orientation, [5, 6, 7, 8])):
        [$specHeight, $specWidth] = [$specWidth, $specHeight];
    endif;

    $this->resize (...self::coordinates ($fullWidth, $fullHeight, $specWidth, $specHeight));
    $this->upright ($orientation);
 }


 /* Resize the original image by cropping it to the aspect ratio of the desired
  * preview geometry and then scaling it down or up. We use the linear RGB
  * colorspace during resizing, because the resizing filter expects linear
  * values, see https://imagemagick.org/Usage/resize/#resize_colorspace. Then we
  * switch to sRGB that is standard on the web. Strip metadata.
  */
 private function resize (
    int $cropX,
    int $cropY,
    int $cropWidth,
    int $cropHeight,
    int $miniWidth,
    int $miniHeight,
 ):    void
 {
    if ($cropX !== 0 or $cropY !== 0):
        $this->image->cropImage ($cropWidth, $cropHeight, $cropX, $cropY);
    endif;
    if ($this->image->getImageColorspace () !== \Imagick::COLORSPACE_RGB):
        $this->image->transformImageColorspace (\Imagick::COLORSPACE_RGB);
    endif;
    if ($cropWidth !== $miniWidth or $cropHeight !== $miniHeight):
        $this->image->resizeImage ($miniWidth, $miniHeight, \Imagick::FILTER_CATROM, 1);
    endif;
    $this->image->transformImageColorspace (\Imagick::COLORSPACE_SRGB);
    $this->image->stripImage ();
 }


 /* Save the image at the given $filename. The format to be used is defined by
  * the file extension. The $quality ranges from 0 to 100 where 0 means least
  * quality, tiny file size and 100 means best quality, big file size. A value
  * in the range 80 to 85 is good for the web. Returns true on success and false
  * for unsupported types.
  */
 public function save (
    string $filename,
    int $quality = 100,
 ):    bool
 {
    $type = strtoupper (pathinfo ($filename, PATHINFO_EXTENSION));
    if (!self::supports ($type)):
        return false;
    endif;

    // prepare image for saving
    switch ($type):
        case 'JPEG':
        case 'JPG':
            if ($this->image->getImageAlphaChannel ()):
                // JPGs do not support transparency, hence the white background.
                $image = clone $this->image;
                $image->setImageBackgroundColor ('#FFF');
                $image->setImageAlphaChannel (\Imagick::ALPHACHANNEL_REMOVE);
            else:
                $image = $this->image;
            endif;
            $image->setImageCompressionQuality ($quality); // apparently works for JPG
            $image->setImageFormat ('JPEG');
            $image->setInterlaceScheme (\Imagick::INTERLACE_JPEG);
            break;
        case 'JXL':
            $image = $this->image;
            $image->setCompressionQuality ($quality); // apparently works for JXL
            $image->setOption ('jxl:effort', '8');
            $image->setImageFormat ('JXL');
            // NOTE: Imagick does not support progressive encoding of JXL yet.
            break;
        default:
            return false;
    endswitch;

    // We try file_put_contents when Imagick::writeImage fails without throwing
    // an \ImagickException because that can happen on some systems according to
    // php.net comments -- maybe due to rights issues of ImageMagick vs. PHP?
    try {
        $success = false;
        $success = $image->writeImage ($filename) || @file_put_contents ($filename, $image);
    } catch (\ImagickException $e) {
    } finally {
        return $success or throw new PreviewException ('Failed to save preview');
    }
 }


 /* Return for a given image $type whether the class supports saving it. The
  * type should be an uppercase preferred file extension such as JPG or JXL.
  */
 public static function supports (
    string $type,
 ):    bool
 {
    static $supports = [];
    return $supports[$type] ??= class_exists ('\\Imagick') && match ($type) {
        'JPEG',
        'JPG'   => !empty (\Imagick::queryFormats ('JPEG')),
        'JXL'   => !empty (\Imagick::queryFormats ('JXL')),
        default => false,
    };
 }


 /* Rotate and/or mirror the image so that it becomes oriented as displayed, if
  * the EXIF $orientation tag (see https://exiftool.org/TagNames/EXIF.html)
  * indicates that it has a different orientation.
  */
 private function upright (
    int $orientation,
 ):    void
 {
    switch ($orientation):
        case 2:
        $this->image->flopImage (); // mirror horizontal
        break;
    case 3:
        $this->image->rotateImage ('#FFF', 180);
        break;
    case 4:
        $this->image->flipImage (); // mirror vertical
        break;
    case 5:
        $this->image->flopImage ();
        $this->image->rotateImage ('#FFF', 270); // clockwise
        break;
    case 6:
        $this->image->rotateImage ('#FFF', 90);
        break;
    case 7:
        $this->image->flopImage ();
        $this->image->rotateImage ('#FFF', 90);
        break;
    case 8:
        $this->image->rotateImage ('#FFF', 270);
        break;
    default:
        break;
    endswitch;
 }

} // class PreviewImagick


/* Converts an $alphabetic base-26 number into a decimal number string. The
 * $alphabetic is expected to use uppercase letters A-Z as digits except when
 * $is_lowercase is true.
 */
function convert_alpha_number (
    string $alphabetic,
    bool $is_lowercase = false
):    string
{
    $shift = $is_lowercase? 96: 64;
    $value = '0';
    foreach (str_split ($alphabetic) as $char):
        $value = bcadd (bcmul ($value, '26'), (string) (ord ($char) - $shift));
    endforeach;
    return $value;
}


/* Converts a natural $number encoded as string to a lowercase or uppercase
 * alphabetic base-26 number, if $type is 'a' or 'A'. Used in alphabetic list
 * counters.
 */
function convert_number_string (
    string $number,
    string $type
):    string|null
{
    if ($type === '1'):
        return $number;
    elseif ($type !== 'A' and $type !== 'a'):
        return NULL;
    elseif (bccomp ($number, '0') < 1):
        return NULL;
    endif;

    // https://drafts.csswg.org/css-counter-styles-3/#alphabetic-system
    $string = '';
    while ($number !== '0') {
        $number = bcsub ($number, '1');
        $string = chr (65 + (int) bcmod ($number, '26')) . $string;
        $number = bcdiv ($number, '26', 0);
    }

    return $type === 'A'? $string: strtolower ($string);
}


/* Returns an HTML decimal numeric character reference for the given $byte, if
 * it is a printable ASCII character but not a blank space. Otherwise the $byte
 * is returned unchanged.
 */
function encode_printable_ascii (
    string $byte
):    string
{
    $ord = ord ($byte);
    return ($ord > 32 and $ord < 127)? '&#' . strval ($ord) . ';': $byte;
}


/* Encodes characters with a special meaning in HTML and also the dollar sign,
 * which is used as mark for math formulas by Aneamal, in a $string as HTML
 * character references. The $string is expected to be UTF-8 encoded; characters
 * that do not match UTF-8 are substituted by a Unicode replacement character.
 * The encoded $string is returned.
 */
function encode_special_chars (
    string $string
):    string
{
    return str_replace ('$', '&#36;', htmlspecialchars ($string, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8'));
}


/* Removes leading and trailing whitespace from the items in a comma-separated
 * string and returns them as array.
 */
function explode_comma_separated (
    string $string,
    int $limit = PHP_INT_MAX
):    array
{
    return array_map ('trim', explode (',', $string, $limit));
}


/* Returns an array of two integers that correspond to two integer values given
 * as a $string and separated by a $delimiter. If the $delimiter is not found
 * in the $string, the second returned integer defaults to zero.
 */
function explode_integer_pair (
    string $delimiter,
    string $string
):    array
{
    $pair = explode ($delimiter, $string, 2);
    if (isset ($pair[1])):
        return [(int) $pair[0], (int) $pair[1]];
    else:
        return [(int) $pair[0], 0];
    endif;
}


/* Returns an array of substrings of $string formed by splitting $string at
 * $delimiter that is not protected with a backslash. The returned array will
 * contain a maximum of $limit elements with the last element containing the
 * rest of $string. Any $limit below 2 is treated as 1.
 */
function explode_unslashed (
    string $delimiter,
    string $string,
    int $limit = PHP_INT_MAX
):    array
{
    if ($limit < 2):
        return [$string];
    endif;
    $final = $limit - 1;
    $count = $offset = 0;
    $array = [];
    while ($count < $final and !is_null ($pos = strpos_unslashed ($string, $delimiter, $offset))):
        $array[$count++] = substr ($string, $offset, $pos - $offset);
        $offset = $pos + 1;
    endwhile;
    $array[] = substr ($string, $offset);
    return $array;
}


/* Returns a text that is composed from given $lines from the file referenced
 * by $filename. $lines is a string of comma-separated integers and ranges; a
 * range consists of two integers seperated by a colon. Here's an example:
 * "1,6,9:11,-3". This example would compose a text from the 1st, 6th, 9th,
 * 10th, 11th line and the 3rd last line from the file. The function returns the
 * whole text if the $lines parameter is not provided.
 * NOTE: The file should exist, so instead of checking for existence first we
 * assume it does, suppress errors during reading and check the return value.
 */
function file_get_lines (
    string $filename,
    string $lines = NULL
):    string|null
{
    // return whole file, if no line choice was made
    if ($lines === NULL):
        $text = @file_get_contents ($filename);
        return $text === false? NULL: $text;
    endif;

    // load lines of the file
    $text = @file ($filename, FILE_IGNORE_NEW_LINES);
    if (!is_array ($text)):
        return NULL;
    endif;
    $count = count ($text);
    $output = [];

    foreach (explode (',', $lines) as $item):
        if ($item === ''):
            continue;
        endif;

        $range = explode (':', $item, 2);
        if (isset ($range[1])):
            $start = get_line_index ((int) $range[0], $count);
            $until = get_line_index ((int) $range[1], $count);
            if ($until >= $start):
                $start = max ($start, 0);
                $until = min ($until, $count - 1);
                for ($i = $start; $i <= $until; ++$i):
                    $output[] = $text[$i];
                endfor;
            else:
                $start = min ($start, $count - 1);
                $until = max ($until, 0);
                for ($i = $start; $i >= $until; --$i):
                    $output[] = $text[$i];
                endfor;
            endif;
        else:
            $i = get_line_index ((int) $item, $count);
            if (isset ($text[$i])):
                $output[] = $text[$i];
            endif;
        endif;
    endforeach;

    return implode ("\n", $output);
}


/* Expects a $directory, for example: '/foo/bar/baz'. Returns an array of the
 * given directory and all parent directories up to the root, for example
 * ['/foo/bar/baz', '/foo/bar', '/foo', '']. The function does only string
 * manipulation. It does not check the existence of directories and while it
 * resolves and hence ignores a path component '.', it does not resolve the
 * parent directory identifier '..'.
 */
function get_directories (
    string $directory
):    array
{
    $dir = '';
    $directories = [''];
    foreach (explode ('/', trim ($directory, '/')) as $segment):
        if ($segment !== '' and $segment !== '.'):
            $dir .= '/' . $segment;
            $directories[] = $dir;
        endif;
    endforeach;
    return array_reverse ($directories);
}


/* Returns an HTML attribute 'loading' for HTML <img> and <iframe> elements.
 * Its value will be 'lazy', if $lazy is positive, and 'eager', if $lazy is 0.
 * If $lazy is negative, an empty string (which browser normally interpret as
 * eager except in data-saving modes) will be returned twice before the return
 * value switches to lazy.
 */
function get_loading_attribute (
    int $lazy = -1
):    string
{
    static $countdown = 2;

    if ($lazy < 0):
        if ($countdown <= 0):
            return " loading='lazy'";
        else:
            --$countdown;
            return '';
        endif;
    elseif ($lazy > 0):
        $countdown = 0;
        return " loading='lazy'";
    else:
        return " loading='eager'";
    endif;
}


/* Returns the index from an array of a given $length (created with PHP's file
 * function) which corresponds to a given $line number (in the file). The line
 * number input can be negative; in that case lines are counted from the end of
 * the file. Note that the returned index will be outside the index range of the
 * array, iff the input line does not exist in the file.
 */
function get_line_index (
    int $line,
    int $length
):    int
{
    return $line + ($line < 0? $length: -1);
}


/* Extracts and returns the value of the charset attribute from the $mediatype
 * of a data URI or NULL, if no charset is found. The syntax of $mediatype is
 * described among other places at
 *   https://tools.ietf.org/html/rfc2397#section-3
 *   https://tools.ietf.org/html/rfc2045#section-5.1
 * This function may not actually handle all possible media types correctly,
 * for instance those which use semikolons inside quoted parameter values that
 * SHOULD not be used in data URIs, but can be used. It works for sensible URIs
 * that are not deliberately made more complicated than necessary. The function
 * should only be used to handle data URIs entered by authors to be parsed by
 * the Aneamal Translator, not for arbitrary data URIs and not to validate data
 * URIs passed to browsers.
 */
function get_mediatype_charset (
    string $mediatype
):    string|null
{
    foreach (explode (';', $mediatype) as $parameter):
        if ($pos = strpos ($parameter, '=')):
            if (strtolower (rawurldecode (substr ($parameter, 0, $pos))) === 'charset'):
                return trim (rawurldecode (substr ($parameter, $pos + 1)), '"');
            endif;
        endif;
    endforeach;
    return NULL;
}


/* Classifies an Aneamal metadata $name and returns an integer code for its
 * type. Backslashes should not be removed from the $name before running this
 * function since they are significant to distinguish between plain and special
 * metadata names.
 */
function get_meta_type (
    string $name
):    int
{
    if (!isset ($name[0])):
        return 0;
    elseif ($name[0] === '&'):
        return META_CUSTOM;
    elseif (is_made_of ($name, 'abcdefghijklmnopqrstuvwxyz', '-0123456789abcdefghijklmnopqrstuvwxyz')):
        return META_SPECIAL;
    else:
        return META_PLAIN;
    endif;
}


/* Returns a short string to be used as HTML attribute value, which contains a
 * unique number within a given $realm, e.g. the name of a HTML <input> element
 * within its form.
 * Prefixes currently in use:
 *  _f : form IDs
 *  _m : math
 *  _t : t-modules
 *  _x : x-modules
 */
function get_unique (
    string $realm = '',
    string $prefix = '_'
):    string
{
    static $count = [];
    if (isset ($count[$realm])):
        ++$count[$realm];
    else:
        $count[$realm] = 1;
    endif;
    return $prefix . (string) $count[$realm];
}


/* Classifies an $uri as one of a few types of absolute and relative URIs and
 * returns an integer code for the corresponding type.
 */
function get_uri_type (
    string $uri
):    int
{
    if ($uri === ''):
        return 0;
    elseif (str_starts_with ($uri, '//')):
        return URI_REMOTE;
    elseif ($uri[0] === '/'):
        return URI_LOCAL;
    elseif ($uri[0] === '?' or $uri[0] === '#'):
        return URI_PAGE;
    elseif ($scheme = parse_url ($uri, PHP_URL_SCHEME)):
        return strtolower ($scheme) === 'data'? URI_DATA: URI_REMOTE;
    else:
        return URI_LOCAL;
    endif;
}


/* Returns an array [width, height] which represents the pixel dimensions of a
 * a given video identified by $filename. If the dimensions can not be
 * determined, NULL is returned instead. Currently, the only supported file
 * extensions are .mp4 and .webm corresponding to the video container formats.
 */
function get_video_dimensions (
    string $filename
):    array|null
{
    $width = $height = 0;

    switch (strtolower (substr ($filename, strrpos ($filename, '.')))):
        case '.mp4': // would be similar enough to .3gp, .mov, .m4v
            $mp4 = new MP4 ($filename);
            $width = $mp4->width;
            $height = $mp4->height;
            break;
        case '.webm': // would be similar enough to .mkv
            $webm = new Matroska ($filename);
            $width = $webm->width;
            $height = $webm->height;
            break;
    endswitch;

    return $width > 0 && $height > 0? [$width, $height]: NULL;
}


/* Joins groups of string items from an $array and returns the resulting array
 * of joined strings. Each string is passed to a $callback, which is expected to
 * return true, if the string starts a new group, and false otherwise. Strings
 * within each group are joined with $joint. The keys of the first items in each
 * group are preserved.
 */
function joint (
    array $array,
    callable $callback,
    string $joint = "\n"
):    array
{
    $groups = [];
    foreach ($array as $k => $string):
        if ($callback ($string) or !isset ($start)):
            $groups[$start = $k] = $string;
        else:
            $groups[$start] .= $joint . $string;
        endif;
    endforeach;
    return $groups;
}


/* Turns an $array of attribute name => attribute value pairs into a string of
 * HTML attributes that can be inserted into an opening HTML tag. The returned
 * string has a leading space unless $array and hence returned string are empty.
 */
function implode_html_attributes (
    array $array
):    string
{
    $return = '';
    foreach ($array as $name => $value):
        if ($value === NULL):
            $return .= " $name";
        elseif (is_array ($value)):
            $return .= " $name='" . implode_html_attribute_value ($value) . "'";
        else:
            $return .= " $name='" . encode_special_chars ($value) . "'";
        endif;
    endforeach;
    return $return;
}


/* Turns an $array of HTML attribute tokens into a space-separated token set
 * that can be used as a HTML attribute value. An example would be the value of
 * the HTML class attribute.
 */
function implode_html_attribute_value (
    array $array
):    string
{
    $tokens = array_filter (array_unique ($array), fn ($x) => $x !== '');
    sort ($tokens, SORT_STRING);
    return encode_special_chars (implode (' ', $tokens));
}


/* Includes a PHP file, providing parameters in the $_ array, and forwards the
 * included file's return value. NOTE: DO NOT CHANGE THE PARAMETER NAMES OF THIS
 * FUNCTION. Scripts included via this function could rely on them. The purpose
 * of this otherwise trivial function is to create a local variable scope for
 * the included file.
 */
function include_module (
    string $extension,
    array $_ = []
):    mixed // SHOULD be a Closure or a string, cast others to string
{
    return include $extension;
}


/* Returns whether a given $string matches a simple pattern. The exact behaviour
 * depends on the number of provided parameters: If only $string and $charlist_1
 * are given, the function returns true iff $string contains only characters
 * that are also in $charlist_1. An empty $string results in true. If $string,
 * $charlist_1 and $charlist_2 are given, the function returns true iff the
 * first byte of $string is also in $charlist_1 and all further characters of
 * $string are also in $charlist_2. An empty $string results in false.
 */
function is_made_of (
    string $string,
    string $charlist_1,
    string|null $charlist_2 = NULL
):    bool
{
    if ($charlist_2 === NULL):
        return strspn ($string, $charlist_1) === strlen ($string);
    else:
        return
            isset ($string[0])
            and str_contains ($charlist_1, $string[0])
            and strspn ($string, $charlist_2, 1) === strlen ($string) - 1
        ;
    endif;
}


/* Returns whether a given $filename identifies an existing file, not a
 * directory, and is readable.
 */
function is_readable_file (
    string $filename
):    bool
{
    return is_file ($filename) and is_readable ($filename);
}


/* Returns whether a byte whose position in a $string is given by $pos (starting
 * at 0 for the first byte) is preceded by a backslash that is not preceded by
 * a backslash itself. The byte is slashed, iff the number of preceding
 * backslahes is odd.
 */
function is_slashed (
    string $string,
    int $pos
):    bool
{
    if (!isset ($string[$chk = $pos - 1])):
        return false;
    endif;
    while ($chk >= 0 and $string[$chk] === '\\') --$chk;
    return ($chk & 1) === ($pos & 1);
}


/* Ensures that a given $string is UTF-8 encoded, normalizes line-breaks to \n
 * and removes the optional initial UTF-8 byte order mark. Returns the result.
 */
function normalize_text (
    string $string,
):    string
{
    // check wether the string matches UTF-8
    if (!mb_check_encoding ($string, 'UTF-8')):
        mb_substitute_character (0xFFFD);
        $string = mb_scrub ($string, 'UTF-8');
    endif;

    // remove the optional byte order mark (BOM)
    if (str_starts_with ($string, "\xEF\xBB\xBF")):
        $string = substr ($string, 3);
    endif;

    // replace Windows- and old Mac-style line breaks with Unix-style ones
    if (str_contains ($string, "\r")):
        $string = str_replace (["\r\n", "\r"], "\n", $string);
    endif;

    return $string;
}


/* Parses a file token which may be found at the beginning of Aneamal blocks and
 * looks like [type:clue] where the :clue part is optional and the
 * case-insensitive type is converted to small letters. If the first byte of the
 * type is "a", "x" or "t" and the second byte is "-", the type is further
 * separated into supertype-subtype .
 * The $token must be provided to the function WITHOUT surrounding square
 * brackets. Here are valid example tokens:
 *   Just a type:  d
 *   Type with clue:  i:Five foxes frolic freely.
 *   Type with subtype:  a-warning
 *   Type with subtype and clue:  t-excel:sheet 2
 * The function returns an array [(super)type, subtype, clue] where each item is
 * a string or NULL if missing.
 */
function parse_file_token (
    string $token = ''
):    array
{
    if ($token === ''):
        return [NULL, NULL, NULL];
    endif;

    $initial = strtolower ($token[0]);

    if (!isset ($token[1])):
        return [$initial, NULL, NULL];

    elseif ($token[1] === '-' and in_array ($initial, ['a', 't', 'x'], true)):
        if ($colon = strpos ($token, ':', 2)):
            return [$initial, mb_strtolower (substr ($token, 2, $colon - 2), 'UTF-8'), substr ($token, $colon + 1)];
        else:
            return [$initial, mb_strtolower (substr ($token, 2), 'UTF-8'), NULL];
        endif;

    elseif ($token[1] === ':'):
        return [$initial, NULL, substr ($token, 2)];

    elseif ($colon = strpos ($token, ':', 2)):
        return [strtolower (substr ($token, 0, $colon)), NULL, substr ($token, $colon + 1)];

    else:
        return [strtolower ($token), NULL, NULL];

    endif;
}


/* Prepares a barely formatted text $string for use as a HTML attribute value
 * and returns it. Barely formatted means that the only character with a special
 * meaning is the backslash. A backslash before a line break implies that the
 * line break shall be removed. A backslash before any other character implies
 * that the following character has no special meaning.
 */
function prepare_html_attribute (
    string $string
):    string
{
    return encode_special_chars (strip_slashed_breaks_and_slashes ($string));
}


/* Encodes a UTF-8 $string so that it can be used as HTML id and URL fragment
 * and CSS selector. The result contains only lowercase letters from the ASCII
 * range, digits and the hyphen -.
 */
function prepare_html_id (
    string $string
):    string
{
    // Normalization: Treat no-break space as space, remove soft hyphens and
    // turn letters to lowercase. More could be done, but we prefer simplicity.
    $string = mb_strtolower (str_replace (["\u{A0}", "\u{AD}"], ["\x20", ''], $string), 'UTF-8');

    // Prepare the bytes (i.e. not Unicode characters) one by one: leave ASCII
    // letters and digits unchanged; collapse other ASCII bytes to a single
    // hyphen; encode non-ASCII bytes as two-byte lowercase hexadecimal number.
    $output = '';
    foreach (str_split ($string) as $i => $byte):
        if (str_contains ('abcdefghijklmnopqrstuvwxyz0123456789', $byte)):
            $output .= $byte;
        elseif (ord ($byte) > 0x7F):
            $output .= bin2hex ($byte);
        elseif (!str_ends_with ($output, '-')):
            $output .= '-';
        endif;
    endforeach;

    // Remove trailing and leading hyphens and return the output.
    return trim ($output, '-');
}


/* Returns the value for a given $key from the memory (a file on the disk) that
 * is specific to the Query String of the current request. If the $key can not
 * be found, but a $callback can be called, then its return value is both added
 * to the memory and returned.
 * NOTE: Using APCu has been considered instead of reading/writing from the
 * disk, but APCu is neither reliable nor secure in shared web hosting.
 */
function query_memory (
    string $key,
    callable|null $callback = NULL
):    string|null
{
    static $cache = NULL;
    static $write = TRUE;
    static $filename = '';

    // Load the memory from the disk at the first call. Usually a memory file
    // exists, so we do not check for existence first, but suppress errors while
    // trying to load it and check the return value. If neither it nor its
    // directory exists yet, try to create the directory. QUERY_HASH is defined
    // in main.php
    if ($cache === NULL):
        $cache = [];
        $filename = __DIR__ . '/private/memory/' . QUERY_HASH . '.tsv';
        if ($lines = @file ($filename, FILE_IGNORE_NEW_LINES)):
            foreach ($lines as $line):
                $record = explode ("\t", $line, 2);
                if (isset ($record[1])):
                    $cache[$record[0]] = $record[1];
                endif;
            endforeach;
        elseif (!is_dir ($dirname = dirname ($filename))):
            $write = @mkdir ($dirname, 0777, true);
        endif;
    endif;

    // Return the requested value from the cache, if available.
    if (isset ($cache[$key])):
        return $cache[$key];
    endif;

    // Return NULL, if the value neither exists nor can be computed.
    if ($callback === NULL):
        return NULL;
    endif;

    // Compute the value; add it to the cache if possible; return it.
    $cache[$key] = $value = $callback ();
    if ($write):
        $write = (bool) @file_put_contents ($filename, "$key\t$value\n", FILE_APPEND);
    endif;
    return $value;
}


/* Returns an array with three string components: all parts of an URI from
 * scheme to path, query, fragment.
 */
function split_uri_tail (
    string $uri
):    array
{
    // get fragment
    $delimiter = strpos ($uri, '#');
    if ($delimiter === false):
        $fragment = NULL;
    else:
        $fragment = substr ($uri, $delimiter + 1);
        $uri = substr ($uri, 0, $delimiter);
    endif;

    // get query
    $delimiter = strpos ($uri, '?');
    if ($delimiter === false):
        $query = NULL;
    else:
        $query = substr ($uri, $delimiter + 1);
        $uri = substr ($uri, 0, $delimiter);
    endif;

    return [$uri, $query, $fragment];
}


/* Returns whether $subject matches a $pattern, where $pattern is an array in
 * which each key defines an n-th byte of $subject that is checked and the
 * corresponding value is a string in which the checked byte should occur.
 */
function str_match (
    string $subject,
    array $pattern
):    bool
{
    foreach ($pattern as $n => $characters):
        if (!isset ($subject[$n]) or !str_contains ($characters, $subject[$n])):
            return false;
        endif;
    endforeach;
    return true;
}


/* Strips the suffix .nml from an $url and returns the result. The suffix is the
 * last part of the $url before the first ? or # character.
 */
function strip_nml_suffix (
    string $url
):    string
{
    $length = strcspn ($url, '?#');
    $path = $length < strlen ($url)? substr ($url, 0, $length): $url;
    return str_ends_with ($path, '.nml')? substr ($path, 0, -4) . substr ($url, $length): $url;
}


/* Returns the portion of a given $path which is relative to the Aneamal root
 * directory or, if this is not possible, just the basename. This is used to
 * reduce the information published publicly in error messages.
 */
function strip_root (
    string $path
):    string
{
    $aneamal_root = dirname (__DIR__);
    if (str_starts_with ($path, $aneamal_root)):
        return substr ($path, strlen ($aneamal_root));
    else:
        return basename ($path);
    endif;
}


/* Removes all line feed characters U+000A that are slashed and the backslash
 * from a $string and returns the result.
 */
function strip_slashed_breaks (
    string $string
):    string
{
    if (str_contains ($string, "\\\n")):
        $lines = explode ("\n", $string);
        $final = array_pop ($lines);
        $string = '';
        foreach ($lines as $line):
            $string .= is_slashed ($line, strlen ($line))? substr ($line, 0, -1): $line . "\n";
        endforeach;
        $string .= $final;
    endif;
    return $string;
}


/* Removes all backslashes from a $string that are not slashed themselves as
 * well as slashed line feed characters U+000A and returns the result. This is
 * equivalent to: stripslashes (strip_slashed_breaks ($string))
 */
function strip_slashed_breaks_and_slashes (
    string $string
):    string
{
    $return = '';
    for ($i = 0, $length = strlen ($string); $i < $length; ++$i):
        if ($string[$i] !== '\\' or ++$i !== $length and $string[$i] !== "\n"):
            $return .= $string[$i];
        endif;
    endfor;
    return $return;
}


/* Works like PHP's strpos function but ignores finds that are slashed with a
 * backslash or included within quotation marks or in a sub-context and returns
 * NULL instead of false when nothing is found.
 *
 * $haystack:  the string to search in
 * $needle:    the string to search for
 * $masks:     an array that defines mask marks where the keys provide the
 *             opening marks and the values provide the closing marks, e.g.
 *             ['"' => '"', "'" => "'"]; we do not look
 *             for the needle inside a quotation and we do not look for other
 *             quotation marks inside a quotation either
 * $offset:    initial search offset, 0 is the first byte in $haystack
 * $context:   a string such as ` which opens and closes a sub-string where we
 *             do not look for the needle, but where we do respect quotations
 *
 * returns:    the first position of an unslashed, unmasked needle in haystack
 *             at/after the initial offset or NULL if none was found
 */
function strpos_unmasked (
    string $haystack,
    string $needle,
    array $masks,
    int $offset = 0,
    string $context = ''
):    int|null
{
    $haystack_length = strlen ($haystack);

    // build an array of opening quotation marks and add the context mark to it
    $opening_marks = array_keys ($masks);
    if ($context !== ''):
        $opening_marks[] = $context;
    endif;

    $needle_pos = -1;
    while (true):
        // find occurrence of an unprotected needle or return NULL
        if ($offset > $needle_pos and is_null ($needle_pos = strpos_unslashed ($haystack, $needle, $offset))):
            return NULL;
        endif;

        // search haystack, starting at offset, for the first occurrence of an
        // opening mark which is not protected by a backslash. The offset is
        // then set to that position and the found mark is saved. In case of no
        // find the new offset will be the haystack length.
        $mark_pos = $haystack_length;
        foreach ($opening_marks as $i => $candidate):
            if (is_null ($new_pos = strpos_unslashed ($haystack, $candidate, $offset))):
                unset ($opening_marks[$i]); // don't look for this again
            elseif ($new_pos < $mark_pos):
                $mark_pos = $new_pos;
                $mark = $candidate;
            endif;
        endforeach;

        // return needle pos if it's before/an unprotected opening mark
        if ($needle_pos <= $mark_pos):
            return $needle_pos;
        endif;

        // else find closing context/quotation mark or return NULL
        if ($mark === $context):
            if (is_null ($offset = strpos_unmasked ($haystack, $mark, $masks, $mark_pos + strlen ($mark)))):
                return NULL;
            endif;
            $offset += strlen ($mark);
        else:
            if (is_null ($offset = strpos_unslashed ($haystack, $masks[$mark], $mark_pos + strlen ($mark)))):
                return NULL;
            endif;
            $offset += strlen ($masks[$mark]);
        endif;
    endwhile;
}


/* Works like strpos_unmasked but additionally takes nesting levels into
 * account. That means it finds the first needle that is neither slashed nor
 * masked nor matched by an unslashed opening nesting mark before it.
 *
 * $haystack:  the string to search in
 * $needle:    the string to search for, also a closing nesting mark
 * $match:     an opening nesting mark to match a subsequent needle
 * $masks:     an array that defines masking marks where the keys provide the
 *             opening marks and the values provide the closing marks, e.g.
 *             ['"' => '"', "'" => "'"]
 * $offset:    initial search offset, 0 is the first byte in $haystack
 * $context:   a string such as ` which opens and closes a sub-string where we
 *             do not look for the needle, but where we do respect masks
 *
 * returns:    the first position of an unslashed, unmasked, unmatched needle in
 *             haystack at/after the initial offset or NULL if none was found
 */
function strpos_unmatched (
    string $haystack,
    string $needle,
    string $match,
    array $masks,
    int $offset = 0,
    string $context = ''
):    int|null
{
    $unmatched = 0;
    $open = -1;
    $matchlen = strlen ($match);
    while (true):
        // find occurrence of an unmasked needle or return NULL
        if (is_null ($pos = strpos_unmasked ($haystack, $needle, $masks, $offset, $context))):
            return NULL;
        endif;
        ++$unmatched;
        // find the number of unmasked opening nesting marks before the needle
        if (isset ($open)):
            $open = $offset - $matchlen;
            while (!is_null ($open = strpos_unmasked ($haystack, $match, $masks, $open += $matchlen, $context)) and $pos - $open >= $matchlen):
                --$unmatched;
            endwhile;
        endif;
        // return position of the needle iff the needle was unmatched
        if ($unmatched > 0):
            return $pos;
        else:
            $offset = $pos + strlen ($needle);
        endif;
    endwhile;
}


/* Works similar to PHP's strpos function but ignores finds that are protected
 * by an unprotected backslash and returns NULL instead of false when nothing
 * is found.
 *
 * $haystack:  the string to search in
 * $needle:    the string to search for
 * $offset:    initial search offset, 0 is the first byte in $haystack
 *
 * returns:    the first position of unslashed needle in haystack but at/after
 *             the initial offset or NULL if it cannot be found
 */
function strpos_unslashed (
    string $haystack,
    string $needle,
    int $offset = 0
):    int|null
{
    $pos = $offset - 1;
    while ($pos = strpos ($haystack, $needle, $pos + 1) and is_slashed ($haystack, $pos));
    return $pos === false? NULL: $pos;
}


/* Works like strpos_unmasked except for finding the last instead of the first
 * occurrence. Look at strpos_unmasked for a detailed description.
 * This function must search forwards in the string until all occurrences have
 * been identified. Searching backwards from the end is not an option due to
 * the asymmetric nature of masks.
 */
function strrpos_unmasked (
    string $haystack,
    string $needle,
    array $masks,
    int $offset = 0,
    string $context = ''
):    int|null
{
    $occurence = NULL;
    while (!is_null ($pos = strpos_unmasked ($haystack, $needle, $masks, $offset, $context = ''))):
        $occurence = $pos;
        $offset = $pos + 1;
    endwhile;
    return $occurence;
}


/* Changes a $string so that all input bytes from the ASCII range are preserved
 * and byte sequences outside the range are replaced by an Unicode replacement
 * character.
 */
function substitute_nonascii (
    string $string
):    string
{
    $ascii = true;
    $result = '';
    for ($i = 0, $length = strlen ($string); $i < $length; ++$i):
        if (ord ($string[$i]) < 128):
            $result .= $string[$i];
            $ascii = true;
        elseif ($ascii === true):
            $result .= "\xEF\xBF\xBD";
            $ascii = false;
        endif;
    endfor;
    return $result;
}


/* Returns a two-dimensional $array transposed, i.e. rows become columns and
 * columns becomes rows, and returns the result. Keys are NOT preserved. In case
 * of an empty two-dimensional input $array, an empty one-dimensional array is
 * returned. The function is optimized for the default case of more than 1 input
 * row. Case 1 could be done quicker with a verbose foreach, but is atypical.
 */
function transpose_matrix (
    array $array
):    array
{
    return match (count ($array)) {
        0 => [],
        1 => array_map (fn ($x) => [$x], ...$array),
        default => array_map (null, ...$array),
    };
}


// The End