/* Copyright 2010-2024 Martin Janecke <>
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at
// declare (strict_types = 1); // only during development
namespace prlbr\aneamal;
const vhtml = '31';
require __DIR__ . '/func.php';
class nml2html {
/* Constants
// identifiers for different kinds of Aneamal files
private const aside = 'a';
private const embedded = 'e'; // code block with tag [a] above
private const footer = 'f';
private const header = 'h';
public const linked = 'l'; // file linked to with file tag like [a]->...
public const main = 'm'; // file requested by reader, loaded via main.php
private const quoted = 'q'; // quotation block
private const settings = 's'; // implied @meta.nml or declared as @meta: ->...
private const template = 't'; // /aneamal/a-.../index.nml referenced [a-...]
// identifiers for different kinds of metadata values
private const text = 0;
private const link = 1;
private const embd = 2;
// style for error messages and default URI for more error feedback
private const error_base_url = '';
private const error_style = 'background:#FA0;color:#000;padding:1ex 1em';
// the directory where to save preview images and their default size setting,
// see method Preview::coordinates in func.php for details
public const pixdir = '/aneamal/public/jpeg';
public const pixsize = '-640,-640';
// letters and digits, whitespace
private const alphanumeric = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789';
private const space = "\t\n\x20";
// maximum number of file_aneamal calls to prevent infinite recursion
private const max_inclusions = 256;
// default maximum size of text file included with [a], [h], [t], [b] etc.
private const default_textcap = 262144; // 2^18 Byte = 256 KiB
// string markup whose content is not interpreted as containing Aneamal markup;
// the array uses the format 'left mark' => 'right mark'
private const masks = ['|' => '|', '$' => '$', '{' => '}', '->`' => '`', '->!`' => '`'];
// brackets create strings that can be nested
private const brackets = ['(' => ')', '[' => ']'];
// heading/section-break marks and their corresponding levels
private const sectioners = ['===' => 1, '---' => 2, '- -' => 3];
// expandable-section heading marks and their corresponding levels
private const expanders = ['+++' => 2, '+ +' => 3];
// alignment mark => [kind, HTML classes]; different kinds can be combined
private const aligners = [
':..' => ['align', '_align _left'],
'..:' => ['align', '_align _right'],
':.:' => ['align', '_align _justify'],
'.:.' => ['align', '_align _center'],
'::.' => ['align', '_align _mid _left'],
'.::' => ['align', '_align _mid _right'],
': :' => ['allot', '_columns'],
':::' => ['allot', '_row'],
':.' => ['float', '_float _port'],
'.:' => ['float', '_float _starboard'],
'::' => ['clear', '_clear'],
// predefined textual metadata, can be called in text as @version for example
private const default_textvars = [
'version' => vmain === vhtml && vhtml === vfunc? vmain: vmain . '/' . vhtml . '/' . vfunc,
// the character & followed by a letter or digit is a custom mark, i.e. it can
// be defined by the author; & followed by some non-alphanumeric characters is
// predefined as follows
private const default_markvars = [
'+' => "<span style='font-size:larger'>",
',' => "<span style='font-weight:normal'>",
'-' => "<span style='font-size:smaller'>",
';' => '</span>',
// the number of links accepted for a given file type in linked file statements;
// the number that a given x-module accepts varies and is checked individually
private const max_links_for_type = [
'a' => 1,
'b' => 1,
'd' => 1,
'h' => 1,
'i' => 2,
'j' => 2,
'p' => 1,
'q' => 1,
't' => 1,
'v' => 3,
'w' => 3,
'x' => 0xface, // many
// values that are recognized in a comma separated list for @fix and represent
// provisional fixes for browser issues etc.; assigned numbers shall be powers
// of two to be used as flags; the recognized 'inherit' fix is not listed here
private const recognized_fixes = [
'list-numbers' => 1,
'previews' => 2,
// metadata names for use with links as values which have no special function
// in Aneamal but are recognized by Aneamal and published as HTML <link>
private const recognized_html_links = [
// a selection from W3C and WHATWG standards
'canonical' => "<link rel='canonical' href='%s'>",
'icon' => "<link rel='icon' href='%s'>",
'license' => "<link rel='license' href='%s'>",
'next' => "<link rel='next' href='%s'>",
'prev' => "<link rel='prev' href='%s'>",
// further meta links
'atom' => "<link rel='alternate' type='application/atom+xml' href='%s'>",
'me' => "<link rel='me' href='%s'>",
'rss' => "<link rel='alternate' type='application/rss+xml' href='%s'>",
'shortlink' => "<link rel='shortlink' href='%s'>",
'up' => "<link rel='up' href='%s'>",
// metadata names for use with text values which have no special function in
// Aneamal but are recognized by Aneamal and published as HTML <meta>
private const recognized_html_metas = [
// a selection from W3C and WHATWG standards
'author' => "<meta name='author' content='%s'>",
'description' => "<meta name='description' content='%s'>",
'keywords' => "<meta name='keywords' content='%s'>",
// further meta names
'publisher' => "<meta name='publisher' content='%s'>",
'robots' => "<meta name='robots' content='%s'>",
'translator' => "<meta name='translator' content='%s'>",
'viewport' => "<meta name='viewport' content='%s'>",
// these metadata names may be declared more than once in the same file, while
// all others can only be declared once
private const metadata_multiples = [
// these metadata names are aliases
private const metadata_aliases = [
'classes' => 'class',
'language' => 'lang',
/* Properties
// holds error messages, gets cleared after metadata and each block is processed
private array $errors = [];
// base URL for more information on error messages
private string|null $errormore = NULL;
// error message from @meta.nml or a manually assigned @meta file
private string $metaerror = '';
// local file path of the Aneamal root directory; no trailing slash
private string $root = __DIR__ . '/..';
// Aneamal root directory relative to the domain in an URL; no trailing slash
private string $home = '';
// Aneamal document's directory relative to the Aneamal root; no trailing slash
private string $dir = '';
// Aneamal filename on the disk in which the document resides
private string $filename = '';
// for example self::main, self::header, self::linked, self::embedded
private string $filekind = '';
// working copy of the Aneamal document's lines
private array $lines = [];
// custom marks; format 'mark' => 'HTML content'
private array $markvars = [];
// metadata; item format 'name' => [type => 'value'], type is either self::text,
// self::link or self::embd
private array $metavars = [];
// locally declared metadata names including custom marks; format 'name' => true
private array $metadecs = [];
// URIs identifying translations of the document; format 'langcode' => 'URI'
private array $altlangs = [];
// URIs to JavaScript files that shall be referenced
private array $javascripts = [];
// URIs to CSS files that shall be referenced
private array $stylesheets = [];
// URIs that are published in HTML <link rel> elements; format 'name' => 'URI'
private array $links = [];
// texts that are published im HTML <meta name> elements; format 'name' => 'URI'
private array $metas = [];
// HTML snippets to be put in the HTML <head>
private array $metahtmls = [];
// JavaScript snippets to be put at the end of the HTML <body>
private array $metascripts = [];
// CSS snippets to be put in the HTML <head>
private array $metastyles = [];
// manually declared metadata filename; empty string means don't autoload either
private string|null $meta = NULL;
// manually declared look filename; empty string means don't autoload either
private string|null $look = NULL;
// manually declared header filename; empty string means don't autoload either
private string|null $header = NULL;
// manually declared aside filename; empty string means don't autoload either
private string|null $aside = NULL;
// manually declared footer filename; empty string means don't autoload either
private string|null $footer = NULL;
// layout value 'manual' prevents autoloading header/aside/footer/look, 'blank'
// prevents any display, NULL is the default behaviour declared with 'auto'
private string|null $layout = NULL;
// BCP47 code for the document, but lowercased; empty string is explicit unknown
private string|null $lang = NULL;
// value 'rtl' iff the document is written in right-to-left script
private string|null $direction = NULL;
// classes for the HTML class attribute
private array $classes = [];
// value for the HTML role attribute; only used for linked/embedded documents
private string $role = '';
// value for HTML <title>; only used for the main document
private string $title = '';
// added at the end of the HTML <title>; possible use is branding via @meta.nml
private string $titletail = '';
// id of a HTML form, gets set when a first form element is encountered
private string|null $form = NULL;
// for questions and the corresponding answers posted via a form
private array $post = [];
// positive for lazy loading, 0 for eager loading of images/iframes; negative
// starts eagerly and then switches to lazy; NULL means not set/inherit; default
// behavior is negative
private int|null $lazy = NULL;
// image dimensions of generated preview for [j]-> inclusion in pixels: width
// and height separated by comma; positive values force the given size, negative
// values set a maximum; zero means that the value is not set
private string|null $pixels = NULL;
// metadata where the key is a module name and the value is configuration text
private array $modules = [];
// maximum size of text file included with [a], [h], [t], [b] etc. in bytes
private int|null $textcap = NULL;
// bit mask for provisional fixes for browser issues
private int|null $fixes = NULL;
// holds the HTML end tags for sections/subsections that have been started
private array $sections = [3 => '', 2 => ''];
/* Public methods
/* This is the only constructor of the class. It preprocesses the Aneamal file,
* e.g. processes sandwich markup, parses/removes metadata, removes comments
* and makes sure the text conforms to UTF-8.
* $source: the source text of an Aneamal file
* $dir: directory in which the parsed file is located relative to the
* Aneamal root, starting with a slash; needed to locate other
* files referenced from this
* $home: Aneamal root directory relative to the host in an URL; an empty
* string means the Aneamal root is the document root
* $filename: basename of the the file the $source is written in
* $kind: an integer identifying the kind of Aneamal document
* $metavars: metadata; item format 'name' => [type => 'value']
* $markvars: custom marks; format 'mark' => 'HTML content'
public function __construct (
string $source,
string $dir,
string $home,
string $filename = '',
string $kind = self::main,
array $metavars = [], // ignored for $kind === self::main
array $markvars = [], // ignored for $kind === self::main
) // returns nothing
// Enforce valid UTF-8 encoding and normalize line breaks. Embedded files
// and quotation blocks have been checked with the containing file already.
if ($kind !== self::embedded and $kind !== self::quoted):
$source = normalize_text ($source);
// Remove trailing slashes from the directories:
$home = rtrim ($home, '/');
$dir = rtrim ($dir, '/');
// In the following indented code, metadata is loaded from @meta.nml
// automatically or from a file manually assigned in a @meta declaration
// BEFORE the main Aneamal file's metadata is processed, because the latter
// inherits data from the former. However, said optional @meta declaration
// would occur in the LATTER! Solution: load @meta.nml automatically, but if
// a @meta declaration in the main Aneamal file contradicts the prior
// automatic loading, we reset all properties of this object and jump back
// to the label Raptor, respecting the manual setting this time. An empty
// string for $manual_meta means: load no meta file.
$manual_meta = NULL;
// No metadata has been loaded from @meta.nml automatically in this run
// of the loop yet.
$loaded_meta_automatically = false;
// canonicalized local file path of the Aneamal root directory
$this->root = dirname (__DIR__);
// URL path of the Aneamal root directory relative to the host
$this->home = $home;
// location, name and kind of the processed Aneamal file
$this->dir = $dir;
$this->filename = $filename;
$this->filekind = $kind;
// Load metadata from a metadata file for the main Aneamal file; inherit
// metadata variables in case of other Aneamal files.
if ($kind === self::main):
if ($manual_meta === NULL):
$loaded_meta_automatically = $this->load_settings ();
elseif ($manual_meta !== ''):
$this->load_settings ($manual_meta);
$this->metavars = $metavars;
$this->markvars = $markvars;
// Process sandwich markup, remove comments and process and remove
// metadata from this Aneamal file.
$this->lines = explode ("\n", $source);
$this->preprocess_lines ();
$this->preprocess_comments_meta ();
// Erase/Rewind once in main Aneamal file, if a manual @meta declaration
// contradicts the early automatic @meta.nml loading.
if ($kind === self::main and $this->meta !== $manual_meta):
if ($loaded_meta_automatically || $this->meta !== ''):
$manual_meta = $this->meta;
$this->reset ();
goto Raptor;
// Default text direction is left-to-right except for embedded files and
// quotation blocks, which inherit the direction of the containing document
// in process_file_aneamal by default.
if ($kind !== self::embedded and $kind !== self::quoted):
$this->direction ??= 'ltr';
// The empty string stands for an unknown language as default. Aneamal files
// other than the main Aneamal file inherit the settings of the parent
// document in process_file_aneamal.
if ($kind === self::main):
$this->lang ??= '';
/* Translates the blocks of this Aneamal file into HTML and returns the result.
* This method is also responsible for printing error messages in between
* blocks and for adding a HTML <form> element, if form controls such as
* checkboxes or textboxes are used in this file.
public function body (
): string
$html = $block = [];
$from = NULL; // first line number in a block
if ($this->metaerror):
$html[] = $this->metaerror;
if ($this->errors):
$html[] = $this->get_errors ('Errors in ghost markup');
// An empty line at the end of the file simplifies further processing.
$this->lines[PHP_INT_MAX] = '';
// Collect subsequent lines constituting a block, preserving line numbers;
// translate each block to HTML when an empty line marks its end.
foreach ($this->lines as $k => $line):
if ($line !== ''):
$from ??= $k;
$block[$to = $k] = $line;
elseif ($block):
try {
$html[] = $this->block ($block);
} catch (\Throwable $e) {
$this->error ('PHP exception in ' . strip_root ($e->getFile ()) . ', line ' . $e->getLine (), 237);
if ($this->errors):
$html[] = $this->get_errors ('Errors in the previous block', $from, $to);
$from = NULL;
$block = [];
// Close expandable sections that extend to the end of the file.
if ($close = $this->end_sections ()):
$html[] = $close;
// Add a <form> element, if form controls have been used in the file.
if (isset ($this->form)):
$html[] = $this->form_element ();
return implode ($html);
/* Generates and returns the whole HTML webpage representing the Aneamal file
* managed by this object, including implied header/footer/etc. files, styles
* and scripts.
public function document (
): string
$header = $main = $aside = $footer = $script = '';
// hidden document
if ($this->layout === 'blank'):
return '';
// prepare a list of directories to automatically look for layout elements;
// it is the current directory and parent directories up to the Aneamal root
if ($this->layout === NULL):
$directories = get_directories ($this->dir);
// get header
if ($this->header !== NULL):
if ($this->header !== ''):
$header = $this->file_aneamal ($this->header, self::header);
elseif ($this->layout === NULL):
foreach ($directories as $dir):
if (is_readable ($filename = $this->root . $dir . '/@header.nml')):
$header = $this->process_file_aneamal (file_get_contents ($filename), $dir, '@header.nml', self::header);
// build main content
$main = "<main>\n" . $this->body () . "</main>\n";
// get aside
if ($this->aside !== NULL):
if ($this->aside !== ''):
$aside = $this->file_aneamal ($this->aside, self::aside);
elseif ($this->layout === NULL):
foreach ($directories as $dir):
if (is_readable ($filename = $this->root . $dir . '/@aside.nml')):
$aside = $this->process_file_aneamal (file_get_contents ($filename), $dir, '@aside.nml', self::aside);
// get footer
if ($this->footer !== NULL):
if ($this->footer !== ''):
$footer = $this->file_aneamal ($this->footer, self::footer);
elseif ($this->layout === NULL):
foreach ($directories as $dir):
if (is_readable ($filename = $this->root . $dir . '/@footer.nml')):
$footer = $this->process_file_aneamal (file_get_contents ($filename), $dir, '@footer.nml', self::footer);
// add look (implied stylesheet)
if ($this->look !== NULL):
if ($this->look !== ''):
array_unshift ($this->stylesheets, $this->look);
elseif ($this->layout === NULL):
foreach ($directories as $dir):
$canonical = $dir . '/@look.css';
if (file_exists ($filename = $this->root . $canonical)):
array_unshift ($this->stylesheets, $this->home . $canonical . '?' . (string) filemtime ($filename));
// add javascript
foreach (array_unique ($this->javascripts) as $val):
$script .= "<script src='" . encode_special_chars ($val) . "'></script>\n";
foreach (array_unique ($this->metascripts) as $val):
$script .= "<script>\n{$val}\n</script>\n";
// build <head> content, must be generated after ->body, because the title
// published in the head may be generated from the main heading in the body
$head = $this->head ();
// <html> and <body> attributes
$dir = $class = $lang = '';
if ($this->direction !== 'ltr'):
$dir = " dir='{$this->direction}'";
if ($this->lang):
$lang = " lang='{$this->lang}'";
if (!empty ($this->classes)):
$class = " class='" . implode_html_attribute_value ($this->classes) . "'";
// build document
return "<!doctype html>\n<html{$dir}{$lang}>\n{$head}<body{$class}>\n{$header}{$main}{$aside}{$footer}{$script}</body>\n</html>\n";
/* Private methods
/* Identifies the end of a link in $string and returns its address. In Aneamal,
* a link is marked with an arrow -> and &$index must initially provide the
* location of the greater-than sign in that arrow. The arrow can be either
* immediately followed by the address for a regular link or by one of a few
* marks, e.g. @ in which case an URI already declared in metadata will be used.
* At the end &$index will be set to the position of the last character of the
* link in $string. (This function is used for linked file and in metadata
* declarations; see function link for hyperlinks in text.)
private function address (
string $string,
int &$index,
bool $allow_meta = true
): string|null
// handle links without address at the end of the string
if (!isset ($string[++$index])):
return '';
// few link modifiers available for hyperlinks are not available here
elseif ($string[$index] === '!' or !$allow_meta and $string[$index] === '@'):
$this->error ('Invalid link modifier: ' . $string[$index] . ' not allowed after -> in this case', 107);
return NULL;
// distinguish between links to a target in this document ...
elseif ($string[$index] === '#'):
if (isset ($string[++$index])):
$group = $this->targeted ($string, $index);
return $group === ''? '#': '#' . $this->fragment_identifier ($group);
return '#'; // an empty target refers to the top of the page
// ... and links declared in metadata ...
elseif ($string[$index] === '@'):
if (!isset ($string[++$index])):
$this->error ('Metadata name expected after ->@', 229);
return NULL;
// retrieve the metadata name
$name = stripslashes ($this->group ($string, $index));
// return the address declared for the given metadata name
if (isset ($this->metavars[$name][self::link])):
return $this->metavars[$name][self::link];
elseif (isset ($this->metavars[$name])):
$this->error ('Metadata name not declared with link value: @' . $name, 247);
return NULL;
$this->error ('Metadata name after ->@ not declared: ' . $name, 230);
return NULL;
// ... and shortened data URIs ...
elseif ($string[$index] === ','):
if (isset ($string[++$index])):
return 'data:;charset=UTF-8;base64,' . stripslashes ($this->group ($string, $index, true));
return 'data:;charset=UTF-8;base64,';
// ... and normal link
return stripslashes ($this->group ($string, $index, true));
/* Processes $lines of an Aneamal document which together form an Aneamal block
* and returns the corresponding HTML code. The processing depends on the kind
* of block. The kind is identified by block markup, usually to be found at the
* start of the block. Headings are only allowed, if not $inside_note.
* The keys of $lines are the corresponding line numbers from the Aneamal file.
private function block (
array $lines,
bool $inside_note = false,
): string
// initial strings from the block to check what kind of block this is
$intro = reset ($lines);
$three = substr ($intro, 0, 3);
$two = substr ($three, 0, 2);
// regular headings and section breaks
if (isset (self::sectioners[$three])):
if ($inside_note):
$this->error ('Heading or section break in notes', 212); // also used below
return $this->sectioner (implode ("\n", $lines));
// expandable-section headings and breaks
if (isset (self::expanders[$three])):
if ($inside_note):
$this->error ('Heading or section break in notes', 212); // also used above
return $this->expander (implode ("\n", $lines));
// notes consist of three or more underscores and a block below; they do end
// (expandable) sections and subsections
if ($three === '___'):
unset ($lines[array_key_first ($lines)]);
$sectiontags = $this->end_sections (2);
if (!is_made_of ($intro, '_')):
$this->error ('Invalid notes markup: expected only underscores in the first line', 75);
return $sectiontags;
elseif (empty ($lines)):
$this->error ('Note missing: expected content below ___', 76);
return $sectiontags;
return $sectiontags . "<div role='note'>\n<hr>\n" . $this->block ($lines, true) . "</div>\n";
// math block: starts and ends with double dollar signs
if ($two === '$$'):
return $this->math_block (implode ("\n", $lines));
// code block: every line starts with a vertical bar
if ($intro[0] === '|'):
$count = 0;
foreach ($lines as &$line):
if ($line[0] === '|'):
$line = substr ($line, 1);
return $this->embedded_file (implode ("\n", array_slice ($lines, 0, $count)), '', implode ("\n", array_slice ($lines, $count)));
return $this->embedded_file (implode ("\n", $lines));
// bulleted list, optionally indented with dots and whitespace
if ($two === '<>' && $three !== '<><' or $intro[0] === '.' && str_starts_with (ltrim ($intro, ".\t\x20"), '<>')):
return $this->bulleted_list ($lines);
// tagged list
if ($intro[0] === '<'):
return $this->tagged_list ($lines);
// file candidate, i.e. file or paragraph with bracketed token at its start
// where the first byte inside the brackets is alphanumeric
if (str_match ($intro, ['[', self::alphanumeric, '-:]'])):
return $this->file (implode ("\n", $lines));
// block quotation
if ($intro[0] === '>'):
$text = $citation = '';
$count = 0;
foreach ($lines as $line):
if ($line[0] === '>'):
$text .= substr ($line, 1) . "\n";
$citation = implode ("\n", array_slice ($lines, $count));
return $this->process_file_aneamal (
substr ($text, 0, -1),
// alignment and other classes
if ($three === ': .' or $three === '. :'):
$this->error ("Reserved characters $three at paragraph start", 209);
elseif (isset (self::aligners[$three]) or isset (self::aligners[$two])):
return $this->classy_block ($lines, $inside_note);
// numbered list
if ($pos = strcspn ($intro, "\t\x20") and $intro[$pos - 1] === '.'):
if (!empty ($this->item_number (substr ($intro, 0, $pos), []))):
return $this->numbered_list ($lines);
// form fields
$count = 0;
foreach ($lines as $line):
// option
if ($line[0] === '{'):
if ($count === 0):
return $this->options ($lines);
return $this->options (
array_slice ($lines, $count, NULL, true),
implode ("\n", array_slice ($lines, 0, $count))
// textbox
elseif (str_match ($line, ['[', '_=-', ':]'])):
if ($count === 0):
return $this->textboxes ($lines);
return $this->textboxes (
array_slice ($lines, $count, NULL, true),
implode ("\n", array_slice ($lines, 0, $count))
// simple paragraph
return '<p>' . $this->phrase (implode ("\n", $lines)) . "</p>\n";
/* Finds the right bracket in a $string that corresponds to the left bracket
* whose position in the $string is initially given by &$index. If a
* corresponding right bracket is found, &$index is set to its position and the
* part of the $string between the brackets is returned. If no corresponding
* bracket is found, NULL is returned as signal that the left bracket does not
* mark a bracketed string.
private function bracketed (
string $string,
int &$index
): string|null
// get the left bracket
$left = $string[$index];
// find the corresponding right bracket
$pos = strpos_unmatched ($string, self::brackets[$left], $left, self::masks, $index + 1, '`');
if ($pos === NULL):
return NULL;
$group = substr ($string, $index + 1, $pos - $index - 1);
$index = $pos;
// remove slashed linebreaks and return the result
return strip_slashed_breaks ($group);
/* Parses $lines that together form a bulleted list and returns it as HTML <ul>
* list. Bulleted list items may be indented. The indentation level is marked by
* dots. Space/tabs are allowed between the dots without having meaning.
private function bulleted_list (
array $lines
): string
$list = $item = '';
$prelevel = $level = -1;
foreach ($lines as $line):
// append lines without rhombus to the previous item
$bullet = strpos ($line, '<>');
if ($bullet === false):
$item .= "\n" . $line;
// append lines whose rhombus isn't a bullet to the previous item
$indent = substr ($line, 0, $bullet);
if (strspn ($indent, ".\t\x20") < $bullet):
$item .= "\n" . $line;
// translate previous item and determine its indentation level
$list .= $this->item ($item);
$level = substr_count ($indent, '.');
// inter-item markup
if ($level === $prelevel):
$list .= "</li>\n<li>";
elseif ($level < $prelevel):
$list .= str_repeat ("</li>\n</ul>\n", $prelevel - $level) . "</li>\n<li>";
$list .= str_repeat ("\n<ul>\n<li class='_skip'>", $level - $prelevel - 1);
$list .= "\n<ul>\n<li>";
// get current item
$prelevel = $level;
$item = substr ($line, $bullet + 2);
return substr ($list, 1) . $this->item ($item) . str_repeat ("</li>\n</ul>\n", $level + 1);
/* Translates alignment marks at the start of a block, which consists of $lines,
* to HTML class names. The rest of the block is processed. If the block is an
* expandable-section break or expandable-section heading, the class names are
* passed on to its function, otherwise they are added with a HTML <div> here.
* Expandable-sections are not permitted $inside_note.
private function classy_block (
array $lines,
bool $inside_note
): string
$classes = [];
while (!is_null ($k = array_key_first ($lines))):
$three = substr ($lines[$k], 0, 3);
// Alignment mark:
if (isset (self::aligners[$mark = $three]) or isset (self::aligners[$mark = substr ($lines[$k], 0, 2)])):
$length = strlen ($mark);
// White space after an alignment mark is required and helps to
// differentiate between .:. :: and .: .:: for example.
if (isset ($lines[$k][$length]) and !str_contains ("\t\x20", $lines[$k][$length])):
$this->error ('Missing whitespace after alignment mark ' . $mark, 211, $k);
// Get the HTML class name corresponding to the alignment mark, but
// prevent contradictions like both left- and right-aligned.
if (isset ($classes[self::aligners[$mark][0]])):
$this->error ('Alignment of this kind already set: ' . self::aligners[$mark][0], 213, $k);
$classes[self::aligners[$mark][0]] = self::aligners[$mark][1];
// Remove alignment mark:
if ($lines[$k] === $mark):
unset ($lines[$k]);
$lines[$k] = ltrim (substr ($lines[$k], $length));
// Section headings apply alignment themselves. They are not allowed
// inside notes though.
elseif (isset (self::sectioners[$three]) and !$inside_note):
return $this->sectioner (implode ("\n", $lines), ['class' => $classes]);
elseif (isset (self::expanders[$three]) and !$inside_note):
return $this->expander (implode ("\n", $lines), ['class' => $classes]);
// Break out of the loop, if no alignment was found anymore.
// Classes can be set without content, too.
if (empty ($lines)):
return "<div class='" . implode_html_attribute_value ($classes) . "'></div>\n";
// Process the aligned block:
return "<div class='" . implode_html_attribute_value ($classes) . "'>\n" . $this->block ($lines, $inside_note) . "</div>\n";
/* Prepares $code for output as a HTML <code> element.
private function code (
string $code
): string
return '<code>' . str_replace ("\n", '<br>', encode_special_chars (stripslashes ($code))) . '</code>';
/* Generates a preview image from the original with the filename $from and saves
* it as $jpg and optionally $jxl as well. The latter is expected to be in the
* same directory as the former. The preview is created with size constraints
* given as $width and $height. See Preview::coordinates () in func.php for a
* description of these values. Returns true on success, otherwise false.
* Success is only determined by the creation of $jpg, not $jxl.
private function create_preview (
int $width,
int $height,
string $from,
string $jpg,
string $jxl = '',
): bool
static $shutdown_function_registered = false;
if (!is_writable ($dir = dirname ($jpg)) and is_dir ($dir) || !@mkdir ($dir, 0777, true)):
$this->error ('Preview image folder not writable: ' . self::pixdir, 228);
return false;
// compute preview
try {
if (PreviewImagick::supports ('JPG')):
$preview = new PreviewImagick ($from, $width, $height);
elseif (PreviewGD::supports ('JPG')):
$preview = new PreviewGD ($from, $width, $height);
$this->error ('PHP installation lacks graphics extension', 252);
return false;
} catch (PreviewException $e) {
$this->error ('Could not create preview image from: ' . basename ($from), 253);
return false;
// save preview
try {
if ($jxl):
$preview->save ($path = $jxl, 80);
$success = $preview->save ($path = $jpg, 83);
} catch (PreviewException $e) {
$this->error ('Could not save preview image at: ' . strip_root ($path), 254);
return false;
// Creating many previews can take more time than max_execution_time allows,
// causing a Fatal Error. We explain this likely timeout reason with a
// shutdown function. It is registered after a succesful preview creation,
// because we can only guarantee progress then and ask to try again.
if ($success and !$shutdown_function_registered):
register_shutdown_function (function () {
if (connection_status () & 2): // timeout
if (!headers_sent ()):
header ('HTTP/1.1 503 Service Unavailable');
header ('Retry-After: 120');
print "<p>Sorry for the timeout. ";
print "This can happen while new previews of images are generated. ";
print "The webpage will load quickly once they have been created. ";
print "<b>Please reload the webpage.</b></p>\n";
$shutdown_function_registered = true;
return $success;
/* Decodes a textual data $uri and returns its data converted to UTF-8, or NULL
* on error.
private function data_uri (
string $uri
): string|null
// find the colon (end of URI scheme) and first comma (start of data)
$colon = strpos ($uri, ':');
$comma = strpos ($uri, ',', $colon);
if (!$colon or !$comma):
$this->error ('Invalid data URI', 226);
return NULL;
// extract mediatype and data
$type = substr ($uri, $colon + 1, $comma - $colon - 1);
$data = rawurldecode (substr ($uri, $comma + 1));
// base64-decode the data strictly, i.e. invalid characters are reported as
// error with the exception of line breaks
if (strtolower (substr ($type, -7)) === ';base64'):
$type = substr ($type, 0, -7);
$data = base64_decode (chunk_split (str_replace ("\n", '', $data)), true);
if ($data === false):
$this->error ('Corrupt base64 encoding', 62);
return NULL;
// find the data URI's character encoding; US-ASCII is the data-URI default
$charset = get_mediatype_charset ($type) ?? 'US-ASCII';
// transform the name of the encoding to its preferred mime name
try {
$encoding = mb_preferred_mime_name ($charset);
} catch (\ValueError) {
$encoding = false;
// convert the data to UTF-8
if ($encoding === false): // means that the encoding is unknown to PHP
$this->error ('Unrecognized character encoding: ' . $charset, 65);
return substitute_nonascii ($data);
elseif ($data === ''):
return '';
elseif (!mb_check_encoding ($data, $encoding)):
$this->error ('Character encoding mismatch: expected ' . $encoding, 64);
return substitute_nonascii ($data);
elseif ($encoding !== 'UTF-8' and $encoding !== 'US-ASCII'):
$data = mb_convert_encoding ($data, 'UTF-8', $encoding);
return $data;
/* A target at the start of a quotation block's or file's caption or math label
* is used as identifier for the whole quotation block/file/math block. This
* method recognizes a target by the character # at the start of a given
* &$caption. If it exists, the target is detached from the &$caption, processed
* and returned as HTML id attribute with leading space. If no target exists, an
* empty string is returned.
private function detach_caption_target (
string &$caption
): string
if ($caption === '' or $caption[0] !== '#'):
return '';
if (!isset ($caption[1])):
$this->error ('Target text missing after initial #', 177);
return $caption = '';
$index = 1;
if ($caption[1] === '{'):
$name = $this->fragment_identifier ($this->encurled ($caption, $index), true);
$caption = ltrim (substr ($caption, $index + 1));
$name = $this->fragment_identifier ($this->targeted ($caption, $index), true);
$caption = ltrim (substr ($caption, 1));
return $name === ''? '': " id='$name'";
/* Prepares the $content of an embedded file with the right method for its type,
* which is encoded in the file $token. See function parse_file_token in
* func.php for a description of the $token. If the optional $caption is
* provided, the processed content and caption are returned as an HTML <figure>,
* otherwise the content is returned without <figure> element.
private function embedded_file (
string $content,
string $token = '',
string|null $caption = NULL
): string
[$type, $subtype, $clue] = parse_file_token ($token);
// detach the optional id from the start of a caption
$id = isset ($caption)? $this->detach_caption_target ($caption): '';
// call the right method to process the given file type
$file = match ($type) {
NULL => '<pre><code>' . encode_special_chars ($content) . "</code></pre>\n",
'a' => $this->process_file_aneamal ($content, $this->dir, $this->filename, self::embedded, $subtype),
'b' => $this->process_file_tsv ($content),
'd' => $this->process_file_tsv ($content, true),
'h' => $content . "\n",
'p' => $this->process_file_tsv ($content, false, true),
'q' => $this->process_file_tsv ($content, true, true),
't' => $this->process_file_text ($content, $subtype, $clue, $caption),
default => NULL,
if ($file === NULL):
$this->error ('Unrecognized file type: ' . $type, 146);
return '';
// return the result
if ($caption !== NULL):
return "<figure{$id}>\n{$file}<figcaption>" . $this->phrase ($caption) . "</figcaption>\n</figure>\n";
elseif ($id !== ''):
return "<figure{$id}>\n{$file}</figure>\n";
return $file;
/* Parses an Aneamal $string and adds HTML tags at the start and end that
* correspond to the given Aneamal $mark for emphasis, an inline note, a quoted
* or a crossed out string before returning it.
private function emphasis (
string $string,
string $mark
): string
return match ($mark) {
'~' => '<i>' . $this->phrase ($string) . '</i>',
'*' => '<b>' . $this->phrase ($string) . '</b>',
'_' => '<u>' . $this->phrase ($string) . '</u>',
'"' => '<q>' . $this->phrase ($string) . '</q>',
'+-' => '<s>' . $this->phrase ($string) . '</s>',
'=-' => '<small>' . $this->phrase ($string) . '</small>',
/* Finds the right mark in a $string that corresponds to the identical left mark
* whose position in the $string is initially given by &$index. Then &$index is
* set to the position of the right mark (or to the last character, if no right
* mark is found) and the part of the $string between the marks is returned. If
* $no_masking is false, content inside $…$ and |…| (see self::masks for a full
* list) is not taken into account in the search for the right mark.
private function enclosed (
string $string,
int &$index,
bool $no_masking = false
): string
// save the mark
$mark = $string[$index];
// find the next mark that is not slashed
$pos = $no_masking? strpos_unslashed ($string, $mark, $index + 1): strpos_unmasked ($string, $mark, self::masks, $index + 1, '`');
if ($pos === NULL):
$group = substr ($string, $index + 1);
$index = strlen ($string) - 1;
$this->error ('String not closed: expected ' . $mark, 117);
$group = substr ($string, $index + 1, $pos - $index - 1);
$index = $pos;
// remove slashed linebreaks and return the result
return strip_slashed_breaks ($group);
/* Finds the right 2-byte mark in a $string that corresponds to the mirrored
* left mark whose position in the $string is initially given by &$index. Then
* &$index is set to the position of the second byte in the right mark (or to
* the last character, if no right mark is found) and the part of the $string
* between the marks is returned.
private function enclosed2 (
string $string,
int &$index
): string
// get the left and right marks
$right = strrev ($left = substr ($string, $index, 2));
// find the corresponding right cross
$pos = strpos_unmatched ($string, $right, $left, self::masks, $index + 2, '`');
if ($pos === NULL):
$group = substr ($string, $index + 2);
$index = strlen ($string) - 1;
$this->error ('Crossed-out string or inline note not closed: expected ' . $right, 206);
$group = substr ($string, $index + 2, $pos - $index - 2);
$index = $pos + 1;
// remove slashed linebreaks and return the result
return strip_slashed_breaks ($group);
/* Finds the right curly bracket that comes after the left curly bracket whose
* position in a $string is initially given by &$index. Then &$index is set to
* the position of the right curly bracket (or to the last character of the
* string, if no right curly bracket is found) and the part of the $string
* between the curly brackets is returned unaltered.
private function encurled (
string $string,
int &$index
): string
$pos = strpos_unslashed ($string, '}', $index + 1);
if ($pos === NULL):
$group = substr ($string, $index + 1);
$index = strlen ($string) - 1;
$this->error ('Curly brackets not closed: expected }', 140);
$group = substr ($string, $index + 1, $pos - $index - 1);
$index = $pos;
return $group;
/* Headings marked up with +++ (with rank 2) and + + (with rank 3) hide
* the contents of their implicitly associated sections by default, but they
* can be toggled to be visible. This method marks the end of expandable
* sections of a given or higher numbered, less important $rank, by returning
* the necessary number of HTML </details> tags.
private function end_sections (
int $rank = 1
): string
$html = '';
foreach ($this->sections as $r => $endtag):
if ($endtag and $r >= $rank):
$html .= $this->sections[$r] . "\n";
$this->sections[$r] = '';
return $html;
/* Stores information about an error that can be retrieved with method
* get_errors. The error $code == 0 is used for errors that should be impossible
* to occur in production. Possible errors have a $code > 1.
private function error (
string $message,
int $code = 0,
int|null $line = NULL
): void
$this->errors[] = [$message, $code, $line];
/* Processes a $block that represents an expandable-section break or
* expandable-section heading and returns its HTML equivalent. Expandable
* sections are displayed closed by default. But when a syntax error occurs in a
* heading, the section is displayed open so that the error message can be found
* more easily. $attributes contains HTML attributes such as classes that apply
* to the section's heading and must be set on the HTML <summary> element.
private function expander (
string $block,
array $attributes = []
): string
$start = substr ($block, 0, 3);
$rank = self::expanders[$start];
// handle expandable-section breaks
if ($start === $block):
if (empty ($this->sections[$rank])):
$this->error ('Seamless section break without corresponding heading: ' . $start, 256);
if ($attributes):
$this->error ('Alignment for seamless section break: ' . $start, 214);
return $this->end_sections ($rank);
// end previous expandable sections
$sectiontags = $this->end_sections ($rank);
// find the end of the expandable-section heading and extract its content
$endpos = strrpos_unmasked ($block, $start, self::masks, 3);
if ($endpos === NULL):
$this->error ('Expandable-section heading not closed: expected ' . $start, 165);
return $sectiontags;
$content = trim (strip_slashed_breaks (substr ($block, 3, $endpos - 3)));
if ($content === ''):
$this->error ('Expandable-section heading missing', 164);
// create markup for the new heading
$heading = "<summary" . implode_html_attributes ($attributes) . ">\n<h$rank>";
foreach (explode ("\n", $content) as $i => $line):
if ($i > 0):
$heading .= '<br><span>' . $this->phrase ($line) . '</span>';
$class = prepare_html_id ($line);
$heading .= $this->phrase ($line);
$heading .= "</h$rank>\n</summary>\n";
$this->sections[$rank] = '</details>';
// check for unexpected text after the heading, but still in the same block
if (strlen ($block) !== $endpos + 3):
$this->error ('Blank line missing after expandable-section heading', 163);
$status = empty ($this->errors)? '': ' open';
return $sectiontags . "<details class='$class'{$status}>\n" . $heading;
/* Processes an Aneamal $block that starts with a left square bracket and seems
* to be a file token. It could be a file token either followed by a link to a
* file or by a code block, in which case the block is an embedded file. If
* there is neither link nor code block or the bracketed string does not match
* the format of a file token (see function parse_file_token in func.php), the
* block is a simple paragraph. In either case, the appropriate HTML code is
* returned.
private function file (
string $block,
bool $is_list_item = false
): string
if (is_null ($token_end = strpos_unslashed ($block, ']', 2))):
$this->error ('Malformed file token: expected ]', 239);
return $is_list_item? $this->phrase ($block): '<p>' . $this->phrase ($block) . "</p>\n";
$token = substr ($block, 1, $token_end - 1);
$after = substr ($block, $token_end + 1, 2);
// embedded file
if ($after === "\n|"):
$lines = explode ("\n", substr ($block, $token_end + 2));
foreach ($lines as $n => &$line):
if ($line[0] === '|'):
$line = substr ($line, 1);
return $this->embedded_file (implode ("\n", array_slice ($lines, 0, $n)), $token, implode ("\n", array_slice ($lines, $n)));
return $this->embedded_file (implode ("\n", $lines), $token);
// linked file
elseif ($after === '->'):
return $this->linked_file (substr ($block, $token_end + 1), $token);
// x-modules with zero links
elseif (str_starts_with ($token, 'x-') || str_starts_with ($token, 'X-') and $after === '' || str_contains (self::space, $after[0])):
return $this->linked_file (substr ($block, $token_end + 1), $token);
// still not a file, just a bracketed string at the paragraph start
return $is_list_item? $this->phrase ($block): '<p>' . $this->phrase ($block) . "</p>\n";
/* Loads a linked Aneamal file located at $uri, processes it and returns its
* content. This is useful for sharing common text between documents or to join
* multiple parts of a huge document. The file $kind can be self::header or
* self::linked for example. $tpl specifies an optional Aneamal template name
* that shall be used with the Aneamal file. Templates can contain metadata,
* style information in particular.
private function file_aneamal (
string $uri,
string $kind,
string|null $tpl = NULL
): string
// prevent an infinite recursion
static $countdown = self::max_inclusions;
if (--$countdown < 0):
$this->error ('Too many Aneamal files included', 66);
return '';
// only allow local files; trouble with data URIs would be that relative
// addresses inside it could not be resolved
if (get_uri_type ($uri) !== URI_LOCAL):
$this->error ('Forbidden URI type for linked Aneamal file: use a local filename instead', 196);
return '';
// split path from the query, which can be used to chose only certain lines
[$path, $query] = split_uri_tail ($uri);
// resolve the file path so that it is relative to the Aneamal root
if (is_null ($canonical = $this->normalize_path ($path))):
return '';
// read the file content
if (is_null ($text = file_get_lines ($this->root . $canonical, $query))):
$this->error ('Aneamal file not readable: ' . $path, 135);
return '';
// cap the text at the textcap
$textcap = $this->textcap ?? self::default_textcap;
if (strlen ($text) > $textcap):
$this->error ("Aneamal file exceeds textcap of $textcap: " . $canonical, 203);
$text = substr ($text, 0, $textcap);
// process the file content as Aneamal document
return $this->process_file_aneamal ($text, dirname ($canonical), basename ($canonical), $kind, $tpl);
/* Includes the user defined x-module of the given $subtype and passes
* preprocessed versions of the given $paths to it and optionally a $clue.
* Returns what the module returns, which must be UTF-8 encoded. For example, a
* module x-audio.php could handle [x-audio]->url markup to integrate an audio
* file into the document.
* The optional $caption is informational and passed through for the form API.
private function file_extension (
array $paths,
string|null $subtype,
string|null $clue,
string|null $caption
): string
if ($subtype === NULL or $subtype === ''):
$this->error ('x-module subtype missing', 144);
return '';
// $links stores the paths as given by the author of the Aneamal file,
// $files stores the corresponding filenames in the local file system,
// $hrefs stores them prepared for use in HTML
$links = $files = $hrefs = [];
foreach ($paths as $path):
if (get_uri_type ($path) === URI_LOCAL and !is_null ($canonical = $this->normalize_path ($path))):
$files[] = $this->root . $canonical;
$hrefs[] = $this->home . strip_nml_suffix ($canonical);
$files[] = NULL;
$hrefs[] = $path;
$links[] = $path;
// prepare data passed to the module
$data = [
// numeric indices for backwards compatibility with old modules
0 => [$links[0] ?? NULL, $files[0] ?? NULL, $hrefs[0] ?? NULL, $this->root, $this->home],
1 => $clue ?? '',
2 => $this->home . '/aneamal/x-' . $subtype,
3 => $this->home . $this->dir,
// x-module specific stuff
'files' => $files,
'hrefs' => $hrefs,
'links' => $links,
// let the module do its work and return result
return $this->use_module ('x-' . $subtype, $data, $clue, $caption) . "\n";
/* Returns the content of text file located at $path which is supposed to
* contain UTF-8 encoded HTML code.
private function file_html (
string $path
): string
$file = $this->file_raw ($path);
return isset ($file)? $file . "\n": '';
/* Returns an HTML <img> element to include the image located at $paths[0] with
* an optional textual $clue for visually impaired readers, bots etc. and an
* optional textual $hint that is typically displayed as tooltip on mouseover
* events and comes already prepared for use as a HTML attribute. The image is
* turned into a HTML link to $paths[1], if provided.
* See for
* guidelines on composing the $clue.
private function file_image (
array $paths,
string|null $clue = NULL,
string|null $hint = NULL
): string
// resolve the file path
[$source, $filename] = $this->media_link ($paths[0]);
if (!$source):
return '';
// Get width and height for local image files. The file is supposed to
// exist, if there is $filename, so to optimize speed we do not check for
// existence first, instead surpressing errors in filemtime () and
// confirming existence en passent through its success.
if ($filename and $time = @filemtime ($filename)):
$size = query_memory (md5 ("$filename,$time"), function () use ($filename) {
if ($s = @getimagesize ($filename) and $s[0] > 0 and $s[1] > 0):
if ($exif = @exif_read_data ($filename, 'IFD0') and isset ($exif['Orientation'])):
// Swap width and height, if the EXIF orientation implies
// a 90° turn to the right or left.
if (in_array ($exif['Orientation'], [5, 6, 7, 8])):
[$s[1], $s[0]] = [$s[0], $s[1]];
return " width='$s[0]' height='$s[1]'";
return '';
$size = '';
// handle the optional textual alternative
$alt = isset ($clue)? " alt='" . prepare_html_attribute ($clue) . "'": '';
// handle optional hint that is already prepared for use as HTML attribute
$title = isset ($hint)? " title='$hint'": '';
// the loading attribute determines whether the image shall load lazily
$loading = get_loading_attribute ($this->lazy ?? -1);
// compose HTML img element
$img = "<img src='{$source}'{$size}{$alt}{$title}{$loading}>";
// return the image element, optionally with a link
return isset ($paths[1])? $this->hyperlink ($paths[1], $img) . "\n": "$img\n";
/* Returns an HTML <video> element to include the video or audio file optionally
* with a $hint that is typically displayed as tooltip on mouseover events and
* comes already prepared for use as an HTML attribute. The address of the media
* file and optionally of a still image and WebVTT file with closed captions for
* the hard of hearing (or subtitles that simply translate the spoken text, but
* will still be identified as closed captions in HTML here) in the $path array:
* a single path links to a media file; in case of more paths the second one
* links to a media file while the first links to a still image and an optional
* third path links to the webvtt file. It is intentional to use HTML's <video>
* element for both audio and video files: HTML's <audio> element does not work
* with captions or a still image. While Aneamal also accepts a short clue in
* the same way as for images, [v:this happens]->, that text is not used in the
* HTML output yet, as there is nothing like the <img> element's alt-attribute
* for the <video> element in HTML yet.
function file_media (
array $paths,
string|null $hint = NULL,
bool $do_loop = false
): string
// Resolve the file path of the media file:
[$videosrc, $videofile] = $this->media_link ($paths[1] ?? $paths[0]);
if (!$videosrc):
return '';
// Resolve the file path of the still image. If a still image is available,
// browsers are told not to preload media data, since the still image is
// good enough as a placeholder and this reduces initial page load time.
if (isset ($paths[1])):
[$stillsrc, $stillfile] = $this->media_link ($paths[0]);
if ($stillsrc):
$poster = " poster='$stillsrc'";
$preload = " preload='none'";
// Resolve the file path of the closed captioning text track:
if (isset ($paths[2])):
[$tracksrc] = $this->media_link ($paths[2]);
if ($tracksrc):
$track = "<track src='$tracksrc' kind='captions' label='CC'>";
// If both the still image and the video file are locally available, further
// processing takes place: width and height of the video are determined and
// a preview image is generated from the still that matches the video size.
if ($videofile and $stillfile ?? false and $vtime = @filemtime ($videofile) and $stime = @filemtime ($stillfile)):
$hash = md5 ("$videofile,$vtime,$stillfile,$stime");
$hash[2] = '/';
$previewsrc = encode_special_chars ($this->home . self::pixdir . "/$hash.jpg");
$previewfile = $this->root . self::pixdir . "/$hash.jpg";
// If the preview file exists already, its width and height are read to
// be used on the video element and the preview is used as poster:
if (file_exists ($previewfile)):
$size = query_memory ($hash, function () use ($previewfile) {
$s = getimagesize ($previewfile);
return $s && $s[0] > 0 && $s[1] > 0? " width='$s[0]' height='$s[1]'": '';
$poster = " poster='$previewsrc'";
// If the preview does not exist, width and height are read from the
// video. If that succeeds, a matching preview is generated from the
// still image:
$size = query_memory ($hash, function () use ($videofile) {
$s = get_video_dimensions ($videofile);
return isset ($s)? " width='$s[0]' height='$s[1]'": '';
if ($size):
$s = explode ("'", $size);
if (!empty ($this->create_preview ((int) $s[1], (int) $s[3], $stillfile, $previewfile))):
$poster = " poster='$previewsrc'";
// handle the optional hint and looping option
$title = isset ($hint)? " title='$hint'": '';
$loop = $do_loop? ' loop': '';
// default values for some of the options only set conditionally above
$poster ??= '';
$preload ??= " preload='metadata'";
$size ??= '';
$track ??= '';
return "<video src='{$videosrc}'{$poster}{$size}{$title}{$loop}{$preload} controls>{$track}<a href='{$videosrc}'>" . basename ($videosrc) . "</a></video>\n";
/* Loads the image located at $paths[0] and creates and saves a usually smaller
* preview image of it. Returns an HTML <img> element to display the preview
* with an optional textual $clue for visually impaired readers, bots etc. and
* an optional textual $hint that is typically displayed as tooltip on mouseover
* events and comes already prepared for use as a HTML attribute. The image is
* turned into a HTML link to $paths[1], if provided, or else to the original
* image at $paths[0].
* See for
* guidelines on composing the $clue.
private function file_preview (
array $paths,
string|null $clue = NULL,
string|null $hint = NULL
): string
// check whether to create a HTML picture element with JXL + JPG previews
$pictured = $this->fixes & self::recognized_fixes['previews'];
// fetch only local files for security and legal reasons
if (get_uri_type ($paths[0]) !== URI_LOCAL):
$this->error ('Forbidden URI type for linked image file: use a local filename instead', 201);
return '';
// resolve the file path to be relative to the Aneamal root directory
if (is_null ($canonical = $this->normalize_path ($paths[0]))):
return '';
// compose the filename of the original image and check its readability
$original = $this->root . $canonical;
if (!is_readable_file ($original)):
$this->error ('Not a readable file: ' . $paths[0], 127);
return '';
// get preview image dimension settings
[$width, $height] = explode_integer_pair (',', $this->pixels ?? self::pixsize);
// compose a filename for the preview image
$hash = md5 ("$canonical,$width,$height" . strval (filemtime ($original)));
$hash[2] = '/';
$base = encode_special_chars ($this->home . self::pixdir . "/$hash");
$jpg = $this->root . self::pixdir . "/$hash.jpg";
$jxl = $pictured? $this->root . self::pixdir . "/$hash.jxl" : '';
// create the preview image, if it does not exist yet
if (!file_exists ($jpg)):
if (!$this->create_preview ($width, $height, $original, $jpg, $jxl)):
return '';
// get the image dimensions as HTML attributes
if ($width > 0 and $height > 0):
$size = " width='$width' height='$height'";
$size = query_memory ($hash, function () use ($jpg) {
$s = getimagesize ($jpg);
return $s && $s[0] > 0 && $s[1] > 0? " width='$s[0]' height='$s[1]'": '';
// handle the optional textual alternative
$alt = isset ($clue)? " alt='" . prepare_html_attribute ($clue) . "'": '';
// handle optional hint that is already prepared for use as HTML attribute
$title = isset ($hint)? " title='$hint'": '';
// the loading attribute determines whether the image shall load lazily
$loading = get_loading_attribute ($this->lazy ?? -1);
// compose HTML img element
$img = "<img src='{$base}.jpg'{$size}{$alt}{$title}{$loading}>";
// wrap optional HTML picture element around
if ($pictured):
$source = file_exists ($jxl)? "<source srcset='{$base}.jxl' type='image/jxl'>" : '';
$img = "<picture>{$source}{$img}</picture>";
// return the HTML code for displaying the preview image
return $this->hyperlink ($paths[1] ?? $paths[0], $img) . "\n";
/* Returns the content of a text file located at $uri, where $uri refers to a
* local file or a data URI. Local file paths can be followed by a query to
* choose only certain lines from the file. If $do_cap is true, the loaded text
* will be capped if it exceeds a given threshold in order to minimize negative
* effects when a huge file gets accidentally loaded (e.g. loading an image
* instead of a text file). If $do_normalize is true, it is checked whether the
* file is UTF-8 encoded as it should be and otherwise made to be compatible
* with UTF-8 by stripping invalid characters. It is important to return NULL
* for missing/unreadable files instead of '' to distinguish this case from
* empty files. The latter may trigger a certain action in a [t]-module, whereas
* a [t]-module will not be called in case of a missing/unreadable file.
private function file_raw (
string $uri,
bool $do_normalize = true,
bool $do_cap = true,
int|null $errline = NULL
): string|null
if ($uri === ''):
$this->error ('Address missing', 232, $errline);
return NULL;
$uri_type = get_uri_type ($uri);
// split path from the query, which can be used to chose certain lines
[$path, $query, $fragment] = split_uri_tail ($uri);
// extend paths or handle data URIs
if ($uri_type === URI_LOCAL):
if (is_null ($canonical = $this->normalize_path ($path))):
return NULL;
elseif (is_null ($text = file_get_lines ($this->root . $canonical, $query))):
$this->error ('Not a readable file: ' . $path, 192, $errline);
return NULL;
elseif ($uri_type === URI_DATA):
// data URIs have neither query nor fragment
if (is_null ($text = $this->data_uri ($uri))):
return NULL;
else: // prevent remote file inclusion
$this->error ('Forbidden URI type for file: use local filename or data URI', 200, $errline);
return NULL;
// cap the text at the textcap
if ($do_cap):
$textcap = $this->textcap ?? self::default_textcap;
if (strlen ($text) > $textcap):
$this->error ("File exceeds textcap of $textcap: " . $path, 193, $errline);
$text = substr ($text, 0, $textcap);
return $do_normalize? normalize_text ($text): $text;
/* Loads, handles and returns a text file located at $uri and announced with [t]
* or [t-…]. In the latter case the processing is done by a T-module. Which one
* is specified by $subtype. The optional $clue is passed to the module. The
* module could be used to do syntax highlighting or to eval code for example.
* Its output is expected to be UTF-8 encoded and returned.
* The optional $caption is informational and passed through for the form API.
private function file_text (
string $uri,
string|null $subtype,
string|null $clue,
string|null $caption
): string
// read the file
$text = $this->file_raw ($uri, false);
// process the file content as text
return is_null ($text)? '': $this->process_file_text ($text, $subtype, $clue, $caption);
/* Loads a tab-seperated values (TSV) file, which is supposed to be UTF-8
* encoded and located at $uri. Returns the file contents processed as a HTML
* <table>. If $is_aneamal is true, Aneamal phrase markup is interpreted in
* the field names and values. If $transpose is true, the table is transposed,
* i.e. rows become columns and columns become rows.
private function file_tsv (
string $uri,
bool $is_aneamal = false,
bool $transpose = false
): string
// read the file and check its encoding
$data = $this->file_raw ($uri);
// process the file content as tab-separated values
return is_null ($data)? '': $this->process_file_tsv ($data, $is_aneamal, $transpose);
/* Returns an HTML form elment with hidden data to be included in a submission:
* _form ID to distinguish different forms on the same webpage; _time stamp of
* when the form HTML was generated; a timestamp authentication code _taco. The
* _time stamp is for checking whether a form submission is outdated. It could
* also be used by modules to confirm that a user reloaded the page before
* making another submission. The _taco is to confirm the authenticy of _time
* so that spammers can not make a form appear *newer* than it is.
* HTML encourages browsers to submit forms with a single textbox implicitly
* (i.e. without activation of a submit button) when a user hits the "enter"
* key. This behavior is not wanted and useless for forms without submit button
* in Aneamal, so a hidden disabled submit button prevents it. See
private function form_element (
): string
// Microsecond precision makes _time stamp collisions of webpages unlikely.
[$fraction, $seconds] = explode ("\x20", microtime ());
$time = $seconds . '.' . substr ($fraction, 2, 6);
// SHA3 is supposedly suitable for a message authentication code when
// applied to a fixed-length key followed by the message.
$salsa = query_memory ('salsa', fn () => base64_encode (random_bytes (33)));
$taco = hash ('sha3-256', "$salsa $this->form $time");
// Enctype multipart/form-data is often more verbose than the default, but
// permits file uploads that PHP accepts, which modules may do.
return <<<HTML
<form id='$this->form' method='post' enctype='multipart/form-data' hidden>
<input name='_form' type='hidden' value='$this->form'>
<input name='_time' type='hidden' value='$time'>
<input name='_taco' type='hidden' value='$taco'>
<input type='submit' disabled>
/* Checks whether an active Aneamal form in this file (i.e. one that has been
* submitted) includes a valid _time stamp and aborts the submission, if not.
* The return value is positive, if the valid _time stamp is younger than the
* modification time of the corresponding *.nml file (i.e. the form has not
* changed in the meanwhile), and negative for an out-dated _time stamp.
* Otherwise 0 is returned.
private function form_time_check (
): int
// Simply return 0, if this Aneamal file does not have an active form.
if (!isset ($_POST['_form']) or $_POST['_form'] !== $this->form):
return 0;
// Abort the submission, if it lacks the hidden fields provided by Aneamal.
if (!isset ($_POST['_taco'], $_POST['_time']) or !is_numeric ($_POST['_time'])):
$_POST = $this->post = [];
return 0;
// The return value of this function is cached for valid _time stamps, so it
// is returned here without re-computation in case of repeated use.
static $cache = [];
if (isset ($cache[$key = "$_POST[_form] $_POST[_time] $_POST[_taco]"])):
return $cache[$key];
// Get the key for validating the _time. $salsa will be NULL, if no key has
// been set yet or the memory has just been erased. In that case, the _time
// will be accepted without validation.
$salsa = query_memory ('salsa');
// Abort the submission in case of a failed validation.
// NOTE: The validation can also fail, if the Aneamal Translator's memory
// was erased by the admin after the submitted form had been generated, but
// the memory has been filled anew due to a request of the page since then.
if (isset ($salsa) and $_POST['_taco'] !== hash ('sha3-256', "$salsa $_POST[_form] $_POST[_time]")):
$_POST = $this->post = [];
return 0;
// Cache and return the result of comparing the validated age of the
// submitted form with the edit time of this corresponding *.nml file.
return $cache[$key] = (int) $_POST['_time'] <=> filemtime ($this->root . $this->dir . '/' . $this->filename);
/* Returns an ID to be used for the Aneamal form in this Aneamal file. It
* depends on the kind of this file, i.e. its function in the webpage layout,
* so that the ID does not easily change due to additions to the webpage. Forms
* in embedded/linked files and quotation blocks get an enumerated ID which can
* change unfortunately, if another form is embedded/linked/quoted earlier.
private function form_id (
): string
return match ($this->filekind) {
self::main => '_fm',
self::header => '_fh',
self::footer => '_ff',
self::aside => '_fa',
default => get_unique ('<form>', '_f'),
/* Encodes a $string for use as a HTML id/URL fragment. The returned value only
* contains small letters, numbers and the hyphen. Returns an empty string on
* error. Except for the empty string, the same value can only be returned once
* with $unique being set. Use this to guarantee unique HTML id attributes.
private function fragment_identifier (
string $string,
bool $unique = false
): string
static $uniques = [];
// create the ID by x_encode'ing the lowercase'd string
$id = prepare_html_id ($string);
if ($id === ''):
$this->error ('Content missing in target: ' . $string, 19);
return '';
// check the ID's uniqueness if required
if ($unique):
if (in_array ($id, $uniques, true)):
$this->error ('Target already set: ' . $id, 20);
return '';
$uniques[] = $id;
return $id;
/* Returns a list of errors that occurrend since this method was last called
* while processing the current document (or since its start). The reported
* errors are cleared from the errors array. $message is a heading for the error
* messages, $from and $to provide a line number range in which the errors
* occurred (inclusively, starting at 0). An empty string is returned if no
* errors occurred.
private function get_errors (
string $message,
int|null $from = NULL,
int|null $to = NULL
): string
// return empty string if no errors occurred
if (empty ($this->errors)):
return '';
$uri = encode_special_chars ($this->errormore ?? self::error_base_url);
$errors = '';
$codes = [];
foreach ($this->errors as [$msg, $code, $line]):
$line = isset ($line)? 'line ' . strval ($line + 1) . ', ': '';
if ($code):
$errors .= "<span class='_$code'><br>$code: " . encode_special_chars ($msg) . " ($line<a href='{$uri}{$code}'>more info</a>).</span>";
$codes[] = $code;
$errors .= "<span><br>" . encode_special_chars ($msg) . "</span>";
$codes = array_unique ($codes, SORT_NUMERIC);
sort ($codes, SORT_NUMERIC);
$classes = "_error _" . implode (' _', $codes);
// compile information about the file/block where the errors occurred
$info = encode_special_chars ($this->dir . '/' . $this->filename . match ($this->filekind) {
self::embedded => ': embedded file',
self::quoted => ': quotation',
default => '',
if (isset ($from, $to) and $from !== $to):
$info .= ', lines ' . strval ($from + 1) . ' to ' . strval ($to + 1);
elseif (isset ($from) or isset ($to)):
$info .= ', line ' . strval (($from ?? $to) + 1);
// clear error array
$this->errors = [];
// return the error messages
return "<div class='$classes' style='" . self::error_style . "' data-nosnippet><b>$message</b> ($info): $errors</div>\n";
/* Finds the end of a word or bare string `...` or URI inside $string whose
* start is initially given by &$index. Then &$index is set to the position of
* the last character in the word or bare string or URI. The word, bare string
* stripped of its enclosing backticks or URI is returned. If $is_uri is true,
* less characters are interpreted as markup which would mark the start of a new
* group and hence end a word, e.g. ~ is a valid character in an URI but not
* inside a word, as it usually marks a gently emphasized string. Note that
* &$index will be one less at the end than at the beginning, if no word or bare
* group or URI is marked by &$index at the beginning. For example consider the
* string "#. " between the quotation marks: at &$index 1, the dot, no word or
* bare string or URI is found, so &$index would be set to 0 and an empty string
* returned.
private function group (
string $string,
int &$index,
bool $is_uri = false
): string
// handle bare strings separately
if ($string[$index] === '`'):
return $this->enclosed ($string, $index, $is_uri);
for ($i = $index, $last = strlen ($string) - 1; $i <= $last; ++$i):
switch ($string[$i]):
// a backslash protects the next character
case '\\':
$i < $last and ++$i;
break 1;
// unprotected whitespace, hint, reference ... end a group
case "\t":
case "\n":
case ' ':
case '{':
case '^':
case '`':
break 2;
// punctuation before whitespace, reference or the end ends a group
case '!':
case ',':
case '.':
case ':':
case ';':
case '?':
if ($i === $last or str_contains (self::space . '^', $string[$i + 1])):
break 2;
break 1;
// unprotected pairwise markup ends a non-URI group and is not
// allowed (unprotected) in an URI that is not enclosed by `...`
case '|':
case '$':
if ($is_uri):
$this->error ('Unprotected special character in URI: ' . $string[$i], 155);
break 1;
break 2;
// unprotected emphasis marks etc. end a non-URI group
case '*':
case '_':
case '~':
case '"':
case '&':
case '#':
case '@':
if ($is_uri):
break 1;
break 2;
// left brackets end a non-URI nesting group
case '(':
case '[':
$j = $i;
if ($is_uri or is_null ($this->bracketed ($string, $j))):
break 1;
break 2;
// a plus which is part of crossed-out markup and an equals sign as
// part of inline note markup end a non-uri group
case '+':
case '=':
if (!$is_uri and $i < $last and $string[$i + 1] === '-'):
break 2;
break 1;
// a hyphen which is part of a link arrow ends a group
case '-':
if ($i < $last and $string[$i + 1] === '>'):
break 2;
break 1;
// copy the group from the string and set the index to the group's last
// character
$group = substr ($string, $index, $i - $index);
$index = $i - 1;
// remove slashed linebreaks and return the result (all linebreaks are
// slashed linebreaks here)
return str_replace ("\\\n", '', $group);
/* Returns the HTML <head> for this Aneamal document using mainly information
* from metadata declarations.
private function head (
): string
// output is always UTF-8 encoded
$head = ["<meta charset='UTF-8'>"];
// the title comes from @title metadata or the main heading
$head[] = '<title>' . encode_special_chars ($this->title . $this->titletail) . "</title>";
// add general <meta> data, including a meta tag 'generator'
ksort ($this->metas);
foreach ($this->metas as $name => $val):
$head[] = sprintf (self::recognized_html_metas[$name], encode_special_chars ($val));
$head[] = "<meta name='generator' content='Aneamal'>";
// add <link>s to alternate language files
ksort ($this->altlangs);
foreach ($this->altlangs as $lang => $val):
$head[] = "<link rel='alternate' hreflang='$lang' href='" . encode_special_chars (strip_nml_suffix ($val)) . "'>";
// add further meta <link>s
ksort ($this->links);
foreach ($this->links as $name => $val):
$head[] = sprintf (self::recognized_html_links[$name], encode_special_chars (strip_nml_suffix ($val)));
// add html meta content
array_push ($head, ...$this->metahtmls);
// add stylesheets
foreach (array_reverse (array_unique (array_reverse ($this->stylesheets))) as $url):
$head[] = "<link rel='stylesheet' href='" . encode_special_chars ($url) . "'>";
foreach (array_reverse (array_unique (array_reverse ($this->metastyles))) as $val):
$head[] = "<style>\n{$val}\n</style>";
return "<head>\n" . implode ("\n", $head) . "\n</head>\n";
/* Identifies the end of a hook in a $string, translates it into an HTML <span>
* element with class attribute and returns it. The position of the first
* character & of the hook must be given in &$index at the beginning and is
* set to the position of the last character at the end. If the & is directly
* followed by a hint {…}, the hook will be invisible in the HTML output, but
* it will still be possible to hook into it. If the & is directly followed by
* metadata @, the metadata value will be displayed in the HTML output, but the
* metadata name must be used to hook into it.
private function hook (
string $string,
int &$index
): string
// a single ampersand at the end of the string is a valid hook
if (!isset ($string[++$index])):
return "<span class='_hook'></span>";
// extract and process the hook name
$text = '';
if ($string[$index] === '{'):
$class = prepare_html_id ($this->encurled ($string, $index));
$class = prepare_html_id ($this->targeted ($string, $index, $text));
// prepare and return the HTML output
$class === '' or $class = " $class";
return "<span class='_hook{$class}'>$text</span>";
/* Returns a HTML link <a> to the given $url. $text is used as link text. If no
* $text is given, i.e. an empty string, the slightly edited $url is used as
* link text too. If $unendorsed is true, the link will communicate to search
* engines that it shall not be seen as a positive sign for the linked location
* and to browsers that they shall not send a referrer to the linked location.
private function hyperlink (
string $url,
string $text = '',
bool $unendorsed = false
): string
// return a placeholder for empty $urls, used where a link would usually
// have been expected, e.g. in a menu for the link to the current webpage
if ($url === ''):
if ($text === ''):
$this->error ('Link text or address required', 227);
return "<a>$text</a>";
// prepare URLs depending on their type
$uri_type = get_uri_type ($url);
if ($uri_type === URI_LOCAL):
if (is_null ($canonical = $this->normalize_path (strip_nml_suffix ($url)))):
$href = '';
$href = $this->home . $canonical;
$href = $url;
$href = encode_special_chars ($href);
// generate a description if no description is given
if ($text === ''):
if ($uri_type === URI_DATA):
$this->error ('Data-URI link without description', 118);
return '';
elseif ($uri_type === URI_REMOTE):
$text = $href;
$text = encode_special_chars (strip_nml_suffix ($url));
// handle optional unendorsement
$rel = $unendorsed? " rel='nofollow noreferrer'": '';
// return the URI
return "<a href='$href'$rel>$text</a>";
/* Analyzes the $initial part of a line from a numbered list, which may be an
* item number like A.2.1. for a new list item. In that case the method returns
* an array of arrays [0 => numeral type, 1 => decimal value as string] which
* represent the parts of the item number. Otherwise an empty array is returned.
* The numeral type is '1', 'A', 'a' or '?' and the decimal value is handled as
* string so that it works for arbitrarily large values. The parameter
* $previous must be such an array which represents the previous item's number.
private function item_number (
string $initial,
array $previous
): array
// if the initial part does not end with a dot, it's not an item number
if ($initial[-1] !== '.'):
return [];
// split the item number at the dots, stripping an optional leading hash
$parts = explode ('.', substr ($initial, $initial[0] === '#'? 1: 0, -1));
$current = [];
$maxlevel = count ($parts) - 1;
$prelevel = count ($previous) - 1;
// the hierarchy level can only increase by one from item to item
if ($maxlevel > $prelevel + 1):
return [];
foreach ($parts as $level => $part):
// the lack of a numeral in a part means this can not be an item number
if ($part === '' or $part === '-'):
return [];
// determine the numerals used for the item number part and its value
if (is_made_of ($part, '0123456789')):
$type = '1';
$value = ltrim ($part, '0') ?: '0';
elseif (is_made_of ($part, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ')):
$type = 'A';
$value = convert_alpha_number ($part);
elseif (is_made_of ($part, 'abcdefghijklmnopqrstuvwxyz')):
$type = 'a';
$value = convert_alpha_number ($part, true);
elseif (is_made_of ($part, '-', '0123456789')):
$type = '1';
$value = '-' . ltrim (substr ($part, 1), '0');
$value === '-' and $value = '0';
elseif ($part === '?'): // automatic numbering
if ($level > $prelevel): // implied: $level === $maxlevel
$type = '?';
$value = '1';
elseif ($level === $maxlevel):
$type = $previous[$level][0];
$value = bcadd ($previous[$level][1], '1');
$type = $previous[$level][0];
$value = $previous[$level][1];
return [];
// the type must be the same as the previous item's on the same level
if ($level <= $prelevel and $type !== $previous[$level][0]):
return [];
// the value may not change from the previous item except on maxlevel
if ($level < $maxlevel and $value !== $previous[$level][1]):
return [];
$current[] = [$type, $value];
return $current;
/* Processes the $content of a list item, which can be either a file, a math
* block or simply text, possibly with phrase markup, and returns the
* corresponding HTML code.
private function item (
string $content
): string
// leading whitespace is insignificant
$content = ltrim ($content);
// file candidate, i.e. file or text with bracketed string at its start
if (str_match ($content, ['[', self::alphanumeric, '-:]'])):
return "\n" . $this->file ($content, true);
// math block
if (str_starts_with ($content, '$$')):
return "\n" . $this->math_block ($content);
return $this->phrase ($content);
/* Identifies the end of a link in $string and returns an HTML <a> element for
* it. In Aneamal, a link is marked with an arrow -> and &$index must initially
* provide the location of the greater-than sign in that arrow. The arrow can
* be either followed by an URI for a regular link or by one of a few special
* marks, e.g. @ in which case an URI already declared in metadata will be used.
* At the end &$index will be set to the position of the last character of the
* link in $string. The optional $text is used as link text. If the $text is
* empty, a link text will be generated from what the arrow points at.
private function link (
string $string,
int &$index,
string $text = ''
): string
// unendorsed links do not give a positive ranking signal to search engines
$unendorsed = false;
// handle links without address at the end of the string
if (!isset ($string[++$index])):
$address = '';
// distinguish between links to a target in this document ...
elseif ($string[$index] === '#'):
if (isset ($string[++$index])):
$group = $this->targeted ($string, $index, $text);
$address = $group === ''? '#': '#' . $this->fragment_identifier ($group);
$address = '#'; // an empty target refers to the top of the page
// fallback, if neither $text was given nor set by $this->targeted
$text === '' and $text = '#';
// ... and links declared in metadata ...
elseif ($string[$index] === '@'):
// make sure there is another character
if (!isset ($string[++$index])):
$this->error ('Metadata name expected after ->@', 92);
return '';
// retrieve the metadata name
$name = stripslashes ($this->group ($string, $index));
// find the address declared for the given metadata name
if (isset ($this->metavars[$name][self::link])):
$address = $this->metavars[$name][self::link];
$text !== '' or $text = encode_special_chars ($name);
elseif (isset ($this->metavars[$name])):
$this->error ('Metadata name not declared with link value: @' . $name, 246);
return '';
$this->error ('Metadata name after ->@ not declared: ' . $name, 93);
return '';
// ... and shortened data URIs ...
elseif ($string[$index] === ','):
$this->error ('Obsolete shorthand for data URI hyperlinks', 255);
if (isset ($string[++$index])):
$address = 'data:;charset=UTF-8;base64,' . stripslashes ($this->group ($string, $index, true));
$address = 'data:;charset=UTF-8;base64,';
// ... and normal link
// handle unendorsed links
if ($unendorsed = $string[$index] === '!'):
if (!isset ($string[++$index])):
$this->error ('Filename/URI missing: address expected after ->!', 94);
return '';
$address = stripslashes ($this->group ($string, $index, true));
// create and return the HTML for the link
return $this->hyperlink ($address, $text, $unendorsed);
/* Prepares one or more linked file(s), whose type is encoded in the file
* $token. See function parse_file_token in func.php for a description of the
* $token. $extra contains the path(s) of the linked file(s) and possibly a
* caption which must be preceded by white space. For image and media types
* i, j, v, w it can also contain hints before the white space. If a caption is
* provided or the file is of type i, j, v, w, the processed file and caption
* are returned as an HTML <figure>, otherwise the result is returned without
* <figure> element.
private function linked_file (
string $extra,
string $token
): string
[$type, $subtype, $clue] = parse_file_token ($token);
if (!isset (self::max_links_for_type[$type])):
$this->error ("Unrecognized file type: $type", 8);
return '';
// find optional link(s), hint, ..., caption and id
$links = [];
$link_counter = 0;
$caption = $hint = NULL;
$id = '';
while ($extra !== ''):
// get the caption and detach the optional id from its start
if (str_contains (self::space, $extra[0])):
$caption = ltrim ($extra);
$id = $this->detach_caption_target ($caption);
// handle links
elseif (str_starts_with ($extra, '->')):
if (++$link_counter > self::max_links_for_type[$type]):
$this->error ("Too many links for linked file of type: $type", 216);
return '';
elseif (!isset ($extra[2])):
$this->error ('Filename/URI missing: filename expected after ->', 58);
return '';
elseif ($i = 1 and !is_null ($links[] = $this->address ($extra, $i))):
$extra = substr ($extra, $i + 1);
return '';
// handle hints
elseif ($extra[0] === '{'):
if (!in_array ($type, ['i', 'j', 'v', 'w'], true) or $hint !== NULL):
$this->error ("Too many hints for linked file of type: $type", 217);
return '';
$i = 0;
$hint = prepare_html_attribute ($this->encurled ($extra, $i));
$extra = substr ($extra, $i + 1);
// handle unexpected stuff
$this->error ("Missing whitespace before caption: $extra", 241);
return '';
// call the right method to process the given file type
$file = match ($type) {
'a' => $this->file_aneamal ($links[0], self::linked, $subtype),
'b' => $this->file_tsv ($links[0]),
'd' => $this->file_tsv ($links[0], true),
'h' => $this->file_html ($links[0]),
'i' => $this->file_image ($links, $clue, $hint),
'j' => $this->file_preview ($links, $clue, $hint),
'p' => $this->file_tsv ($links[0], false, true),
'q' => $this->file_tsv ($links[0], true, true),
't' => $this->file_text ($links[0], $subtype, $clue, $caption),
'v' => $this->file_media ($links, $hint),
'w' => $this->file_media ($links, $hint, true),
'x' => $this->file_extension ($links, $subtype, $clue, $caption),
// return the optionally captioned result
if ($caption !== NULL):
return "<figure{$id}>\n{$file}<figcaption>" . $this->phrase ($caption) . "</figcaption>\n</figure>\n";
elseif ($id !== ''):
return "<figure{$id}>\n{$file}</figure>\n";
elseif (in_array ($type, ['i', 'j', 'v', 'w'], true)):
return "<figure>\n{$file}</figure>\n";
return $file;
/* Loads metadata from another Aneamal file, typically an automatically found
* @meta.nml in the same or an ancestor directory. If an $uri is provided,
* metadata will be loaded from the given file instead. Returns true, if
* metadata has been loaded, and false otherwise.
private function load_settings (
string|null $uri = NULL
): bool
$meta = NULL;
if (isset ($uri)):
if (get_uri_type ($uri) !== URI_LOCAL):
$this->error ('Forbidden URI type for @meta: use a local filename instead', 187);
return false;
// split path from the query, which can be used to chose certain lines
[$path, $query] = split_uri_tail ($uri);
// resolve the file path so that it is relative to the Aneamal root
if (is_null ($canonical = $this->normalize_path ($path))):
return false;
// read the file content
if (is_null ($text = file_get_lines ($this->root . $canonical, $query))):
$this->error ('Not a readable file: ' . $path, 202);
return false;
$meta = new self ($text, dirname ($canonical), $this->home, basename ($canonical), self::settings);
foreach (get_directories ($this->dir) as $dir):
$filename = $dir . '/@meta.nml';
if (is_readable ($this->root . $filename)):
$meta = new self (file_get_contents ($this->root . $filename), $dir, $this->home, '@meta.nml', self::settings);
// end here, if no metadata has been loaded
if ($meta === NULL):
return false;
// copy various properties set via metadata declarations
foreach ([
'classes', // @class
'fixes', // @fix
'metahtmls', // @htmlhead
'javascripts', // @javascript
'lang', // @lang or @language
'lazy', // @load
'modules', // @math and @t-... and @x-...
'metascripts', // @script
'metastyles', // @style
'stylesheets', // @stylesheet
] as $property):
$this->$property = $meta->$property;
// copy metadata used as <link rel='$index'> in the HTML head
foreach (['atom', 'icon', 'license', 'me', 'rss', 'up'] as $index):
if (isset ($meta->links[$index])):
$this->links[$index] = $meta->links[$index];
// copy metadata used as <meta name='$index'> in the HTML head
foreach (['author', 'publisher', 'robots', 'translator', 'viewport'] as $index):
if (isset ($meta->metas[$index])):
$this->metas[$index] = $meta->metas[$index];
// copy metadata that can be used in the body
$this->metavars = $meta->metavars;
$this->markvars = $meta->markvars;
$this->metaerror = $meta->get_errors ('Errors in meta file');
return true;
/* Applies a function from a math module to a LaTeX math $expression to turn
* the code into a nice looking formula and returns the result. $is_block says
* whether the $expression is from a math block, displaystyle in LaTeX
* terminology, or math phrase markup, textstyle in LaTeX terminology. The math
* module should be located at aneamal/math/index.php and usually makes use of
* an external program such as KaTeX, MathJax or mimeTex. The optional $label,
* only available for blocks, can contain an equation number for example and a
* target at the start of the $label can be used to link to the math expression.
private function math (
string $expression,
bool $is_block = false,
string|null $label = NULL
): string
// prepare the HTML wrapper
if (!$is_block):
$start = "<span role='math'>";
$end = '</span>';
elseif ($label === NULL):
$start = "<div role='math'>\n";
$end = "\n</div>\n";
$id = $this->detach_caption_target ($label);
$start = "<div{$id} role='math'>\n";
$end = "\n<span class='_label'>" . $this->phrase ($label) . "</span>\n</div>\n";
// prepare data to be passed to the module
$data = [
// numeric indices for backwards compatibility with old modules
0 => $expression,
1 => $is_block? 'display': 'text',
2 => $this->home . '/aneamal/math',
3 => $this->home . $this->dir,
// math module specific stuff
'kind' => $is_block? 'block': 'string',
'math' => $expression,
$result = $this->use_module ('math', $data);
if ($result === ''):
return encode_special_chars ($expression);
return $start . $result . $end;
/* Prepares an Aneamal $block that contains a mathematical LaTeX formula for
* output. $block must start with two dollar signs and contain two more dollar
* signs to mark the end of the mathematical expression. It can be followed by
* a math label.
private function math_block (
string $block
): string
if ($pos = strpos_unslashed ($block, '$$', 2)):
if (!isset ($block[$pos + 2])):
return $this->math (substr ($block, 2, $pos - 2), true);
elseif (!str_contains (self::space, $block[$pos + 2])):
$this->error ('Missing whitespace between $$ and math label', 121);
return $this->math (substr ($block, 2, $pos - 2), true, ltrim (substr ($block, $pos + 2)));
$this->error ('Math block not closed: expected $$', 6);
return $this->math (substr ($block, 2), true);
/* Returns an array whose first item contains a given $uri of a media file
* (image, audio, video, captions) prepared for use as a href or src attribute
* in HTML. The second item is the $uri prepared for use in the local file
* system, if it points to a local file. In case of an error, an item is false.
function media_link (
string $uri
): array
$uri_type = get_uri_type ($uri);
if ($uri_type === URI_LOCAL):
if (is_null ($canonical = $this->normalize_path ($uri))):
return [false, false];
$uri = $this->home . $canonical;
$filename = $this->root . substr ($canonical, 0, strcspn ($canonical, '?#'));
elseif ($uri_type === URI_PAGE):
$this->error ('Link expected to point to a file: ' . $uri, 204);
return [false, false];
return [encode_special_chars ($uri), $filename ?? false];
/* Handles the metadata declaration of a metadata $name that is recognized by or
* reserved for special use in Aneamal and whose value $val is plain text (in
* contrast to a link). Some metadata declarations change the behaviour of the
* Aneamal-HTML translator (e.g. 'pixels' sets the size for preview images).
* Other metadata declarations are published in the HTML output as <meta>
* elements inside the <head>. The output is done by other methods, but this one
* prepares for it.
private function meta_content (
string $name,
string $val,
int $errline
): void
switch ($name):
case 'aside':
if ($val === 'off'):
$this->aside = '';
$this->error ('Invalid @aside value: ' . $val, 182, $errline);
case 'charset':
if ($this->filekind === self::embedded or $this->filekind === self::quoted):
$this->error ('@charset declared in embedded file or quotation block', 46, $errline);
elseif (!in_array (strtoupper ($val), ['UTF-8', 'UTF8'])):
$this->error ('Obsolete character encoding declared, use UTF-8 instead', 238, $errline);
case 'class':
case 'classes':
foreach (explode_comma_separated ($val) as $string):
array_push ($this->classes, ...explode (' ', $string));
case 'dir':
$direction = strtolower ($val);
if ($direction === 'ltr' or $direction === 'rtl'):
$this->direction = $direction;
$this->error ('Invalid writing direction declared: ' . $val, 110, $errline);
case 'fix':
$inherited = $this->fixes;
$this->fixes = 0;
foreach (explode_comma_separated ($val) as $fix):
if (isset (self::recognized_fixes[$fix])):
$this->fixes |= self::recognized_fixes[$fix];
elseif ($fix === 'inherit'):
$this->fixes |= $inherited;
elseif ($fix !== ''):
$this->error ('Invalid @fix value: ' . $fix, 223, $errline);
case 'footer':
if ($val === 'off'):
$this->footer = '';
$this->error ('Invalid @footer value: ' . $val, 173, $errline);
case 'header':
if ($val === 'off'):
$this->header = '';
$this->error ('Invalid @header value: ' . $val, 171, $errline);
case 'lang':
case 'language':
if (is_made_of ($val, self::alphanumeric . '-')):
$this->lang = $val;
$this->error ('Invalid characters in language code', 69, $errline);
case 'layout':
if ($val === 'manual' or $val === 'blank'):
$this->layout = $val;
elseif ($val === 'auto'):
$this->layout = NULL;
$this->error ('Invalid @layout value: ' . $val, 169, $errline);
case 'load':
if ($val === 'eager'):
$this->lazy = 0;
elseif ($val === 'lazy'):
$this->lazy = 1;
elseif ($val === 'auto'):
$this->lazy = -1;
$this->error ('Invalid @load value: ' . $val, 231, $errline);
case 'look':
if ($val === 'off'):
$this->look = '';
$this->error ('Invalid @look value: ' . $val, 175, $errline);
case 'math':
$this->modules['math'] = $val;
case 'meta':
if ($val === 'off'):
$this->meta = '';
$this->error ('Invalid @meta value: ' . $val, 195, $errline);
case 'pixels':
$this->pixels = $val;
case 'role':
$this->role = $val;
case 'textcap':
$this->textcap = (int) $val;
case 'title':
$this->title = $val;
case 'titletail':
$this->titletail = $val === ''? '': " $val";
if (isset (self::recognized_html_metas[$name])):
$this->metas[$name] = $val;
elseif (str_starts_with ($name, 't-') or str_starts_with ($name, 'x-')):
$this->modules[$name] = $val;
$this->error ('Reserved metadata name: ' . $name, 190, $errline);
/* Checks and registers a custom mark declaration. The custom marks $name must
* start with an ampersand. Its $value should be either a path to a file or
* the content of an embedded file in which case either $is_link or $is_embedded
* should be true. If $is_fallback is true, then the custom mark is only
* registered, if it has not been registered by a parent file before.
private function meta_custom (
string $name,
string $value,
bool $is_fallback,
bool $is_link,
bool $is_embedded,
int $errline
): void
// Custom marks consist of an ampersand followed by a letter or digit.
if (strlen ($name) !== 2 or !str_contains (self::alphanumeric, $name[1])):
$this->error ('Malformed custom-mark declaration', 95, $errline);
// Each custom mark may only be defined once per Aneamal file.
if (isset ($this->metadecs[$name])):
$this->error ('Custom mark already declared: ' . $name, 96, $errline);
$this->metadecs[$name] = true;
// Stop if this custom mark declaration is just a fallback and the mark has
// been declared in a parent file such as @meta.nml or a template.
if ($is_fallback and isset ($this->markvars[$name])):
// Register the custom mark:
if ($is_embedded):
$this->markvars[$name] = $value;
elseif ($is_link):
$this->markvars[$name] = $this->file_raw ($value, true, false, $errline) ?? '';
$this->error ('Custom mark not declared as link or embedded file', 120, $errline);
/* Handles the metadata declaration of a metadata $name that is recognized by or
* reserved for special use in Aneamal and whose value $val is an embedded file.
* The output is done by other methods, but this one prepares for it.
private function meta_embedded (
string $name,
string $val,
int $errline
): void
switch ($name):
case 'htmlhead':
$this->metahtmls[] = $val;
case 'math':
$this->modules['math'] = $val;
case 'script':
$this->metascripts[] = $val;
case 'style':
$this->metastyles[] = $val;
if (str_starts_with ($name, 't-') or str_starts_with ($name, 'x-')):
$this->modules[$name] = $val;
$this->error ('Embedded file not supported for this metadata name @' . $name, 154, $errline);
/* Handles the metadata declaration of a metadata $name that is recognized by or
* reserved for special use in Aneamal and whose value $val is an URI (in
* contrast to plain text). Some metadata declarations change the behaviour of
* the Aneamal-HTML translator (e.g. 'errormore' sets the base URL for more
* information in case of errors). Other unescaped variables will later be
* published in the HTML output as <link> elements inside the <head>. The output
* is done by other methods, but this one prepares for it.
private function meta_link (
string $name,
string $val, // URI as given in the meta declaration
string $uri, // URI already checked and prepared for use in links
int $errline
): void
switch ($name):
case 'aside':
$this->aside = $val;
case 'errormore':
$this->errormore = $uri;
case 'footer':
$this->footer = $val;
case 'header':
$this->header = $val;
case 'htmlhead':
$this->metahtmls[] = $this->file_raw ($val, true, false, $errline) ?? '';
case 'javascript':
$this->javascripts[] = $uri;
case 'look':
$this->look = $uri;
case 'math':
$this->modules['math'] = $this->file_raw ($val, true, false, $errline);
case 'meta':
$this->meta = $val;
case 'script':
$this->metascripts[] = $this->file_raw ($val, true, false, $errline) ?? '';
case 'style':
$this->metastyles[] = $this->file_raw ($val, true, false, $errline) ?? '';
case 'stylesheet':
$this->stylesheets[] = $uri;
if (isset (self::recognized_html_links[$name])):
$this->links[$name] = $uri;
elseif (str_starts_with ($name, 'lang-')):
$lang = substr ($name, 5);
if ($lang === ''):
$this->error ('Alternative-language code missing', 85, $errline);
$this->altlangs[$lang] = $uri;
elseif (str_starts_with ($name, 't-') or str_starts_with ($name, 'x-')):
$this->modules[$name] = $this->file_raw ($val, true, false, $errline);
$this->error ('Reserved metadata name: ' . $name, 191, $errline);
/* Handles a metadata declaration - that is a metadata value $val assigned to a
* $name - of various types, which occurred or started in line $errline. Values
* can be plain text, link URIs or the content of an embedded file. In the
* latter case, $is_embedded must be true. Metadata can be accessed from within
* the main body of the document.
private function metadata (
string $name,
string $val,
int $errline,
bool $is_embedded = false
): void
if ($name === '' or $name === '\\' or $name === '?'):
$this->error ('Metadata name missing', 74, $errline);
// A question mark at the end of the name means that the value will only be
// used, if it has not been inherited from a parent file.
$is_fallback = $name[-1] === '?' && !is_slashed ($name, strlen ($name) - 1);
if ($is_fallback):
$name = rtrim (substr ($name, 0, -1));
// Determine the metadata type before simplifying the metadata name:
$type = get_meta_type ($name);
$name = self::metadata_aliases[$name] ?? stripslashes ($name);
// Identify whether the value is an Aneamal link and turn it into just the
// URI in that case.
$is_link = false;
if (!$is_embedded and $is_link = str_starts_with ($val, '->')):
$length = strlen ($val);
// retrieve the address
$pos = 1;
$val = $this->address ($val, $pos, false);
if ($val === NULL):
// check whether the URL is followed by anything, which is forbidden
if ($length > $pos + 1):
$this->error ('Characters after link in metadata declaration for @' . $name, 60, $errline);
// Handle custom mark declarations:
if ($type === META_CUSTOM):
$this->meta_custom ($name, $val, $is_fallback, $is_link, $is_embedded, $errline);
// ASCII characters except alphanumerics and hyphen are not allowed in
// metadata names.
if (strlen ($name) !== strcspn ($name, "\t !\"#$%&'()*+,./:;<=>?@[\\]^_`{|}~")):
$this->error ('Unexpected character in metadata name: ' . $name, 189, $errline);
// Each metadata name except for a select few may only be declared locally
// once, so we keep track of these.
if (isset ($this->metadecs[$name]) and !in_array ($name, self::metadata_multiples)):
$this->error ('Metadata name already declared: ' . $name, 194, $errline);
$this->metadecs[$name] = true;
// Stop if this metadata declaration is just a fallback and the name has
// been declared in a parent file such as @meta.nml or a template.
if ($is_fallback and isset ($this->metavars[$name])):
// Process the metadata declaration depending on the form of its content:
if ($is_embedded):
// Register the embedded file, but do not store its content in metavars
// since files embedded in metadata cannot be accessed in the body.
$this->metavars[$name] = [self::embd => true];
// process embedded files further
if ($type !== META_SPECIAL):
$this->error ('Embedded file used for unrecognized metadata name: ' . $name, 244, $errline);
$this->meta_embedded ($name, $val, $errline);
elseif ($is_link):
// check and normalize URIs
$uri = $val;
if (get_uri_type ($uri) === URI_LOCAL):
if (is_null ($val = $this->normalize_path ($val))):
$uri = $this->home . $val;
// Store the metadata link for later use in the body.
$this->metavars[$name] = [self::link => $uri];
// process special meta links further
if ($type === META_SPECIAL and $val !== ''):
$this->meta_link ($name, $val, $uri, $errline);
else: // textual value
$val = stripslashes ($val);
// Store the metadata for later use in the body.
$this->metavars[$name] = [self::text => $val];
// process special plain text meta variables further
if ($type === META_SPECIAL):
$this->meta_content ($name, $val, $errline);
/* Turns a $path like //foo/./bar/baz/../file into /foo/bar/file by treating a
* single dot path component as same directory and a double dot component as
* parent directory. Multiple slashes like // are turned into single ones.
* Pathes that do not start with a slash like bar/file, i.e. pathes that are
* relative to the directory of the file that is currently worked on, are
* changed into pathes with a leading slash that are relative to the Aneamal
* root directory. The input $path must not be empty and not start with ? or #.
* Returns null on error, that is when a parent of the Aneamal root directory
* is referenced.
function normalize_path (
string $path
): string|null
$parts = [];
// remove and remember the query string and URL fragment from the path
$pathlen = strcspn ($path, '?#');
if ($pathlen < strlen ($path)):
$tail = substr ($path, $pathlen);
$path = substr ($path, 0, $pathlen);
$tail = '';
// make path relative to the Aneamal root directory if it is not yet
if ($path[0] !== '/'):
$path = $this->dir . '/' . $path;
// split the path into its components (folders and file)
foreach (explode ('/', substr ($path, 1)) as $part):
// don't add . to the normalized path
if ($part === '.'):
// drop the current path when encoutering ..
elseif ($part === '..'):
if (array_pop ($parts) === NULL):
$this->error ('Linked parent of Aneamal root directory', 51);
return NULL;
// add non-empty component to the normalized path
elseif ($part !== ''):
$parts[] = $part;
// return the resolved path with leading and possibly trailing slash
if (empty ($parts)):
return '/' . $tail;
elseif ($part === '.' or $part === '..' or $path[-1] === '/'):
return '/' . implode ('/', $parts) . '/' . $tail;
return '/' . implode ('/', $parts) . $tail;
/* Parses $lines that together form a numbered list and returns it as HTML <ol>
* list. Numbered lists may be hierarchical and every sublist may use its own
* enumeration type. However items on the same level of the same sublist must
* use the same enumeration type (e.g. decimal numbers).
private function numbered_list (
array $lines
): string
$previous = [];
$list = $item = '';
$prelevel = $maxlevel = -1;
$fixbignum = $this->fixes & self::recognized_fixes['list-numbers'];
foreach ($lines as $line):
$initial_length = strcspn ($line, "\t\x20");
$initial = substr ($line, 0, $initial_length);
$current = $this->item_number ($initial, $previous);
// if the current line belongs to the latest item
if ($current === []):
$item .= "\n" . $line;
// if the current line constitutes a new item
$list .= $this->item ($item);
$maxlevel = count ($current) - 1;
$type = $current[$maxlevel][0];
$value = $current[$maxlevel][1];
// CSS to display numbers too big for browsers' 32-bit calculation
if ($fixbignum and bccomp (ltrim ($value, '-'), '2147483647') === 1 and !is_null ($style = convert_number_string ($value, $type))):
$style = " style='list-style-type:\"$style. \"'";
$style = '';
// the item identifier can be a target for links; handle it
$name = $initial[0] === '#'? $this->fragment_identifier (substr ($initial, 1), true): '';
$id = $name === ''? '': " id='$name'";
// if the new item is deeper in the hieararchy than the previous
if ($maxlevel > $prelevel):
$list .= $type === '?'? "\n<ol>\n<li{$id}>": "\n<ol type='$type'>\n<li{$id}{$style} value='$value'>";
// if the new item is higher in the hierarchy than the previous
if ($maxlevel !== $prelevel):
$list .= str_repeat ("</li>\n</ol>\n", $prelevel - $maxlevel);
$list .= $type === '?'? "</li>\n<li{$id}>": "</li>\n<li{$id}{$style} value='$value'>";
$item = substr ($line, $initial_length);
$previous = $current;
$prelevel = $maxlevel;
return substr ($list, 1) . $this->item ($item) . str_repeat ("</li>\n</ol>\n", $maxlevel + 1);
/* Parses $lines that make up the options in an options block. Each option
* consists of a key enclosed in curly brackets at the beginning of a line and
* an answer associated with that key. There can be a $question associated with
* the options. Returns a HTML <fieldset> with the $question as <legend> and for
* each option a checkbox or radio button for the key and <label> for the
* answer. Feeds the form API.
private function options (
array $lines,
string|null $question = NULL
): string
// This block is active, if it belongs to a form that has been posted.
$this->form ??= $this->form_id ();
$active = isset ($_POST['_form']) && $_POST['_form'] === $this->form;
// Join the lines of the block into distinct options and Preprocess each
// option, extracting [key, modifier, answer]. We also count here how many
// mutually exclusive, preselected and regular options occur.
$prep = [];
$exclusive = $selected = $regular = 0; // counters
foreach (joint ($lines, fn ($x) => $x[0] === '{') as $k => $option):
// Extract the key, its end being marked by a right curly bracket:
$pos = strpos_unslashed ($option, '}', 1);
if ($pos === NULL):
$this->error ('Malformed option: expected }', 112, $k);
$key = trim (strip_slashed_breaks_and_slashes (substr ($option, 1, $pos - 1)));
// Add keys without modifier and answer to the preprocessed array:
if (!isset ($option[++$pos])):
$prep[] = [$key, NULL, NULL];
// Distinguish between mutually exclusive, preselected and regular
// options based on the modifier, a single (non-whitespace) byte right
// after the right curly bracket:
if ($option[$pos] === "'"):
$modifier = $option[$pos++];
elseif ($option[$pos] === "-"):
$modifier = $option[$pos++];
$modifier = str_contains ("!01", $option[$pos])? $option[$pos++]: NULL;
// Add options with modifier or answer to the preprocessed array:
if (!isset ($option[$pos])):
$prep[] = [$key, $modifier, NULL];
elseif (str_contains (self::space, $option[$pos])):
$prep[] = [$key, $modifier, ltrim (substr ($option, $pos + 1))];
$prep[] = [$key, $modifier, substr ($option, $pos)];
$this->error ('Missing whitespace between key and answer in an option', 205, $k);
// Determine the HTML <input type>: radio button for valid mutually
// exclusive options, i.e. when all options are marked as exclusive except
// for at most a single preselected option. Otherwise checkboxes are used.
if ($regular === 0 and $exclusive > 0 and $selected <= 1):
$type = 'radio';
$name = get_unique ($this->form);
$type = 'checkbox';
if ($exclusive > 0):
$this->error ('Mix of mutually exclusive and non-exclusive options', 249);
// Generate the HTML for each option and in an active form the respective
// data for the form API.
$html = $data = [];
$mismatch = NULL;
foreach ($prep as [$key, $modifier, $answer]):
// Generate a name that is unique within the form for checkboxes. Radio
// buttons share a common name that has been generated earlier.
if ($type === 'checkbox'):
$name = get_unique ($this->form);
// Attributes for the HTML <input>, including a globally unique ID.
$attr = [
'id' => $id = get_unique (),
'name' => $name,
'form' => $this->form,
'type' => $type,
'value' => $key,
// Differentiate finer between different kinds of options:
// option that is selected by default
if ($modifier === '-'):
$attr['checked'] = NULL;
// option that MUST be selected to submit
elseif ($modifier === '!'):
$attr['required'] = NULL;
// abort the submission, if this required option is not selected
if ($active and !isset ($_POST[$name])):
$active = false;
$_POST = $this->post = [];
// option that SHOULD be selected (to pass a test)
elseif ($modifier === '1'):
if ($active and !$mismatch):
$mismatch = !isset ($_POST[$name]);
// option that SHOULD not be selected (to pass a test)
elseif ($modifier === '0'):
if ($active and !$mismatch):
$mismatch = isset ($_POST[$name]);
// Compose the HTML for the option:
$attributes = implode_html_attributes ($attr);
$html[] = "<input{$attributes}> <label for='$id'>" . $this->phrase ($answer ?? '') . "</label>";
// Compose data about this option for the form API:
if ($active and isset ($_POST[$name]) and $_POST[$name] === $key):
$data[] = isset ($answer)? ['input' => $key, 'label' => $answer]: ['input' => $key];
// Push data about this options block to the form API, even if no option has
// been selected (so that $data is empty).
if ($active):
$this->post[] = isset ($question)? ['topic' => $question, 'block' => $data]: ['block' => $data];
if (isset ($mismatch)):
$this->post[array_key_last ($this->post)]['match'] = !$mismatch;
// Return the HTML for this options block:
$legend = isset ($question)? '<legend>' . $this->phrase ($question) . "</legend>\n": '';
return "<fieldset>\n" . $legend . implode ("<br>\n", $html) . "\n</fieldset>\n";
/* Parses phrase markup in a $text and returns the text prepared for output as
* HTML. This method is applied to text inside blocks: to list items, image
* captions, paragraphs, heading content etc.
private function phrase (
string $text
): string
$output = $word = '';
// $groupend becomes true without pushing the group into the output, if a
// group has just ended. This is important so that a following link or hint
// can be associated with the group or groups before it. If no link or hint
// follow immediately, the group is pushed to the output before continuing.
$groupend = false;
// $linked and $hinted become true when a link or hint are not yet pushed to
// the output. They can be used to prevent a chain of multiple links or
// hints which don't make sense.
$linked = $hinted = false;
// go through the text, byte by byte
for ($i = 0, $length = strlen ($text); $i < $length; ++$i):
$char = $text[$i];
// link, excluding references to a note
if ($char === '-' and $length > $i + 1 and $text[$i + 1] === '>'):
$link = $this->link ($text, $i, $word);
if ($linked):
$this->error ('Multiple consecutive links', 88);
elseif ($link !== ''):
$word = $link;
$groupend = $linked = true;
// hint
elseif ($char === '{'):
$hint = prepare_html_attribute ($this->encurled ($text, $i));
if ($word === ''):
$this->error ('Annotated text missing: text expected before {', 56);
elseif ($hinted):
$this->error ('Multiple consecutive hints', 89);
$word = "<span title='$hint'>$word</span>";
$groupend = $hinted = true;
elseif ($groupend === true):
$output .= $word;
$word = '';
$groupend = $linked = $hinted = false;
switch ($char):
case '\\': // marks next byte as literal character
if ($length > ++$i and $text[$i] !== "\n"):
$word .= encode_printable_ascii ($text[$i]);
case '&': // hook or deprecated custom mark
if ($length <= ++$i):
$output .= $word;
$word = "<span class='_hook'></span>";
$groupend = true;
elseif (isset ($this->markvars['&' . $text[$i]])):
$word .= $this->markvars['&' . $text[$i]];
elseif (isset (self::default_markvars[$text[$i]])):
$word .= self::default_markvars[$text[$i]];
$output .= $word;
$word = $this->hook ($text, $i);
$groupend = true;
case '#': // target
$output .= $word;
$word = $this->target ($text, $i);
$groupend = true;
case '@': // metadata
if ($length <= ++$i):
$this->error ('Metadata name expected after @', 133);
$word .= '@';
elseif (isset ($this->metavars[$name = stripslashes ($this->group ($text, $i))][self::text])):
$output .= $word;
$word = encode_special_chars ($this->metavars[$name][self::text]);
$groupend = true;
elseif (isset (self::default_textvars[$name])):
$output .= $word;
$word = encode_special_chars (self::default_textvars[$name]);
$groupend = true;
elseif (isset ($this->metavars[$name])):
$this->error ('Metadata name not declared with text value: @' . $name, 245);
$word .= '@' . encode_special_chars ($name);
$this->error ('Metadata name after @ not declared: ' . $name, 134);
$word .= '@' . encode_special_chars ($name);
case '`': // bare string, e.g. to define a link text
$output .= $word;
$word = $this->phrase ($this->enclosed ($text, $i));
$groupend = true;
case '^': // reference to a note
$output .= $word;
$word = $this->reference ($text, $i);
$groupend = $linked = true;
case '$': // math
$output .= $word;
$word = $this->math ($this->enclosed ($text, $i, true));
$groupend = true;
case '|': // code
$output .= $word;
$word = $this->code ($this->enclosed ($text, $i, true));
$groupend = true;
case '}':
$this->error ('Unmatched right curly bracket: }', 57);
$word .= '}';
case '_': // supplementary emphasis
case '~': // gentle emphasis
case '*': // heavy emphasis
case '"': // quoted string
$output .= $word;
$word = $this->emphasis ($this->enclosed ($text, $i), $char);
$groupend = true;
case "\n": // line break
$output .= $word . '<br>';
$word = '';
case "\t":
case "\x20": // end of a word
$output .= $word . "\x20";
$word = '';
case '(':
case '[': // brackets
if (is_null ($group = $this->bracketed ($text, $i))):
$word .= $char;
$output .= $word;
$word = $char . $this->phrase ($group) . self::brackets[$char];
$groupend = true;
case '+':
case '=':
if ($length > $i + 1 and $text[$i + 1] === '-'):
$output .= $word;
$word = $this->emphasis ($this->enclosed2 ($text, $i), "$char-");
$groupend = true;
$word .= $char;
case '-':
if ($length > $i + 1 and in_array ($text[$i + 1], ['+', '='], true)):
$mark = '-' . $text[++$i];
$this->error ('Unmatched right cross or unmatched right fork: ' . $mark, 207);
$word .= $mark;
$word .= '-';
case '<':
$word .= '<';
case '>':
$word .= '>';
$word .= $char;
return $output . $word;
/* Removes comments (i.e. lines starting with %) and interprets and removes
* metadata declarations (i.e. lines starting with @). Despite removing lines,
* the array keys of $this->lines are preserved.
* There are two kinds of metadata, regular and embedded file metadata. The
* initial line of regular metadata must contain an unslashed colon separating
* the name from the value. The value may span multiple lines where all lines
* start with @ and all but the last line end with \ and there are no other
* lines in between except for comments. The initial line of embedded file
* metadata does not contain an unslashed colon. Its name is the whole initial
* line (without @ and trimmed). Its value is the embedded file content defined
* by the following lines that start with @ followed by |.
private function preprocess_comments_meta (
): void
$state = 0; // 0 (none), 1 (regular) or 2 (embedded)
$val = NULL;
foreach ($this->lines as $n => $line):
// get the leftmost byte
$left = $line[0] ?? NULL;
// check for reserved semicolon
if ($left === ';'):
$this->error ('Reserved first character in line: semicolon ;', 210, $n);
// if in a comment line ...
if ($left === '%'):
// ... remove the line
unset ($this->lines[$n]);
// if in a metadata line ...
elseif ($left === '@'):
// get the line without the initial @
$line = ltrim (substr ($line, 1));
// if in an embedded file already
if ($state === 2):
if (str_starts_with ($line, '|')):
if ($val === NULL):
$val = substr ($line, 1);
$val .= "\n" . substr ($line, 1);
elseif ($val === NULL):
$this->error ('Value missing in metadata declaration for @' . $name, 240, $start); // multiple uses
$state = 0;
$this->metadata ($name, $val, $start, true);
$state = 0;
// if not in multiline metadata yet
if ($state === 0):
$start = $n; // remembers where the metadata declaration started
$statement = explode_unslashed (':', $line, 2);
if (isset ($statement[1])):
$name = rtrim ($statement[0]);
$val = ltrim ($statement[1]);
$state = 1;
elseif ($statement[0] !== ''):
$name = $statement[0];
$val = NULL;
$state = 2;
// if in multiline metadata already
elseif ($state === 1):
$val = substr ($val, 0, -1) . $line;
// if regular metadata doesn't continue as multiline metadata
if ($state === 1 and ($val === '' or $val[-1] !== '\\' or is_slashed ($val, strlen ($val) - 1))):
$this->metadata ($name, $val, $start);
$state = 0;
unset ($this->lines[$n]);
// if not in a metadata line, but an embedded file had been open
elseif ($state === 2):
if ($val === NULL):
$this->error ('Value missing in metadata declaration for @' . $name, 240, $start); // multiple uses
$this->metadata ($name, $val, $start, true);
$state = 0;
// if not in a metadata line, but a metadata line was expected
elseif ($state === 1):
$this->error ('Malformed multiline metadata declaration for @' . $name, 100);
$this->metadata ($name, substr ($val, 0, -1), $start);
$state = 0;
// if embedded file/multiline metadata is still open at the document end
if ($state === 2):
if ($val === NULL):
$this->error ('Value missing in metadata declaration for @' . $name, 240, $start); // multiple uses
$this->metadata ($name, $val, $start, true);
elseif ($state === 1):
$this->metadata ($name, substr ($val, 0, -1), $start);
/* Removes white space from the beginning and end of each line in the file and
* converts sandwich markup to line style markup. Sandwich style markup begins
* with a line like "/prefix/until" and ends with a line like "until" (quotes
* not included). Converting it to line style markup means removing the initial
* and final lines and prepending "prefix" to every line in between. Despite
* removing lines, the array keys of $this->lines are preserved.
private function preprocess_lines (
): void
$until = NULL;
foreach ($this->lines as $n => $line):
$trimmed = trim ($line);
// if not inside a sandwich yet
if ($until === NULL):
$this->lines[$n] = $trimmed;
// if in the initial line of sandwich markup
if (str_starts_with ($trimmed, '/')):
$close = strpos ($trimmed, '/', 1);
if ($close):
$prefix = ltrim (substr ($trimmed, 1, $close - 1));
if ($prefix === ''):
$this->error ('Prefix missing in sandwich markup', 143, $n);
$until = ltrim (substr ($trimmed, $close + 1));
unset ($this->lines[$n]);
$this->error ('Invalid sandwich markup: expected second /', 141, $n);
// if in the final line of sandwich markup
elseif ($until === $trimmed):
$until = NULL;
unset ($this->lines[$n]);
// if inside a sandwich
$this->lines[$n] = trim ($prefix . $this->lines[$n]);
// if sandwich markup is still open at the document end
if ($until !== NULL and $until !== ''):
$this->error ('Bottom sandwich delimiter missing: ' . $until, 142);
/* Parses an Aneamal text that has been included as linked or embedded file or
* as a block quotation. A template which adds standard styles and scripts may
* be used. These and the file's stylesheets and scripts are added to this
* file's corresponding array. The file's meta title and general metadata are
* not used.
* $text: Aneamal text
* $dir: directory in which the parsed file is located relative to the
* Aneamal root, starting with a slash; needed to locate other
* files referenced from this
* $filename: basename of the the file the $text is written in
* $kind: an integer identifying the kind of Aneamal document
* $tpl: name of a metadata template file
* $citation: optionally a string that references the source of the text, this
* makes sense for quotation blocks
private function process_file_aneamal (
string $text,
string $dir,
string $filename,
string $kind,
string|null $tpl = NULL,
string $citation = ''
): string
// cache for templates: they only need to be loaded once for multiple uses
static $templates = [];
$lazy = $pixels = NULL;
$tplerrors = $role = '';
$attr = $classes = $modules = [];
$metavars = $this->metavars;
$markvars = $this->markvars;
// handle the optional template
if (isset ($tpl)):
if (!isset ($templates[$tpl])):
if (
is_readable ($this->root . ($tplfile = '/aneamal/a-' . $tpl . '/index.nml'))
or is_readable ($this->root . ($tplfile = '/aneamal/a-' . $tpl . '.nml'))
$templates[$tpl] = new self (
file_get_contents ($this->root . $tplfile),
dirname ($tplfile),
basename ($tplfile),
$this->javascripts = array_merge ($this->javascripts, $templates[$tpl]->javascripts);
$this->metascripts = array_merge ($this->metascripts, $templates[$tpl]->metascripts);
$this->stylesheets = array_merge ($this->stylesheets, $templates[$tpl]->stylesheets);
$this->metastyles = array_merge ($this->metastyles, $templates[$tpl]->metastyles);
if (!empty ($templates[$tpl]->lines) and implode ($templates[$tpl]->lines) !== ''):
$templates[$tpl]->error ('Content in template', 151, array_key_first (array_filter ($templates[$tpl]->lines, fn ($l) => $l !== '')));
$tplerrors = $templates[$tpl]->get_errors ('Errors in template');
$this->error ('Template not readable: a-' . $tpl, 152);
if (isset ($templates[$tpl])):
$classes = array_merge (["a-$tpl"], $templates[$tpl]->classes);
$role = $templates[$tpl]->role;
$lazy = $templates[$tpl]->lazy;
$pixels = $templates[$tpl]->pixels;
$modules = $templates[$tpl]->modules;
$metavars = $templates[$tpl]->metavars;
$markvars = $templates[$tpl]->markvars;
$doc = new self ($text, $dir, $this->home, $filename, $kind, $metavars, $markvars);
// pass preview image size settings from template or main file to the $doc
$doc->lazy ??= $lazy ?? $this->lazy;
// pass preview image size settings from template or main file to the $doc
$doc->pixels ??= $pixels ?? $this->pixels;
// merge module configurations whereby settings in the doc take precedence
$doc->modules = array_merge ($this->modules, $modules, $doc->modules);
// pass base URI for detailed error explanations to the $doc
$doc->errormore ??= $this->errormore;
// pass size limit for text file inclusions to the $doc
$doc->textcap ??= $this->textcap;
// pass fixes to the $doc
$doc->fixes ??= $this->fixes;
// Pass language data to the $doc iff it lacks it. The writing direction is
// only passed for embedded files and quotation blocks, since other files
// set it to ltr in the constructor by default.
$doc->lang ??= $this->lang;
$doc->direction ??= $this->direction;
// generate the document body of the file
$body = $doc->body ();
if ($body === '' and $citation === ''):
return '';
// inherit styles and scripts
$this->javascripts = array_merge ($this->javascripts, $doc->javascripts);
$this->metascripts = array_merge ($this->metascripts, $doc->metascripts);
$this->stylesheets = array_merge ($this->stylesheets, $doc->stylesheets);
$this->metastyles = array_merge ($this->metastyles, $doc->metastyles);
// prepare $doc's language data iff different from this document's data
if ($this->lang !== $doc->lang):
$attr['lang'] = $doc->lang;
if ($this->direction !== $doc->direction):
$attr['dir'] = $doc->direction;
// prepare role and class, combined from the template and the $doc
$role = trim ($role . ' ' . $doc->role);
if ($role !== ''):
$attr['role'] = $role;
if ($classes or $doc->classes):
$attr['class'] = array_merge ($classes, $doc->classes);
// handle the optional citation and id
$id = '';
if ($citation !== ''):
$id = $this->detach_caption_target ($citation);
$citation = "<cite>" . $this->phrase ($citation) . "</cite>\n";
// choose the HTML element to wrap the HTML in
$tag = match ($kind) {
self::aside => 'aside',
self::footer => 'footer',
self::header => 'header',
self::quoted => 'blockquote',
default => 'div',
$attributes = implode_html_attributes ($attr);
return "<{$tag}{$id}{$attributes}>\n{$body}{$citation}</$tag>\n{$tplerrors}";
/* Lets a t-module specified by $subtype process a give $text. The t-module is
* provided with an optional $clue and its returned result is expected to be
* UTF-8 encoded. If no $subtype is provided, the $text is handled as
* preformatted plain text. The result is returned.
* The optional $caption is informational and passed through for the form API.
function process_file_text (
string $text,
string|null $subtype,
string|null $clue,
string|null $caption
): string
// handle text which is not processed by a module
if ($subtype === NULL):
return "<pre class='_plain'>" . encode_special_chars (normalize_text ($text)) . "</pre>\n";
elseif ($subtype === ''):
$this->error ('t-module subtype missing', 248);
return '';
// prepare the data to be passed to the module
$data = [
// numeric indices for backwards compatibility with old modules
0 => $text,
1 => $clue ?? '',
2 => $this->home . '/aneamal/t-' . $subtype,
3 => $this->home . $this->dir,
// t-module specific stuff
'text' => $text,
// let the module do its work
return $this->use_module ('t-' . $subtype, $data, $clue, $caption) . "\n";
/* Translates tab-seperated values ($tsv) into an HTML table and returns it.
* Parameters allow to $parse phrase_markup within values and to $transpose the
* whole table.
private function process_file_tsv (
string $tsv,
bool $parse = false,
bool $transpose = false
): string
// remove optional line break from the end
if ($tsv !== '' and $tsv[-1] === "\n"):
$tsv = substr ($tsv, 0, -1);
// split the file into lines and fields
$data = array_map (fn ($line) => explode ("\t", $line), explode ("\n", $tsv));
// parse nameline
$records = count ($data) - 1;
$fields = count ($data[0]);
foreach ($data[0] as $col => $field):
$data[0][$col] = "<th scope='" . ($transpose? 'row': 'col') . "'>" . ($parse? $this->phrase ($field): encode_special_chars ($field)) . "</th>\n";
// check the number of fields per record and parse values
foreach ($data as $row => $record):
if ($row === 0):
foreach ($record as $col => $field):
$data[$row][$col] = "<td>" . ($parse? $this->phrase ($field): encode_special_chars ($field)) . "</td>\n";
$count = count ($record);
if ($count < $fields):
$data[$row] = array_pad ($data[$row], $fields, "<td></td>\n");
elseif ($count > $fields):
$this->error ('Too many fields in TSV line ' . strval ($row + 1), 183);
return '';
// optionally transpose data
if ($transpose):
$data = transpose_matrix ($data);
// finish and return the table
foreach ($data as $row => $record):
$data[$row] = "<tr>\n" . implode ($record) . "</tr>\n";
// return result
if ($transpose):
$colgroups = "<colgroup>\n" . ($records? "<colgroup span='$records'>\n": '');
return "<table>\n$colgroups<tbody>\n" . implode ($data) . "</tbody>\n</table>\n";
return "<table>\n<thead>\n" . $data[0] . "</thead>\n<tbody>\n" . implode (array_slice ($data, 1)) . "</tbody>\n</table>\n";
/* Identifies the end of a reference to a note in $string and translates the
* reference to HTML, i.e. returns a <sup>erscript HTML <a> element. Initially
* &$index must give the position of a character ^ in $string which marks the
* reference and is followed by the name of a target on the same page, usually
* in a note. At the end of the method, &$index will be set to the position of
* the last character of the reference.
private function reference (
string $string,
int &$index
): string
// make sure there is a next character
if (!isset ($string[++$index])):
$this->error ('Target text missing in reference: target expected after ^', 71);
return '';
// extract and process target name
$text = '';
$name = $this->fragment_identifier ($this->targeted ($string, $index, $text));
// return the HTML output
if ($name === ''):
return "<sup>$text</sup>";
return "<sup><a href='#$name'>$text</a></sup>";
/* Sets non-static properties of this nml2html object to their initial value.
private function reset (
): void
foreach (get_class_vars (__CLASS__) as $property => $initial):
$this->$property = $initial;
/* Runs a module, providing it with parameters in the $data array. Regular
* modules return an anonymous function when included and that function is
* cached, called and its return value returned. Legacy modules return a string
* directly and that string is passed along by this function.
* The $caption of the Aneamal block that the module deals with is used in the
* form API, iff this is a regular module using that API.
private function run_module (
string $f, // filename
array $data,
string|null $caption
): string
// cache for anonymous functions (= Closures) that are returned by modules
static $closure = [];
// cache for settings a module makes via its default parameter values
static $setting = [];
// X-modules can accept a variable number of links. The cardinality says how
// many were actually provided; the default for *any* module being 1.
$cardinality = isset ($data['links'])? count ($data['links']): 1;
// Run cached modules:
if (isset ($closure[$f])):
// A cardinality outside the range expected by the module is an error.
if ($cardinality < $setting[$f]['min']):
throw new CardinalityException ("$cardinality instead of minimum " . $setting[$f]['min']);
elseif ($cardinality > $setting[$f]['max']):
throw new CardinalityException ("$cardinality exceeds maximum of " . $setting[$f]['max']);
// Form API. The order in which $data items are set is important here,
// since form_time_check () reads $this->form and aborts a submission by
// emptying $_POST in case of an invalid timestamp.
if (isset ($setting[$f]['post'])):
$data['form'] = $this->form ??= $this->form_id ();
$data['cron'] = $this->form_time_check ();
$data['post'] = $this->post;
// If the form was posted and the module has a post handler, it is
// called to get an array of inputs in the module's responsibility.
if ($setting[$f]['post'] !== '' and isset ($_POST['_form']) and $_POST['_form'] === $this->form):
$block = $setting[$f]['post'] ($data);
if (!is_array ($block)):
$this->error ('Post handler of module ' . $data['type'] . ' returned unexpected type: ' . get_debug_type ($block), 243);
elseif (!array_is_list ($block)):
$this->error ('Post handler of module ' . $data['type'] . ' did not return a PHP list', 251);
$data['post'][] = $this->post[] = isset ($caption)? [
'addon' => $data['type'],
'block' => $block,
'topic' => $caption,
'addon' => $data['type'],
'block' => $block,
return (string) ($closure[$f] ($data));
// Otherwise report an error, if the module file cannot be read; that state
// is not cached by this script, because PHP itself caches is_readable.
if (!is_readable ($f)):
$this->error ('Module not found: ' . $data['type'], 186);
return '';
// Otherwise include the module file; parse errors are reported through an
// anonymous function so that they will be cached and reported whenever an
// erroneous module is used without PHP trying to parse it again and again.
// This function "accepts" any cardinality, so that no cardinality errors
// will be reported instead of the parse error. (We do not know which
// cardinality the erroneous module would accept if it was bug-free.)
try {
$response = include_module ($f, $data);
} catch (\ParseError $e) {
$response = function ($_, $min = 0, $max = PHP_INT_MAX) use ($e) {
throw $e;
return '';
// If the module returned an anonymous function, cache it and the settings
// communicated via its function parameter default values, and execute it.
if ($response instanceof \Closure):
$closure[$f] = $response;
foreach ((new \ReflectionFunction ($response))->getParameters () as $i => $parameter):
// The 0th parameter does not communicate settings, instead
// accepting the $data provided to the module.
if ($i > 0 and $parameter->isDefaultValueAvailable ()):
$value = $parameter->getDefaultValue ();
switch ($parameter->name):
case 'max': // maximum number of links for an x-module
case 'min': // minimum number of links for an x-module
$setting[$f][$parameter->name] = (int) $value;
case 'post':
// The post handler must be callable, if it exists, or
// it will be reported as error instead of running the
// module at all. Module developers must fix this. Being
// lenient here would just complicate trouble-shooting.
if ($value === '' or is_callable ($value)):
$setting[$f]['post'] = $value;
$closure[$f] = function ($_) use ($data, $value) {
$this->error ('Post handler of module ' . $data['type'] . " not callable: $value", 242);
return '';
$setting[$f]['max'] ??= 1;
$setting[$f]['min'] ??= 1;
return $this->run_module ($f, $data, $caption);
// Only modules that return an anonymous function work with a cardinality
// other than 1, but this part of the function handles modules that do not
// return an anonymous function, hence a different cardinality is an error.
if ($cardinality !== 1):
throw new CardinalityException ("$cardinality instead of expected 1");
// Otherwise return the response of the module; this is mainly for backwards
// compatibility as modules were not required to return a cacheable
// anonymous function in the past.
return (string) $response;
/* Processes a $block that represents a regular section break or heading with
* optional sublines and returns their HTML equivalent. $attributes contains
* HTML attributes such as classes that apply to the section's heading and are
* set on the <hgroup> element, if available, or a <div>.
private function sectioner (
string $block,
array $attributes = []
): string
$start = substr ($block, 0, 3);
$rank = self::sectioners[$start];
// prepare wrapper for attributes
if ($attributes):
$attr = implode_html_attributes ($attributes);
$starttag = "<div{$attr}>\n";
$endtag = "</div>\n";
$attr = $starttag = $endtag = '';
// end previous expandable sections
$sectiontags = $this->end_sections ($rank);
// handle section breaks
if ($start === $block):
if ($rank === 1):
$this->error ('Main heading incomplete', 188);
return $sectiontags;
return $sectiontags . $starttag . "<hr class='_h{$rank}'>\n" . $endtag;
// find the end of the heading and extract its content
$endpos = strrpos_unmasked ($block, $start, self::masks, 3);
if ($endpos === NULL):
$this->error ('Heading not closed: expected ' . $start, 4);
return $sectiontags;
$content = trim (strip_slashed_breaks (substr ($block, 3, $endpos - 3)));
if ($content === ''):
$this->error ('Heading text missing', 176);
// translate the heading to HTML
$return = "<h$rank>";
foreach (explode ("\n", $content) as $i => $line):
if ($i > 0):
$return .= '<br><span>' . $this->phrase ($line) . '</span>';
$class = prepare_html_id ($line);
$return .= $this->phrase ($line);
$return .= "</h$rank>\n";
// use the main heading as <title> if no @title was declared as metadata
if ($rank === 1 and $this->title === ''):
$this->title = str_replace ("\n", "\x20", stripslashes (ltrim ($content)));
// add tags for this section
if ($rank > 1):
$sectiontags .= "<section class='$class'>\n";
$this->sections[$rank] = '</section>';
// return the heading with the optional sublines
if (strlen ($block) === $endpos + 3):
return $sectiontags . $starttag . $return . $endtag;
elseif ($block[$endpos + 3] !== "\n"):
$this->error ('Line feed missing after heading', 91);
return $sectiontags . $starttag . $return . $endtag;
$sublines = $this->phrase (ltrim (substr ($block, $endpos + 3)));
return $sectiontags . "<hgroup{$attr}>\n$return<p>$sublines</p>\n</hgroup>\n";
/* Parses $lines which represent a single- or multi-tagged list. Returns a HTML
* description list (<dl>) for a single-tagged list or a HTML <table> for
* multi-tagged lists. In single-tagged lists the same tag may occur for
* multiple items, whereas the same tag-combination may only occur once in a
* multi-tagged list.
* Admittedly, this function is a monster.
private function tagged_list (
array $lines
): string
$items = [];
$dimension = 0;
$origin = NULL;
// this foreach preprocesses each item by extracting the tags and content
foreach (joint ($lines, fn ($x) => $x[0] === '<') as $k => $item):
$close = -1;
$start = 1;
$tags = [];
$count = 0;
$empty = 0;
// save the item's tags, identified by pointy brackets < >
while ($close = strpos_unmasked ($item, '>', self::masks, $close + 2)):
// continue searching if > is preceded by an unslashed -, which
// means it defines a link, not the end of the tag
if ($item[$close - 1] === '-' and !is_slashed ($item, $close - 1)):
// save the tag, count empty and non-empty tags
$tags[$count] = trim (substr ($item, $start, $close - $start));
$tags[$count] === ''? ++$empty: ++$count;
// check whether > is followed by <, which means another tag follows
// for the same item, otherwise stop searching
if (isset ($item[$close + 1]) and $item[$close + 1] === '<'):
$start = $close + 2;
// the item with the biggest number of tags determines the dimension
if ($dimension < $count):
$dimension = $count;
// save the item if a closing > was found, the number of tags is
// consistent with the list's dimension and there are no empty tags or
// all tags are empty in a list with dimension greater than one
if ($close === NULL):
$this->error ('Malformed tagged-list item: expected >', 7, $k);
elseif ($empty === 2 and $count === 0):
if ($origin === NULL):
$origin = substr ($item, $close + 1);
$this->error ('Origin already set', 105, $k);
elseif ($empty !== 0):
$this->error ('Empty tag in tagged list', 106, $k);
$tags[] = substr ($item, $close + 1);
$items[] = $tags;
// if dimension is 1, render the list as HTML description list …
if ($dimension === 1):
if ($origin !== NULL):
$this->error ('Origin set for single-tagged list', 219);
$list = '';
foreach ($items as [$tag, $content]):
$list .= '<dt>' . $this->phrase ($tag) . "</dt>\n<dd>" . $this->item ($content) . "</dd>\n";
return "<dl>\n$list</dl>\n";
// send an error for tagged lists without tag
elseif ($dimension < 1):
$this->error ("Tagged list lacks tags", 225);
return '';
// send an error for tagged lists with dimension greater than four
elseif ($dimension > 4):
$this->error ("Too many tags: $dimension", 49);
return '';
// In the following foreach loop the different tags are mapped to $numbers,
// which are then used to refer to the tags. $headings is filled with the
// tags processed for HTML output. $contents is filled with the contents
// of the items processed for HTML output. $x_axis and $y_axis mirror the
// the structure of column-/row headings; the $branch array is used to
// remember which leading tag combinations are used with further tags to
// make sure that there is no content directly assigned to these.
// Consider the item <~a~><b><c> ~d~; the following assignments will be made:
// $numbers['~a~'] = 0
// $numbers['b'] = 1
// $numbers['c'] = 2
// $headings[0] = '<i>a</i>'
// $headings[1] = 'b'
// $headings[2] = 'c'
// $contents['0:1:2'] = '<i>d</i>';
// $y_axis[0][2] = 0;
// $x_axis[1] = [];
// $branch[0] = true;
// $branch['0:1'] = true;
$numbers = $headings = $contents = $y_axis = $x_axis = $branch = [];
foreach ($items as $item):
$content = array_pop ($item);
$tags = [];
$last = count ($item) - 1;
foreach ($item as $i => $tag):
if (isset ($numbers[$tag])):
$t = $numbers[$tag];
$t = $numbers[$tag] = count ($numbers);
$headings[$t] = $this->phrase ($tag);
$tags[] = $t;
$tagstring = implode (':', $tags);
if (isset ($contents[$tagstring])):
$error = '<' . implode ('><', array_slice ($item, 0, $i + 1)) . '>';
$this->error ("Tag combination already used to tag an item: $error", 220);
continue 2;
elseif (!isset ($branch[$tagstring]) and $i !== $last):
$branch[$tagstring] = true;
if (isset ($branch[$tagstring])):
$error = '<' . implode ('><', $item) . '>';
$this->error ("Tag combination already used in another combination: $error", 221);
continue 1;
$contents[$tagstring] = $this->item ($content);
switch (count ($tags)):
case 1:
$y_axis[$tags[0]] = [];
case 2:
isset ($y_axis[$tags[0]]) or $y_axis[$tags[0]] = [];
isset ($x_axis[$tags[1]]) or $x_axis[$tags[1]] = [];
case 3:
$y_axis[$tags[0]][$tags[2]] = 0;
isset ($x_axis[$tags[1]]) or $x_axis[$tags[1]] = [];
case 4:
$y_axis[$tags[0]][$tags[2]] = 0;
$x_axis[$tags[1]][$tags[3]] = 0;
unset ($branch, $items, $numbers);
// process the origin for HTML output
$origin = isset ($origin)? $this->item ($origin): '';
if ($dimension === 2):
// build the first row of the table
$colcount = count ($x_axis);
$table = "<colgroup>\n<colgroup span='$colcount'>\n";
$table .= "<thead>\n<tr>\n<td>$origin</td>\n";
foreach (array_keys ($x_axis) as $x):
$table .= "<th scope='col'>" . $headings[$x] . "</th>\n";
$table .= "</tr>\n</thead>\n";
// build the body of the table
$table .= "<tbody>\n";
foreach (array_keys ($y_axis) as $y):
$table .= "<tr>\n<th scope='row'>" . $headings[$y] . "</th>\n";
if (isset ($contents[$y])):
$table .= "<td colspan='$colcount'>" . $contents[$y] . "</td>\n";
foreach (array_keys ($x_axis) as $x):
$table .= '<td>' . ($contents["$y:$x"] ?? '') . "</td>\n";
$table .= "</tr>\n";
return "<table>\n$table</tbody>\n</table>\n";
elseif ($dimension === 3):
// build the first row of the table
$colcount = count ($x_axis);
$table = "<colgroup span='2'>\n<colgroup span='$colcount'>\n";
$table .= "<thead>\n<tr>\n<td colspan='2'>$origin</td>\n";
foreach (array_keys ($x_axis) as $x):
$table .= "<th scope='col'>" . $headings[$x] . "</th>\n";
$table .= "</tr>\n</thead>\n";
// build the body of the table
foreach ($y_axis as $y => $rowgroup): // rowgroups
$table .= "<tbody>\n<tr>\n";
if (empty ($rowgroup)): // single row that is a rowgroup
$table .= "<th colspan='2' scope='rowgroup'>" . $headings[$y] . "</th>\n";
if (isset ($contents[$y])): // single cell spanning all cols
$table .= "<td colspan='$colcount'>" . $contents[$y] . "</td>\n";
else: // row with named cells
foreach (array_keys ($x_axis) as $x): // cells
$table .= '<td>' . ($contents["$y:$x"] ?? '') . "</td>\n";
else: // rowgroup with named rows
$rowspan = " rowspan='" . count ($rowgroup) . "'";
$table .= "<th{$rowspan} scope='rowgroup'>" . $headings[$y] . "</th>\n";
$tbody = [];
foreach (array_keys ($rowgroup) as $r => $z): // rows
$tr = "<th scope='row'>" . $headings[$z] . "</th>\n";
foreach (array_keys ($x_axis) as $x): // cells
if (isset ($contents["$y:$x"])): // cell spans rows
if ($r === 0):
$tr .= "<td{$rowspan}>" . $contents["$y:$x"] . "</td>\n";
else: // simple cell
$tr .= '<td>' . ($contents["$y:$x:$z"] ?? '') . "</td>\n";
$tbody[] = $tr;
$table .= implode ("</tr>\n<tr>\n", $tbody);
$table .= "</tr>\n</tbody>\n";
return "<table>\n$table</table>\n";
else: // $dimension === 4
// build colgroups and table head
$table = "<colgroup span='2'>\n";
$toprow = "<td colspan='2' rowspan='2'>$origin</td>\n";
$subrow = '';
$groups = [];
$colcount = 0;
foreach ($x_axis as $x => $colgroup):
$groups[$x] = $count = count ($colgroup);
if ($count === 0):
$table .= "<colgroup>\n";
$toprow .= "<th rowspan='2' scope='colgroup'>" . $headings[$x] . "</th>\n";
$colcount += $count;
$table .= "<colgroup span='$count'>\n";
$toprow .= "<th colspan='$count' scope='colgroup'>" . $headings[$x] . "</th>\n";
foreach (array_keys ($colgroup) as $q):
$subrow .= "<th scope='col'>" . $headings[$q] . "</th>\n";
$table .= "<thead>\n<tr>\n$toprow</tr>\n<tr>\n$subrow</tr>\n</thead>\n";
// build the body of the table
foreach ($y_axis as $y => $rowgroup): // rowgroups
$table .= "<tbody>\n<tr>\n";
if (empty ($rowgroup)): // single row that is a rowgroup
$table .= "<th colspan='2' scope='rowgroup'>" . $headings[$y] . "</th>\n";
if (isset ($contents[$y])):
$table .= "<td colspan='$colcount'>" . $contents[$y] . "</td>\n";
foreach (array_keys ($x_axis) as $x):
$colspan = $groups[$x] === 0? '': " colspan='" . $groups[$x] . "'";
$table .= "<td{$colspan}>" . ($contents["$y:$x"] ?? '') . "</td>\n";
else: // rowgroup with named rows
$t = count ($rowgroup);
$table .= "<th rowspan='$t' scope='rowgroup'>" . $headings[$y] . "</th>\n";
$tbody = [];
foreach (array_keys ($rowgroup) as $r => $z): // rows
$tr = "<th scope='row'>" . $headings[$z] . "</th>\n";
foreach ($x_axis as $x => $colgroup): // cellgroups
if (empty ($colgroup)): // single cell that is a cellgroup
if (isset ($contents["$y:$x"])):
if ($r === 0):
$tr .= "<td rowspan='$t'>" . $contents["$y:$x"] . "</td>\n";
$tr .= '<td>' . ($contents["$y:$x:$z"] ?? '') . "</td>\n";
elseif (isset ($contents["$y:$x"])):
if ($r === 0):
$tr .= "<td colspan='" . $groups[$x] . "' rowspan='$t'>" . $contents["$y:$x"] . "</td>\n";
else: // cellgroup with col-named cells
foreach (array_keys ($colgroup) as $c => $q):
if (isset ($contents["$y:$x:$z"])):
if ($c === 0):
$tr .= "<td colspan='" . $groups[$x] . "'>" . $contents["$y:$x:$z"] . "</td>\n";
$tr .= '<td>' . ($contents["$y:$x:$z:$q"] ?? '') . "</td>\n";
$tbody[] = $tr;
$table .= implode ("</tr>\n<tr>\n", $tbody);
$table .= "</tr>\n</tbody>\n";
return "<table>\n$table</table>\n";
/* Identifies the end of a target in a $string, translates it into an HTML
* <span> element with id attribute and returns it. The position of the first
* character # of the target must be given in &$index at the beginning and is
* set to the position of the last character at the end. If the # is directly
* followed by a hint {…}, the target will be invisible in the HTML output, but
* it will still be possible to link to it. If the # is directly followed by
* metadata @, the metadata value will be displayed in the HTML output, but the
* metadata name must be used to link to it.
private function target (
string $string,
int &$index
): string
// make sure there is a next character
if (!isset ($string[++$index])):
$this->error ('Target text missing: text expected after #', 42);
return '';
// extract and process the target name
$text = '';
if ($string[$index] === '{'):
$name = $this->fragment_identifier ($this->encurled ($string, $index), true);
$name = $this->fragment_identifier ($this->targeted ($string, $index, $text), true);
// prepare and return the HTML output
$name === '' or $name = " id='$name'";
return "<span{$name}>$text</span>";
/* Identifies the end of a hook or target name in a $string, either where the
* target is set (except invisible targets #{...}), or where it is referenced/
* linked to. &$index must give the position of the first character of the
* target name initially, excluding marks &, #, ^, ->, and is set to its last
* character's position at the end. If an empty string is passed as &$text
* parameter, that variable will be replaced with parsed text generated from the
* target name for display. Returns the target name.
private function targeted (
string $string,
int &$index,
string|null &$text = NULL
): string
$char = $string[$index];
if (in_array ($char, ['*', '~', '_', '"'], true)):
$group = $this->enclosed ($string, $index);
if ($text === ''):
$text = $this->emphasis ($group, $char);
elseif ($char === '|'):
$group = $this->enclosed ($string, $index, true);
if ($text === ''):
$text = $this->code ($group);
elseif ($char === '$'):
$group = $this->enclosed ($string, $index, true);
if ($text === ''):
$text = $this->math ($group);
elseif (isset (self::brackets[$char]) and !is_null ($group = $this->bracketed ($string, $index))):
if ($text === ''):
$text = $char . $this->phrase ($group) . self::brackets[$char];
elseif (str_match ($string, [$index => '+=', '-'])):
$group = $this->enclosed2 ($string, $index);
if ($text === ''):
$text = $this->emphasis ($group, "$char-");
elseif ($char === '#' and isset ($string[$index + 1])):
$start = $index++;
$group = $this->targeted ($string, $index);
if ($text === ''):
$text = $this->target ($string, $start);
elseif ($char === '&'):
$start = $index++;
$group = isset ($string[$index])? $this->targeted ($string, $index): '';
if ($text === ''):
$text = $this->hook ($string, $start);
elseif ($char === '@' and isset ($string[$index + 1])):
$start = $index++;
$group = $this->group ($string, $index);
if ($text === ''):
$text = $this->phrase (substr ($string, $start, $index - $start + 1));
$group = $this->group ($string, $index);
if ($text === ''):
$text = $this->phrase ($group);
return $group;
/* Parses $lines that make up single- and multi-line text boxes. Each box is
* marked by a token like [_] or [=] at the beginning of a line, optionally with
* a linked file to supply default text and a label. There can be a $question
* associated with the boxes. Returns a HTML <fieldset> with the optional
* $question as <legend>, <input>s and/or <textarea>s and optionally <label>s.
* Feeds the form API.
private function textboxes (
array $lines,
string|null $question = NULL
): string
$html = $data = [];
// this block is active, if it belongs to a form that has been posted
$this->form ??= $this->form_id ();
$active = isset ($_POST['_form']) && $_POST['_form'] === $this->form;
// split the block into parts where each part corresponds to a textbox and
// optionally modifiers and label and process each part
foreach (joint ($lines, fn ($x) => str_match ($x, ['[', '_=-', ':]'])) as $k => $part):
// differentiate between textbox types
$type = $part[1];
if ($type === '-'):
$this->error ("Reserved textbox type: [-]", 236, $k);
// find the end of the textbox token and extract the optional clue
if (is_null ($pos = strpos_unslashed ($part, ']', 2))):
$this->error ('Malformed textbox: expected ]', 233, $k);
elseif ($pos > 2):
$clue = strip_slashed_breaks_and_slashes (substr ($part, 3, $pos - 3));
$clue = NULL;
// generate a globally unique ID and a name that is unique for the form
$attr = [
'id' => $id = get_unique (),
'name' => $name = get_unique ($this->form),
'form' => $this->form,
// single-line textboxes get a 'text' type attribute (multi-line
// textboxes get an own HTML element instead) that is the HTML default
// anyway, but is useful for styling with CSS
if ($type === '_'):
$attr['type'] = 'text';
// optional placeholder
if (isset ($clue)):
$attr['placeholder'] = $clue;
// see whether text input is required
if (isset ($part[++$pos]) and $part[$pos] === "!"):
$attr['required'] = NULL;
// load the text from an optional linked file
$content = $datalist = '';
if (substr ($part, $pos, 2) === '->'):
if (!is_null ($uri = $this->address ($part, $pos))):
if (!is_null ($file = $this->file_raw ($uri, true, true, $k))):
// The text is used as default content for multi-line boxes.
if ($type === '='):
$content = encode_special_chars ($file);
// Multi-line text for a single-line textbox is interpreted
// as a single-column TSV file offering input suggestions.
elseif (str_contains ($file, "\n")):
$attr['list'] = $listid = get_unique ();
$options = [];
foreach (explode ("\n", $file) as $n => $line):
// A single-column TSV may not contain tabs.
if (str_contains ($line, "\t")):
$this->error ('Unexpected tab character in suggestions file', 235);
$line = substr ($line, 0, strpos ($line, "\t"));
// The 0th line is the TSV nameline, not an option.
$n and $options[] = "<option value='" . encode_special_chars ($line) . "'>\n";
$datalist = "\n<datalist id='$listid'>\n" . implode ($options) . "</datalist>";
// Single-line text serves as default for single-line boxes.
$attr['value'] = $file;
// prepare the optional HTML $label and $about info for the form API
if (isset ($part[$pos])):
$about = ltrim (substr ($part, $pos));
$label = " <label for='$id'>" . $this->phrase ($about) . "</label>";
if (!str_contains (self::space, $part[$pos])):
$this->error ('Missing whitespace after textbox', 234, $k);
$about = $clue;
$label = '';
// add textbox to HTML output
$attributes = implode_html_attributes ($attr);
$html[] = match ($type) {
'_' => "<span><input{$attributes}>{$label}</span>{$datalist}",
'=' => "<span><textarea{$attributes}>$content</textarea>$label</span>",
// prepare data about this textbox for the form API, if the current
// form has been submitted
if ($active):
// sanitation for single-line textboxes: remove newlines
if ($type === '_' and isset ($_POST[$name])):
$_POST[$name] = str_replace (["\r", "\n"], '', $_POST[$name]);
// abort submission, if required content is missing
if (array_key_exists ('required', $attr)):
if (!isset ($_POST[$name]) or $_POST[$name] === ''):
$active = false;
$_POST = $this->post = [];
// add sanitized posted data to the form API
if (isset ($_POST[$name]) and $_POST[$name] !== ''):
$value = normalize_text ($_POST[$name]);
$data[] = isset ($about)? ['input' => $value, 'label' => $about]: ['input' => $value];
// add data about this textbox block to the form API, if the current form
// has been submitted (even if $data is empty)
if ($active):
$this->post[] = isset ($question)? ['topic' => $question, 'block' => $data]: ['block' => $data];
// return the HTML
$legend = isset ($question)? '<legend>' . $this->phrase ($question) . "</legend>\n": '';
return "<fieldset>\n" . $legend . implode ("<br>\n", $html) . "\n</fieldset>\n";
/* Executes a module $type, located in the /aneamal/$type folder. This can be a
* t-module [t-...], x-module [x-...] or math module which parses $$...$$ and
* $...$. It supplies the module with $arguments specific to the kind of module
* and additional data send to all modules such as an optional $clue.
* The optional $caption is informational and passed through for the form API.
private function use_module (
string $type,
array $arguments,
string|null $clue = NULL,
string|null $caption = NULL
): string
// prevent stuff like [x-foo/../html]->bar, which would include THIS file
if (str_contains ($type, '.') or substr_count ($type, '/') > 1):
$this->error ("Unexpected character in module name: $type", 218);
return '';
// add data that is supplied to all modules
$name = str_contains ($type, '/')? substr ($type, 0, strcspn ($type, '/')): $type;
$prfx = '_' . $name[0] . $this->filekind;
$data = array_merge ($arguments, [
'base' => $this->home . $this->dir,
'clue' => $clue,
'here' => $this->home . '/aneamal/' . $type,
'home' => $this->home,
'lang' => $this->lang,
'meta' => $this->modules[$name] ?? '',
'name' => $name,
'pixl' => $this->pixels ?? self::pixsize,
'root' => $this->root,
'type' => $type,
'uniq' => get_unique ($prfx, $prfx),
try {
$path = __DIR__ . '/' . $type;
$result = $this->run_module ($path . '/index.php', $data, $caption);
} catch (\ParseError $e) {
$this->error ("ParseError in module $type: \"" . $e->getMessage () . '" in ' . strip_root ($e->getFile ()) . ', line ' . $e->getLine (), 198);
return '';
} catch (\TypeError $e) {
$this->error ("TypeError in module $type: " . strip_root ($e->getFile ()) . ', line ' . $e->getLine (), 199);
return '';
} catch (CardinalityException $e) {
$this->error ("Wrong number of links for module $type: " . $e->getMessage (), 215);
return '';
} catch (ModuleMessage $e) {
$this->error ("Message from module $type: \"" . $e->getMessage () . '"', 222);
return '';
} catch (\Throwable $e) {
$this->error ("Error in module $type: \"" . $e->getMessage () . '" in ' . strip_root ($e->getFile ()) . ', line ' . $e->getLine (), 197);
return '';
// check text encoding and return the result
return normalize_text ($result);
} // class nml2html
// The End