main-V0.tex

%
% initial version 
% 
%

\documentclass[runningheads]{llncs}
%\documentclass[a4paper,11pt]{article}
%\setcounter{page}{1}

\usepackage[T1]{fontenc}
\usepackage[utf8]{inputenc}
\usepackage[english]{babel}

\usepackage{hyperref}
%\usepackage[bookmarks,bookmarksnumbered,naturalnames,plainpages=false]{hyperref}
%usepackage{url}

% for footnote ref
\usepackage{refcount} 

% array and tabular
\usepackage{array}
\newcolumntype{C}[1]{>{\centering\arraybackslash\hspace{0pt}}p{#1}}

% extension of enumerate env. (style for displaying counters)
% \usepackage{enumerate} 

%% pictures
% \usepackage{graphicx} 
% \DeclareGraphicsExtensions{.pdf,.png,.jpg}
% \graphicspath{fig/}

%% PGF, Tikz
\usepackage{tikz-cd}
%% \usepackage{pgfplots}
%% \usepgfplotslibrary{dateplot}
%% \usepackage{pgf,pgfarrows,pgfnodes, pgfautomata}
% \usepackage{tikz}
% \usetikzlibrary{cd}
%% \usetikzlibrary{arrows}
%% \usetikzlibrary{calc}
%% \usetikzlibrary{snakes}
%% \usetikzlibrary{backgrounds}
% \usetikzlibrary{trees}
%% \usetikzlibrary{automata}
%% \usetikzlibrary{positioning}
%% \usetikzlibrary{matrix}
%% \usetikzlibrary{patterns}
%% \usetikzlibrary{shapes}

% symbols
\usepackage{amsmath} 
\usepackage{amssymb} 
\usepackage{amsbsy}
\usepackage{bbold}
\usepackage{latexsym}
%\usepackage{amsfonts}
\usepackage{stmaryrd}
%\usepackage{mathabx}
%\usepackage{MnSymbol}
\usepackage{harmony} % simple music fonts
\usepackage{mathtools} % for arrows
%\usepackage{mathptmx}

%% theorem environments
\usepackage{theorem}
% \newtheorem{theorem}{Theorem} %[section]
% \newtheorem{definition}[theorem]{Definition}
% \newtheorem{lemma}[theorem]{Lemma}
% \newtheorem{corollary}[theorem]{Corollary}
% \newtheorem{proposition}[theorem]{Proposition}
% \newenvironment{proof}{\vspace{-2ex}{\it Proof. }}{\hspace*{\fill} $\Box$\smallskip }
% \theorembodyfont{\slshape}
% \newtheorem{example}[theorem]{Example}
% \newtheorem{remark}[theorem]{Remark}
{\theorembodyfont{\rmfamily} \theoremstyle{break} \newtheorem{algo}{Algorithm}}

%% algorithms
%\usepackage{algorithm} 
%\usepackage{program} 

%% for new macros
\usepackage{xspace}

%% arrows etc
%\input{rewriting}

%% Misc macros

\def\ie{\textit{i.e.}\xspace}
\def\eg{\textit{e.g.}\xspace}
\def\wrt{\textit{wrt}\xspace}
%\def\wlog{\textit{wlog}\xspace}
\def\etc{\textit{etc}\xspace}

\def\<#1>{\langle #1 \rangle}
\newcommand{\pair}[2]{\langle{#1}, {#2}\rangle}
%\newcommand{\A}{\mathcal{A}}
%\newcommand{\B}{\mathcal{B}}
\newcommand{\D}{\mathbb{D}}
\newcommand{\E}{\mathbb{E}}
\newcommand{\W}{\mathbb{W}}
\newcommand{\T}{\mathcal{T}}

\newcommand{\Semiring}{\mathbb{S}}
\newcommand{\zero}{\mathbb{0}}
\newcommand{\one}{\mathbb{1}}
\newcommand{\dom}{\ensuremath{\mathit{dom}}}


\def\SA{\textsf{SA}\xspace}
\def\WA{\textsf{WA}\xspace}
\def\SWT{\textsf{swT}\xspace}
\def\SWA{\textsf{swA}\xspace}
\def\SWTA{\textsf{swTA}\xspace}
\def\SWVPA{\textsf{sw-VPA}\xspace}
\def\weight{\mathsf{weight}}
\def\wei{\mathsf{w}}
\def\mei{\mathsf{m}}
\def\init{\mathsf{in}}
\def\final{\mathsf{out}}
\newcommand{\call}[1]{\ensuremath #1} %{\ensuremath \langle_{#1}}
\newcommand{\return}[1]{\ensuremath #1} %{\ensuremath {}_{#1}{\rangle}} % $\prescript{}{a}{)}$
\def\Omegai{{\Omega_\mathsf{i}}}
\def\Omegac{{\Omega_\mathsf{c}}}
\def\Omegar{{\Omega_\mathsf{r}}}
\def\Sigmai{{\Sigma_\mathsf{i}}}
\def\Sigmac{{\Sigma_\mathsf{c}}}
\def\Sigmar{{\Sigma_\mathsf{r}}}
\def\Deltai{{\Delta_\mathsf{i}}}
\def\Deltac{{\Delta_\mathsf{c}}}
\def\Deltar{{\Delta_\mathsf{r}}}
\def\Phii{{\Phi_\mathsf{i}}}
\def\Phic{{\Phi_\mathsf{c}}}
\def\Phir{{\Phi_\mathsf{r}}}
\def\Phicr{{\Phi_\mathsf{cr}}}
\def\weii{{\wei_\mathsf{i}}}
\def\weic{{\wei_\mathsf{c}}}
\def\weir{{\wei_\mathsf{r}}}
\def\weie{{\wei_\mathsf{e}}}
\newcommand{\config}[2]{\ensuremath \genfrac{[}{]}{0pt}{}{#1}{#2}} 
\newcommand{\opluseq}{\ensuremath\mathrel{\oplus}=}
\newcommand{\ioi}[1]{\mathsf{ioi}({#1})}
\newcommand{\rank}{\mathsf{rk}}
\newcommand{\lin}{\mathsf{lin}}

%\sloppy

% Parsing over infinite alphabet as optimal alignment computation 
% as edit-distance between string and language
% 
%\title{Symbolic Weighted Parsing and Automated Music Transcription}
%\title{Symbolic Weighted Language Models and Automated Music Transcription}
\title{Symbolic Weighted Language Models and Parsing over Infinite Alphabets}
%\title{Weighted Visibly Pushdown Automata and Automated Music Transcription}
\author{Florent Jacquemard}
\institute{INRIA \& CNAM, Paris, France\\
\email{florent.jacquemard@inria.fr}}

%\titlerunning{WVPA \& AMT}
\titlerunning{Symbolic Weighted Parsing and Automated Music Transcription}
%\authorrunning{Florent Jacquemard}

\date{\today}
 
\begin{document}
\thispagestyle{empty}
\maketitle

\begin{abstract}
%Symbolic Weighted (SW) extension of symbolic automata...
We propose a framework for weighted parsing over infinite alphabets.
%
It is based on language models called Symbolic Weighted Automata (SWA) 
at the joint %intersection 
between Symbolic Automata (SA) and Weighted Automata (WA), 
as well as Transducers (SWT) and Visibly Pushdown (SWPDA) variants.
%
Like SA, SWA deal with large or infinite input alphabets, 
and like WA, they output a weight value in a semiring domain.
The transitions of WA are labeled by functions from an infinite alphabet into the weight domain. 
This is unlike WA whose transitions are guarded by boolean predicates 
overs symbols in an infinite alphabet 
and also unlike WA whose transitions are labeled by constant weight values, 
and who deal only with finite automata.
%
We present some properties of WA, WT and SWPDA models, 
%and show how they can be used 
that we use to define and solve a variant of parsing 
over infinite alphabets.
%
We also briefly describe the application that motivated the introduction of these models: 
an parse-based approach to automated music transcription.
\end{abstract}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% intro
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\section{Introduction} \label{sec:intro}
Parsing %variant of membership problem for formal languages
%- given a language model and an input word $s$, compute a derivation of the model that yields $s$.
is the problem %process 
of structuring a linear representation in input %typically
(a finite word over an alphabet) according to a language model. % (a formal grammar).
% natural language, programming language, 
%
Most of the context-free parsing approaches~\cite{GruneJacobs08parsing}  
assume a finite and reasonably small input alphabet. %models and algorithms 
Such a restriction makes perfectly sense in the context of NLP tasks 
like constituency parsing or of programming languages compilers or interpreters.
Considering large or infinite alphabets can however be of practical interest
in other cases.
%
For instance, when dealing with large characters encodings such as UTF-16, % processing strings in 
\eg for vulnerability detection in Web-applications~\cite{dAntoni21CACM}, 
%
for the analyse (\eg validation or filtering) 
of data streams or serialization of structured documents 
(which may contain textual or numerical attributes)~\cite{Segoufin06csl}, 
or for processing timed execution traces~\cite{Bouyer03algebraic}.
% algebraic definition of a class of data languages 
% (notion of monoid recognizability, based on registers, comparable to Bojancszik et al. data words)
Regarding the latter case, we describe %briefly 
at the end of the paper 
a parse-based approach to automated music transcription,
where a symbolic music performance, 
presented as a sequence of timed musical events, % timestamped
% a symbolic representation of a music performance 
is converted into a structured score in Common Western Music Notation~\cite{foscarin:hal-01988990}.
%generalizes weighted parsing: 
%finding the best derivation for a weighted grammar. 

Various extensions of language models for handling infinite alphabets have been studied.
%words carrying data values in an infinite domain (e.g. integers) e.g. data processing 
For instance, some automata with memory extensions 
allow restricted storage and comparison of input symbols, 
%and correspond with logics, 
(see~\cite{Segoufin06csl} for a survey),
with pebbles for marking positions~\cite{NevenSchwentickVianu04FSMinfinite}, 
registers~\cite{KaminskiFrancez94}, 
or %computations in 2 steps
the possibility to compute on subsequences 
with the same attribute values~\cite{Bojanczyk11FO2}. % data words automata.
%
%for the and verification of infinite state systems 
%(model checkers: alphabet = long bit-vectors)
%...For the representation of  in model checking, verification and 
The automata at the core of model checkers
compute on input symbols represented by large bitvectors~\cite{Vardi07ciaa} %\cite{BaierKatoen08MC}
(sets of assignments of Boolean variables) %propositional variables)
and in practice,  %implementation
each transition accepts a set of such symbols (instead of an individual symbol), 
represented by Boolean formula or Binary Decision Diagrams.
%
Following a similar idea, % and generalizing,
in symbolic automata (\SA)~\cite{dAntoniVeanes17CAV,dAntoni21CACM}, 
the transitions are guarded by predicates over infinite alphabet domains.
With closure conditions on the sets of such predicates, % (alphabet theories), 
all the good properties enjoyed by automata over finite alphabets are preserved.

Other extensions of language models  %(automata and grammars) 
help in dealing with non-determinism, by the computation of weight values. 
With an ambiguous grammar, there may exists several derivations 
(\emph{abstract syntax trees} -- AST), % \ie the result of an analyze.
yielding one input word. % structuring a word, 
The association of one weight value %in semiring domain
to each derivation permits to select a best one (or $n$ bests). % a fixed number of bests 
% = ...a ranking of derivations 
This is roughly the principle of \emph{weighted parsing}
approaches~\cite{Goodman99SemiringParsing,Nederhof03weightedParsing,MorbitzVogler19weighted-parsing}.
In \emph{weighted language models}, 
like \eg probabilistic context-free grammars % (CFG),
and weighted automata (\WA) \cite{Droste09handbook},
a weight value is associated to each transition rule, % production rule
and the rule's weights can be combined with a associative product operator~$\otimes$ into 
the weight of a derivation.
A second operator~$\oplus$, associative and commutative, 
is moreover used to handle ambiguity of the model, 
by summing the weights of the possibly several (in general exponentially many) AST %syntax trees 
associated to a given input word.
Typically, $\oplus$ will select the best of two weight values.
%Intuitively,~$\oplus$ selects, or ranking, the syntax trees.
The weight domain, equipped with these two operators is assumed, at minima, 
to form a \emph{semiring} %$\Semiring$ 
where $\oplus$ can be extended to infinite sums, 
%\cite{Eilenberg74automata}
such as the Viterbi semiring and the tropical min-plus algebra, see Figure~\ref{fig:semirings}. 
%of domain $\mathbb{R}^+ \cup \{ +\infty\}$, 
%where $\oplus$ is min and $\otimes$ is plus .
%by ranking 
%making the weight domain a semiring.
%Some efficient specialized parsing algorithms~\cite{Huang05kbest} have been proposed in this context 
%% models represented as hypergraphs \cite{Huang05kbest}
%in order to compute the $n$ best syntax trees of a given input word without having to enumerate them all.
%Generally based on dynamic programming, these algorithms rely on 
%additional algebraic properties of~$\Semiring$.
%-- see \eg~\cite{Huang05kbest} for some NLP applications.
%The extraction of $n$ best list is useful 
%the problem: quantitative parsing or symbolic parsing
%of parsing of words over infinite input alphabet.

In this paper, we present a uniform framework for weighted parsing over infinite input alphabets.
It is based on weighted %finite states 
language models generalizing 
both~\SA, with functions in an arbitrary semiring instead of Boolean guards, 
and~\WA, by handling infinite alphabets -- Figure~\ref{fig:hierarchy}.
%***
%some weighted language models
%model of weighted CFG 
%computing on words over infinite input alphabets %sets of terminal symbol
%- but with finite sets of states and transitions rules. 
% non-terminals and production rules).
In their transition rules, input symbols appear as variables %(parameters)
and the weight associated to a transition rule is a function of these variables.
%
\begin{figure}
\centering
\begin{tikzpicture}
\node (NFA) at (0,4) {%
  \(
  \begin{array}{c} 
  \mathsf{NFA} : \Sigma_{\mathsf{fin}}^* \to \mathbb{B}\\
  q \xrightarrow{a} q' \quad  a \in \Sigma_\mathsf{fin}
  \end{array} 
  \)
};
%
\node (WA) at (-2,2) {%
  \(
  \begin{array}{c} 
  \mathsf{WA} : \Sigma_{\mathsf{fin}}^* \to \mathbb{S}\\
  q \xrightarrow{a, w} q' \quad  a \in \Sigma_\mathsf{fin}, w \in \mathbb{S}
  \end{array} 
  \)
};
%
\node (SA) at (2,2) {%
  \(
  \begin{array}{c} 
  \mathsf{SA} : \Sigma_{\mathsf{inf}}^* \to \mathbb{B}\\
  q \xrightarrow{\phi} q' \quad \phi : \Sigma_\mathsf{inf} \to \mathbb{B}
  \end{array} 
  \)
};
%
\node (SWA) at (0,0) {%
  \(
  \begin{array}{c} 
  \mathsf{SWA} : \Sigma_{\mathsf{inf}}^* \to \mathbb{S}\\
  q \xrightarrow{\phi} q' \quad \phi : \Sigma_\mathsf{inf} \to \mathbb{S}
  \end{array} 
  \)
};
\draw[->] (NFA)--(WA);
\draw[->] (NFA)--(SA);
\draw[->] (WA)--(SWA);
\draw[->] (SA)--(SWA);
%\begin{array}{c} \mathsf{NFA} : \Sigma^* \to \mathbb{B} \end{array} 
\end{tikzpicture}
\caption{Classes of Symbolic/Weighted Automata. 
$\Sigma_\mathsf{fin}$ is a finite alphabet, 
$\Sigma_\mathsf{inf}$ is a countable alphabet,
$\mathbb{B}$ is the Boolean algebra, 
$\mathbb{S}$ is an arbitrary commutative semiring, 
$q \xrightarrow{\dots} q'$ represents the form of a transition between states $q$ and $q'$.}
\label{fig:hierarchy}  
\end{figure}
%
%This approach is close to the case of 
%Symbolic Automata (SA)~\cite{dAntoniVeanes17CAV,dAntoni21CACM}, 
%except that the domain for weight values is not restricted to be Boolean, 
%like for the guards in the rules of SA, 
%but can be an arbitrary commutative semiring (assuming some restrictions).
%
The models presented here are finite automata called symbolic-weighted (\SWA),
transducers (\SWT) and pushdown automata,
with a visibly restriction~\cite{AlurMadhusudan09nested} (\SWVPA).
%\wrt an edit distance
The latter model of automata computes on \emph{nested words}~\cite{AlurMadhusudan09nested}, 
a structured form of words parenthesized with markup symbols, 
corresponding to a linearization of trees.
In the context of parsing, they are used here to represent (weighted) AST of CF grammars.
More precisely, a \SWVPA $A$ associates a weight value $A(t)$ % \in \Semiring$ 
to a given a nested word $t$, which is the linearization of an AST. %representing a parse tree.
%
On the other hand, 
a \SWT is used to define a distance $T(s, t)$ between two finite words $s$ and $t$
over an infinite alphabet, following~\cite{Mohri03ijfcs}.
Then, the \emph{SW-parsing} problem aims at %computing 
finding $t$ minimizing 
$T(s, t) \otimes A(t)$ (\wrt the ranking defined by $\oplus$)
-- this value is called the distance between $s$ and $A$ in~\cite{Mohri03ijfcs}.
%
Similarly to weighted-parsing 
methods~\cite{Goodman99SemiringParsing,Nederhof03weightedParsing,MorbitzVogler19weighted-parsing}, 
our approach proceeds in two steps, 
based on properties of the SW models. 
The first step is a Bar-Hillel construction where, 
given a \SWT $T$, a \SWVPA $A$, and an input word $s$, 
a \SWVPA $A_{T, s}$ is built, such that for all $t$, $A_{T, s}(t) = T(s, t) \otimes A(t)$.
In the second step, a best AST $t$ is found by applying to $A_{T, s}$ 
a best search algorithm similar to shortest distance in graphs~\cite{Mohri02semiring,Huang05kbest}.
%In expressiveness, they are equivalent to weighted CFG. 
%and can be used in a general approach for parsing over infinite input alphabets.
%
%Let $A$ be a \SWVPA, associating $A(w) \in \Semiring$ 
%to a given a nested word $w$ (representing a parse tree),
%and let a \SWT compute a distance $d$, in $\Semiring$, 
%between 2 strings over respectively an infinite input alphabet and the 
%same (infinite) alphabet of $A$.
%Then, the problem of Symbolic Weighted Parsing is, 
%given an input string $s$, to find a nested word $w$ minimizing 
%(according to the ranking defined by $\oplus$)
%the distance $d(s, w) \otimes A(w)$ between $s$ and $A$, 
%as defined in~\cite{Mohri03ijfcs}.

% First one general edit-distance is defined by a weighted word 
% transducer~\cite{Mohri}
% %Symbolic automata transducers are extended models~\cite{VeanesdAntoniJACM}
% %dealing with infinite set of input symbols...
% value in a semiring...

The main contributions of the paper are: 
($i$)~the introduction of automata, \SWA, transducers, \SWT (Section~\ref{section:SWA}),
and visibly pushdown automata \SWVPA (Section~\ref{sec:SWVPA}),
generalizing the corresponding classes of symbolic and weighted models, 
%for the weighted computation on (nested) words over infinite alphabets;  
($ii$)~a polynomial best-search algorithm for \SWVPA, %(Section~\ref{sec:best})
% a framework for parsing over infinite alphabets, 
and ($iii$)~a uniform framework (Section~\ref{sec:parsing}) for parsing over infinite alphabets, 
the keys to which are 
($iii.a$)~the \SWT-based definition of generic edit distances between input and output words,
and ($iii.b$)~the use of nested words, and \SWVPA, 
instead of syntax trees and grammars. %(\S~\ref{sec:trees}).
%
Finally, Section~\ref{sec:transcription} describes
the implemented application %of this approach 
to automated music transcription
that motivated this study.

%based these models and on generic def. of distances;
%the use of VPA for parsing, word instead of trees, use comparison input-output.
%and best-search

%In Section~\ref{section:SWA} we introduce \SWA and \SWT.
%Then \SWVPA are defined in Section~\ref{sec:SWVPA}, 
%where a polynomial 1-best algorithm is described that can be use to solve
%Symbolic Weighted Parsing, as described Section~\ref{sec:parsing}.
%Finally, in Section~\ref{sec:transcription}, we present an application 
%of this approach to automated music transcription that has been implemented.


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% SWT & SWA
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\section{SW Automata and Transducers}
\label{section:transducer}\label{sec:transducer}
\label{section:SWA}\label{sec:SWA}
\label{section:SWT}\label{sec:SWT}

We follow the approach of~\cite{Mohri03ijfcs} for the computation of distances
between words and languages, with weighted transducers, 
extending it to infinite alphabets.
% with models of symbolic weighted automata and transducers. 
%
The models introduced in this section generalize 
weighted automata and transducers~\cite{Droste09handbook} 
%over finite alphabets, see  e.g.~\cite{Mohri03ijfcs}, 
by labeling each transition with a weight function that takes the 
input and output symbols as parameters, instead of a simple weight value.
These functions are similar to the guards of symbolic automata~\cite{dAntoniVeanes17CAV,dAntoni21CACM},
but they can return values in an arbitrary semiring, 
where the latter guards are restricted to the Boolean semiring.


\subsection{Semirings}
\label{section:semiring}
We shall consider semiring for the weight values of our language models.
%
A \emph{semiring} $\< \Semiring, \oplus, \zero, \otimes, \one>$ 
is a structure with a domain~$\Semiring$,
equipped with two associative
binary operators~$\oplus$ and $\otimes$,
with respective neutral elements $\zero$ and $\one$, and such that:
%$\< \mathbb{S}, \oplus, \zero>$ is a commutative monoid
%$\< \mathbb{S}, \otimes, \one>$ is a monoid
\begin{itemize}
\item $\oplus$ is commutative;
 $\< \Semiring, \oplus, \zero>$ is a commutative monoid 
   and $\< \Semiring, \otimes, \one>$ a monoid,
\item $\otimes$ distributes over~$\oplus$:  $\forall x, y, z \in \mathbb{S}$,
$x \otimes (y \oplus z) = (x \otimes y) \oplus (x \otimes z)$, 
and $(x \oplus y) \otimes z = (x \otimes z) \oplus (y \otimes z)$,
\item $\zero$ is absorbing for~$\otimes$: 
$\forall x\in \mathbb{S}$, $\zero \otimes x = x \otimes \zero = \zero$.
\end{itemize}
%Components of a semiring~$\Semiring$ may be subscripted by~$\Semiring$ when needed.
%We simply write $x \in \Semiring$ to mean $x \in \mathbb{S}$.
%
Intuitively, in the models presented in this paper, 
$\oplus$ selects an optimal value from two given values, 
in order to handle non-determinism, 
and $\otimes$ combines two values into a single value, 
in a chaining of transitions.
%and let $\< \Semiring, \oplus, \zero, \otimes, \one>$ be a {semiring}, 

\medskip%\noindent
A semiring $\Semiring$ is \emph{commutative} if $\otimes$ is commutative.
It is \emph{idempotent} if for each $x \in \dom(\Semiring)$, $x \oplus x = x$.
%
Every idempotent semiring~$\Semiring$ induces 
a partial ordering~$\leq_\oplus$ 
called the \emph{natural ordering} of~$\Semiring$~\cite{Mohri02semiring} 
and defined,  by: 
%implicitly defined by the semiring $\Semiring$ 
for all $x$ and $y$,
$x \leq_\oplus y \;\mbox{iff}\; x \oplus y = x$.
%(see~\cite{Mohri02semiring} for the proof that it is an ordering).
%
The natural ordering is sometimes defined in the opposite direction~\cite{DrosteKuich09semirings};
We follow here the direction  %follows \cite{Mohri02semiring}, and 
that coincides with the usual ordering on the Tropical semiring \emph{min-plus} 
(Figure~\ref{fig:semirings}).

\begin{lemma}[Monotony, \cite{Mohri02semiring}] \label{lem:monotonic}
Let $\< \Semiring, \oplus, \zero, \otimes, \one>$ be an idempotent semiring.
For all $x, y, z  \in \Semiring$,  
if $x \leq_\oplus y$ then
$x \oplus z \leq_\oplus y \oplus z$,
$x \otimes z \leq_\oplus y \otimes z$
and $z \otimes x \leq_\oplus z \otimes y$.
\end{lemma}   
When the property of Lemma~\ref{lem:monotonic} holds, 
%We say in this case that 
$\Semiring$ is called  \emph{monotonic}. % \wrt $\leq_\oplus$.
%A semiring $\Semiring$ 
%is \emph{monotonic} \wrt a partial ordering~$\leq$ 
%iff for all $x, y, z  \in \Semiring$,  $x \leq y$ implies
%$x \oplus z \leq y \oplus z$,
%$x \otimes z \leq y \otimes z$,
%and $z \otimes x \leq z \otimes y$.
%
Another important semiring property in the context of optimization
is {superiority}~\cite{Huang08advanceddynamic}, 
which corresponds to the 
\emph{non-negative weights} condition in shortest-path algorithms~\cite{Dijkstra59anote}.
Intuitively, it means that combining elements with $\otimes$ always increase their weight. 
Formally, it is defined as the property ($i$) below. %of the following lemma.

\begin{lemma}[Superiority, Boundedness]
\label{lem:superior}\label{lem:bounded}
Let $\< \Semiring, \oplus, \zero, \otimes, \one>$ be an idempotent semiring.
The two following statements are equivalent:
\begin{itemize}
\item [$i.$] for all $x, y \in \Semiring$,  
$x \leq_\oplus x \otimes y$ and 
$y \leq_\oplus x \otimes y$
\item[$ii.$] for all $x \in \Semiring$,  $\one \oplus x = \one$.
\end{itemize}
\end{lemma}
%
\begin{proof} %(Lemma~\ref{lem:bounded})
$(ii) \Rightarrow (i)$ : 
$x \oplus (x \otimes y) = x \otimes (\one \oplus y) = x$, 
by distributivity of~$\otimes$ over~$\oplus$. 
Hence $x \leq_\oplus x \otimes y$.
Similarly, $y \oplus (x \otimes y) = (\one \oplus x) \otimes y = y$, 
hence $y \leq_\oplus x \otimes y$.
%
$(i) \Rightarrow (ii)$ :
%$(i)$ implies that $\one \leq_\oplus x$ for all $x \in \Semiring$, 
by the second inequality of ($i$), with $y = \one$, 
$\one \leq_\oplus x \otimes \one = x$, \ie, 
by definition of $\leq_\oplus$, $\one \oplus x = \one$.
\qed
\end{proof}

In~\cite{Huang08advanceddynamic}, the property~$(i)$ 
is called $\Semiring$ \emph{superior} \wrt the ordering~$\leq_\oplus$.
We have seen in the proof of Lemma~\ref{lem:bounded} that it implies that 
$\one \leq_\oplus x$ for all $x \in \Semiring$.
Similarly, by the first inequality of ($i$) with $y = \zero$,  
$x \leq_\oplus x \otimes \zero = \zero$.
%
Hence, in a superior semiring, % \wrt~$\leq$, 
it holds that %$\one \leq \zero$
for all $x \in \Semiring$, $\one \leq_\oplus x \leq_\oplus \zero$.
%
Intuitively, from an optimization point of view,
it means that $\one$ is the best value, and $\zero$ the worst.
%** superior implies $\Semiring$ bounded~\cite{Mohri02semiring} see **
%
In~\cite{Mohri02semiring}, the property ($ii$) of Lemma~\ref{lem:bounded}  
is called \emph{boundedness} of $\Semiring$ -- we shall use this term in the rest of the paper. 
% \emph{negative boundedness} 
It implies that, when looking for a best path in a graph whose edges
are weighted by values of $\Semiring$, the loops can be safely avoided.


%Following the terminology of~\cite{Mohri02semiring},
%when $\forall x \in \dom(\Semiring), \one \oplus x = \one$,
%the semiring $\Semiring$ is is called \emph{bounded}.

\begin{lemma}
Every bounded semiring is idempotent.
\end{lemma}
\begin{proof}
By boundedness, $\one \oplus \one = \one$, 
and idempotency follows by multiplying
both sides by $x$ and distributing. 
\qed
\end{proof}

\noindent
An idempotent semiring $\Semiring$~is called \emph{total} if
it~$\leq_\oplus$ is total
\ie when for all $x, y \in \Semiring$, either $x \oplus y = x$ or $x \oplus y = y$.

%\medskip
We shall need below infinite sums with~$\oplus$.
A semiring~$\Semiring$ is called \emph{complete}~\cite{Droste09handbook} 
%(\cite{Droste09handbook} chapter 1) %\cite{Kuich97semirings}
if it has an %infinite sum 
operation $\bigoplus_{i \in I} x_i$
for every family
$(x_i)_{i \in I}$ %$\{ x_i \mid i \in I \}$
of elements of $\dom(\Semiring)$ over an index set $I \subset \mathbb{N}$, such that
%is well-defined and in $\dom(\Semiring)$,
the following holds: %properties
\begin{description}
\item[$i.$]
\emph{infinite sums extend finite sums:}\\
$\displaystyle\bigoplus_{i \in \emptyset} x_i = \zero$,\quad 
      $\forall j\in \mathbb{N}, \displaystyle\bigoplus_{i \in \{ j \}} x_i = x_j$,
      $\forall j, k\in \mathbb{N}, j\neq k, 
      \displaystyle\bigoplus_{i \in \{ j, k \}} x_i = x_j \oplus x_k$,
%
\item[$ii.$]
\emph{associativity and commutativity:}\\
for all $I \subseteq \mathbb{N}$
and all partition $(I_{j})_{j \in J}$ of $I$, %\subseteq \mathbb{N}$, 
\(
\displaystyle
\bigoplus_{j \in J}\bigoplus_{i \in I_j} x_i = 
\bigoplus_{i \in I} x_i
\),
%
\item[$iii.$] 
\emph{distributivity of product over infinite sum:}\\
for all $I \subseteq \mathbb{N}$,
\(
\displaystyle
\bigoplus_{i \in I} (x \otimes y_i) = x \otimes \bigoplus_{i\in I} y_i\), and
\(
\displaystyle
\bigoplus_{i \in I} (x_i \otimes y) = (\bigoplus_{i \in I} x_i ) \otimes y\).
\end{description}


\begin{example}
Figure~\ref{fig:semirings} presented examples of semirings interesting in practice 
and enjoying the above properties.
\end{example}


\begin{figure}[t]
\begin{center}
\begin{tabular}{|c|c|C{2em}|C{2em}|C{2em}|C{2em}|}
\hline
        & domain & $\oplus$ & $\otimes$ & $\zero$  & $\one$\\ %& nat. ordering\\
\hline\hline
Boolean & $\{\bot, \top\}$ & $\vee$ & $\wedge$ & $\bot$ & $\top$\\ %& $\top \leq_\oplus \bot$  \\
\hline
Counting & $\mathbb{N}$ & $+$ & $\times$ & 0 & 1 \\
\hline
Viterbi & $[0, 1] \subset \mathbb{R}$ & $\mathit{max}$ & $\times$ & 0 & 1\\ % & $x \leq_\oplus y \iff x \ge y$  \\
\hline
Tropical min-plus & $\mathbb{R}_+ \cup \{ \infty\}$ & $\mathit{min}$ & $+$ & $\infty$ & $0$\\ % & $x \leq_\oplus y \iff x \leq y$   \\
\hline
%MaxPlus & $\mathbb{Q} \cup \{ -\infty\}$ & $\mathsf{max}$ & $+$ & $-\infty$ & $0$ \\
%\hline
%Word lang. & $2^{\Sigma^*}$ & $\cup$ & $\cdot$ & $\emptyset$ & $\{ \varepsilon \}$ \\
%\hline
\end{tabular}
\end{center}
\vskip-1em
\caption{Some commutative, bounded, total and complete semirings.}
\label{fig:semirings}
\end{figure}


\subsection{Label Theory}
\label{section:symbols}
We shall now define the functions labeling the transitions of SW automata and transducers,
generalizing the Boolean algebras of~\cite{dAntoniVeanes17CAV} 
from Boolean to other semiring domains.
%
We consider \emph{alphabets}, which are countable sets of symbols 
denoted $\Sigma$, $\Delta$,...
%Let $\< \Semiring, \oplus, \zero, \otimes, \one>$ be a commutative, complete semiring.
%
\noindent 
Given a semiring $\< \Semiring, \oplus, \zero, \otimes, \one>$, 
a $\Semiring$-\emph{label theory}
is a tuplet $\bar\Phi$ of recursively enumerable sets of the form
%$\Phi_\epsilon \subseteq \Semiring$, % containing constant functions valued in $\Semiring$, 
$\Phi_\Sigma$, %and $\Phi_\Delta$, 
containing unary functions of type $\Sigma \to \Semiring$, %resp. $\Delta \to \Semiring$, 
and $\Phi_{\Sigma, \Delta}$, containing binary functions $\Sigma \times \Delta \to \Semiring$, 
such that:

\noindent -- 
for all $\Phi_{\Sigma, \Delta} \in \bar\Phi$, there exists 
$\Phi_{\Sigma} \in \bar\Phi$ and $\Phi_{\Delta} \in \bar\Phi$

\noindent -- 
for all $\alpha \in \Semiring$ and $\phi \in \Phi_\Sigma$,
      $\alpha \otimes \phi : x \mapsto \alpha \otimes \phi(x)$, 
      and $\phi \otimes \alpha : x \mapsto \phi(x) \otimes \alpha$\\
\phantom{--} belong to $\Phi_\Sigma$, and similarly for $\oplus$ 
      and for $\Phi_{\Sigma, \Delta}$

\noindent -- 
for all $\phi, \phi' \in \Phi_\Sigma$,
$\phi \otimes \phi': x \mapsto \phi(x) \otimes \phi'(x)$ belongs to $\Phi_\Sigma$

\noindent -- 
for all $\eta, \eta' \in \Phi_{\Sigma, \Delta}$
$\eta \otimes \eta': x, y \mapsto \eta(x, y) \otimes \eta'(x, y)$ belongs to $\Phi_{\Sigma, \Delta}$

\noindent -- 
for all $\phi \in \Phi_\Sigma$ and $\eta \in \Phi_{\Sigma, \Delta}$,
$\phi \otimes \eta: x, y \mapsto \phi(x) \otimes \eta(x, y)$ belongs to $\Phi_{\Sigma, \Delta}$

\noindent -- 
for all $\psi \in \Phi_\Delta$ and $\eta \in \Phi_{\Sigma, \Delta}$,
$\eta \otimes \psi: x, y \mapsto \eta(x, y) \otimes \psi(y)$ belongs to $\Phi_{\Sigma, \Delta}$

\noindent -- 
similar closures holds for $\oplus$

\noindent -- 
partial applications $\eta \in \Phi_{\Sigma, \Delta}$, 
resp. $\eta_a: y \mapsto \eta(a, y)$ for $a \in \Sigma$ %and $y \in \Delta$
and\\
\phantom{--} $\eta_b: x \mapsto \eta(x, b)$ for $b \in \Delta$ %and $x \in \Sigma$, 
belong resp. to~$\Phi_\Sigma$ and~$\Phi_\Delta$.


%Moreover, these sets are required to be closed under the 
%operators~$\oplus$ and~$\otimes$ of~$\Semiring$:
%for all $\phi, \phi' \in \Phi_\Sigma$,
%$\psi, \psi' \in \Phi_\Delta$, 
%and $\eta, \eta' \in \Phi_{\Sigma, \Delta}$, %the function
%%
%\begin{center}
%\begin{tabular}{cclll}
%$\phi \otimes \phi'$ & : & $x \mapsto \phi(x) \otimes \phi'(x)$ & belongs to $\Phi_\Sigma$,\\
%$\psi \otimes \psi'$ & : & $y \mapsto \psi(y) \otimes \psi'(y)$ & belongs to $\Phi_\Delta$,\\
%$\phi \otimes \eta$\;  & : & $x, y \mapsto \phi(x) \otimes \eta(x, y)$ & belongs to $\Phi_{\Sigma, \Delta}$,\\
%$\eta \otimes \psi$  & : & $x, y \mapsto \eta(x, y) \otimes \psi(y)$ & belongs to $\Phi_{\Sigma, \Delta}$,\\
%$\eta \otimes \eta'$ & : & $x, y \mapsto \eta(x, y) \otimes \eta'(x, y)$ & belongs to $\Phi_{\Sigma, \Delta}$, &
%\multicolumn{1}{r}{and similarly for $\oplus$.}\\ %the same also holds for the binary sum operator $\oplus$.
%\end{tabular}
%\end{center}
%
%Finally, it is also required 
%% that the codomain of every function of $\Phi_\Sigma$ and $\Phi_\Delta$ 
%% is a subset of $\Phi_\epsilon$, and
%that the partial applications of a function $\eta \in \Phi_{\Sigma, \Delta}$, 
%resp.  $\eta_a: y \mapsto f(a, y)$ for $a \in \Sigma$ and $y \in \Delta$
%and  $\eta_b: x \mapsto f(x, b)$ for $b \in \Delta$ and $x \in \Sigma$, 
%belong resp. to~$\Phi_\Sigma$ and~$\Phi_\Delta$.

\medskip\noindent
When~$\Semiring$ is complete, 
we call \emph{summary} of a function
$\phi \in \Phi_\Sigma$,
resp. $\eta \in \Phi_{\Sigma, \Delta}$,
the value $\bigoplus_{a \in \Sigma} \phi(a)$, 
resp. $\bigoplus_{a \in \Sigma} \bigoplus_{b \in \Delta} \eta(a, b)$.
Note that, by definition of infinite sums in complete semirings, 
a summary of $\phi \oplus \phi'$ and $\phi \otimes \phi'$ %(and $\alpha \otimes \phi$)
can be computed from summaries of $\phi$ and $\phi'$ in $\Phi_{\Sigma}$, using the operators of $\Semiring$, 
and the same holds for $\eta, \eta' \in \Phi_{\Sigma, \Delta}$. 
%
A label theory is called \emph{effective} when 
summaries of $\phi \oplus \eta$, $\phi \otimes \eta$, $\eta \oplus \psi$, $\eta \otimes \psi$, 
and of the partial applications $\eta_a$ and $\eta_b$, 
can be computed, using the operators of $\Semiring$, 
from summaries of 
$\phi \in \Phi_{\Sigma}$, $\psi \in \Phi_{\Delta}$, $\eta \in \Phi_{\Sigma, \Delta}$. 


\subsection{Definitions} \label{sec:SWTdef}\label{sec:SWAdef}
%(SWT)

Let $\Semiring$ be a commutative and complete semiring, 
$\Sigma$ and $\Delta$ be alphabets called respectively \emph{input} and \emph{output}, %{alphabets}, 
and $\bar\Phi = \< \Phi_\Sigma, \Phi_\Delta, \Phi_{\Sigma, \Delta}>$ be an $\Semiring$-label theory.

\begin{definition}
\label{def:transducer} \label{def:SWT}
A \emph{symbolic-weighted transducer} (\SWT)
over $\Sigma$, $\Delta$, $\Semiring$ and $\bar\Phi$
%the input and output alphabets~$\Sigma$ and $\Delta$ with label theory $\bar\Phi$, and the semiring $\Semiring$ 
is a tuple
$T = \< Q, \init, \bar{\wei}, \final >$,
where $Q$ is a finite set of states, 
$\mathsf{in} : Q \to \Semiring$, 
respectively $\mathsf{out} : Q \to \Semiring,$
are functions defining the weight for entering, 
respectively leaving, a state, 
and $\bar{\wei}$ is a tuplet made of the 4 of transition functions 
$\wei_\epsilon$, $\wei_\Sigma$, $\wei_\Delta$, and $\wei_{\Sigma, \Delta}$
from $Q \times Q$ into respectively~$\Semiring$, %$\Phi_\epsilon$, 
$\Phi_\Sigma$, $\Phi_\Delta$, and $\Phi_{\Sigma, \Delta}$.
\end{definition}
%
A pair of states $\< q, q'>$ such that $\wei_\epsilon(q, q') \neq \zero$
is called an \emph{$\epsilon$-transition}.

\noindent We synthesize the 4-uplet of transition functions into a unique 
function~$\wei:
 Q \times (\Sigma \cup \{ \epsilon \}) \times (\Delta \cup \{ \epsilon \}) \times Q 
 \to \Semiring$, %also called $\wei$ for simplicity, 
such that, for all $q, q' \in Q$, $a \in \Sigma$,  $b \in \Delta$, 
%and with 
%$\< \phi_\epsilon, \phi_\Sigma, \phi_\Delta, \phi_{\Sigma, \Delta}> = \wei(q, q')$, 
\[
\begin{array}{rcll}
\wei(q, \epsilon, \epsilon, q') & = &  \wei_\epsilon(q, q'),\\ %\phi_\epsilon\\
\wei(q, a, \epsilon, q') & = & \phi(a) & %\wei_\Sigma(q, q')(a)
\quad\mathrm{where~} \phi = \wei_\Sigma(q, q') \in \Phi_\Sigma,\\
\wei(q, \epsilon, b, q') & = & \psi(b) & 
\quad\mathrm{where~} \psi = \wei_\Delta(q, q') \in \Phi_\Delta,\\
\wei(q, a, b, q') & = & \eta(a, b) & 
\quad\mathrm{where~} \eta = \wei_{\Sigma, \Delta}(q, q') \in \Phi_{\Sigma, \Delta}.\\
\end{array}      
\]
%More precisely, $Q \times Q$, 
%resp. $Q \times \Sigma \times Q$,
%$Q \times \Delta \times Q$,
%$Q \times \Sigma \times \Delta \times Q$,
%into resp. $\Phi_0$, $\Phi_\Sigma$, $\Phi_\Delta$ $\Phi_{\Sigma, \Delta}$.

\noindent
The symbolic-weighted transducer~$T$ defines a mapping 
from the pairs of strings of $\Sigma^* \times \Delta^*$ 
into~$\Semiring$,
based on the following intermediate function $\weight_T$
defined recursively for every $q, q' \in Q$, 
%$a \in \Sigma$, $b \in \Delta$ 
for every strings of $s \in \Sigma^*$, $t \in \Delta^*$:
\[
\begin{array}{rccl}
\weight_T(q, s, t, q') & = & 
   \displaystyle\bigoplus_{q'' \in Q} &
   \wei(q, \epsilon, \epsilon, q'') \otimes \weight_T(q'', s, t, q')\\
   & \oplus & \displaystyle\bigoplus_{q'' \in Q} &
              \wei(q'', \epsilon, \epsilon, q') \otimes \weight_T(q, s, t, q'')\\
 & \oplus & \displaystyle\bigoplus_{\begin{array}{c}
                                      \scriptstyle q'' \in Q\\[-2pt]
                                      \scriptstyle s = au, a \in \Sigma
                                      \end{array}} &
   \wei(q, a, \epsilon, q'') \otimes \weight_T(q'', u, t, q')\\
 & \oplus & \displaystyle\bigoplus_{\begin{array}{c}
                                    \scriptstyle q'' \in Q\\[-2pt]
                                    \scriptstyle t = bv, b \in \Delta
                                    \end{array}} &
    \wei(q, \epsilon, b, q'') \otimes \weight_T(q'', s, v, q')\\ 
 & \oplus & \displaystyle\bigoplus_{\begin{array}{c}
                                    \scriptstyle q'' \in Q\\[-2pt]
                                    \scriptstyle s = au, a \in \Sigma\\[-2pt]
                                    \scriptstyle t = bv, b \in \Delta
                                    \end{array}} &
   \wei(q, a, b, q'') \otimes \weight_T(q'', u, v, q').\\ 
\end{array}
\]

The above sum may be infinite because of the $\epsilon$-transitions in the first and second lines.
However, $\weight_T$ is well defined since $\Semiring$ is complete.
%
As we shall see, under conditions, the $\epsilon$-transitions can be removed and hence the sum made finite.
%
%** The application of $\oplus$ can be seen as a ND choice between transitions **
We recall that, by convention, an empty sum with $\oplus$ is $\zero$. 
%
Since $\zero$ is absorbing for~$\otimes$ in~$\Semiring$,
one term $\wei(q, a, b, q'')$ equal to $\zero$ in the above expression 
will be ignored in the sum, meaning that there is no possible transition
from state $q$ into state $q'$ while reading $a$ and writing $b$ 
(including the case $a = \epsilon$ and $b = \epsilon$).
%These functions $\phi$ act as guards for the transducer's transitions, 
%preventing a transition when they return the absorbing $\zero$ of $\Semiring$.
This is analogous to a transition's guard not satisfied by $\<a, b>$ in 
the case of symbolic transducers.

The expression of $\weight_T$ can be seen as a stateful definition of 
an edit-distance between a word $s \in \Sigma^*$ and a word $t \in \Delta^*$,
see also~\cite{Mohri03ijfcs}.
Intuitively, 
$\wei(q, a, \epsilon, q'')$, in the third line, is the cost of 
the deletion of the symbol~$a \in \Sigma$ in~$s$, 
$\wei(q, \epsilon, b, q'')$, in the fourth line, is the cost 
of the insertion of~$b \in \Delta$ in $t$, 
and $\wei(q, a, b, q'')$, in the last line, is the cost 
of the substitution of  $a \in \Sigma$ by~$b \in \Delta$.
The cost of a sequence of such operations transforming $s$ into $t$, 
is the product, with $\otimes$, of the individual costs of the operations involved;
And the distance between $s$ and $t$ is the sum, with $\oplus$,
of all such product of costs.


\medskip\noindent
The weight associated by $T$ to  $\< s, t> \in \Sigma^* \times \Delta^*$
is defined as follows: 
\begin{equation}
T(s, t)  = 
\displaystyle\bigoplus_{q, q' \in Q} \mathsf{in}(q) 
\mathop{\otimes} \weight_T(q, s, t, q') \mathop{\otimes} \mathsf{out}(q').
\label{eq:weightT}
\end{equation}

\begin{example}
Let $\Sigma$ be an alphabet of symbols with timestamps. 
The timestamp of $a \in \Sigma$, denoted by $\mathsf{t}(a)$, is expressed as a rational number.


DTW for sequences of timestamped events **     
\end{example}
      
\begin{example}
(simpler) pointwise distance between two sequences of timestamped events **
\end{example}

\noindent
The \emph{Symbolic Weighted Automata} %$A = \< Q, \init, \weight, \final >$
%over $\Sigma$,  $\Semiring$ and $\bar\Phi$
are defined similarly as the transducers of Definition~\ref{def:SWT}, 
by simply omitting the output symbols.
%
In this case, the label theory $\bar\Phi$ is reduced to a singleton $\< \Phi_\Sigma>$.
%over $\Sigma$ is reduced to
%a set $\Phi_\Sigma$ closed under~$\oplus$ and~$\otimes$.
%
\begin{definition} \label{def:SWA}
A \emph{symbolic-weighted automaton} (\SWA)
over $\Sigma$, $\Semiring$ and $\bar\Phi$
%the input alphabet~$\Sigma$ and the commutative semiring $\Semiring$ 
is a tuple
$A = \< Q, \init, \bar{\wei}, \final >$,
where $Q$ is a finite set of states, 
$\mathsf{in} : Q \to \Semiring$, 
respectively $\mathsf{out} : Q \to \Semiring,$
are functions defining the weight for entering, 
respectively leaving, a state, 
and $\bar{\wei}$ is a pair of transition functions 
$\wei_\epsilon$ and $\wei_\Sigma$ from $Q \times Q$ into 
respectively $\Semiring$ %$\Phi_\epsilon$ 
and $\Phi_\Sigma$.
\end{definition}
      
\noindent
The above transition functions $\wei_\epsilon$ and $\wei_\Sigma$
can be synthesized as above into a function
$\wei: Q \times (\Sigma \cup \{ \epsilon \}) \times Q \to \Semiring$.
%
%When $\wei_\epsilon(q, q') = \zero$ for all $q, q' \in Q$, 
%the automaton~$A$ is called \emph{without $\epsilon$-transitions}.
      
A $\SWT$ or $\SWA$ is called \emph{summarized}
when       
      
      
\subsection{Properties}
The two following properties will be useful to our approach on 
symbolic weighted parsing in Section~\ref{sec:parsing}.

\begin{proposition}
Given a \SWT $T$ 
over alphabet $\Sigma$, $\Delta$ and a commutative semiring~$\Semiring$, 
and $s \in \Sigma^*$, 
there exists an effectively constructible \SWA 
$A_{T, s}$ over $\Delta$ and $\Semiring$ 
such that for all $t \in \Delta^*$, $A_{T, s}(t) = T(s, t)$.
\end{proposition}
%
\begin{proof}
Let $T = \< Q, \init, \bar{\wei}, \final >$,
let 
$\wei: Q \times (\Sigma \cup \{ \epsilon \}) \times (\Delta \cup \{ \epsilon \}) \times Q 
 \to \Semiring$
be the synthesized form of 
$\bar{\wei} = \< \wei_\epsilon, \wei_\Sigma, \wei_\Delta, \wei_{\Sigma, \Delta}>$,
and let $s = s_1 \ldots s_n$ with $s_1, \ldots, s_n \in \Sigma$.
%$n = |s|$ be the length of $s$.
%
The state set of $A_{T, s}$ will be $Q' = [1..n] \times Q$.
Its state entering weight function is defined by 
$\init'(1, q) = \init(q)$ for all $q \in Q$, 
and $\init'(i, q) = \zero$ for all $1 < i \leq n$, 
and its state leaving weight function by 
$\final'(n, q) = \final(q)$ or all $q \in Q$, 
and $\final'(i, q) = \zero$ for all $1 \leq i <  n$. 

\noindent
We define transition functions $\wei'_\epsilon$ and $\wei'_\Sigma$ for $A_{T, s}$, 
together with their synthesized form $\wei'$ as follows: 
for all $1 \leq i, j \leq n$, $q, q' \in Q$, %and $a \in \Sigma$, 
and $b \in \Delta$, 
\[
\begin{array}{lclcrcll}
\wei'_\epsilon\bigl(\< i, q>, \< i, q'>\bigr) & = & \wei_\epsilon(q, q') & \mathrm{\ie} &
\wei'\bigl(\< i, q>, \epsilon, \< i, q'>\bigr)   & = & \wei(q, \epsilon, \epsilon, q'), & \\ 
%
\wei'_\epsilon\bigl(\< i, q>, \< i+1, q'>\bigr) & = & \phi(s_i) & 
\multicolumn{5}{l}{\mathrm{where~} \phi = \wei_\Sigma(q, q'),}\\
 & & & \mathrm{\ie} &
\wei'\bigl(\< i, q>, \epsilon, \< i+1, q'>\bigr) & = & \wei(q, s_i, \epsilon, q') & 
\mathrm{if~} i < n,\\ 
%
\wei'_\epsilon\bigl(\< i, q>, \< j, q'>\bigr) & = & \zero & 
%\mathrm{\ie} & \wei'(\< i, q>, \epsilon, \< j, q'>)   & = & \zero & & 
\multicolumn{5}{l}{\mathrm{if~} j \neq i, i+1,}\\ 
%
\wei'_\Sigma\bigl(\< i, q>, \< i, q'>\bigr) & = & \wei_\Delta(q, q') & \mathrm{\ie} &
\wei'\bigl(\< i, q>, b, \< i, q'>\bigr)          & = & \wei(q, \epsilon, b, q'),\\ 
%
\wei'_\Sigma\bigl(\< i, q>, \< i+1, q'>\bigr) & = & \eta_{s_i} &
\multicolumn{5}{l}{\mathrm{where~} \eta = \wei_{\Sigma, \Delta}(q, q'),}\\
 & & & \mathrm{\ie} & \wei'\bigl(\< i, q>, b, \< i+1, q'>\bigr) & = & \wei(q, s_i, b, q') & 
 \mathrm{if~} i < n,\\ 
%
\wei'_\Sigma\bigl(\< i, q>, \< j, q'>\bigr) & = & \zero & \mathrm{\ie} &
\wei'\bigl(\< i, q>, b, \< j, q'>\bigr)     & = & \multicolumn{2}{l}{\zero\ \mathrm{~if~} j \neq i, i+1.}
\end{array}      
\]
We use the assumption that the underlying label theory of $T$ is 
is closed under partial applications (Section~\ref{section:symbols})
in the above construction of $\wei'_\Sigma$.

\noindent
The $\SWA$ wanted is $A_{T, s} = \< Q', \init', {\bar\wei}', \final' >$.
\qed
\end{proof}

The construction time and size for $A_{T, s}$ are $O(\| T \| . | s |)$,
where the size $\| T \|$ of $T$ is its number of states $|Q|$.


% gen. to transducers
\begin{proposition}
Given a \SWA $A$ over alphabet $\Sigma$ 
and a commutative and bounded semiring $\Semiring$, 
there exists an effectively constructible \SWA $A'$ 
without $\epsilon$-transitions 
such that for all $s \in \Sigma^*$, $A'(s) = A(s)$.
\end{proposition}
%
\begin{proof}  ** Tb revised **   
Let $A = \< Q, \init, \<\wei_\epsilon, \wei_\Sigma>, \final >$.
The automaton $A'$ is $\< Q, \init, \<\wei'_\epsilon, \wei'_\Sigma>, \final >$,
where, for all $q, q' \in Q$,
$\wei'_\epsilon(q, q') = \zero$  
and 
% \[
% \begin{array}{rcccl}
% \wei'_\Sigma(q, q') & = &  & \displaystyle\bigoplus_{q_1 \in Q} &
%                \wei_\Sigma(q, q_1) \otimes  \wei_\epsilon(q_1, q')\\
%                     &   & \oplus  & \displaystyle\bigoplus_{q_2 \in Q} &
%                \wei_\epsilon(q, q_2) \otimes \wei_\Sigma(q_2, q')\\
% \end{array}
% \]
\[
\wei'_\Sigma(q, q') = 
\displaystyle\bigoplus_{q'' \in Q} \wei_\epsilon(q, q'') \otimes \wei_\Sigma(q'', q')
\oplus \displaystyle\bigoplus_{q_1 \in Q} \wei_\Sigma(q, q'') \otimes \wei_\epsilon(q'', q')
\]
In the above definition of $\wei'_\Sigma$ we use the operator
of product of function of $\Phi_\Sigma$ by $\Semiring$ 
described in Section~\ref{section:symbols}.
%
By definition of $\weight_{A}$ and distributivity of $\otimes$ on $\oplus$, 
** NO. TBC see \cite{Mohri02ijfcs} **
it holds that $\weight_{A}(q, s, q') = \weight_{A'}(q, s, q')$.
\qed
\end{proof}     


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% SW-VPA
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\section{SW Visibly Pushdown Automata}
\label{section:SWVPA}\label{sec:SWVPA}
The following model generalizes Symbolic VPA~\cite{dAntonyAlur14SVPDA}
from Boolean semirings to arbitrary semiring weight domains.


\subsection{Definition}
Let $\Omega$ be a countable alphabet 
%finite (large) or infinite,
that we assume partitioned into three subsets 
$\Omegai$, $\Omegac$, $\Omegac$,
whose elements are respectively called 
\emph{internal}, \emph{call} and \emph{return} symbols.
% \begin{itemize}
% \item a set $\Omegai$ of \emph{internal symbols} denoted $a$,
% \item a set $\Omegac$ of \emph{call symbols} denoted $\call{a}$,
% \item a set $\Omegar$ of \emph{return symbols} denoted $\return{a}$.
% \end{itemize}
Let~$\< \Semiring, \oplus, \zero, \otimes, \one>$ be a commutative semiring and  
let  $(\Phi_\epsilon, \Phic, \Phir, \Phicr)$ be a label theory over $\Semiring$
%In order to simplify notations, %and following the definition of Section~\ref{section:transducer}, 
%we shall write respectively 
where $\Phic$, $\Phir$ and~$\Phicr$ stand respectively 
for~$\Phi_\Omegac$, $\Phi_\Omegar$ and~$\Phi_{\Omegac, \Omegar}$.
%
Moreover, we extend this theory with a set $\Phii$ 
of unary functions in $\Omegai \to \Semiring$,
closed under $\oplus$ and $\otimes$.

\begin{definition}
A \emph{Symbolic Weighted Visibly Pushdown Automata} (\SWVPA) 
over  $\Omega = \Omegai \uplus \Omegac \uplus \Omegar$ and $\Semiring$ is a tuple
$A = \< Q, P, \init, \weii, \weic, \weir, \weie, \final >$,
where $Q$ is a finite set of states, 
$P$ is a finite set of stack symbols, 
$\mathsf{in} : Q \to \Semiring$, 
respectively $\mathsf{out} : Q \to \Semiring,$
are functions defining the weight for entering, 
respectively leaving, a state, 
and 
$\weii : Q \times Q \to \Phii$,  
$\weic : Q \times Q \times P \to \Phic$,  
$\weir : Q \times P \times Q \to \Phicr$,  
$\weie : Q \times Q \to \Phir$,  
are transition functions.
\end{definition}
%
Similarly as in Section~\ref{section:transducer}, 
we extend the above transition functions as follows
for all $q, q' \in Q$, $p \in P$, 
$a \in \Omegai$, 
$\call{c} \in \Omegac$, 
$\return{r} \in \Omegar$, 
overloading their names: % for simplicity:
\[
\begin{array}{lll}
\weii: Q \times \Omegai \times Q \to \Semiring & 
\weii(q, a, q') = \phi_\mathsf{i}(a) & 
\mathrm{where~} \phi_\mathsf{i} = \weii(q, q'),\\
%
\weic: Q \times \Omegac \times Q \times P \to \Semiring & 
\weic(q, \call{c}, q', p) = \phi_\mathsf{c}(\call{c}) & 
\mathrm{where~} \phi_\mathsf{c} = \weic(q, q', p),\\
%
\weir: Q \times \Omegac \times P \times \Omegar \times Q \to \Semiring & 
\weir(q, {\call{c}},  p, {\return{r}}, q') = \phi_\mathsf{r}({\call{c}},  {\return{r}}) & 
\mathrm{where~} \phi_\mathsf{r} = \weir(q, p, q'),\\
%
\weie: Q \times \Omegar \times Q \to \Semiring & 
\weie(q, {\return{r}}, q') = \phi_\mathsf{e}({\return{r}}) &
\mathrm{where~} \phi_\mathsf{e} = \weie(q, q').\\
\end{array}      
\]

\noindent
The intuition is the following for the above transitions.
\begin{description}
\item $\weii$ : read the input internal symbol $a$, change state to $q'$ (stack is untouched).
\item $\weic$ : read the input call symbol $\call{c}$, push it to the stack along with $p$, change state to $q'$.
\item $\weir$ : when the stack is not empty, 
      read and pop from stack a pair made of $\call{c}$ and $p$, 
      read the input return symbol $\return{r}$, change state to $q'$.
      In this case, the weight function $\phi_\mathsf{r}$ 
      computes a value of matching between the call and return symbols $c$ and $r$.
      This value might be set to $\zero$ in order to express that the symbols do not match.
\item $\weie$ : when the stack is empty, 
      read the input symbol $\call{r}$, change state to $q'$.
\end{description}

We give now a formal definition of these transitions of the automaton~$A$
in term of %a weight value computed by 
an intermediate function $\weight_A$, like in Section~\ref{sec:SWT}.
In the case of a pushdown automaton, a configuration is composed 
of a state $q \in Q$ and a stack content $\gamma \in \Gamma^*$, where $\Gamma = \Omegac \times P$.
Therefore, $\weight_A$ is a function from 
$Q \times \Gamma^* \times \Omega^* \times Q \times \Gamma^*$ into $\Semiring$.
\[
\begin{array}{rcl}
\weight_A\bigl(\config{q}{\gamma}, a\, u, \config{q'}{\gamma'}\bigr) & = & 
 {\displaystyle\bigoplus_{q'' \in Q}} \weii(q, a, q'') 
 \otimes \weight_A\bigl(\config{q''}{\gamma}, u, \config{q'}{\gamma'}\bigr)\\
%
\weight_A\bigl(\config{q}{\gamma}, {\call{c}}\, u, \config{q'}{\gamma'}\bigr) & = & 
 {\displaystyle\bigoplus_{\begin{array}{c}
                          \scriptstyle q'' \in Q\\[-2pt]
                          \scriptstyle p \in P
                          \end{array}}}
 \weic\bigl(q, {\call{c}}, q'', p\bigr) 
 \otimes \weight_A\bigl(\config{q''}{{\call{c}}\, p\cdot \gamma}, u, \config{q'}{\gamma'}\bigr)\\[1mm]
%
\weight_A\bigl(\config{q}{{\call{c}}\, p\cdot \gamma}, {\return{r}}\, u, \config{q'}{\gamma'}\bigr) & = & 
 {\displaystyle\bigoplus_{q'' \in Q}} 
 \weir\bigl(q, {\call{c}}, p, {\return{r}}, q''\bigr) 
 \otimes \weight_A\bigl(\config{q''}{\gamma}, u, \config{q'}{\gamma'}\bigr)\\
%
\weight_A\bigl(\config{q}{\bot}, {\return{r}}\, u, \config{q'}{\gamma'}\bigr) & = & 
 {\displaystyle\bigoplus_{q'' \in Q}} \weie(q, {\return{r}}, q'') 
 \otimes \weight_A\bigl(\config{q''}{\bot}, u, \config{q'}{\gamma'}\bigr)\\
\end{array}
\]
where $\bot$ denotes the empty stack and ${\call{c}}\, p\cdot \gamma$ 
denotes a stack where the pair made of ${\call{c}} \in \Omegac$ and $p \in P$ is the top symbol 
and $\gamma$ is the rest of stack.

\noindent
The weight associated by $A$ to $s \in \Omega^*$
is then defined as follows,
following empty stack semantics: 
\begin{equation}
A(s)  = 
{\displaystyle\bigoplus_{q, q' \in Q}} \textstyle
\mathsf{in}(q) \mathop{\otimes} 
\weight_A\bigl(\config{q}{\bot}, s, \config{q'}{\bot}\bigr) 
\mathop{\otimes} \mathsf{out}(q').
\label{eq:weightA}
\end{equation}

\begin{example}
structured words...
intro language of music notation ?
\end{example}


\subsection{Properties}
Like VPA and symbolic VPA, 
the class of \SWVPA is closed under the binary operators of the underlying semiring.
%
\begin{proposition}
Let $A_1$ and $A_2$ be two (\SWVPA)
over the same $\Omega$ and $\Semiring$.
There exists two $\SWVPA$ $A_1 \oplus A_2$ and $A_1 \otimes A_2$, 
effectively constructible, 
such that for all $s \in \Omega^*$, 
$(A_1 \oplus A_2)(s) = A_1(s) \oplus A_2(s)$ and 
$(A_1 \otimes A_2)(s) = A_1(s) \otimes A_2(s)$.
\end{proposition}
The construction is essentially the same 
as in the case of the Boolean semiring~\cite{dAntonyAlur14SVPDA}.


\subsection{Best-first Search} 
\label{sec:best}\label{sec:search}
**hypotheses**
Let us assume that the semiring~$\Semiring$ is
commutative, bounded, complete, and total.
and 
** assume label theory is s.t. 
for all $\phi \in \Phii$, 
$\psi \in \Phir$, 
and $\eta \in \Phicr$, 
$\displaystyle\bigoplus_{a \in \Omegai} \phi(a)$ 
$\displaystyle\bigoplus_{r \in \Omegar} \phi(r)$ and 
$\displaystyle\bigoplus_{{\call{c}} \in \Omegac} 
\displaystyle\bigoplus_{{\return{r}} \in \Omegar} \eta({\call{c}}, {\return{r}})$
are computable...

% total ? 
% monotonic and superior writ natural ordering
%Regarding the infinite sum operator, note that
%$\bigoplus_{x \in \Phi_\Omega} \phi(x)$, 
%$\bigoplus_{y \in \Phi_\Delta} \psi(y)$, and  
%... exist and in $\Semiring$.

We propose a Dijkstra algorithm computing the minimal weight by $A$, 
\wrt~$\leq_\oplus$, for a word in~$\Omega^*$.

\noindent
More precisely, 
let $\top$ be a fresh stack symbol which does not belong to $\Gamma$,
and for every two states $q, q' \in Q$ and $\sigma \in \{ \bot, \top \}$, 
let 
\[ 
  d_0(q, \sigma, q') = \bigoplus_{s\in \Omega^*} 
  \textstyle
  \weight_A\bigl(\config{q}{\sigma}, s, \config{q'}{\sigma}\bigr). 
\]
Since $\Semiring$ is complete, this infinite sum is well defined, 
and since $\leq_\oplus$ is assumed total, it is the minimum in $\Omega^*$
of $s \mapsto \weight_A(\config{q}{\sigma}, s, \config{q'}{\sigma})$ \wrt this ordering.
%
When $\sigma = \bot$, $d_0(q, \sigma, q')$ is the central 
expression in a term of the definition (\ref{eq:weightA}) of $A(s_0)$ for the minimum $s_0$
(for the above function).
When $\sigma = \bot$, intuitively, it is the minimum weight of a computation of $A$
starting in state $q$ with a stack $\gamma \in \Gamma^*$ (possibly empty), 
and ending in state $q$ with the same stack $\gamma$, such that moreover 
the computation does not touch a symbol of $\gamma$. 
That means that during the computation, 
$A$ may apply the first case of in the definition of $\weight_A$ (internal symbol),
as well as the second case, to can push call symbols on the top of $\gamma$, 
and may pop these symbols with the third case (return symbol). 
However, it cannot apply one of the two last cases (return symbol and empty stack)
when the current stack is $\gamma$.
%pop symbols in $\gamma$.
% Note that having a stack reduced to such a symbol makes impossible the application of the 
% two last cases in the definition of $\weight_A$ (return symbol and empty stack). 
% However, it is possible to apply the two first cases 
% (internal symbol or call symbol, with a push on the top of $\top$).

The algorithm~\ref{algo:Dijkstra}
constructs iteratively a marking $d : Q \times \{ \bot, \top \} \times Q \to \Semiring$
%of the triplets $\<q, \sigma, q'>$ 
%of states of $A$ by weight values in $\Semiring$, 
that converges eventually to $d_0(q, \sigma, q')$. 
%It uses for that purpose a priority queue $P$ containing triplets of 
%$Q \times \{ \bot, \top \} \times Q$.


\begin{algo}[1-best for \SWVPA] \label{algo:Dijkstra}
\textbf{initially} let $P = Q \times \{ \bot, \top \} \times Q$, %contains all %triplets 
%$\< q_1, \bot, q_2>$ and $\<q_1, \top, q_2>$ for $q_1, q_2 \in Q$, with 
and 
$d(q_1, \bot, q_2) = d(q_1, \top, q_2) = \one$ if $q_1 = q_2$ and
$d(q_1, \bot, q_2) = d(q_1, \top, q_2) = \zero$ otherwise.

\smallskip\noindent
\textbf{while} $P$ is not empty

\noindent
\quad\textbf{extract} $\< q_1, \sigma, q_2>$ from $P$ 
 such that $d(q_1, \sigma, q_2)$ is minimal wrt $\leq_\oplus$.

\noindent\quad \textbf{for all} $q_0, q_3 \in Q$ and $p \in P$ do

\noindent
\qquad
\(\begin{array}{rrcl}
 & d(q_1, \sigma, q_3) & \opluseq &
   d(q_1, \sigma, q_2) \otimes \displaystyle\bigoplus_{a \in \Omegai} \weii(q_2, a, q_3)\\
%
%  & d(q_0, \sigma, q_2) & \opluseq & 
%    \displaystyle\bigoplus_{a \in \Omegai} \weii(q_0, a, q_1) \otimes d(q_1, \sigma, q_2)\\
%
\mathrm{if~} \sigma = \top & d(q_0, \top, q_3) & \opluseq &
d(q_1, \sigma, q_2) \otimes
%\displaystyle\bigoplus_{p \in P} 
\displaystyle\bigoplus_{{\call{c}} \in \Omegac} 
\displaystyle\bigoplus_{{\return{r}} \in \Omegar} \eta({\call{c}}, {\return{r}})\\
\mathrm{and} &  d(q_0, \bot, q_3) & \opluseq &
d(q_1, \sigma, q_2) \otimes
%\displaystyle\bigoplus_{p \in P} 
\displaystyle\bigoplus_{{\call{c}} \in \Omegac} 
\displaystyle\bigoplus_{{\return{r}} \in \Omegar} \eta({\call{c}}, {\return{r}})\\
& & & \mathrm{where~} \eta = \weic(q_0, q_1, p) \otimes \weir(q_2, p, q_3)\\
%
\mathrm{if~} \sigma = \bot & d(q_1, \bot, q_3) & \opluseq &
  d(q_1, \sigma, q_2) \otimes \displaystyle\bigoplus_{\return{r} \in \Omegar} \weie(q_2, {\return{r}}, q_3)\\
%
%& d(q_0, \bot, q_2) & \opluseq & 
%d(q_0, \bot, q_1) \otimes d(q_1, \sigma, q_2), \mathrm{if~} \< q_0, \bot, q_1> \notin P \\
%
%\mathrm{if~} \sigma = \top & d(q_0, \top, q_2) & \opluseq & 
%d(q_0, \top, q_1) \otimes d(q_1, \sigma, q_2), \mathrm{if~} \< q_0, \top, q_1> \notin P \\
%
& d(q_1, \bot, q_3) & \opluseq & 
   d(q_1, \sigma, q_2) \otimes d(q_2, \bot, q_3), \mathrm{if~} \< q_2, \bot, q_3> \notin P \\
%
\mathrm{if~} \sigma = \top & d(q_1, \top, q_3) & \opluseq & 
    d(q_1, \sigma, q_2) \otimes d(q_2, \top, q_3), \mathrm{if~} \< q_2, \top, q_3> \notin P \\
\end{array}
\) 
\end{algo}

\noindent
The infinite sums in the updates of $d$ in Algorithm~\ref{algo:Dijkstra} are well defined
since~$\Semiring$ is complete.
The algorithm performs $2.|Q|^2$ iterations until $P$ is empty, 
and each iteration has a time complexity $O(|Q|^2 . |P|)$.
This gives a time complexity $O(|Q|^4 . |P|)$. 
It can be reduced by implementing $P$ as a priority queue, 
prioritized by the value returned by $d$
***complete***. %$|Q|^3.\log(|Q|^2)$

The correctness of Algorithm~\ref{algo:Dijkstra} 
is ensured by the invariant expressed in the following lemma.
\begin{lemma}
For all $\< q_1, \sigma, q_2> \notin P$, $d(q_1, \sigma, q_2) =  d_0(q_1, \sigma, q_2)$.
\end{lemma}
The proof is by contradiction, 
assuming a counter-example minimal in the length of the witness word.


% \begin{algorithm}
% \caption{<your caption for this algorithm>}
% \label{<your label for references later in your document>}
% \begin{program}
% \seq{|line| := `` '', i := 1};
% \WHILE i \neq n+1 \DO
%       |line| := |item|[i] \concat `` '' \concat |number|[i]; 
%       i := i+1; 
%       \WHILE i \neq n+1 \AND |item|[i] = |item|[i-1] \DO 
%       |line| := |line| \concat ``, '' \concat |number|[i]);
%       i := i+1 \OD ; 
%       |write|(|line|) \OD 
% \end{program}
% \end{algorithm}

% The weight of a transition acts as a guard: 
% a transition is activated for a symbol $`a`$ iff its weight $`\phi(a) \neq 0`$ (or $`\psi(a, b) \neq 0`$), 
% the absorbing element.

%\begin{lemma}
\noindent
For computing the minimal weight of a computation of $A$, we use the fact that,
at the termination of Algorithm~\ref{algo:Dijkstra}, %it holds that,
%There exist $q_1, q_2 \in Q$
\[
  {\displaystyle \bigoplus_{s \in \Omega^*} A(s)} = 
  {\displaystyle\bigoplus_{q, q' \in Q}} \textstyle
  \mathsf{in}(q) \mathop{\otimes} d(q, \bot, q') \mathop{\otimes} \mathsf{out}(q').
\]
%\end{lemma}

%\medskip
\noindent
In order to obtain effectively a witness 
(word of $\Semiring^*$ with computation of $A$ of minimal weight), 
we require an additional property the of weight functions.
%
\begin{definition}
Let $\Omega$ be an alphabet and $\Semiring$ a complete semiring.
A function $\phi$ from $\Omega^n$ into $\Semiring$
is called $k$-\emph{convex} for a natural number $k$ iff 
$\mathit{card}\{ \mathbf{a} \in \Omega^n \mid \phi(\mathbf{a}) = 
                 \displaystyle\bigoplus_{\mathbf{p} \in \Omega^n} \phi(\mathbf{p}) \} \leq k$.
\end{definition}
A label theory is $k$-convex if all its functions are $k$-convex.


\begin{proposition}
For a \SWVPA $A$ over a 
commutative, idempotent, superior, total and complete semiring 
and an alphabet $\Omega$ with a $k$-convex label theory,
one can construct in PTIME a word $s \in \Omega^*$ 
such that $A(s)$ is minimal \wrt the natural ordering for $\Semiring$. 
%$\leg_\oplus$.
\end{proposition} 


\subsection{Trees and Nested-Words}
\label{sec:trees}
The hierarchical structure of nested-words, defined the \emph{call} and \emph{return} markup symbols of 
suggest a correspondence between these word and trees. 
The lifting of this correspondence to languages, respective of tree automata and VPA have
been discussed in~\cite{AlurMadhusudan09nested}, 
see also~\cite{Caralp12VPAmult} for the weighted case.
In this section, we discuss the correspondence between the symbolic-weighted extensions
of tree automata and VPA.

Let $\Omega$ be a countable ranked alphabet, such that 
every symbol $a \in \Omega$ has a rank 
$\rank(a) \in [0..M]$ where $M$ is a fixed natural number.
We write $\Omega_k$ the subset of all symbols $a$ of $\Omega$
with $\rank(a) = k$, where $0 \leq k \leq M$.
%$\cup_{i = 0}^{i = M} \Omega_i$ 
%
$\T(\Omega)$ denotes the free $\Omega$-algebra of finite, ordered, 
$\Omega$-labeled trees, 
which is the smallest set such that  $\Omega_0 \subset \T(\Omega)$
and for all $1 \leq k \leq M$, all $a \in \Omega_k$, 
and all $t_1, \ldots, t_k \in \T(\Omega)$, $a(t_1, \ldots, t_k) \in \T(\Omega)$.
%
% tree = single node (leave) labeled with a symbol of $a \in \Omegai$
% (such a tree is simply denoted by $a$)
% or the composition, denoted by $b(t_1,\ldots, t_n$) of a node labeled with $b$
% and $n$ subtrees $t_1$,\ldots, $t_n$.

\renewcommand{\call}[1]{\ensuremath \langle_{#1}}
\renewcommand{\return}[1]{\ensuremath {}_{#1}{\rangle}} % $\prescript{}{a}{)}$

Let $\hat\Omega$ be the countable (unranked) alphabet obtained from 
$\Omega$ as follows: 
$\hat\Omega = \< \Omegai, \Omegac, \Omegar >$, with
$\Omegai = \Omega_0$, 
$\Omegac = \{ \; \call{a} \mid a \in \Omega_{>0} \}$,
$\Omegac = \{ \; \return{a} \mid a \in \Omega_{>0} \}$,
where $\Omega_{>0}$ denotes $\bigcup_{k = 1}^{M} \Omega_k$.

\noindent
We define a linearization of trees of $\T(\Omega)$ into 
words of $\hat{\Omega}^*$ as follows:
\begin{description}
\item $\lin(a) = a$ for all $a \in \Omega_0$, 
\item $\lin\bigl( b(t_1, \ldots, t_k)\bigr) = 
       \call{b} \; \lin(t_1) \ldots \lin(t_k) \; \return{b}$ 
       when $b \in \Omega_k$, $1 \leq k \leq M$.
\end{description}

% A \emph{regular tree grammar} over $\Omega$ 
% is a triplet $G = \< N, q_\mathsf{i}, R>$ where
% $N$ is a finite set of non-terminal symbols denoted $q$..., 
% $q_\mathsf{i} \in N$ is the starting non-terminal, 
% $R$ is a finite set of production rules of the form
% $q_0 \to a(q_1\ldots q_k)$ where 
% $q_0, q_1, \ldots, q_k \in N$
% $a \in \Omega_k$.
% A tree $t \in \T(\Omega)$ is in the language of $G$ 
% if it can be generated from $q_\mathsf{i}$ by 
% non terminal replacement following the rules of $R$.

Let us assume a label theory $\Phi_{\Omega_k}$ 
for each $k \in [0..M]$.
%
\begin{definition}  \label{def:SWTA}
A \emph{symbolic-weighted tree automaton} (\SWTA)
over the ranked input alphabet~$\Omega$
and the commutative semiring $\Semiring$ 
is a triplet $A = \< Q, \init, \bar{\wei} >$ where
$Q$ is a finite set of states, 
$\mathsf{in} : Q \to \Semiring$ is the starting weight function, 
and $\bar{\wei}$ is a $M+2$-uplet of transition functions made of:
$\wei_\epsilon$ from $Q \times Q$ into $\Semiring$, and, 
for each $k \in [0..M]$, $\wei_{\Omega, k}$
from $Q \times Q^{k}$ into $\Phi_{\Omega_k}$.
\end{definition}
%
Like in Section~\ref{sec:SWAdef}, we define from $\bar{\wei}$ 
a transition function~$\wei$, %from $\bar\wei$
from 
$Q \times (\Sigma \cup \{ \epsilon \}) \times \bigcup_{k=0}^{M} Q^k$
into~$\Semiring$: %also called $\wei$ for simplicity, 
%such that, for all $q, q' \in Q$, $a \in \Sigma$, and $b \in \Delta$, 
\[
\begin{array}{rcll}
\wei(q_0, \epsilon, q_1) & = &  \wei_\epsilon(q_0, q_1),\\ %\phi_\epsilon\\
\wei(q_0, a, q_1 \ldots q_k) & = & \phi_{\Omega, k}(a) &
\quad\mathrm{where~} \phi_{\Omega, k} = \wei_{\Omega, k}(q_0, q_1\ldots q_k).\\
\end{array}      
\]
Intuitively, $\wei(q_0, a, q_1 \ldots q_k)$ can be seen as
the weight of a production rule $q_0 \to a(q_1, \ldots, q_k)$ 
of a regular tree grammar~\cite{tata}, 
that replaces the non-terminal symbol $q_0$, by $a(q_1, \ldots, q_k)$. 
%in a step of tree building.
%
Such a grammar computes the weights of the derivation trees 
of the Context-Free grammar obtained by forgetting the labeling symbols of $\Omega_{>0}$.


\noindent
The \SWTA of Definition~\ref{def:SWTA} defines a mapping 
from trees of $\T(\Omega)$ into the weights of~$\Semiring$,
based on the intermediate function $\weight_A$
defined as follows for $q_0 \in Q$ and 
$t = b(t_1,\ldots, t_k) \in \T(\Omega)$,
åwith $0 \leq k \leq M$:
\[
\begin{array}{rccl}
\weight_A(q_0, t) & = & 
   \displaystyle\bigoplus_{q_1 \in Q} &
   \wei(q, \epsilon, q_1) \otimes \weight_A(q_1, t)\\
 & \oplus & \displaystyle\bigoplus_{q_1 \ldots q_k \in Q^k} &
              \wei(q_0, b, q_1 \ldots q_k ) 
   \otimes \displaystyle\bigotimes_{i=1}^{k}
           \weight_A(q_i, t_i).\\
\end{array}
\]

\medskip\noindent
The weight associated by $A$ to  $t \in \T(\Omega)$ is then
\begin{equation}
A(t)  = 
\displaystyle\bigoplus_{q \in Q} \mathsf{in}(q) \mathop{\otimes} \weight_A(q, t).
\label{eq:weightTA}
\end{equation}

\begin{lemma}  \label{lem:SWTA}
For all \SWTA $A$ over~$\Omega$ and $\Semiring$, without $\epsilon$-transitions,
there exists an effectively constructible \SWVPA $A'$ over 
$\hat\Omega$ and $\Semiring$
such that for all $t \in \T(\Omega)$, $A'\bigl(\lin(t)\bigr) = A(t)$.
\end{lemma} 
% 
\begin{proof}
Let $A = \< Q, \init, \bar{\wei} >$ where $\bar{\wei}$ is summarized as above by a function
$\wei: Q \times (\Sigma \cup \{ \epsilon \}) \times \bigcup_{k=0}^{M} Q^k \to \Semiring$. 

We build 
$A' = \< Q', P', \init', \weii, \weic, \weir, \weie, \final' >$,
computing over $\hat\Omega = \< \Omegai, \Omegac, \Omegar >$,
%
%$\Omega = \Omegai \uplus \Omegac \uplus \Omegar$ and $\Semiring$ 
where $Q' = \bigcup_{k=0}^{M} Q^k$ be  the set of sequences of state symbols of $A$, 
of length at most $M$, including the empty sequence denoted by $\epsilon$, 
and where $P' = Q'$.

\[
\begin{array}{ll}
%\weii: Q \times \Omegai \times Q \to \Semiring & 
\weii(\bar{q}, a, \bar{q} q') = \wei(q', a, \epsilon) & 
\mathrm{for~all~} a \in \Omega_0,\\
%
%\weic: Q \times \Omegac \times Q \times P \to \Semiring & 
\weic(\bar{q}, \call{b}, \epsilon, \bar{q}) = \one & %\wei(p', b, \bar{p}) & 
\mathrm{for~all~} b \in \Omega_{>0},\\
%
%\weir: Q \times \Omegac \times P \times \Sigmar \times Q \to \Semiring & 
\weir(\bar{q}, {\call{b}}, \bar{p}, {\return{b}}, \bar{p} q') = \wei(q', b, \bar{q}) & 
\mathrm{for~all~}  b \in \Omega_{>0},\\
%
%\weie: Q \times \Omegar \times Q \to \Semiring & 
\weie(\bar{p}, {\return{b}}, \bar{q}) = \zero &
\mathrm{for~all~}  b \in \Omega_{>0}.\\
\end{array}      
\]

In practice, it is sufficient to consider in $Q'$ only the prefixes of 
sequences.
\qed\end{proof}


\section{Symbolic Weighted Parsing}
\label{sec:parsing}
Let us now use the models and results of the former sections in %order to define 
an approach to the problem of parsing over infinite alphabet. %appropriate
%
Besides considering infinitely many possible of input symbols, 
handled with suitable language formalisms, 
this approach extends conventional parsing 
by computing a derivation tree modulo 
a generic distance between words,  
defined by a SW transducer given in input.
This enables considering finer word relationships than strict equality 
as in the conventional parsing approach, 
opening possibilities of quantitative analysis via this method.

\subsection{Definition}

Let $\Sigma$ be a countable input alphabet 
and $\Omega = \Omegai \uplus \Omegac \uplus \Omegar$ an output countable alphabet,
let $\< \Semiring, \oplus, \zero, \otimes, \one>$ be a commutative, bounded, complete 
and total semiring 
and let  $(\Phi_\epsilon, \Phic, \Phir, \Phicr)$ be a label theory over $\Semiring$, 
assumed computable and $k$-convex for some fixed $k$.

\noindent
Assuming given in input:
\begin{itemize}
\item a \SWT $T$ over $\Sigma$,  $\Omega$, and $\Semiring$,
defining a measure %between words 
$T: \Sigma^* \times \Omega^* \to \Semiring$,
\item a \SWVPA $A$ over $\Omega$, and $\Semiring$, defining a series of nested words
      $A : \Omega^* \to \Semiring$,
\item an input word $s \in \Sigma^*$,
\end{itemize}
the problem of \emph{symbolic weighted parsing} is to find 
a nested word $t \in \Omega$ minimizing
\( T(s, t) \otimes A(t)\)
\wrt $\leq_\oplus$, 
\ie such that %Hence, it is the problem of finding 
\( T(s, t) \otimes A(t) = 
  \displaystyle\bigoplus_{t' \in \Omega^*} T(s, t') \otimes A(t') \).

\noindent 
Therefore, it is the problem of optimizing a measure 
called the \emph{edit-distance between $s$ and $A$} in~\cite{Mohri03ijfcs}.
%
The input language can also be expressed as a \SWTA, or, 
as a particular case, as a weighted context-free grammar, 
converted in turn into a \SWVPA following Lemma~\ref{lem:SWTA}.
%
In the case of finite alphabets, 
the problem of searching, in a WTA language, 
for the best parse tree for a given,  
sometimes referred as~\emph{weighted parsing}
(see \cite{Goodman99SemiringParsing,MorbitzVogler19weighted-parsing} **more general problems**)
is a particular case of SW parsing.
%
Indeed, it corresponds to the case where $T$
accepts only the pairs $\<s, t>$ such that 
$s$ is the projection of $t$ on $\Omegai$. 
This can be done with a single state $q$ and 
with transition rules of the form:
\begin{description}
\item[] $\wei(q, \epsilon, a, q) = \one$ for all $a \in \Omegac \cup \Omegar$,
\item[] $\wei(q, a, a, q) = \one$ for all $a \in \Omegai$,
\item[] $\wei(q, a, b, q) = \zero$ for all $a, b \in \Omegai$, $a \neq b$.
\end{description}


\subsection{Computation}

\begin{proposition}
The problem of Symbolic Weighted  parsing 
can be solved in PTIME in the size of the input \SWT, \SWVPA (or \SWTA) and input word, 
and the computation time of the functions of the label theory.
\end{proposition}
%
\begin{proof}
Bar-Hillel construction
\qed  
\end{proof}


\subsection{Application to Automated Music Transcription}
\label{sec:transcription}
Symbolic Automated Music Transcription
and analysis of music performances

\subsubsection{Time Scales}
Real-Time Unit (RTU) = seconds

\noindent 
Musical-Time Unit (MTU) = number of measures

\noindent 
conversion via tempo value

\subsubsection{Representation of Music Performances}
We consider symbolic representations of musical performances, as finite sequences of events.
It corresponds to the concrete case of a MIDI file~\cite{SMF} 
recorded  from an electronic keyboard, 
or the output of a transcription from audio files~\cite{Benetos18AMTsurvey}.
%
For the sake of simplicity, 
we shall only consider here the case of monophonic performances, 
where at most one note is sounding at a time. 
The approach however extends to the polyphonic case.

A music performance is a finite sequence of events in a set~$\Sigma$.
Every event $e \in \Sigma$ has attributes such from a finite domain, 
like a number of key for a note 
or a flag indicating that it is a rest 
(\textsf{ON} and \textsf{OFF} messages in~\cite{SMF})
and a velocity value (0..127 in~\cite{SMF})).
%This representation is similar to the piano roll ~\cite{Muller15fundamentals} chap.1. 
Moreover, it contains a RTU value $\ioi{e}$ (real number) 
representing the time distance to the next event, 
or to the end of performance for the last event,
also called \emph{inter-onset interval}.


\subsubsection{Representation of Music Scores}
Music score are represented as structured words
made of timed %quantified 
events and parenthesized markups,
akin of nested words~\cite{AlurMadhusudan09nested}.

We consider an alphabet $\Delta$, every symbol of which is 
composed of a tag, in a finite set $\Xi$, 
and an MTU (rational) IOI duration value.
%The alphabet $\Delta$ 
It is partitioned into 
$\Delta = \Deltai \uplus \Deltac \uplus \Deltar$, 
like in Section~\ref{section:SWVPA}.
%
\noindent
The symbols of $\Deltai$ represent events:
% (infinite alphabet of internal symbols) made of:
with tags indicating a new note or grace-note (with null IOI), 
a rest or the continuation of the previous note (tie or dot).
%
The elements of $\Deltac \uplus \Deltar$ are matched
markups for describing the structure of the score, 
\ie the hierarchical grouping of events, and also, 
importantly the division of time in measures, tuplets...
%- parentheses for time divisions : tuplets, bars...
(linearization of rhythm trees \cite{jacquemard:hal-01138642}...).
They contain additional info such as tuple number, beaming policy...

\noindent
The duration values of letters of $\Delta$, in MTU (rational), 
can be computed with the markups and tags (\eg grace note has duration 0).

%\noindent
%There are simultaneous events, since grace notes has duration 0. They are ordered.
%
%\noindent
%Finite bound on the number of duration ratio. ?

\begin{example}
...      
\end{example}

\subsubsection{Performance/Score Distance Computation}
\label{app:distance}
We define a distance between performance and score representations
by a $\SWT$ $T = \< Q, \init, \wei, \final >$, over a semiring $\Semiring$.
** detail the elements of $\Semiring$ ....**
%are quadruplets of the form
%$\< t, s, \delta_t, \delta_s>$

Every state of $Q$ contains a 
tempo value in a finite domain (e.g. 30..300 bpm).
This value can be fixed 
or recomputed by the $T$ %transducer 
after reading each event, 
according to a perceptive/cognitive model of tempo 
such as~\cite{LargeJones99tempo}
(also used in the context of score following~\cite{Cont10TPAMI}).
% we wont detail here.


\subsection{Transcription by SW Parsing} %Best-first Search}
We assume a score language defined by a \SWVPA over the semiring 
$\Semiring$ of Section~\ref{app:distance}.


\section*{Conclusion}
% discussion

\noindent
-- summary 

\noindent
-- other theoretical properties of SW models

\noindent
-- room to improve complexity for best-search algorithm
... modular approach with oracles ...

and extention to $n$-best

\noindent
-- offline algorithm, 
semi-online implementation for AMT (bar-by-bar approach)


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% BIBLIO                                                                     %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\bibliographystyle{plain}
%\bibliographystyle{plainurl} 
\bibliographystyle{abbrv}
%\bibliographystyle{splncs04}
\bibliography{references}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% APPENDIX                                                                   %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newpage
\appendix 


\section{Edit-Distance}

%\subsection{Distance between words or languages}
...algebraic definition of edit-distance of Mohri, in \cite{Mohri03ijfcs}
% Mehryar Mohri 
% Edit-distance of weighted automata: General definitions and algorithms
% International Journal of Foundations of Computer Science 14.06 (2003): 957-982.
distance $d$ over $\Sigma^* \times \Sigma^*$ 
into a semiring  $\Semiring = ( \Semiring, \oplus, \zero, \otimes, \one)$.

%\noindent
Let $\Omega = \Sigma \cup \{ \epsilon \} \times \Sigma \cup \{ \epsilon \} \setminus \{ (\epsilon, \epsilon) \}$,
and let $h$ be the morphism from $\Omega^*$ into $\Sigma^* \times \Sigma^*$  
defined over the concatenation of strings of $\Sigma^*$ (that removes the $\epsilon$'s).
%
\noindent
An \emph{alignment} between 2 strings  $s, t \in \Sigma^*$ is an element $\omega \in \Omega^*$ 
such that $h(\omega) = (s, t)$.
%
\noindent
We assume a base cost function $\Omega$ : $\delta: \Omega \to S$, extended to $\Omega^*$ as follows  
(for $\omega \in \Omega^*$): 
\(
\displaystyle\delta(\omega) = \bigotimes_{0 \leq i < |\omega|} \delta(\omega_i)
\).

\noindent
\begin{definition}
For  $s, t \in \Sigma^*$, the edit-distance between $s$ and $t$ is  
\( 
d(s, t) = \displaystyle\bigoplus_{\omega \in \Omega^*\, h(\omega) = (s, t)} \delta(\omega)
\).
\end{definition}

e.g. Levenstein edit-distance: $S$ is min-plus and $\delta(a, b) = 1$ for all $(a, b) \in \Omega$.


%\paragraph{Distance between a word and a regular language}


\end{document}