-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain-V0.tex
1943 lines (1706 loc) · 74.5 KB
/
main-V0.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
%
% initial version
%
%
\documentclass[runningheads]{llncs}
%\documentclass[a4paper,11pt]{article}
%\setcounter{page}{1}
\usepackage[T1]{fontenc}
\usepackage[utf8]{inputenc}
\usepackage[english]{babel}
\usepackage{hyperref}
%\usepackage[bookmarks,bookmarksnumbered,naturalnames,plainpages=false]{hyperref}
%usepackage{url}
% for footnote ref
\usepackage{refcount}
% array and tabular
\usepackage{array}
\newcolumntype{C}[1]{>{\centering\arraybackslash\hspace{0pt}}p{#1}}
% extension of enumerate env. (style for displaying counters)
% \usepackage{enumerate}
%% pictures
% \usepackage{graphicx}
% \DeclareGraphicsExtensions{.pdf,.png,.jpg}
% \graphicspath{fig/}
%% PGF, Tikz
\usepackage{tikz-cd}
%% \usepackage{pgfplots}
%% \usepgfplotslibrary{dateplot}
%% \usepackage{pgf,pgfarrows,pgfnodes, pgfautomata}
% \usepackage{tikz}
% \usetikzlibrary{cd}
%% \usetikzlibrary{arrows}
%% \usetikzlibrary{calc}
%% \usetikzlibrary{snakes}
%% \usetikzlibrary{backgrounds}
% \usetikzlibrary{trees}
%% \usetikzlibrary{automata}
%% \usetikzlibrary{positioning}
%% \usetikzlibrary{matrix}
%% \usetikzlibrary{patterns}
%% \usetikzlibrary{shapes}
% symbols
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{amsbsy}
\usepackage{bbold}
\usepackage{latexsym}
%\usepackage{amsfonts}
\usepackage{stmaryrd}
%\usepackage{mathabx}
%\usepackage{MnSymbol}
\usepackage{harmony} % simple music fonts
\usepackage{mathtools} % for arrows
%\usepackage{mathptmx}
%% theorem environments
\usepackage{theorem}
% \newtheorem{theorem}{Theorem} %[section]
% \newtheorem{definition}[theorem]{Definition}
% \newtheorem{lemma}[theorem]{Lemma}
% \newtheorem{corollary}[theorem]{Corollary}
% \newtheorem{proposition}[theorem]{Proposition}
% \newenvironment{proof}{\vspace{-2ex}{\it Proof. }}{\hspace*{\fill} $\Box$\smallskip }
% \theorembodyfont{\slshape}
% \newtheorem{example}[theorem]{Example}
% \newtheorem{remark}[theorem]{Remark}
{\theorembodyfont{\rmfamily} \theoremstyle{break} \newtheorem{algo}{Algorithm}}
%% algorithms
%\usepackage{algorithm}
%\usepackage{program}
%% for new macros
\usepackage{xspace}
%% arrows etc
%\input{rewriting}
%% Misc macros
\def\ie{\textit{i.e.}\xspace}
\def\eg{\textit{e.g.}\xspace}
\def\wrt{\textit{wrt}\xspace}
%\def\wlog{\textit{wlog}\xspace}
\def\etc{\textit{etc}\xspace}
\def\<#1>{\langle #1 \rangle}
\newcommand{\pair}[2]{\langle{#1}, {#2}\rangle}
%\newcommand{\A}{\mathcal{A}}
%\newcommand{\B}{\mathcal{B}}
\newcommand{\D}{\mathbb{D}}
\newcommand{\E}{\mathbb{E}}
\newcommand{\W}{\mathbb{W}}
\newcommand{\T}{\mathcal{T}}
\newcommand{\Semiring}{\mathbb{S}}
\newcommand{\zero}{\mathbb{0}}
\newcommand{\one}{\mathbb{1}}
\newcommand{\dom}{\ensuremath{\mathit{dom}}}
\def\SA{\textsf{SA}\xspace}
\def\WA{\textsf{WA}\xspace}
\def\SWT{\textsf{swT}\xspace}
\def\SWA{\textsf{swA}\xspace}
\def\SWTA{\textsf{swTA}\xspace}
\def\SWVPA{\textsf{sw-VPA}\xspace}
\def\weight{\mathsf{weight}}
\def\wei{\mathsf{w}}
\def\mei{\mathsf{m}}
\def\init{\mathsf{in}}
\def\final{\mathsf{out}}
\newcommand{\call}[1]{\ensuremath #1} %{\ensuremath \langle_{#1}}
\newcommand{\return}[1]{\ensuremath #1} %{\ensuremath {}_{#1}{\rangle}} % $\prescript{}{a}{)}$
\def\Omegai{{\Omega_\mathsf{i}}}
\def\Omegac{{\Omega_\mathsf{c}}}
\def\Omegar{{\Omega_\mathsf{r}}}
\def\Sigmai{{\Sigma_\mathsf{i}}}
\def\Sigmac{{\Sigma_\mathsf{c}}}
\def\Sigmar{{\Sigma_\mathsf{r}}}
\def\Deltai{{\Delta_\mathsf{i}}}
\def\Deltac{{\Delta_\mathsf{c}}}
\def\Deltar{{\Delta_\mathsf{r}}}
\def\Phii{{\Phi_\mathsf{i}}}
\def\Phic{{\Phi_\mathsf{c}}}
\def\Phir{{\Phi_\mathsf{r}}}
\def\Phicr{{\Phi_\mathsf{cr}}}
\def\weii{{\wei_\mathsf{i}}}
\def\weic{{\wei_\mathsf{c}}}
\def\weir{{\wei_\mathsf{r}}}
\def\weie{{\wei_\mathsf{e}}}
\newcommand{\config}[2]{\ensuremath \genfrac{[}{]}{0pt}{}{#1}{#2}}
\newcommand{\opluseq}{\ensuremath\mathrel{\oplus}=}
\newcommand{\ioi}[1]{\mathsf{ioi}({#1})}
\newcommand{\rank}{\mathsf{rk}}
\newcommand{\lin}{\mathsf{lin}}
%\sloppy
% Parsing over infinite alphabet as optimal alignment computation
% as edit-distance between string and language
%
%\title{Symbolic Weighted Parsing and Automated Music Transcription}
%\title{Symbolic Weighted Language Models and Automated Music Transcription}
\title{Symbolic Weighted Language Models and Parsing over Infinite Alphabets}
%\title{Weighted Visibly Pushdown Automata and Automated Music Transcription}
\author{Florent Jacquemard}
\institute{INRIA \& CNAM, Paris, France\\
\email{florent.jacquemard@inria.fr}}
%\titlerunning{WVPA \& AMT}
\titlerunning{Symbolic Weighted Parsing and Automated Music Transcription}
%\authorrunning{Florent Jacquemard}
\date{\today}
\begin{document}
\thispagestyle{empty}
\maketitle
\begin{abstract}
%Symbolic Weighted (SW) extension of symbolic automata...
We propose a framework for weighted parsing over infinite alphabets.
%
It is based on language models called Symbolic Weighted Automata (SWA)
at the joint %intersection
between Symbolic Automata (SA) and Weighted Automata (WA),
as well as Transducers (SWT) and Visibly Pushdown (SWPDA) variants.
%
Like SA, SWA deal with large or infinite input alphabets,
and like WA, they output a weight value in a semiring domain.
The transitions of WA are labeled by functions from an infinite alphabet into the weight domain.
This is unlike WA whose transitions are guarded by boolean predicates
overs symbols in an infinite alphabet
and also unlike WA whose transitions are labeled by constant weight values,
and who deal only with finite automata.
%
We present some properties of WA, WT and SWPDA models,
%and show how they can be used
that we use to define and solve a variant of parsing
over infinite alphabets.
%
We also briefly describe the application that motivated the introduction of these models:
an parse-based approach to automated music transcription.
\end{abstract}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% intro
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Introduction} \label{sec:intro}
Parsing %variant of membership problem for formal languages
%- given a language model and an input word $s$, compute a derivation of the model that yields $s$.
is the problem %process
of structuring a linear representation in input %typically
(a finite word over an alphabet) according to a language model. % (a formal grammar).
% natural language, programming language,
%
Most of the context-free parsing approaches~\cite{GruneJacobs08parsing}
assume a finite and reasonably small input alphabet. %models and algorithms
Such a restriction makes perfectly sense in the context of NLP tasks
like constituency parsing or of programming languages compilers or interpreters.
Considering large or infinite alphabets can however be of practical interest
in other cases.
%
For instance, when dealing with large characters encodings such as UTF-16, % processing strings in
\eg for vulnerability detection in Web-applications~\cite{dAntoni21CACM},
%
for the analyse (\eg validation or filtering)
of data streams or serialization of structured documents
(which may contain textual or numerical attributes)~\cite{Segoufin06csl},
or for processing timed execution traces~\cite{Bouyer03algebraic}.
% algebraic definition of a class of data languages
% (notion of monoid recognizability, based on registers, comparable to Bojancszik et al. data words)
Regarding the latter case, we describe %briefly
at the end of the paper
a parse-based approach to automated music transcription,
where a symbolic music performance,
presented as a sequence of timed musical events, % timestamped
% a symbolic representation of a music performance
is converted into a structured score in Common Western Music Notation~\cite{foscarin:hal-01988990}.
%generalizes weighted parsing:
%finding the best derivation for a weighted grammar.
Various extensions of language models for handling infinite alphabets have been studied.
%words carrying data values in an infinite domain (e.g. integers) e.g. data processing
For instance, some automata with memory extensions
allow restricted storage and comparison of input symbols,
%and correspond with logics,
(see~\cite{Segoufin06csl} for a survey),
with pebbles for marking positions~\cite{NevenSchwentickVianu04FSMinfinite},
registers~\cite{KaminskiFrancez94},
or %computations in 2 steps
the possibility to compute on subsequences
with the same attribute values~\cite{Bojanczyk11FO2}. % data words automata.
%
%for the and verification of infinite state systems
%(model checkers: alphabet = long bit-vectors)
%...For the representation of in model checking, verification and
The automata at the core of model checkers
compute on input symbols represented by large bitvectors~\cite{Vardi07ciaa} %\cite{BaierKatoen08MC}
(sets of assignments of Boolean variables) %propositional variables)
and in practice, %implementation
each transition accepts a set of such symbols (instead of an individual symbol),
represented by Boolean formula or Binary Decision Diagrams.
%
Following a similar idea, % and generalizing,
in symbolic automata (\SA)~\cite{dAntoniVeanes17CAV,dAntoni21CACM},
the transitions are guarded by predicates over infinite alphabet domains.
With closure conditions on the sets of such predicates, % (alphabet theories),
all the good properties enjoyed by automata over finite alphabets are preserved.
Other extensions of language models %(automata and grammars)
help in dealing with non-determinism, by the computation of weight values.
With an ambiguous grammar, there may exists several derivations
(\emph{abstract syntax trees} -- AST), % \ie the result of an analyze.
yielding one input word. % structuring a word,
The association of one weight value %in semiring domain
to each derivation permits to select a best one (or $n$ bests). % a fixed number of bests
% = ...a ranking of derivations
This is roughly the principle of \emph{weighted parsing}
approaches~\cite{Goodman99SemiringParsing,Nederhof03weightedParsing,MorbitzVogler19weighted-parsing}.
In \emph{weighted language models},
like \eg probabilistic context-free grammars % (CFG),
and weighted automata (\WA) \cite{Droste09handbook},
a weight value is associated to each transition rule, % production rule
and the rule's weights can be combined with a associative product operator~$\otimes$ into
the weight of a derivation.
A second operator~$\oplus$, associative and commutative,
is moreover used to handle ambiguity of the model,
by summing the weights of the possibly several (in general exponentially many) AST %syntax trees
associated to a given input word.
Typically, $\oplus$ will select the best of two weight values.
%Intuitively,~$\oplus$ selects, or ranking, the syntax trees.
The weight domain, equipped with these two operators is assumed, at minima,
to form a \emph{semiring} %$\Semiring$
where $\oplus$ can be extended to infinite sums,
%\cite{Eilenberg74automata}
such as the Viterbi semiring and the tropical min-plus algebra, see Figure~\ref{fig:semirings}.
%of domain $\mathbb{R}^+ \cup \{ +\infty\}$,
%where $\oplus$ is min and $\otimes$ is plus .
%by ranking
%making the weight domain a semiring.
%Some efficient specialized parsing algorithms~\cite{Huang05kbest} have been proposed in this context
%% models represented as hypergraphs \cite{Huang05kbest}
%in order to compute the $n$ best syntax trees of a given input word without having to enumerate them all.
%Generally based on dynamic programming, these algorithms rely on
%additional algebraic properties of~$\Semiring$.
%-- see \eg~\cite{Huang05kbest} for some NLP applications.
%The extraction of $n$ best list is useful
%the problem: quantitative parsing or symbolic parsing
%of parsing of words over infinite input alphabet.
In this paper, we present a uniform framework for weighted parsing over infinite input alphabets.
It is based on weighted %finite states
language models generalizing
both~\SA, with functions in an arbitrary semiring instead of Boolean guards,
and~\WA, by handling infinite alphabets -- Figure~\ref{fig:hierarchy}.
%***
%some weighted language models
%model of weighted CFG
%computing on words over infinite input alphabets %sets of terminal symbol
%- but with finite sets of states and transitions rules.
% non-terminals and production rules).
In their transition rules, input symbols appear as variables %(parameters)
and the weight associated to a transition rule is a function of these variables.
%
\begin{figure}
\centering
\begin{tikzpicture}
\node (NFA) at (0,4) {%
\(
\begin{array}{c}
\mathsf{NFA} : \Sigma_{\mathsf{fin}}^* \to \mathbb{B}\\
q \xrightarrow{a} q' \quad a \in \Sigma_\mathsf{fin}
\end{array}
\)
};
%
\node (WA) at (-2,2) {%
\(
\begin{array}{c}
\mathsf{WA} : \Sigma_{\mathsf{fin}}^* \to \mathbb{S}\\
q \xrightarrow{a, w} q' \quad a \in \Sigma_\mathsf{fin}, w \in \mathbb{S}
\end{array}
\)
};
%
\node (SA) at (2,2) {%
\(
\begin{array}{c}
\mathsf{SA} : \Sigma_{\mathsf{inf}}^* \to \mathbb{B}\\
q \xrightarrow{\phi} q' \quad \phi : \Sigma_\mathsf{inf} \to \mathbb{B}
\end{array}
\)
};
%
\node (SWA) at (0,0) {%
\(
\begin{array}{c}
\mathsf{SWA} : \Sigma_{\mathsf{inf}}^* \to \mathbb{S}\\
q \xrightarrow{\phi} q' \quad \phi : \Sigma_\mathsf{inf} \to \mathbb{S}
\end{array}
\)
};
\draw[->] (NFA)--(WA);
\draw[->] (NFA)--(SA);
\draw[->] (WA)--(SWA);
\draw[->] (SA)--(SWA);
%\begin{array}{c} \mathsf{NFA} : \Sigma^* \to \mathbb{B} \end{array}
\end{tikzpicture}
\caption{Classes of Symbolic/Weighted Automata.
$\Sigma_\mathsf{fin}$ is a finite alphabet,
$\Sigma_\mathsf{inf}$ is a countable alphabet,
$\mathbb{B}$ is the Boolean algebra,
$\mathbb{S}$ is an arbitrary commutative semiring,
$q \xrightarrow{\dots} q'$ represents the form of a transition between states $q$ and $q'$.}
\label{fig:hierarchy}
\end{figure}
%
%This approach is close to the case of
%Symbolic Automata (SA)~\cite{dAntoniVeanes17CAV,dAntoni21CACM},
%except that the domain for weight values is not restricted to be Boolean,
%like for the guards in the rules of SA,
%but can be an arbitrary commutative semiring (assuming some restrictions).
%
The models presented here are finite automata called symbolic-weighted (\SWA),
transducers (\SWT) and pushdown automata,
with a visibly restriction~\cite{AlurMadhusudan09nested} (\SWVPA).
%\wrt an edit distance
The latter model of automata computes on \emph{nested words}~\cite{AlurMadhusudan09nested},
a structured form of words parenthesized with markup symbols,
corresponding to a linearization of trees.
In the context of parsing, they are used here to represent (weighted) AST of CF grammars.
More precisely, a \SWVPA $A$ associates a weight value $A(t)$ % \in \Semiring$
to a given a nested word $t$, which is the linearization of an AST. %representing a parse tree.
%
On the other hand,
a \SWT is used to define a distance $T(s, t)$ between two finite words $s$ and $t$
over an infinite alphabet, following~\cite{Mohri03ijfcs}.
Then, the \emph{SW-parsing} problem aims at %computing
finding $t$ minimizing
$T(s, t) \otimes A(t)$ (\wrt the ranking defined by $\oplus$)
-- this value is called the distance between $s$ and $A$ in~\cite{Mohri03ijfcs}.
%
Similarly to weighted-parsing
methods~\cite{Goodman99SemiringParsing,Nederhof03weightedParsing,MorbitzVogler19weighted-parsing},
our approach proceeds in two steps,
based on properties of the SW models.
The first step is a Bar-Hillel construction where,
given a \SWT $T$, a \SWVPA $A$, and an input word $s$,
a \SWVPA $A_{T, s}$ is built, such that for all $t$, $A_{T, s}(t) = T(s, t) \otimes A(t)$.
In the second step, a best AST $t$ is found by applying to $A_{T, s}$
a best search algorithm similar to shortest distance in graphs~\cite{Mohri02semiring,Huang05kbest}.
%In expressiveness, they are equivalent to weighted CFG.
%and can be used in a general approach for parsing over infinite input alphabets.
%
%Let $A$ be a \SWVPA, associating $A(w) \in \Semiring$
%to a given a nested word $w$ (representing a parse tree),
%and let a \SWT compute a distance $d$, in $\Semiring$,
%between 2 strings over respectively an infinite input alphabet and the
%same (infinite) alphabet of $A$.
%Then, the problem of Symbolic Weighted Parsing is,
%given an input string $s$, to find a nested word $w$ minimizing
%(according to the ranking defined by $\oplus$)
%the distance $d(s, w) \otimes A(w)$ between $s$ and $A$,
%as defined in~\cite{Mohri03ijfcs}.
% First one general edit-distance is defined by a weighted word
% transducer~\cite{Mohri}
% %Symbolic automata transducers are extended models~\cite{VeanesdAntoniJACM}
% %dealing with infinite set of input symbols...
% value in a semiring...
The main contributions of the paper are:
($i$)~the introduction of automata, \SWA, transducers, \SWT (Section~\ref{section:SWA}),
and visibly pushdown automata \SWVPA (Section~\ref{sec:SWVPA}),
generalizing the corresponding classes of symbolic and weighted models,
%for the weighted computation on (nested) words over infinite alphabets;
($ii$)~a polynomial best-search algorithm for \SWVPA, %(Section~\ref{sec:best})
% a framework for parsing over infinite alphabets,
and ($iii$)~a uniform framework (Section~\ref{sec:parsing}) for parsing over infinite alphabets,
the keys to which are
($iii.a$)~the \SWT-based definition of generic edit distances between input and output words,
and ($iii.b$)~the use of nested words, and \SWVPA,
instead of syntax trees and grammars. %(\S~\ref{sec:trees}).
%
Finally, Section~\ref{sec:transcription} describes
the implemented application %of this approach
to automated music transcription
that motivated this study.
%based these models and on generic def. of distances;
%the use of VPA for parsing, word instead of trees, use comparison input-output.
%and best-search
%In Section~\ref{section:SWA} we introduce \SWA and \SWT.
%Then \SWVPA are defined in Section~\ref{sec:SWVPA},
%where a polynomial 1-best algorithm is described that can be use to solve
%Symbolic Weighted Parsing, as described Section~\ref{sec:parsing}.
%Finally, in Section~\ref{sec:transcription}, we present an application
%of this approach to automated music transcription that has been implemented.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% SWT & SWA
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{SW Automata and Transducers}
\label{section:transducer}\label{sec:transducer}
\label{section:SWA}\label{sec:SWA}
\label{section:SWT}\label{sec:SWT}
We follow the approach of~\cite{Mohri03ijfcs} for the computation of distances
between words and languages, with weighted transducers,
extending it to infinite alphabets.
% with models of symbolic weighted automata and transducers.
%
The models introduced in this section generalize
weighted automata and transducers~\cite{Droste09handbook}
%over finite alphabets, see e.g.~\cite{Mohri03ijfcs},
by labeling each transition with a weight function that takes the
input and output symbols as parameters, instead of a simple weight value.
These functions are similar to the guards of symbolic automata~\cite{dAntoniVeanes17CAV,dAntoni21CACM},
but they can return values in an arbitrary semiring,
where the latter guards are restricted to the Boolean semiring.
\subsection{Semirings}
\label{section:semiring}
We shall consider semiring for the weight values of our language models.
%
A \emph{semiring} $\< \Semiring, \oplus, \zero, \otimes, \one>$
is a structure with a domain~$\Semiring$,
equipped with two associative
binary operators~$\oplus$ and $\otimes$,
with respective neutral elements $\zero$ and $\one$, and such that:
%$\< \mathbb{S}, \oplus, \zero>$ is a commutative monoid
%$\< \mathbb{S}, \otimes, \one>$ is a monoid
\begin{itemize}
\item $\oplus$ is commutative;
$\< \Semiring, \oplus, \zero>$ is a commutative monoid
and $\< \Semiring, \otimes, \one>$ a monoid,
\item $\otimes$ distributes over~$\oplus$: $\forall x, y, z \in \mathbb{S}$,
$x \otimes (y \oplus z) = (x \otimes y) \oplus (x \otimes z)$,
and $(x \oplus y) \otimes z = (x \otimes z) \oplus (y \otimes z)$,
\item $\zero$ is absorbing for~$\otimes$:
$\forall x\in \mathbb{S}$, $\zero \otimes x = x \otimes \zero = \zero$.
\end{itemize}
%Components of a semiring~$\Semiring$ may be subscripted by~$\Semiring$ when needed.
%We simply write $x \in \Semiring$ to mean $x \in \mathbb{S}$.
%
Intuitively, in the models presented in this paper,
$\oplus$ selects an optimal value from two given values,
in order to handle non-determinism,
and $\otimes$ combines two values into a single value,
in a chaining of transitions.
%and let $\< \Semiring, \oplus, \zero, \otimes, \one>$ be a {semiring},
\medskip%\noindent
A semiring $\Semiring$ is \emph{commutative} if $\otimes$ is commutative.
It is \emph{idempotent} if for each $x \in \dom(\Semiring)$, $x \oplus x = x$.
%
Every idempotent semiring~$\Semiring$ induces
a partial ordering~$\leq_\oplus$
called the \emph{natural ordering} of~$\Semiring$~\cite{Mohri02semiring}
and defined, by:
%implicitly defined by the semiring $\Semiring$
for all $x$ and $y$,
$x \leq_\oplus y \;\mbox{iff}\; x \oplus y = x$.
%(see~\cite{Mohri02semiring} for the proof that it is an ordering).
%
The natural ordering is sometimes defined in the opposite direction~\cite{DrosteKuich09semirings};
We follow here the direction %follows \cite{Mohri02semiring}, and
that coincides with the usual ordering on the Tropical semiring \emph{min-plus}
(Figure~\ref{fig:semirings}).
\begin{lemma}[Monotony, \cite{Mohri02semiring}] \label{lem:monotonic}
Let $\< \Semiring, \oplus, \zero, \otimes, \one>$ be an idempotent semiring.
For all $x, y, z \in \Semiring$,
if $x \leq_\oplus y$ then
$x \oplus z \leq_\oplus y \oplus z$,
$x \otimes z \leq_\oplus y \otimes z$
and $z \otimes x \leq_\oplus z \otimes y$.
\end{lemma}
When the property of Lemma~\ref{lem:monotonic} holds,
%We say in this case that
$\Semiring$ is called \emph{monotonic}. % \wrt $\leq_\oplus$.
%A semiring $\Semiring$
%is \emph{monotonic} \wrt a partial ordering~$\leq$
%iff for all $x, y, z \in \Semiring$, $x \leq y$ implies
%$x \oplus z \leq y \oplus z$,
%$x \otimes z \leq y \otimes z$,
%and $z \otimes x \leq z \otimes y$.
%
Another important semiring property in the context of optimization
is {superiority}~\cite{Huang08advanceddynamic},
which corresponds to the
\emph{non-negative weights} condition in shortest-path algorithms~\cite{Dijkstra59anote}.
Intuitively, it means that combining elements with $\otimes$ always increase their weight.
Formally, it is defined as the property ($i$) below. %of the following lemma.
\begin{lemma}[Superiority, Boundedness]
\label{lem:superior}\label{lem:bounded}
Let $\< \Semiring, \oplus, \zero, \otimes, \one>$ be an idempotent semiring.
The two following statements are equivalent:
\begin{itemize}
\item [$i.$] for all $x, y \in \Semiring$,
$x \leq_\oplus x \otimes y$ and
$y \leq_\oplus x \otimes y$
\item[$ii.$] for all $x \in \Semiring$, $\one \oplus x = \one$.
\end{itemize}
\end{lemma}
%
\begin{proof} %(Lemma~\ref{lem:bounded})
$(ii) \Rightarrow (i)$ :
$x \oplus (x \otimes y) = x \otimes (\one \oplus y) = x$,
by distributivity of~$\otimes$ over~$\oplus$.
Hence $x \leq_\oplus x \otimes y$.
Similarly, $y \oplus (x \otimes y) = (\one \oplus x) \otimes y = y$,
hence $y \leq_\oplus x \otimes y$.
%
$(i) \Rightarrow (ii)$ :
%$(i)$ implies that $\one \leq_\oplus x$ for all $x \in \Semiring$,
by the second inequality of ($i$), with $y = \one$,
$\one \leq_\oplus x \otimes \one = x$, \ie,
by definition of $\leq_\oplus$, $\one \oplus x = \one$.
\qed
\end{proof}
In~\cite{Huang08advanceddynamic}, the property~$(i)$
is called $\Semiring$ \emph{superior} \wrt the ordering~$\leq_\oplus$.
We have seen in the proof of Lemma~\ref{lem:bounded} that it implies that
$\one \leq_\oplus x$ for all $x \in \Semiring$.
Similarly, by the first inequality of ($i$) with $y = \zero$,
$x \leq_\oplus x \otimes \zero = \zero$.
%
Hence, in a superior semiring, % \wrt~$\leq$,
it holds that %$\one \leq \zero$
for all $x \in \Semiring$, $\one \leq_\oplus x \leq_\oplus \zero$.
%
Intuitively, from an optimization point of view,
it means that $\one$ is the best value, and $\zero$ the worst.
%** superior implies $\Semiring$ bounded~\cite{Mohri02semiring} see **
%
In~\cite{Mohri02semiring}, the property ($ii$) of Lemma~\ref{lem:bounded}
is called \emph{boundedness} of $\Semiring$ -- we shall use this term in the rest of the paper.
% \emph{negative boundedness}
It implies that, when looking for a best path in a graph whose edges
are weighted by values of $\Semiring$, the loops can be safely avoided.
%Following the terminology of~\cite{Mohri02semiring},
%when $\forall x \in \dom(\Semiring), \one \oplus x = \one$,
%the semiring $\Semiring$ is is called \emph{bounded}.
\begin{lemma}
Every bounded semiring is idempotent.
\end{lemma}
\begin{proof}
By boundedness, $\one \oplus \one = \one$,
and idempotency follows by multiplying
both sides by $x$ and distributing.
\qed
\end{proof}
\noindent
An idempotent semiring $\Semiring$~is called \emph{total} if
it~$\leq_\oplus$ is total
\ie when for all $x, y \in \Semiring$, either $x \oplus y = x$ or $x \oplus y = y$.
%\medskip
We shall need below infinite sums with~$\oplus$.
A semiring~$\Semiring$ is called \emph{complete}~\cite{Droste09handbook}
%(\cite{Droste09handbook} chapter 1) %\cite{Kuich97semirings}
if it has an %infinite sum
operation $\bigoplus_{i \in I} x_i$
for every family
$(x_i)_{i \in I}$ %$\{ x_i \mid i \in I \}$
of elements of $\dom(\Semiring)$ over an index set $I \subset \mathbb{N}$, such that
%is well-defined and in $\dom(\Semiring)$,
the following holds: %properties
\begin{description}
\item[$i.$]
\emph{infinite sums extend finite sums:}\\
$\displaystyle\bigoplus_{i \in \emptyset} x_i = \zero$,\quad
$\forall j\in \mathbb{N}, \displaystyle\bigoplus_{i \in \{ j \}} x_i = x_j$,
$\forall j, k\in \mathbb{N}, j\neq k,
\displaystyle\bigoplus_{i \in \{ j, k \}} x_i = x_j \oplus x_k$,
%
\item[$ii.$]
\emph{associativity and commutativity:}\\
for all $I \subseteq \mathbb{N}$
and all partition $(I_{j})_{j \in J}$ of $I$, %\subseteq \mathbb{N}$,
\(
\displaystyle
\bigoplus_{j \in J}\bigoplus_{i \in I_j} x_i =
\bigoplus_{i \in I} x_i
\),
%
\item[$iii.$]
\emph{distributivity of product over infinite sum:}\\
for all $I \subseteq \mathbb{N}$,
\(
\displaystyle
\bigoplus_{i \in I} (x \otimes y_i) = x \otimes \bigoplus_{i\in I} y_i\), and
\(
\displaystyle
\bigoplus_{i \in I} (x_i \otimes y) = (\bigoplus_{i \in I} x_i ) \otimes y\).
\end{description}
\begin{example}
Figure~\ref{fig:semirings} presented examples of semirings interesting in practice
and enjoying the above properties.
\end{example}
\begin{figure}[t]
\begin{center}
\begin{tabular}{|c|c|C{2em}|C{2em}|C{2em}|C{2em}|}
\hline
& domain & $\oplus$ & $\otimes$ & $\zero$ & $\one$\\ %& nat. ordering\\
\hline\hline
Boolean & $\{\bot, \top\}$ & $\vee$ & $\wedge$ & $\bot$ & $\top$\\ %& $\top \leq_\oplus \bot$ \\
\hline
Counting & $\mathbb{N}$ & $+$ & $\times$ & 0 & 1 \\
\hline
Viterbi & $[0, 1] \subset \mathbb{R}$ & $\mathit{max}$ & $\times$ & 0 & 1\\ % & $x \leq_\oplus y \iff x \ge y$ \\
\hline
Tropical min-plus & $\mathbb{R}_+ \cup \{ \infty\}$ & $\mathit{min}$ & $+$ & $\infty$ & $0$\\ % & $x \leq_\oplus y \iff x \leq y$ \\
\hline
%MaxPlus & $\mathbb{Q} \cup \{ -\infty\}$ & $\mathsf{max}$ & $+$ & $-\infty$ & $0$ \\
%\hline
%Word lang. & $2^{\Sigma^*}$ & $\cup$ & $\cdot$ & $\emptyset$ & $\{ \varepsilon \}$ \\
%\hline
\end{tabular}
\end{center}
\vskip-1em
\caption{Some commutative, bounded, total and complete semirings.}
\label{fig:semirings}
\end{figure}
\subsection{Label Theory}
\label{section:symbols}
We shall now define the functions labeling the transitions of SW automata and transducers,
generalizing the Boolean algebras of~\cite{dAntoniVeanes17CAV}
from Boolean to other semiring domains.
%
We consider \emph{alphabets}, which are countable sets of symbols
denoted $\Sigma$, $\Delta$,...
%Let $\< \Semiring, \oplus, \zero, \otimes, \one>$ be a commutative, complete semiring.
%
\noindent
Given a semiring $\< \Semiring, \oplus, \zero, \otimes, \one>$,
a $\Semiring$-\emph{label theory}
is a tuplet $\bar\Phi$ of recursively enumerable sets of the form
%$\Phi_\epsilon \subseteq \Semiring$, % containing constant functions valued in $\Semiring$,
$\Phi_\Sigma$, %and $\Phi_\Delta$,
containing unary functions of type $\Sigma \to \Semiring$, %resp. $\Delta \to \Semiring$,
and $\Phi_{\Sigma, \Delta}$, containing binary functions $\Sigma \times \Delta \to \Semiring$,
such that:
\noindent --
for all $\Phi_{\Sigma, \Delta} \in \bar\Phi$, there exists
$\Phi_{\Sigma} \in \bar\Phi$ and $\Phi_{\Delta} \in \bar\Phi$
\noindent --
for all $\alpha \in \Semiring$ and $\phi \in \Phi_\Sigma$,
$\alpha \otimes \phi : x \mapsto \alpha \otimes \phi(x)$,
and $\phi \otimes \alpha : x \mapsto \phi(x) \otimes \alpha$\\
\phantom{--} belong to $\Phi_\Sigma$, and similarly for $\oplus$
and for $\Phi_{\Sigma, \Delta}$
\noindent --
for all $\phi, \phi' \in \Phi_\Sigma$,
$\phi \otimes \phi': x \mapsto \phi(x) \otimes \phi'(x)$ belongs to $\Phi_\Sigma$
\noindent --
for all $\eta, \eta' \in \Phi_{\Sigma, \Delta}$
$\eta \otimes \eta': x, y \mapsto \eta(x, y) \otimes \eta'(x, y)$ belongs to $\Phi_{\Sigma, \Delta}$
\noindent --
for all $\phi \in \Phi_\Sigma$ and $\eta \in \Phi_{\Sigma, \Delta}$,
$\phi \otimes \eta: x, y \mapsto \phi(x) \otimes \eta(x, y)$ belongs to $\Phi_{\Sigma, \Delta}$
\noindent --
for all $\psi \in \Phi_\Delta$ and $\eta \in \Phi_{\Sigma, \Delta}$,
$\eta \otimes \psi: x, y \mapsto \eta(x, y) \otimes \psi(y)$ belongs to $\Phi_{\Sigma, \Delta}$
\noindent --
similar closures holds for $\oplus$
\noindent --
partial applications $\eta \in \Phi_{\Sigma, \Delta}$,
resp. $\eta_a: y \mapsto \eta(a, y)$ for $a \in \Sigma$ %and $y \in \Delta$
and\\
\phantom{--} $\eta_b: x \mapsto \eta(x, b)$ for $b \in \Delta$ %and $x \in \Sigma$,
belong resp. to~$\Phi_\Sigma$ and~$\Phi_\Delta$.
%Moreover, these sets are required to be closed under the
%operators~$\oplus$ and~$\otimes$ of~$\Semiring$:
%for all $\phi, \phi' \in \Phi_\Sigma$,
%$\psi, \psi' \in \Phi_\Delta$,
%and $\eta, \eta' \in \Phi_{\Sigma, \Delta}$, %the function
%%
%\begin{center}
%\begin{tabular}{cclll}
%$\phi \otimes \phi'$ & : & $x \mapsto \phi(x) \otimes \phi'(x)$ & belongs to $\Phi_\Sigma$,\\
%$\psi \otimes \psi'$ & : & $y \mapsto \psi(y) \otimes \psi'(y)$ & belongs to $\Phi_\Delta$,\\
%$\phi \otimes \eta$\; & : & $x, y \mapsto \phi(x) \otimes \eta(x, y)$ & belongs to $\Phi_{\Sigma, \Delta}$,\\
%$\eta \otimes \psi$ & : & $x, y \mapsto \eta(x, y) \otimes \psi(y)$ & belongs to $\Phi_{\Sigma, \Delta}$,\\
%$\eta \otimes \eta'$ & : & $x, y \mapsto \eta(x, y) \otimes \eta'(x, y)$ & belongs to $\Phi_{\Sigma, \Delta}$, &
%\multicolumn{1}{r}{and similarly for $\oplus$.}\\ %the same also holds for the binary sum operator $\oplus$.
%\end{tabular}
%\end{center}
%
%Finally, it is also required
%% that the codomain of every function of $\Phi_\Sigma$ and $\Phi_\Delta$
%% is a subset of $\Phi_\epsilon$, and
%that the partial applications of a function $\eta \in \Phi_{\Sigma, \Delta}$,
%resp. $\eta_a: y \mapsto f(a, y)$ for $a \in \Sigma$ and $y \in \Delta$
%and $\eta_b: x \mapsto f(x, b)$ for $b \in \Delta$ and $x \in \Sigma$,
%belong resp. to~$\Phi_\Sigma$ and~$\Phi_\Delta$.
\medskip\noindent
When~$\Semiring$ is complete,
we call \emph{summary} of a function
$\phi \in \Phi_\Sigma$,
resp. $\eta \in \Phi_{\Sigma, \Delta}$,
the value $\bigoplus_{a \in \Sigma} \phi(a)$,
resp. $\bigoplus_{a \in \Sigma} \bigoplus_{b \in \Delta} \eta(a, b)$.
Note that, by definition of infinite sums in complete semirings,
a summary of $\phi \oplus \phi'$ and $\phi \otimes \phi'$ %(and $\alpha \otimes \phi$)
can be computed from summaries of $\phi$ and $\phi'$ in $\Phi_{\Sigma}$, using the operators of $\Semiring$,
and the same holds for $\eta, \eta' \in \Phi_{\Sigma, \Delta}$.
%
A label theory is called \emph{effective} when
summaries of $\phi \oplus \eta$, $\phi \otimes \eta$, $\eta \oplus \psi$, $\eta \otimes \psi$,
and of the partial applications $\eta_a$ and $\eta_b$,
can be computed, using the operators of $\Semiring$,
from summaries of
$\phi \in \Phi_{\Sigma}$, $\psi \in \Phi_{\Delta}$, $\eta \in \Phi_{\Sigma, \Delta}$.
\subsection{Definitions} \label{sec:SWTdef}\label{sec:SWAdef}
%(SWT)
Let $\Semiring$ be a commutative and complete semiring,
$\Sigma$ and $\Delta$ be alphabets called respectively \emph{input} and \emph{output}, %{alphabets},
and $\bar\Phi = \< \Phi_\Sigma, \Phi_\Delta, \Phi_{\Sigma, \Delta}>$ be an $\Semiring$-label theory.
\begin{definition}
\label{def:transducer} \label{def:SWT}
A \emph{symbolic-weighted transducer} (\SWT)
over $\Sigma$, $\Delta$, $\Semiring$ and $\bar\Phi$
%the input and output alphabets~$\Sigma$ and $\Delta$ with label theory $\bar\Phi$, and the semiring $\Semiring$
is a tuple
$T = \< Q, \init, \bar{\wei}, \final >$,
where $Q$ is a finite set of states,
$\mathsf{in} : Q \to \Semiring$,
respectively $\mathsf{out} : Q \to \Semiring,$
are functions defining the weight for entering,
respectively leaving, a state,
and $\bar{\wei}$ is a tuplet made of the 4 of transition functions
$\wei_\epsilon$, $\wei_\Sigma$, $\wei_\Delta$, and $\wei_{\Sigma, \Delta}$
from $Q \times Q$ into respectively~$\Semiring$, %$\Phi_\epsilon$,
$\Phi_\Sigma$, $\Phi_\Delta$, and $\Phi_{\Sigma, \Delta}$.
\end{definition}
%
A pair of states $\< q, q'>$ such that $\wei_\epsilon(q, q') \neq \zero$
is called an \emph{$\epsilon$-transition}.
\noindent We synthesize the 4-uplet of transition functions into a unique
function~$\wei:
Q \times (\Sigma \cup \{ \epsilon \}) \times (\Delta \cup \{ \epsilon \}) \times Q
\to \Semiring$, %also called $\wei$ for simplicity,
such that, for all $q, q' \in Q$, $a \in \Sigma$, $b \in \Delta$,
%and with
%$\< \phi_\epsilon, \phi_\Sigma, \phi_\Delta, \phi_{\Sigma, \Delta}> = \wei(q, q')$,
\[
\begin{array}{rcll}
\wei(q, \epsilon, \epsilon, q') & = & \wei_\epsilon(q, q'),\\ %\phi_\epsilon\\
\wei(q, a, \epsilon, q') & = & \phi(a) & %\wei_\Sigma(q, q')(a)
\quad\mathrm{where~} \phi = \wei_\Sigma(q, q') \in \Phi_\Sigma,\\
\wei(q, \epsilon, b, q') & = & \psi(b) &
\quad\mathrm{where~} \psi = \wei_\Delta(q, q') \in \Phi_\Delta,\\
\wei(q, a, b, q') & = & \eta(a, b) &
\quad\mathrm{where~} \eta = \wei_{\Sigma, \Delta}(q, q') \in \Phi_{\Sigma, \Delta}.\\
\end{array}
\]
%More precisely, $Q \times Q$,
%resp. $Q \times \Sigma \times Q$,
%$Q \times \Delta \times Q$,
%$Q \times \Sigma \times \Delta \times Q$,
%into resp. $\Phi_0$, $\Phi_\Sigma$, $\Phi_\Delta$ $\Phi_{\Sigma, \Delta}$.
\noindent
The symbolic-weighted transducer~$T$ defines a mapping
from the pairs of strings of $\Sigma^* \times \Delta^*$
into~$\Semiring$,
based on the following intermediate function $\weight_T$
defined recursively for every $q, q' \in Q$,
%$a \in \Sigma$, $b \in \Delta$
for every strings of $s \in \Sigma^*$, $t \in \Delta^*$:
\[
\begin{array}{rccl}
\weight_T(q, s, t, q') & = &
\displaystyle\bigoplus_{q'' \in Q} &
\wei(q, \epsilon, \epsilon, q'') \otimes \weight_T(q'', s, t, q')\\
& \oplus & \displaystyle\bigoplus_{q'' \in Q} &
\wei(q'', \epsilon, \epsilon, q') \otimes \weight_T(q, s, t, q'')\\
& \oplus & \displaystyle\bigoplus_{\begin{array}{c}
\scriptstyle q'' \in Q\\[-2pt]
\scriptstyle s = au, a \in \Sigma
\end{array}} &
\wei(q, a, \epsilon, q'') \otimes \weight_T(q'', u, t, q')\\
& \oplus & \displaystyle\bigoplus_{\begin{array}{c}
\scriptstyle q'' \in Q\\[-2pt]
\scriptstyle t = bv, b \in \Delta
\end{array}} &
\wei(q, \epsilon, b, q'') \otimes \weight_T(q'', s, v, q')\\
& \oplus & \displaystyle\bigoplus_{\begin{array}{c}
\scriptstyle q'' \in Q\\[-2pt]
\scriptstyle s = au, a \in \Sigma\\[-2pt]
\scriptstyle t = bv, b \in \Delta
\end{array}} &
\wei(q, a, b, q'') \otimes \weight_T(q'', u, v, q').\\
\end{array}
\]
The above sum may be infinite because of the $\epsilon$-transitions in the first and second lines.
However, $\weight_T$ is well defined since $\Semiring$ is complete.
%
As we shall see, under conditions, the $\epsilon$-transitions can be removed and hence the sum made finite.
%
%** The application of $\oplus$ can be seen as a ND choice between transitions **
We recall that, by convention, an empty sum with $\oplus$ is $\zero$.
%
Since $\zero$ is absorbing for~$\otimes$ in~$\Semiring$,
one term $\wei(q, a, b, q'')$ equal to $\zero$ in the above expression
will be ignored in the sum, meaning that there is no possible transition
from state $q$ into state $q'$ while reading $a$ and writing $b$
(including the case $a = \epsilon$ and $b = \epsilon$).
%These functions $\phi$ act as guards for the transducer's transitions,
%preventing a transition when they return the absorbing $\zero$ of $\Semiring$.
This is analogous to a transition's guard not satisfied by $\<a, b>$ in
the case of symbolic transducers.
The expression of $\weight_T$ can be seen as a stateful definition of
an edit-distance between a word $s \in \Sigma^*$ and a word $t \in \Delta^*$,
see also~\cite{Mohri03ijfcs}.
Intuitively,
$\wei(q, a, \epsilon, q'')$, in the third line, is the cost of
the deletion of the symbol~$a \in \Sigma$ in~$s$,
$\wei(q, \epsilon, b, q'')$, in the fourth line, is the cost
of the insertion of~$b \in \Delta$ in $t$,
and $\wei(q, a, b, q'')$, in the last line, is the cost
of the substitution of $a \in \Sigma$ by~$b \in \Delta$.
The cost of a sequence of such operations transforming $s$ into $t$,
is the product, with $\otimes$, of the individual costs of the operations involved;
And the distance between $s$ and $t$ is the sum, with $\oplus$,
of all such product of costs.
\medskip\noindent
The weight associated by $T$ to $\< s, t> \in \Sigma^* \times \Delta^*$
is defined as follows:
\begin{equation}
T(s, t) =
\displaystyle\bigoplus_{q, q' \in Q} \mathsf{in}(q)
\mathop{\otimes} \weight_T(q, s, t, q') \mathop{\otimes} \mathsf{out}(q').
\label{eq:weightT}
\end{equation}
\begin{example}
Let $\Sigma$ be an alphabet of symbols with timestamps.
The timestamp of $a \in \Sigma$, denoted by $\mathsf{t}(a)$, is expressed as a rational number.
DTW for sequences of timestamped events **
\end{example}
\begin{example}
(simpler) pointwise distance between two sequences of timestamped events **
\end{example}
\noindent
The \emph{Symbolic Weighted Automata} %$A = \< Q, \init, \weight, \final >$
%over $\Sigma$, $\Semiring$ and $\bar\Phi$
are defined similarly as the transducers of Definition~\ref{def:SWT},
by simply omitting the output symbols.
%
In this case, the label theory $\bar\Phi$ is reduced to a singleton $\< \Phi_\Sigma>$.
%over $\Sigma$ is reduced to
%a set $\Phi_\Sigma$ closed under~$\oplus$ and~$\otimes$.
%
\begin{definition} \label{def:SWA}
A \emph{symbolic-weighted automaton} (\SWA)
over $\Sigma$, $\Semiring$ and $\bar\Phi$
%the input alphabet~$\Sigma$ and the commutative semiring $\Semiring$
is a tuple
$A = \< Q, \init, \bar{\wei}, \final >$,
where $Q$ is a finite set of states,
$\mathsf{in} : Q \to \Semiring$,
respectively $\mathsf{out} : Q \to \Semiring,$
are functions defining the weight for entering,
respectively leaving, a state,
and $\bar{\wei}$ is a pair of transition functions
$\wei_\epsilon$ and $\wei_\Sigma$ from $Q \times Q$ into
respectively $\Semiring$ %$\Phi_\epsilon$
and $\Phi_\Sigma$.
\end{definition}
\noindent
The above transition functions $\wei_\epsilon$ and $\wei_\Sigma$
can be synthesized as above into a function
$\wei: Q \times (\Sigma \cup \{ \epsilon \}) \times Q \to \Semiring$.
%
%When $\wei_\epsilon(q, q') = \zero$ for all $q, q' \in Q$,
%the automaton~$A$ is called \emph{without $\epsilon$-transitions}.
A $\SWT$ or $\SWA$ is called \emph{summarized}
when
\subsection{Properties}
The two following properties will be useful to our approach on
symbolic weighted parsing in Section~\ref{sec:parsing}.
\begin{proposition}
Given a \SWT $T$
over alphabet $\Sigma$, $\Delta$ and a commutative semiring~$\Semiring$,
and $s \in \Sigma^*$,
there exists an effectively constructible \SWA