response.tex

% !arara: clean: { files: [response.aux, response.log, response.bbl, response.blg] }
% arara: xelatex: {synctex : yes}
% arara: biber
% arara: xelatex: {synctex : yes}
\documentclass{responseletter}
\date{\today}

\setkomavar{fromname}{Egon Geerardyn \showinitial{EG}}
\setkomavar{fromemail}[]{egon.geerardyn@vub.ac.be} 

\newcommand{\commit}[1]{\href{https://github.com/egeerardyn/phdthesis/commit/#1}{\texttt{#1}}}

\addbibresource{biblio/ownWork.bib}
\addbibresource{biblio/references.bib} 

\begin{document}
\begin{letter}{%
Bart Peeters\\
Johan Deconinck\\
Johan Schoukens\\
Jérôme Antoni\\
Koen Tiels\\
Patrick Guillaume\\
Tom Oomen\\
Tomas McKelvey}

\opening{Dear jury members,}

First of all, I would like to thank you for your feedback on my PhD thesis and the ensuing discussions.
This has greatly improved the text.

Attached, you can find an extensive summary of the changes I have carried out in the text.
\oldnew{old}{new}
The numbers in the margin indicate the pages in the old revision (\href{https://github.com/egeerardyn/phdthesis/releases/tag/private-submit}{\texttt{946aebc}} as for the private defense) and in the new revision.
Note that in this document, clickable links are shown with a thin border (depending on your PDF viewer).

I have also made the \LaTeX{} source code of my text (and data files) available on GitHub.
There, you can inspect the \href{https://github.com/egeerardyn/phdthesis/compare/b6b273e...master}{full set of changes} at the source code level, or \href{https://github.com/egeerardyn/phdthesis/commits/master}{inspect individual changes}.

\closing{Sincerely,}
\encl{list of changes\\
          \href{https://dl.dropboxusercontent.com/u/452005/VUB/phd/old.pdf}{\color{old}old manuscript}\\
          \href{https://dl.dropboxusercontent.com/u/452005/VUB/phd/new.pdf}{\color{new}new manuscript}\\}%FIXME: upload manuscript to dropbox
\end{letter}


\section{Requested Changes}

I have based this response largely on the report of my private defense and private communications afterwards. 
I have used initials to indicate who requested what specific changes as in the list below:

\begin{minipage}{0.33\columnwidth}
\begin{itemize}
  \item \shortname{BP}: Bart Peeters,
  \item \shortname{EG}: Egon Geerardyn,
  \item \shortname{JA}: Jérôme Antoni,
\end{itemize}
\end{minipage}
\begin{minipage}{0.33\columnwidth}
\begin{itemize}
  \item \shortname{JD}: Johan Deconinck,
  \item \shortname{JS}: Johan Schoukens,
  \item \shortname{KT}: Koen Tiels,
\end{itemize}
\end{minipage}
\begin{minipage}{0.33\columnwidth}
\begin{itemize}
  \item \shortname{PG}: Patrick Guillaume,
  \item \shortname{TM}: Tomas McKelvey,
  \item \shortname{TO}: Tom Oomen.
\end{itemize}
\end{minipage}

\oldnew{old}{new}
The numbers in the margin indicate the pages/sections in the old revision (\href{https://github.com/egeerardyn/phdthesis/releases/tag/private-submit}{\texttt{946aebc}} as for the private defense) and in the new revision.
The same color code (red = old, green = new) is used throughout this document.

\subsection{Figures}
\question{Increase the line width used in the figures like Figure 3.4.\\}
\action{All figures have been inspected and improved. At most places, the minimum line width has been increased to $0.75\mathrm{pt}$. At some places, markers and annotations have been added to easy identification of the different graphs.}

\subsection{Assumptions}
\question{Make a list of assumptions, especially in Chapter 2, under which the work is valid}

\begin{enumerate}

\item
\action{Assumptions have been added to chapter 2.}
\begin{oldquote}
For the theoretical analysis, we consider \gls{SISO} \gls{OE} systems, i.e.
\begin{align}
y(t) &=G(s,\theta)u(t)  + H(s,\vartheta)e(t),
\end{align}
where $u(t)$ is a known input sequence, $y(t)$ the output sequence, and $e(t)$ is zero mean Gaussian white noise with variance $\sigma_{e}^2$.
$G$ is a rational, time-invariant transfer function in continuous time.
\end{oldquote}
\begin{newquote}
\oldnewpage{33}{16}
For the theoretical analysis, we consider \gls{SISO} \gls{OE} systems, i.e.
\begin{align}
y(t) &=G(s,\theta)u(t)  + H(s,\vartheta)e(t),
\end{align}
where $u(t)$ is a known input sequence, $y(t)$ the output sequence, and $e(t)$ is zero mean Gaussian white noise with variance $\sigma_{e}^2$.
$G$ and $H$ are rational, time-invariant transfer function in continuous time.
\omission{}
\begin{assumption}{2.1}
The noise filter $H(s,\vartheta)$ is assumed to be independently parametrized from the plant $G(s,\theta)$.
\end{assumption}
\begin{assumption}{2.2}
For the derivations, $H(s,\vartheta) = 1$ is used as a working assumption such that the disturbing noise on the output is zero mean Gaussian white noise.
When this assumption is violated, one has to first measure the noise coloring and use this knowledge to predistort the input to account for the noise coloring.
\end{assumption}
\end{newquote}

\item \says{JA}{Here, the number of modes per frequency band decreases as the frequency increases. A decreasing modal density is typical for a beam, but for a plate, the number of modes is constant. For a volume, the number of modes increases. I suggest to recast the context in a set of assumptions. The modal density and the modal overlap factor are important in this respect.
But this also opens perspectives. For a beam, you would get a logarithmic multisine, for a plate, you would get a linear multisine. In mechanics, you have an idea about the modal density and modal overlap factor, which can be a big prior knowledge.}
\action{The following remarks have been added}

\begin{newquote}
\oldnewpage{}{25}
\begin{assumption} {2.3}
The sub-systems in (2.33) all are either lightly damped ($\damping \ll \sqrt{1/2}$) or are not of interest.
\end{assumption}

\begin{assumption}{2.4}
The resonance frequencies $\wn[i]$ of all the sub-systems in (2.33) are well-separated in the frequency domain such that there is little modal overlap, i.e. $\modalOverlap \ll 1$.
\end{assumption}

\begin{assumption}{2.5}
It is assumed that the excitation signal contains many more frequency lines than parameters required to describe the system under test well.
Hence, we can neglect the bias of the estimate and focus on its variance error by assuming that a proper model order can be chosen and fitted from the data.
\end{assumption}

\begin{remark}{2.3}
The modal overlap $\modalOverlap$ is given~\citep{Ege2009} as the ratio of the average $3\unit{dB}$ bandwidth to the average difference in resonance frequencies for all resonances in the frequency band of interest.
Often, the modal overlap is linked to the model density $\modalOverlap(\omega) = \wdB \modalDensity{\omega}$.
In acoustic and structural engineering, expressions exist for the theoretical modal density $\modalDensity{f}$ in specific geometries such as beams, plates, square volumes~\citep{Hart1971,Bies2009}.
Specifically, beams exhibit a modal density $\modalDensity{\omega} = \bigO{\omega^{-1/2}}$, for plates this becomes $\modalDensity{\omega} = \bigO{1}$, and for volumes $\modalDensity{\omega} = \bigO{\omega}$.
As such, the previous assumption practically limits the use of the excitation signals to beams or the low-frequency region of plates and volumes~\citep{Ege2009}.
\end{remark}
\end{newquote}
\begin{newquote}
\oldnewpage{}{56}
\begin{remark}{2.14}
The proposed design can be adapted quite straightforwardly to also account for the high-frequency regions of e.g. plates by increasing the number of frequency lines where the modal overlap becomes more important.
In practice, this will boil down to linearly spaced multisines.
For measurement of the transfer characteristics of volumes (e.g. rooms), more specialized techniques are required to describe the high-frequency behavior such as the ones discussed by \citet{Ege2009}.
\end{remark}
\end{newquote}

\item \says{JA}{In (2.12), you have a series of second-order systems that all have the same amplitude. Can this be made more general?}
\action{The following remark has been added}
\begin{newquote}
 The free parameters $(\dc, \damping, \wn, \wz)$ of the sub-system are reduced to only $(\damping, \wn)$ and the others are fixed during our experiments: $\dc = 1$ and $\wz \to \infty$. 
   \begin{remark}{2.10}
   Implicitly, this considers a design where each of the sub-systems have an equal gain $\dc=1$ and residue $\wz \to\infty$.
   In practice, this is often not a reasonable assumption, but the design can be adapted in a straightforward manner.
   Concretely, using a first experiment with the proposed signal, one can estimate the different gains of the sub-systems and optimize the amplitude spectrum of the signal to take those into account: for sub-systems where $\dc$ is lower than on average, the amplitude spectrum needs to be increased near the resonance frequency and \latin{vice versa} if the uncertainty of the parameters is required to be constant.
   Of course, the appropriate actions to compensate for differing $\dc=1$ depend on the actual goal of the model.
   \end{remark}
\end{newquote}

\item \says{JA}{On page 33, in (2.62), you have a covariance matrix. The PSD of the noise depends on the frequency. Afterwards, it is assumed constant?}
\says{KT}{For the input design, in (2.1), you assume that the noise is white. What happens for colored noise? The reason I ask this is in case you want to estimate the best linear approximation of a nonlinear system. Then you can consider the stochastic nonlinear distortions as a noise source, but that is not necessarily white.}
\action{This is addressed in Assumption 2.2 and in the following remark that has been added to the manuscript}
\oldnewpage{33}{37}
\begin{newquote}
To introduce only limited prior knowledge into the input design, we shall assume that the noise is white.
  Hence, $\sigma^2_E(\omega) = \sigma^2_E$ is constant over the whole frequency band of interest.
  If the noise coloring is known beforehand (or measured in a previous experiment), the design can be adapted in a straightforward manner, e.g. by coloring the amplitude spectrum accordingly.
\end{newquote}

  \item \says{TM}{For the robust excitation signals, you use the setup of Goodwin and coworkers. On slide 17, you focus on variance. Why not on bias?}
               \says{TM}{Why can you guarantee \replace{that}{unbiasedness of your estimators}?}
               \action{This has been addressed in Assumption 2.5 (see above)}
\end{enumerate}

\subsection{References}
\question{Update the (historical) references.}
\begin{enumerate}
\item 
\oldnewpage{51}{57}
\says{TM}{In Chapter~3: Did the problem of FRF modeling start in 1995? How long does the problem date back?  I would suggest to add a few historical references, older than 1995.}
 \action{References have been added to Chapter 3. The original editions of e.g. \citep{Bendat2010,Bendat1993,Box2008} date back to somewhere between the 1950s and 1980s, but I have opted to refer to the last editions.}
\begin{oldquote}
The measurement of \glspl{FRF} of dynamic systems is an important step in many technical applications, often used as a simple visualization of the system dynamics.
However, obtaining a good non-parametric \gls{FRF} from a measured input-output data set can be challenging due to the presence of noise, leakage and measurement time limitations to observe lightly-damped dynamics well.
\gls{FRF} measurement techniques are discussed, for instance, in
\citep{Schoukens1998,Schoukens2006LPM,Guillaume1996,Broersen1995,Pintelon2010LPM1,Antoni2007FRF,Pintelon2012}, and applied to practical devices and systems~\citep{Lim2010,Robinson1990,Behjat2010}, among others.
\end{oldquote}
\begin{newquote}
The measurement of \glspl{FRF} of dynamic systems is an important step in many technical applications, often used as a simple visualization of the system dynamics.
However, obtaining a good non-parametric \gls{FRF} from a measured input-output data set can be challenging due to the presence of noise, leakage and measurement time limitations to observe lightly-damped dynamics well.
\gls{FRF} measurement techniques are discussed, for instance, in
\citep{Schoukens1998,Schoukens2006LPM,Guillaume1996,Broersen1995,Pintelon2010LPM1,Antoni2007FRF,Pintelon2012,Bendat2010,Bendat1993,Jenkins1968,Box2008,Bendat1978,Blackman1959}, and applied to practical devices and systems~\citep{Lim2010,Robinson1990,Behjat2010,White1969}, among others.
\end{newquote}

\item 
\oldnewpage{119}{133}
\says{TM}{The initialization of the parametric estimators is an interesting expos\'e. The references have a narrow flavor. There are more people that worked on this, other than Ljung.}
\action{More (historical) references for parametric estimates and initial values have been added.}
\begin{oldquote}
Parametric identification of \gls{LTI} systems from either input/output data or non-parametric frequency response data, has been well developed as evidenced by published literature~\citep{Pintelon2012,Ljung1999,Schoukens1999,Pintelon1998}.
\end{oldquote}
\begin{newquote}
Parametric identification of \gls{LTI} systems from either input/output data or non-parametric frequency response data, has been well developed as evidenced by published literature~\citep{Pintelon2012,Ljung1999,Schoukens1999,Pintelon1998,Soderstrom1989,Goodwin1977,Brillinger1981,Sanathanan1963,McKelvey2002,Peeters2004}.
\end{newquote}
\end{enumerate}


\subsection{Define robustness}
\question{Define robustness.}

\begin{enumerate}
  \item \says{JA}{The use of the word ``robustness'' can have different meanings. What is the meaning of ``robustness of input''?}
  \says{JA}{I suggest that you define ``robustness'' in the thesis.}
  \action{Robustness has been defined in the introduction of chapter 2}
  \oldnewpage{}{14}
  \begin{newquote}
  In short, we consider an input signal to be \term{robust}, if such signal allows one to measure the \gls{FRF} of a wide variety of systems well, without requiring a lot of specific prior information regarding the system.
Moreover, a robust input signal should not be very sensitive towards assumptions regarding the system under test (or should be easily adaptable if the assumptions are only approximately valid).
  \end{newquote}
\end{enumerate}

\subsection{Small Corrections}
\question{A list of small corrections will be provided directly by the members of the jury.}

\begin{enumerate}

\item \action{A lot of typos have been fixed and other tiny adjustments have been carried out.}
You can consult commits
  \commit{eed3f40}, 
  \commit{b163710}, 
  \commit{08d2bfb},  
  \commit{dc1d99d}, 
  \commit{06471dd}, 
  \commit{d4771ea}, and
  \commit{ca38442} where most of these small changes are contained.

\item \action{The bibliography has been made more uniform. Whenever possible, first names have been included.}

\item \says{TM}{When considering unstable LTI systems, which methods will make sense when doing truncation?}
\answer{The truncation methods are not directly usable for that situation.}
\action{A short discussion has been added to the future work section.}
\oldnewsection{}{6.2.2}
\begin{newquote}
    With respect to the impulse response truncation approaches introduced in Section~3.6, a few paths could be investigated further to broaden the class of systems where these methods are applicable and have a more subtle smoothing behavior of the transfer function.

    The crude truncation approach that is discussed in this thesis, can be interpreted as equivalent to windowing the \gls{IRF} with a rectangular window.
    In the frequency domain, this causes a very obvious correlation in the \gls{FRF}.
    By choosing a different window function (e.g. an exponential window, Hann window, \ldots), one could try to reduce this adverse effect.

   Another alternative for this crude rectangular windowing approach, which still uses a \gls{DFT} for the full size of the measurement record, could be to reduce the size of the \gls{DFT}.
   The introduced algorithms that determine a truncation length $\truncTime$, could also give a good indication of what size of \gls{DFT} would be suitable to represent the system well for the given measurements.
   However, further studies are required to compare the performance of these different alternatives.

   Currently, the truncation approach works based on the assumption that the last fragment of the \gls{IRF} is dominated by noise influences.
   The consequence of this is that the system is assumed to be stable and causal.
   It would be a worthwhile extension to relax this assumption to deal with unstable systems as well.
   For those systems, the last fragment of the \gls{IRF} will be dominated by the unstable (or anti-causal) part of the system. 
   Hence, the problem will become much harder to deal with, since a segment in the middle of the \gls{IRF} needs to be found where the noise level can be estimated.
   Afterwards, one could try to split up the \gls{IRF} in a part attributed to the stable poles and one caused by unstable poles and apply truncation on both parts separately and recombining the results.
\end{newquote}

\item \says{JA}{With the logarithmic spacing of the multisine, you assume a specific type of damping (all $\xi$ are the same). 
So, the FRF shapes are the same on a logarithmic scale. What if you have other shapes?}
\answer{
The assumption is not that $\xi$ is identical for all sub-systems. 
Rather, when $\xi \geq \xi_{\min}$ for all sub-systems, their relative $3\unit{dB}$ bandwidth has a minimal width and is excited well when $\alpha \leq 1 + \xi_{\min}$ (see also Figure 2.8 in the manuscript: for all $\frac{\alpha -1}{\xi_{\min}} < 1$, misalignment is no longer an issue and hence the design works well).
}
\action{A remark is added to underline this fact.}
\oldnewpage{}{22}
\begin{newquote}
In this example, the $\damping$ is set to be the same in both sub-systems. 
Using different dampings in the sub-systems would, however, not change the argument, as the sub-systems with the lowest damping will be the limiting factor.
\end{newquote}

\item \says{JA}{On page 129, in (5.17), the probability is zero. You need some interpretation of the equality sign.}
\oldnewpage{129}{143}
\action{The expression with the equality sign has been removed.}
\begin{oldquote}
The success rate $\successRate_{\bullet}$ of an initialization scheme $\bullet$ is expressed mathematically as the probability function
\begin{equation}
\successRate_{\bullet} = 
  \Prob{  \model{\bullet}=\model{\trueSymbol}  }
\text{.}
\tag{5.17}
\end{equation}
In practice, and since the iterative algorithm usually does not reach the local optimum precisely, the above definition is relaxed to the form
\begin{equation}
\successRate_{\bullet} \isdef \Prob{ \norm[2]{\model{\bullet} - \model{\trueSymbol}} < \absoluteTolerance}
\tag{5.18}
\end{equation}
with $\absoluteTolerance$ a numerical tolerance that will be specified later on.
\end{oldquote}

\begin{newquote}
The success rate $\successRate_{\bullet}$ of an initialization scheme $\bullet$ is defined as the probability that it reaches the \emph{best local optimum}.
In practice, and since the iterative algorithm usually does not reach the local optimum precisely, this is implemented practically as
\begin{equation}\label{eq:successrateTol}
\successRate_{\bullet} \isdef \Prob{ \norm[2]{\model{\bullet} - \model{\true{}}} < \absoluteTolerance}
\tag{5.17}
\end{equation}
with $\absoluteTolerance$ a numerical tolerance that will be specified later on.
\end{newquote}

\item \says{BP}{Guideline 2.3 seems to be dropped further in the text.}
\action{This guideline has been move to conclusion of that chapter.}
\oldnewpage{47}{56}

\item
\says{JD}{I work in electrochemistry. I use multisines to characterize Warburg impedances in $\sqrt{\omega}$ and other elements in $\sqrt[x]{\omega}$. Will LPM still work for this?}
\answer{The LPM will only provide an approximation for those applications.}
\action{A paragraph has been added to the future work section.}
\oldnewpage{}{179}
\begin{newquote}
E.g. in electrochemistry it is common~\citep{Wang1987} to describe diffusion phenomena using Warburg impedances which are proportional to $\sqrt{s}$ (with $s$ the Laplace variable).
In such diffusion problems, it can be shown~\citep[p. 181]{Pintelon2012} that the system transients dampen as an $\bigO{t^{3/2}}$, which is considerable slower than for lumped continuous-time systems and discrete-time where the impulse decays as an $\bigO{e^{0t}}$.
As a result, estimating diffusion transients is an even more important task than the setting discussed in this dissertation.
    On the other hand, in microwave engineering~\citep{Rizzi1988,Pozar2005}, commensurate filters are easily described using Richard's variable $R=\tanh (\tau s)$ with $\tau$ the delay of the considered transmission lines.
    For these altered models, however, it should be verified whether numerical conditioning becomes a similar nuisance as for discrete-time models.
\end{newquote}

\item \action{Equation (2.6) has been rewritten and clarified}
\oldnewpage{15}{17}
\begin{newquote}
From (2.4) and (2.5) we see that scaling the optimal input power by $1-\delta$, has the following effect on the variance of the estimate:
\begin{align}
\var{\hat{G}(j\omega)}_{{(1-\delta)\mathrm{optimal} + \delta \mathrm{robust}}} \approx \frac{1}{1-\delta} \var{\hat{G}(j\omega)}_{\mathrm{optimal}}
\tag{2.6}
\text{.}
\end{align}
This is a small loss while the increase in robustness can be significant if the added signal is robust. 
\end{newquote}

\item 
\oldnewpage{xi}{?}
\old{Set of the positive real numbers without the number $0$}
\replacedBy
\new{Set of the real numbers without the number $0$}

\item
\oldnewpage{xi}{?}
\action{Added \new{\nBW: Number of excited lines in the 3 dB bandwidth of a resonance}}

\item 
\oldnewpage{4}{4}
\old{However, since most parametric estimators enforce very little prior knowledge about the system, noise in the measurements can sometimes be a problem to distinguish the actual system features.}
\replacedBy
\new{However, since most nonparametric estimators enforce very little prior knowledge about the system, it can sometimes be problematic to clearly distinguish noise in the measurements from the actual systematic behavior.}

\item 
\oldnewpage{5}{5}
\begin{oldquote}
In this context, `user-friendly' should be understood as having a good user experience for two groups of users:
\begin{itemize}
  \item novices, without formal training in system identification, optimization, \ldots and,
  \item well-seasoned identification practitioners that are already able to build good models.
\end{itemize}
\end{oldquote}
\begin{newquote}
In this context, `user-friendly' should be understood as being easy to use for users ranging from novices without extensive formal training to well-seasoned identification specialists.
\end{newquote}

\item 
\oldnewpage{54}{61}
\action{A remark has been added to clarify the initial values used for \gls{LRIC}.}
\begin{newquote}
The starting values for the \gls{LRIC} are obtained by means of the \gls{LRM}, the details of which are explained in Section 3.2.2.
\end{newquote}

  \item 
  \oldnewpage{56}{62}
  \says{JA}{On page 56, in (3.16): what is $\mathbf{C}$?}
  \action{There is a tilde missing. This has been changed:
  \old{$\LocalVector{C}$} \replacedBy \new{$\LocalModel{C}$}
  }
  
  \item 
  \oldnewpage{73}{80}
  \says{JA}{For (3.86), you cite Seber and Lee, 2003, but Golub was earlier. I suggest to change the reference.}
  \action{A reference to Golub has been added. Seber and Lee, however, provide a more elaborate proof such that I think their work still deserves to be mentioned.}
  \begin{newquote}
 	For linear models (i.e. $\LocalVector{Y} = \LocalMatrix{K} \LocalVector{\theta}$, such as for the {LRM} and {LPM}), however, the {PRESS} can be calculated in a single step without estimating any additional models~\citep{Golub1979}:
	\begin{equation}
	\PRESS = \frac{\LocalVector{E}^{\HT} \LocalVector{W}^{-1} \LocalVector{E}}{2N_W + 1}
         \tag{3.86}
	\end{equation}
	where $\LocalVector{E}$ is the residual vector of the full estimation and $\LocalVector{W}$ is a diagonal weighting matrix with $\LocalVector{W}_{ii} = (1 - \LocalVector{H}_{ii})^{\HT} (1 - \LocalVector{H}_{ii})$ and
	$\LocalVector{H} = \LocalMatrix{K} \pinv{\LocalMatrix{K}}$ is the so-called `hat-matrix' that is known to be idempotent.
	The proof for this can also be found in \citet[Sec.~12.3.2]{Seber2003}.
  \end{newquote}

\item 
\oldnewpage{73}{80}
\action{Constant versus varying local model complexity has been clarified.}
\begin{oldquote}
In the previous section, different methods were introduced to select the local model order. 
This begs the question whether it is advantageous to use the same local model complexity over all frequencies or the model order should be kept globally. 
The answer to this, depends on the ultimate purpose of the \gls{FRF}.
\end{oldquote}
\begin{newquote}
In the previous section, different methods were introduced to select the local model order.
This begs the question whether it is advantageous to use the same local model complexity globally over all frequencies or whether the local model order can be allowed to vary for different frequency bands.
In fact, both approaches have their merits and which of both is to be preferred, depends on the ultimate purpose of the \gls{FRF}.
\end{newquote}

  \item 
   \oldnewpage{74}{81}
   \old{Note that one should also $\nWind$ as the total number of data points (and not the number of points in each window).} 
   \replacedBy 
   \new{Note that one should interpret $\nWind$ as the total number of data points (and not the number of points in each window) for the global case.}

   \item 
   \oldnewpage{96}{?}
   \old{For an infinitely long data record, this can be described in the time-domain}
   \replacedBy
   \new{This can be described in the time-domain}

   \item 
   \oldnewpage{96}{111}
   \old{For a limited data record ($n \in \Set{0,\ldots,N-1}$)}
   \replacedBy
   \new{For a limited data record ($n \in \Set{0,\ldots,N-1}$ with $N$ the number of data points)}

  \item
  \oldnewpage{98}{113}
  \action{The following remark has been added.}
  \begin{newquote}
  \begin{remark}{4.3}
The notions $\svdMax{\Delta_{\LMM}(\omega)}$ and the ensuing  $\widehat{\phantom{_\infty}\infnorm{\Delta_{\LMM}}}$  should be regarded as notational convenience to denote that local models are used to estimate the maximal singular value and $\Hinf{}$ norm, respectively.
In reality, there is no tangible `$\Delta_{\LMM}$' that is constructed: neighboring local models are stitched together using the `$\max$' operator.
\end{remark}
  \end{newquote}

  \item
  \oldnewpage{100}{115}
  \old{In the structural engineering community(\textbf{\$}))}
  \replacedBy
  \new{In the structural engineering community~\citep[see e.g.][Section 2.2.2]{Gawronski2004}}

  \item 
  \oldnewpage{}{107}
  \says{BP}{For the peak modeling, I suggest to take over some things of your slides in the text. For example, is there a physical interpretation of $\Delta$ (slide 20)? So it is out of scope, but how does the peak modeling help with control? That link is missing.}
  \action{Section 4.2 has been added to clarify the connection between robust control and the chapter.}
  \begin{newquote}
  \section{4.2 Intermezzo: Basics of Robust Control}
\label{sec:lrmhinf:robust-control-basics}
When designing a feedback controller $\Controller$ (as in Figure~4.1), the goal is to design $\Controller$ such that the output $y$ of a plant $\true{\Plant}$ follows the command signal $r_2$ with some additional constraints on the performance of the whole closed-loop system (e.g. limited overshoot, short response times, good disturbance suppression, limited controller effort $u_c$, \ldots).
This obviously requires knowledge of the actual plant $\true{\Plant}$ that is to be controlled.
Such knowledge can be obtained by means of modeling, be it first-principles modeling or system identification or a combination of modeling approaches, which provides an approximate model $\estimated{\Plant}$.

\begin{figure}
 \centering
 \includegraphics{ch04-lrmhinf/tikz/ch04fig0.pdf}
 \caption[4.1]{The considered feedback configuration $\ClosedLoop{\true{\Plant},\Controller}$.}
 \label{fig:lrmhinf:tpc:nominal}
\end{figure}

In classical control, the working assumption is that $\estimated{\Plant} = \true{\Plant}$ and hence the design of the controller is carried out on the estimated plant model.
This has the downside that if such a controller is not designed conservatively, errors in the model may produce instability or unwanted performance degradation.

\begin{figure}%[10]{R}[1em]{5cm}
  \centering
  \includegraphics{ch04-lrmhinf/tikz/ch04fig1.pdf}
  \caption[4.2]{The plant model set.}
  \label{fig:lrmhinf:modelset}
\end{figure}

In robust control~\citep{Zhou1996,Skogestad2005}, the design is carried out while taking uncertainties into account.
This is typically done by considering that the actual plant $\true{\Plant} \in \ModelSet[]$.
Hence, the problem is then to construct the model set $\ModelSet[]$ as illustrated in Figure~4.2.
The design process thus guarantees performance and stability for the whole set instead of for a single model.
However, these guarantees hinge on proper specification of $\ModelSet[]$ which is typically done by estimating a nominal model $\estimated{\Plant}$ and the model error $\Delta(\estimated{\Plant}, \true{\Plant})$.
A generic way to express the plant model set (see also Figure~4.2) is
\begin{equation}
  \ModelSet[]_{\mathrm{generic}} 
  \isdef 
  \Set{ \Plant | \norm{\Delta(\estimated{\Plant}, \Plant)} \leq \gamma}
  \label{eq:lrmhinf:model-set:generic}
  \tag{4.1}
\end{equation}
where both the norm and the exact uncertainty description $\Delta(\estimated{\Plant}, \Plant)$, can be chosen by the control engineer.
An example of such a model set is easily imagined for additive uncertainty ($\Plant = \estimated{\Plant} + \Delta$) when a $2$-norm is used, in that case one obtains
\begin{equation}
  \ModelSet[]_{\mathrm{additive,2}} 
  = 
  \Set{ \Plant | \Bignorm[2]{ \ezbrace{\Plant - \estimated{\Plant}}{\Delta(\estimated{\Plant}, \Plant)}  } \leq \gamma}
  \equiv
  \Set{  \ezbrace{\estimated{\Plant}  + \Delta}{\Plant(\estimated{\Plant}, \Delta)}  | \norm[2]{\Delta} \leq \gamma}
  \text{,}
  \tag{4.2}
\end{equation}
which is a multidimensional ball with radius $\gamma$ centered around $\estimated{\Plant}$.
Note that in many cases, the uncertainty structure (additive, multiplicative, \ldots) can be converted to other representations~\citep{Douma2005}.

Building a good model set $\ModelSet[]$ as in \eqref{eq:lrmhinf:model-set:generic}, hence requires estimating a good value of the overbound $\gamma$ to define the size of the set.
This overbound has important practical implications for the robust controller design.
Specifically, let us denote $\true{\gamma} \isdef \norm{\true\Delta} \isdef \norm{\Delta(\estimated{\Plant}, \true{\Plant})}$.
For an estimation procedure that yields the estimate $\estimated{\gamma}$ of the overbound, a few situations can hence occur.
\begin{itemize}
  \item $\estimated\gamma < \true{\gamma}$: the model set $\ModelSet[]$ is too small and hence $\true{\Plant} \not\in \ModelSet[]$.
  Based on such an estimate, the robust controller may fail to stabilize and perform well for the actual plant $\true{\Plant}$.
  \item $\estimated\gamma \gg \true{\gamma}$: the model set $\ModelSet[]$ is too big such that the robust controller is overly conservative.
  Consequently, the actual plant is still stabilized by the controller, but the performance of the system may be degraded as the controller design accounts for too many possible plants.
    \item $\estimated\gamma = \true{\gamma}$: the model set is just large enough to contain $\true{\Plant}$ such that both stability and performance are satisfied.
\end{itemize}

In robust control, it is most common to use a (possibly weighted) $\Hinf{}$ norm in the description of the plant model set~\citep{Skogestad2005,Zhou1996} such that
\begin{equation}
\ModelSet[] = \Set{ \Plant(\Delta, \estimated{\Plant}) | \norm[\infty]{\Delta 
} \leq \gamma}
  \text{,}
  \label{eq:lrmhinf:model-set:hinf}
  \tag{4.3}
\end{equation}
which is also the approach we follow in this chapter.
This $\Hinf{}$ norm can be interpreted as the peak gain of the model error $\Delta$, i.e. its maximum amplitude over the frequencies.
In control it is a common practice to use low-order models $\estimated{\Plant}$ to keep the controller design feasible, this means that $\Delta$ is often littered with unmodeled dynamics.
When those dynamics are resonant, it can be cumbersome to detect the respective peak amplitudes reliably from a short measurement dataset.
In this chapter, this problem will be tackled by exploiting the local models.
  \end{newquote}

  \item 
  \oldnewpage{113}{}
  \action{The notation $\ClosedLoop{\Plant, \Controller}$ has been explained further}
  \begin{newquote}
  $\ClosedLoop{\Plant,\Controller}$ denotes the closed loop transfer function from the loop inputs $\mat{r_2 & r_1}^{\TT}$ to the output and input of the plant $\mat{y & u}^{\TT}$ as schematically in Figure~4.9.
This can be expressed symbolically, as in \citet{Oomen2012SIRP}, by
\begin{equation}
\ClosedLoop{\Plant, \Controller} \isdef
\begin{bmatrix}
  \Plant\\
  \I
\end{bmatrix}
\left(\I + \Controller \Plant \right)^{-1}
\begin{bmatrix}
\Controller & \I
\end{bmatrix}
\label{eq:lrmhinf:closedLoop}
\text{.}
\tag{4.36}
\end{equation}
  \end{newquote}

  \item
  \says{TM}{``It is \emph{well known} that Transient terms are smooth''. You should provide a derivation of this statement.}
  \action{The derivation has been added as Appendix 3.A to the book.}
  \begin{newquote}
  
\disclaimer{
This appendix is based on multiple expositions regarding transient contributions in the frequency domain.
For a more in-depth discussion, we refer to \citep{Pintelon1997ARB}, \citep{Pintelon1997Transient}, \citep{Schoukens1999}, \citep{McKelvey2002}, \citep[Section 2]{McKelvey2012LRM}, and \citep[Sections 2.6.3, 6.3.2 and Appendix 6.B]{Pintelon2012}.
}

This appendix derives the transient contributions in the frequency domain when a \gls{LTI} system is observed during a finite measurement window (i.e. $t \in {0, \ldots, N-1}$).
The following derivations are repeated from \citep[Appending 6.B]{Pintelon2012}, but have been adapted in notation.

Consider the ordinary difference equation
\begin{equation}
  \sum_{n=0}^{\order{A}} a_n y(t-n)
  =
  \sum_{m=0}^{\order{B}} b_m u(t-m)
  \qquad 
  \forall 
  t \in \Integers
  \label{eq:nparam:leakage:diffeq}
\end{equation}
where the coefficients $a_n$ and $b_m$ can be either real or complex coefficients for all values of their subscripts $n \in \set{0, \ldots, \order{A}}$ and $m \in \set{0, \ldots, \order{B}}$.
Such a difference equation can be used to describe any discrete-time \gls{LTI} system (except those with arbitrary delays).

To describe this system in the frequency domain, we first introduce the (one-sided) $\ZT$-transform of the signals $x \in \set{u,y}$ and $X \in \set{U,Y}$, respectively:
\begin{equation}
  X(z) 
  \isdef 
  \ZTransform{x(t)} 
  \isdef \sum_{t=0}^{\infty} x(t) z^{-t}
\end{equation}
which reduces to the \gls{DTFT} when $z = e^{j\omega}$ is examined~\citep[Chapter 10]{Oppenheim1996}.

First, we revisit two pertinent properties of the $\ZT$-transform that are necessary to allow one to compute the $\ZT$-transform of difference equation~\eqref{eq:nparam:leakage:diffeq}.
\begin{property}{3.1}
The $\ZT$-transform is a linear transform: $\ZTransform{ay(t) + bu(t)} = a \ZTransform{y(t)} + b \ZTransform{u(t)}= aY(z) + bU(z)$ when $a$ and $b$ are finite constants~\citep[Section 10.5.1]{Oppenheim1996}.
\end{property}
\begin{property}{3.2}
The $\ZT$-transform of a shifted signal can be related to the $\ZT$-transform of the unshifted signal~\citep[Section 10.5.2]{Oppenheim1996} as
\begin{align}
  \ZTransform{x(t-n)} 
  &= \sum_{t=0}^{+\infty} x(t-n) z^{-t} \\
  &= \sum_{\tau = -n}^{+\infty} x(\tau) z^{-(\tau + n)} \quad \text{ where } \tau = t - n\\
  &= z^{-n} \sum_{\tau = 0}^{+\infty} x(\tau) z^{-\tau} +  \sum_{\tau = -n}^{-1} x(\tau) z^{-\tau-n}\\
  \ZTransform{x(t-n)} &= z^{-n} \left( \ZTransform{x(t)} + \sum_{\tau = -n}^{-1} x(\tau) z^{-\tau} \right)
  \text{.}
\end{align}
\end{property}

By using both properties, we compute the $\ZT$-transform of the left hand side of \eqref{eq:nparam:leakage:diffeq}:
\begin{align}
  \ZTransform{\sum_{n=0}^{\order{A}} a_n y(t-n)}
  &=
  \sum_{n=0}^{\order{A}} a_n \ZTransform{y(t-n)}
  +\sum_{n=0}^{\order{A}} a_n z^{-n}  \sum_{\tau = -n}^{-1} y(\tau) z^{-\tau}  \\
  &=
  A(z^{-1}) Y(z) + I_A(z^{-1})
\end{align}
where
\begin{align}
  A(z^{-1}) &= \sum_{n=0}^{\order{A}} a_n z^{-n} \\
  I_A(z^{-1}) &= \sum_{n=1}^{\order{A}} \sum_{\tau = -n}^{-1} y(\tau) a_n z^{-\tau-n}
\end{align}
are polynomials in $z^{-1}$ of respective degrees $\order{A}$ and  $\order{A}-1$ (at most).
Note that $I_A(z^{-1})$ depends on $y(\tau)$ where $\tau \in \set{-1, \ldots, -\order{A}}$, i.e. samples before the beginning of the measurement record.

For the right hand side, the derivation is analogous and yields
\begin{align}
  \ZTransform{\sum_{m=0}^{\order{B}} B_m y(t-m)} &=
  B(z^{-1}) U(z) + I_{B}(z)\\
  B(z^{-1}) &= \sum_{m=0}^{\order{B}} b_m z^{-m} \\
  I_B(z^{-1}) &= \sum_{m=1}^{\order{B}} \sum_{\tau = -m}^{-1} u(\tau) b_m z^{-\tau-m}
  \text{,}
\end{align}
where the degrees of the polynomials are respectively $\order{B}$ and $\order{B}-1$ (at most).

Combining both expressions, one obtains the $\ZT$-transform of the complete expression \eqref{eq:nparam:leakage:diffeq}:
\begin{align}
  A(z^{-1}) Y(z)= B(z^{-1}) Y(z) + \ezbrace{I_B(z^{-1}) -  I_A(z^{-1})}{I_{BA}(z^-1)}
  \label{eq:nparam:leakage:icAccounted}
\end{align}
where $I_{BA}(z^{-1})$ is a polynomial of degree at most $\max\set{\order{A}, \order{B}} -1$.
Since $I_{BA}$ depends on $u(\tau)$ and $y(\tau)$ for $\tau < 0$, it accounts for the initial conditions of the experiment.

Also notice that \eqref{eq:nparam:leakage:icAccounted} cannot be evaluated generally without the knowledge of $u(T)$ and $y(T)$ for $T \geq N$, which are not available in experimental conditions.
In particular, we are interested in the evaluation on the unit circle, i.e. $z = e^{j\omega}$ where one could hope to evaluate the \gls{DTFT}.
To overcome this problem, the samples $y(T)$ and $u(T)$ for $T \geq N$ need to be eliminated from the equation.
This can be done, as in \citet[Appendix 6.B.1]{Pintelon2012}, by multiplying \eqref{eq:nparam:leakage:diffeq} such that one can eventually rewrite
\begin{equation}
   A(z^-1) Y_{N,\infty}(z) = B(z^-1) U_{N,\infty}(z) + z^{-N}\ezbrace{(E_{B}(z^{-1}) - E_A(z^{-1}))}{E_{BA}(z^{-1})}
   \label{eq:nparam:leakage:ecAccounted}
 \end{equation}
 where
\begin{align}
  X_{N,\infty}(z) 
  &= \sum_{t=N}^{\infty} x(t)z^{-t}\\
  E_{B}(z^{-1}) 
  &= \sum_{m=1}^{\order{B}} \sum_{\tau = -m}^{-1} u(N+\tau) b_m z^{N-\tau-m}\\
  E_A(z^{-1}) 
  &= \sum_{n=1}^{\order{A}} \sum_{\tau = -n}^{-1} y(N+\tau) a_n z^{N-\tau-n}
  \text{.}
\end{align}
Note that $E_{B}$ depends on the last $\order{B}-1$ samples of the measurement record: it represents the final conditions of the experiment.

Subtracting \eqref{eq:nparam:leakage:ecAccounted} from \eqref{eq:nparam:leakage:icAccounted} and denoting $X_N(z) = X(z) - X_{N,\infty}(z)$ then yields
\begin{equation}
  A(z^-1)Y_N(z) = B(z^{-1}) U_N(z) + I_{BA}(z^{-1}) - z^{-N}E_{BA}(z^{-1})
\end{equation}
which no longer depends on the samples of $u(t)$ and $y(t)$ outside the acquired measurement record where $t \in \set{0,\ldots,N-1}$.

The latter expression can hence be evaluated on the unit circle.
More specifically, by substituting the \gls{DFT} frequencies $z_k = e^{j2\pi k N^{-1}}$ for $k \in\set{0,\ldots,N-1}$, one retrieves
\begin{equation}
  A(z_k^{-1}) Y_{\DFT}(k) = B(z_k^{-1}) U_{\DFT}(k) + T(z_k^{-1})
\end{equation}
with 
\begin{equation}
T(z^{-1}) = \frac{I_{BA}(z^{-1}) - E_{BA}(z^{-1})}{\sqrt{N}}
\end{equation}
the transient (or leakage) contribution that is a polynomial of order $\max\set{\order{B}, \order{A}} -1$ (at most), and the $N$-points \gls{DFT} spectrum of $x(n)$ given by
\begin{equation}
  X_{\DFT}(k) \isdef \frac{1}{\sqrt{N}} \sum_{n=0}^{N-1} x(n) e^{j 2 \pi k n N^{-1}} = \frac{1}{\sqrt{N}} X_N(z_k)
  \text{.}
\end{equation}

Note that since $T(z^{-1})$ is a polynomial, $T(z_k^{-1})$ is a smooth function of the frequency, hence illustrating that the transient/leakage contribution at the \gls{DFT} frequencies is a smooth function that encapsulates the difference between the initial conditions $I_{BA}$ and end conditions $E_{BA}$ of the measurement record.

\begin{remark}{3.11}
Note that in the remainder of this dissertation, the \gls{DFT} spectra are typically denoted without the subscript $\DFT$, but the subscript is added here to emphasize the fact that the final expression is valid for the \gls{DFT} spectra of the input and output.
\end{remark}


\begin{remark}{3.12}
The derivation of the leakage has been proven here for discrete-time systems.
For lumped continuous-time \gls{LTI} and diffusive systems, the derivations are similar and elaborated in \citet[Appendix 6.B]{Pintelon2012}.
Most importantly, the observation that the transient contribution $T$ is described in the frequency domain by a polynomial of finite degree (and hence smooth), remains valid for such systems.
\end{remark}
  \end{newquote}
\end{enumerate}

\newpage
\printbibliography[heading=bibintoc]
\end{document}