% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/formants.R
\name{addFormants}
\alias{addFormants}
\title{Add formants}
\usage{
addFormants(sound, formants, spectralEnvelope = NULL, action = c("add",
  "remove")[1], vocalTract = NA, formantDep = 1,
  formantDepStoch = 20, formantWidth = 1, lipRad = 6, noseRad = 4,
  mouthOpenThres = 0, mouth = NA, interpol = c("approx", "spline",
  "loess")[3], temperature = 0.025, formDrift = 0.3, formDisp = 0.2,
  samplingRate = 16000, windowLength_points = 800, overlap = 75,
  normalize = TRUE)
}
\arguments{
\item{sound}{numeric vector with \code{samplingRate}}

\item{formants}{either a character string like "aaui" referring to default
presets for speaker "M1" or a list of formant times, frequencies,
amplitudes, and bandwidths (see ex. below). \code{formants = NA} defaults
to schwa. Time stamps for formants and mouthOpening can be specified in ms
or an any other arbitrary scale. See \code{\link{getSpectralEnvelope}} for
more details}

\item{spectralEnvelope}{(optional): as an alternative to specifying formant
frequencies, we can provide the exact filter - a vector of non-negative
numbers specifying the power in each frequency bin on a linear scale
(interpolated to length equal to windowLength_points/2). A matrix
specifying the filter for each STFT step is also accepted. The easiest way
to create this matrix is to call soundgen:::getSpectralEnvelope or to use
the spectrum of a recorded sound}

\item{action}{'add' = add formants to the sound, 'remove' = remove formants
(inverse filtering)}

\item{vocalTract}{the length of vocal tract, cm. Used for calculating formant
dispersion (for adding extra formants) and formant transitions as the mouth
opens and closes. If \code{NULL} or \code{NA}, the length is estimated
based on specified formant frequencies (if any)}

\item{formantDep}{scale factor of formant amplitude (1 = no change relative
to amplitudes in \code{formants})}

\item{formantDepStoch}{the amplitude of additional stochastic formants added
above the highest specified formant, dB (only if temperature > 0)}

\item{formantWidth}{= scale factor of formant bandwidth (1 = no change)}

\item{lipRad}{the effect of lip radiation on source spectrum, dB/oct (the
default of +6 dB/oct produces a high-frequency boost when the mouth is
open)}

\item{noseRad}{the effect of radiation through the nose on source spectrum,
dB/oct (the alternative to \code{lipRad} when the mouth is closed)}

\item{mouthOpenThres}{open the lips (switch from nose radiation to lip
radiation) when the mouth is open \code{>mouthOpenThres}, 0 to 1}

\item{mouth}{mouth opening (0 to 1, 0.5 = neutral, i.e. no
modification) (anchor format)}

\item{interpol}{the method of smoothing envelopes based on provided mouth
anchors: 'approx' = linear interpolation, 'spline' = cubic spline, 'loess'
(default) = polynomial local smoothing function. NB: this does NOT affect
the smoothing of formant anchors}

\item{temperature}{hyperparameter for regulating the amount of stochasticity
in sound generation}

\item{formDrift, formDisp}{scaling factors for the effect of temperature on
formant drift and dispersal, respectively}

\item{samplingRate}{sampling frequency, Hz}

\item{windowLength_points}{length of FFT window, points}

\item{overlap}{FFT window overlap, \%. For allowed values, see
\code{\link[seewave]{istft}}}

\item{normalize}{if TRUE, normalizes the output to range from -1 to +1}
}
\description{
A spectral filter that either adds or removes formants from a sound - that
is, amplifies or dampens certain frequency bands, as in human vowels. See
\code{\link{soundgen}} and \code{\link{getSpectralEnvelope}} for more
information. With \code{action = 'remove'} this function can perform inverse
filtering to remove formants and obtain raw glottal output, provided that you
can specify the correct formant structure.
}
\details{
Algorithm: converts input from a time series (time domain) to a spectrogram
(frequency domain) through short-term Fourier transform (STFT), multiples by
the spectral filter containing the specified formants, and transforms back to
a time series via inverse STFT. This is a subroutine in
\code{\link{soundgen}}, but it can also be used on any existing sound.
}
\examples{
sound = c(rep(0, 1000), runif(16000), rep(0, 1000))  # white noise
# NB: pad with silence to avoid artefacts if removing formants
# playme(sound)
# spectrogram(sound, samplingRate = 16000)

# add F1 = 900, F2 = 1300 Hz
sound_filtered = addFormants(sound, formants = c(900, 1300))
# playme(sound_filtered)
# spectrogram(sound_filtered, samplingRate = 16000)

# ...and remove them again (assuming we know what the formants are)
sound_inverse_filt = addFormants(sound_filtered,
                                 formants = c(900, 1300),
                                 action = 'remove')
# playme(sound_inverse_filt)
# spectrogram(sound_inverse_filt, samplingRate = 16000)

\dontrun{
# Use the spectral envelope of an existing recording (bleating of a sheep)
# (see also the same example with noise as source in ?generateNoise)
data(sheep, package = 'seewave')  # import a recording from seewave
sound_orig = as.numeric(scale(sheep@left))
samplingRate = sheep@samp.rate
sound_orig = sound_orig / max(abs(sound_orig))  # range -1 to +1
# playme(sound_orig, samplingRate)

# get a few pitch anchors to reproduce the original intonation
pitch = analyze(sound_orig, samplingRate = samplingRate,
  pitchMethod = c('autocor', 'dom'))$pitch
pitch = pitch[!is.na(pitch)]
pitch = pitch[seq(1, length(pitch), length.out = 10)]

# extract a frequency-smoothed version of the original spectrogram
# to use as filter
specEnv_bleating = spectrogram(sound_orig, windowLength = 5,
 samplingRate = samplingRate, output = 'original', plot = FALSE)
# image(t(log(specEnv_bleating)))

# Synthesize source only, with flat spectrum
sound_unfilt = soundgen(sylLen = 2500, pitch = pitch,
  rolloff = 0, rolloffOct = 0, rolloffKHz = 0,
  temperature = 0, jitterDep = 0, subDep = 0,
  formants = NULL, lipRad = 0, samplingRate = samplingRate)
# playme(sound_unfilt, samplingRate)
# seewave::meanspec(sound_unfilt, f = samplingRate, dB = 'max0')  # ~flat

# Force spectral envelope to the shape of target
sound_filt = addFormants(sound_unfilt, formants = NULL,
  spectralEnvelope = specEnv_bleating, samplingRate = samplingRate)
# playme(sound_filt, samplingRate)  # playme(sound_orig, samplingRate)
# spectrogram(sound_filt, samplingRate)  # spectrogram(sound_orig, samplingRate)

# The spectral envelope is now similar to the original recording. Compare:
par(mfrow = c(1, 2))
seewave::meanspec(sound_orig, f = samplingRate, dB = 'max0', alim = c(-50, 20))
seewave::meanspec(sound_filt, f = samplingRate, dB = 'max0', alim = c(-50, 20))
par(mfrow = c(1, 1))
# NB: but the source of excitation in the original is actually a mix of
# harmonics and noise, while the new sound is purely tonal
}
}
