diff --git a/PEMOQ/PEMOQ.m b/PEMOQ/PEMOQ.m new file mode 100644 index 0000000..f9a45ca --- /dev/null +++ b/PEMOQ/PEMOQ.m @@ -0,0 +1,5 @@ +function [ODG]=PEMOQ(ref,test) +[refa, fs]=audioread(ref); +[testa, fs]=audioread(test); +[PSM, PSMt, ODG, PSM_inst] = audioqual(refa, testa, fs); + diff --git a/PEMOQ/audioqual.p b/PEMOQ/audioqual.p new file mode 100644 index 0000000..d0709ec Binary files /dev/null and b/PEMOQ/audioqual.p differ diff --git a/PEMOQ/audioqual_hi.p b/PEMOQ/audioqual_hi.p new file mode 100644 index 0000000..809c272 Binary files /dev/null and b/PEMOQ/audioqual_hi.p differ diff --git a/PEMOQ/pemo1_SL.mexa64 b/PEMOQ/pemo1_SL.mexa64 new file mode 100644 index 0000000..da1ff28 Binary files /dev/null and b/PEMOQ/pemo1_SL.mexa64 differ diff --git a/PEMOQ/pemo2.mexa64 b/PEMOQ/pemo2.mexa64 new file mode 100644 index 0000000..76c7614 Binary files /dev/null and b/PEMOQ/pemo2.mexa64 differ diff --git a/PEMOQ/pemo3.mexa64 b/PEMOQ/pemo3.mexa64 new file mode 100644 index 0000000..406f1a0 Binary files /dev/null and b/PEMOQ/pemo3.mexa64 differ diff --git a/PEMOQ/pemo4.mexa64 b/PEMOQ/pemo4.mexa64 new file mode 100644 index 0000000..998db1a Binary files /dev/null and b/PEMOQ/pemo4.mexa64 differ diff --git a/PEMOQ/toeplitzC.c b/PEMOQ/toeplitzC.c new file mode 100644 index 0000000..b3c3bd4 --- /dev/null +++ b/PEMOQ/toeplitzC.c @@ -0,0 +1,128 @@ +/*================================================================= + * + * toeplitzC.C Sample .MEX file corresponding to toeplitz.m + * Solves simple 3 body orbit problem + * + * The calling syntax is: + * + * [yp] = yprime(t, y) + * TOEPLITZ(C,R) is a non-symmetric Toeplitz matrix having C as its + * first column and R as its first row. + * + * TOEPLITZ(R) is a symmetric Toeplitz matrix for real R. + * For a complex vector R with a real first element, T = toeplitz(r) + * returns the Hermitian Toeplitz matrix formed from R. When the + * first element of R is not real, the resulting matrix is Hermitian + * off the main diagonal, i.e., T_{i,j} = conj(T_{j,i}) for i ~= j. + * + * You may also want to look at the corresponding M-code, yprime.m. + * + * This is a MEX-file for MATLAB. + * Copyright 1984-2006 The MathWorks, Inc. + * + *=================================================================*/ +/* $Revision: 1.10.6.4 $ */ +#include +#include "mex.h" + +/* Input Arguments */ + +#define C_IN prhs[0] +#define R_IN prhs[1] + + +/* Output Arguments */ + +#define T_OUT plhs[0] + + +static void toeplitzC( + double t[], + double c[], + double r[], + int m, int n + ) +{ + int i,j,m0; + + for (j=0;jm?m:j; + for (i=0;i<=m0;i++) + t[j*m+i] = r[j-i]; + for (i=j+1;i 2) { + mexErrMsgTxt("More than 2 input arguments."); + } else if (nrhs == 0) { + mexErrMsgTxt("1 or 2 input arguments required."); + } else if (nrhs == 1) { + mexErrMsgTxt("1 input argument: not implemented (yet), use toeplitz."); + } else if (nlhs > 1) { + mexErrMsgTxt("Too many output arguments."); + } + + if (nrhs == 1) { + mexErrMsgTxt("Not implemented (yet). Please use 2 input arguments or toeplitz.m with one input argument."); + } + + /* Check the dimensions of Y. Y can be 4 X 1 or 1 X 4. */ + + tm = mxGetNumberOfElements(C_IN); + tn = mxGetNumberOfElements(R_IN); + + /* Create a matrix for the return argument */ + if (mxIsComplex(C_IN) || mxIsComplex(R_IN)) + T_OUT = mxCreateDoubleMatrix(tm, tn, mxCOMPLEX); + else + T_OUT = mxCreateDoubleMatrix(tm, tn, mxREAL); + + /* Assign pointers to the various parameters */ + tr = mxGetPr(T_OUT); + + cr = mxGetPr(C_IN); + rr = mxGetPr(R_IN); + + /* Do the actual computations in a subroutine */ + toeplitzC(tr,cr,rr,tm,tn); + + /* Imaginary part */ + if (mxIsComplex(C_IN) || mxIsComplex(R_IN)){ + /*if (!mxIsComplex(C_IN)){ + mexErrMsgTxt("Not implemented (yet). A"); + } + else*/ + if (mxIsComplex(C_IN)) + ci = mxGetPi(C_IN); + else + ci = mxGetPr(mxCreateDoubleMatrix(tm, 1, mxREAL)); + /*if (!mxIsComplex(R_IN)){ + mexErrMsgTxt("Not implemented (yet). B"); + } + else*/ + if (mxIsComplex(R_IN)) + ri = mxGetPi(R_IN); + else + ri = mxGetPr(mxCreateDoubleMatrix(tn, 1, mxREAL)); + ti = mxGetPi(T_OUT); + toeplitzC(ti, ci, ri, tm, tn); + } + + return; + +} + + diff --git a/PQevalAudio/CB/PQCB.m b/PQevalAudio/CB/PQCB.m new file mode 100644 index 0000000..075cfa1 --- /dev/null +++ b/PQevalAudio/CB/PQCB.m @@ -0,0 +1,99 @@ +function [Nc, fc, fl, fu, dz] = PQCB (Version) +% Critical band parameters for the FFT model + +% P. Kabal $Revision: 1.1 $ $Date: 2003/12/07 13:32:57 $ + +B = inline ('7 * asinh (f / 650)'); +BI = inline ('650 * sinh (z / 7)'); + +fL = 80; +fU = 18000; + +% Critical bands - set up the tables +if (strcmp (Version, 'Basic')) + dz = 1/4; +elseif (strcmp (Version, 'Advanced')) + dz = 1/2; +else + error ('PQCB: Invalid version'); +end + +zL = B(fL); +zU = B(fU); +Nc = ceil((zU - zL) / dz); +zl = zL + (0:Nc-1) * dz; +zu = min (zL + (1:Nc) * dz, zU); +zc = 0.5 * (zl + zu); + +fl = BI (zl); +fc = BI (zc); +fu = BI (zu); + +if (strcmp (Version, 'Basic')) + fl = [ 80.000, 103.445, 127.023, 150.762, 174.694, ... + 198.849, 223.257, 247.950, 272.959, 298.317, ... + 324.055, 350.207, 376.805, 403.884, 431.478, ... + 459.622, 488.353, 517.707, 547.721, 578.434, ... + 609.885, 642.114, 675.161, 709.071, 743.884, ... + 779.647, 816.404, 854.203, 893.091, 933.119, ... + 974.336, 1016.797, 1060.555, 1105.666, 1152.187, ... + 1200.178, 1249.700, 1300.816, 1353.592, 1408.094, ... + 1464.392, 1522.559, 1582.668, 1644.795, 1709.021, ... + 1775.427, 1844.098, 1915.121, 1988.587, 2064.590, ... + 2143.227, 2224.597, 2308.806, 2395.959, 2486.169, ... + 2579.551, 2676.223, 2776.309, 2879.937, 2987.238, ... + 3098.350, 3213.415, 3332.579, 3455.993, 3583.817, ... + 3716.212, 3853.817, 3995.399, 4142.547, 4294.979, ... + 4452.890, 4616.482, 4785.962, 4961.548, 5143.463, ... + 5331.939, 5527.217, 5729.545, 5939.183, 6156.396, ... + 6381.463, 6614.671, 6856.316, 7106.708, 7366.166, ... + 7635.020, 7913.614, 8202.302, 8501.454, 8811.450, ... + 9132.688, 9465.574, 9810.536, 10168.013, 10538.460, ... + 10922.351, 11320.175, 11732.438, 12159.670, 12602.412, ... + 13061.229, 13536.710, 14029.458, 14540.103, 15069.295, ... + 15617.710, 16186.049, 16775.035, 17385.420 ]; + fc = [ 91.708, 115.216, 138.870, 162.702, 186.742, ... + 211.019, 235.566, 260.413, 285.593, 311.136, ... + 337.077, 363.448, 390.282, 417.614, 445.479, ... + 473.912, 502.950, 532.629, 562.988, 594.065, ... + 625.899, 658.533, 692.006, 726.362, 761.644, ... + 797.898, 835.170, 873.508, 912.959, 953.576, ... + 995.408, 1038.511, 1082.938, 1128.746, 1175.995, ... + 1224.744, 1275.055, 1326.992, 1380.623, 1436.014, ... + 1493.237, 1552.366, 1613.474, 1676.641, 1741.946, ... + 1809.474, 1879.310, 1951.543, 2026.266, 2103.573, ... + 2183.564, 2266.340, 2352.008, 2440.675, 2532.456, ... + 2627.468, 2725.832, 2827.672, 2933.120, 3042.309, ... + 3155.379, 3272.475, 3393.745, 3519.344, 3649.432, ... + 3784.176, 3923.748, 4068.324, 4218.090, 4373.237, ... + 4533.963, 4700.473, 4872.978, 5051.700, 5236.866, ... + 5428.712, 5627.484, 5833.434, 6046.825, 6267.931, ... + 6497.031, 6734.420, 6980.399, 7235.284, 7499.397, ... + 7773.077, 8056.673, 8350.547, 8655.072, 8970.639, ... + 9297.648, 9636.520, 9987.683, 10351.586, 10728.695, ... + 11119.490, 11524.470, 11944.149, 12379.066, 12829.775, ... + 13294.850, 13780.887, 14282.503, 14802.338, 15341.057, ... + 15899.345, 16477.914, 17077.504, 17690.045 ]; + fu = [ 103.445, 127.023, 150.762, 174.694, 198.849, ... + 223.257, 247.950, 272.959, 298.317, 324.055, ... + 350.207, 376.805, 403.884, 431.478, 459.622, ... + 488.353, 517.707, 547.721, 578.434, 609.885, ... + 642.114, 675.161, 709.071, 743.884, 779.647, ... + 816.404, 854.203, 893.091, 933.113, 974.336, ... + 1016.797, 1060.555, 1105.666, 1152.187, 1200.178, ... + 1249.700, 1300.816, 1353.592, 1408.094, 1464.392, ... + 1522.559, 1582.668, 1644.795, 1709.021, 1775.427, ... + 1844.098, 1915.121, 1988.587, 2064.590, 2143.227, ... + 2224.597, 2308.806, 2395.959, 2486.169, 2579.551, ... + 2676.223, 2776.309, 2879.937, 2987.238, 3098.350, ... + 3213.415, 3332.579, 3455.993, 3583.817, 3716.212, ... + 3853.348, 3995.399, 4142.547, 4294.979, 4452.890, ... + 4643.482, 4785.962, 4961.548, 5143.463, 5331.939, ... + 5527.217, 5729.545, 5939.183, 6156.396, 6381.463, ... + 6614.671, 6856.316, 7106.708, 7366.166, 7635.020, ... + 7913.614, 8202.302, 8501.454, 8811.450, 9132.688, ... + 9465.574, 9810.536, 10168.013, 10538.460, 10922.351, ... + 11320.175, 11732.438, 12159.670, 12602.412, 13061.229, ... + 13536.710, 14029.458, 14540.103, 15069.295, 15617.710, ... + 16186.049, 16775.035, 17385.420, 18000.000 ]; +end diff --git a/PQevalAudio/CB/PQDFTFrame.m b/PQevalAudio/CB/PQDFTFrame.m new file mode 100644 index 0000000..4719334 --- /dev/null +++ b/PQevalAudio/CB/PQDFTFrame.m @@ -0,0 +1,60 @@ +function X2 = PQDFTFrame (x) +% Calculate the DFT of a frame of data (NF values), returning the +% squared-magnitude DFT vector (NF/2 + 1 values) + +% P. Kabal $Revision: 1.1 $ $Date: 2003/12/07 13:32:57 $ + +persistent hw + +NF = 2048; % Frame size (samples) + +if (isempty (hw)) + Amax = 32768; + fc = 1019.5; + Fs = 48000; + Lp = 92; + % Set up the window (including all gains) + GL = PQ_GL (NF, Amax, fc/Fs, Lp); + hw = GL * PQHannWin (NF); +end + +% Window the data +xw = hw .* x; + +% DFT (output is real followed by imaginary) +X = PQRFFT (xw, NF, 1); + +% Squared magnitude +X2 = PQRFFTMSq (X, NF); + +%---------------------------------------- +function GL = PQ_GL (NF, Amax, fcN, Lp) +% Scaled Hann window, including loudness scaling + +% Calculate the gain for the Hann Window +% - level Lp (SPL) corresponds to a sine with normalized frequency +% fcN and a peak value of Amax + +W = NF - 1; +gp = PQ_gp (fcN, NF, W); +GL = 10^(Lp / 20) / (gp * Amax/4 * W); + +%---------- +function gp = PQ_gp (fcN, NF, W) +% Calculate the peak factor. The signal is a sinusoid windowed with +% a Hann window. The sinusoid frequency falls between DFT bins. The +% peak of the frequency response (on a continuous frequency scale) falls +% between DFT bins. The largest DFT bin value is the peak factor times +% the peak of the continuous response. +% fcN - Normalized sinusoid frequency (0-1) +% NF - Frame (DFT) length samples +% NW - Window length samples + +% Distance to the nearest DFT bin +df = 1 / NF; +k = floor (fcN / df); +dfN = min ((k+1) * df - fcN, fcN - k * df); + +dfW = dfN * W; +gp = sin(pi * dfW) / (pi * dfW * (1 - dfW^2)); + diff --git a/PQevalAudio/CB/PQeval.m b/PQevalAudio/CB/PQeval.m new file mode 100644 index 0000000..ec98373 --- /dev/null +++ b/PQevalAudio/CB/PQeval.m @@ -0,0 +1,112 @@ +function [MOVI, Fmem] = PQeval (xR, xT, Fmem) +% PEAQ - Process one frame with the FFT model + +% P. Kabal $Revision: 1.1 $ $Date: 2003/12/07 13:32:58 $ + +NF = 2048; +Version = 'Basic'; + +% Windowed DFT +X2(1,:) = PQDFTFrame (xR); +X2(2,:) = PQDFTFrame (xT); + +% Critical band grouping and frequency spreading +[EbN, Es] = PQ_excitCB (X2); + +% Time domain smoothing => "Excitation patterns" +[Ehs(1,:), Fmem.TDS.Ef(1,:)] = PQ_timeSpread (Es(1,:), Fmem.TDS.Ef(1,:)); +[Ehs(2,:), Fmem.TDS.Ef(2,:)] = PQ_timeSpread (Es(2,:), Fmem.TDS.Ef(2,:)); + +% Level and pattern adaptation => "Spectrally adapted patterns" +[EP, Fmem.Adap] = PQadapt (Ehs, Fmem.Adap, Version, 'FFT'); + +% Modulation patterns +[M, ERavg, Fmem.Env] = PQmodPatt (Es, Fmem.Env); + +% Loudness +MOVI.Loud.NRef = PQloud (Ehs(1,:), Version, 'FFT'); +MOVI.Loud.NTest = PQloud (Ehs(2,:), Version, 'FFT'); + +% Modulation differences +MOVI.MDiff = PQmovModDiffB (M, ERavg); + +% Noise Loudness +MOVI.NLoud.NL = PQmovNLoudB (M, EP); + +% Bandwidth +MOVI.BW = PQmovBW (X2); + +% Noise-to-mask ratios +MOVI.NMR = PQmovNMRB (EbN, Ehs(1,:)); + +% Probability of detection +MOVI.PD = PQmovPD (Ehs); + +% Error harmonic structure +MOVI.EHS.EHS = PQmovEHS (xR, xT, X2); + +%-------------------- +function [EbN, Es] = PQ_excitCB (X2) + +persistent W2 EIN + +NF = 2048; +Version = 'Basic'; +if (isempty (W2)) + Fs = 48000; + f = linspace (0, Fs/2, NF/2+1); + W2 = PQWOME (f); + [Nc, fc] = PQCB (Version); + EIN = PQIntNoise (fc); +end + +% Allocate storage +XwN2 = zeros (1, NF/2+1); + +% Outer and middle ear filtering +Xw2(1,:) = W2 .* X2(1,1:NF/2+1); +Xw2(2,:) = W2 .* X2(2,1:NF/2+1); + +% Form the difference magnitude signal +for (k = 0:NF/2) + XwN2(k+1) = (Xw2(1,k+1) - 2 * sqrt (Xw2(1,k+1) * Xw2(2,k+1)) ... + + Xw2(2,k+1)); +end + +% Group into partial critical bands +Eb(1,:) = PQgroupCB (Xw2(1,:), Version); +Eb(2,:) = PQgroupCB (Xw2(2,:), Version); +EbN = PQgroupCB (XwN2, Version); + +% Add the internal noise term => "Pitch patterns" +E(1,:) = Eb(1,:) + EIN; +E(2,:) = Eb(2,:) + EIN; + +% Critical band spreading => "Unsmeared excitation patterns" +Es(1,:) = PQspreadCB (E(1,:), Version); +Es(2,:) = PQspreadCB (E(2,:), Version); + +%-------------------- +function [Ehs, Ef] = PQ_timeSpread (Es, Ef) + +persistent Nc a b + +if (isempty (Nc)) + [Nc, fc] = PQCB ('Basic'); + Fs = 48000; + NF = 2048; + Nadv = NF / 2; + Fss = Fs / Nadv; + t100 = 0.030; + tmin = 0.008; + [a, b] = PQtConst (t100, tmin, fc, Fss); +end + +% Allocate storage +Ehs = zeros (1, Nc); + +% Time domain smoothing +for (m = 0:Nc-1) + Ef(m+1) = a(m+1) * Ef(m+1) + b(m+1) * Es(m+1); + Ehs(m+1) = max(Ef(m+1), Es(m+1)); +end diff --git a/PQevalAudio/CB/PQgroupCB.m b/PQevalAudio/CB/PQgroupCB.m new file mode 100644 index 0000000..ae6c79d --- /dev/null +++ b/PQevalAudio/CB/PQgroupCB.m @@ -0,0 +1,63 @@ +function Eb = PQgroupCB (X2, Ver) +% Group a DFT energy vector into critical bands +% X2 - Squared-magnitude vector (DFT bins) +% Eb - Excitation vector (fractional critical bands) + +% P. Kabal $Revision: 1.2 $ $Date: 2004/02/05 04:25:46 $ + +persistent Nc kl ku Ul Uu Version + +Emin = 1e-12; + +if (~ strcmp (Ver, Version)) + Version = Ver; + % Set up the DFT bin to critical band mapping + NF = 2048; + Fs = 48000; + [Nc, kl, ku, Ul, Uu] = PQ_CBMapping (NF, Fs, Version); +end + +% Allocate storage +Eb = zeros (1, Nc); + +% Compute the excitation in each band +for (i = 0:Nc-1) + Ea = Ul(i+1) * X2(kl(i+1)+1); % First bin + for (k = (kl(i+1)+1):(ku(i+1)-1)) + Ea = Ea + X2(k+1); % Middle bins + end + Ea = Ea + Uu(i+1) * X2(ku(i+1)+1); % Last bin + Eb(i+1) = max(Ea, Emin); +end + +%--------------------------------------- +function [Nc, kl, ku, Ul, Uu] = PQ_CBMapping (NF, Fs, Version) + +[Nc, fc, fl, fu] = PQCB (Version); + +% Fill in the DFT bin to critical band mappings +df = Fs / NF; +for (i = 0:Nc-1) + fli = fl(i+1); + fui = fu(i+1); + for (k = 0:NF/2) + if ((k+0.5)*df > fli) + kl(i+1) = k; % First bin in band i + Ul(i+1) = (min(fui, (k+0.5)*df) ... + - max(fli, (k-0.5)*df)) / df; + break; + end + end + for (k = NF/2:-1:0) + if ((k-0.5)*df < fui) + ku(i+1) = k; % Last bin in band i + if (kl(i+1) == ku(i+1)) + Uu(i+1) = 0; % Single bin in band + else + Uu(i+1) = (min(fui, (k+0.5)*df) ... + - max(fli, (k-0.5)*df)) / df; + end + break; + end + end +end diff --git a/PQevalAudio/CB/PQspreadCB.m b/PQevalAudio/CB/PQspreadCB.m new file mode 100644 index 0000000..d0efe16 --- /dev/null +++ b/PQevalAudio/CB/PQspreadCB.m @@ -0,0 +1,66 @@ +function Es = PQspreadCB (E, Ver) +% Spread an excitation vector (pitch pattern) - FFT model +% Both E and Es are powers + +% P. Kabal $Revision: 1.1 $ $Date: 2003/12/07 13:32:58 $ + +persistent Bs Version + +if (~ strcmp (Ver, Version)) + Version = Ver; + Nc = length (E); + Bs = PQ_SpreadCB (ones(1,Nc), ones(1,Nc), Version); +end + +Es = PQ_SpreadCB (E, Bs, Version); + +%------------------------- +function Es = PQ_SpreadCB (E, Bs, Ver); + +persistent Nc dz fc aL aUC Version + +% Power law for addition of spreading +e = 0.4; + +if (~ strcmp (Ver, Version)) + Version = Ver; + [Nc, fc, fl, fu, dz] = PQCB (Version); +end + +% Allocate storage +aUCEe = zeros (1, Nc); +Ene = zeros (1, Nc); +Es = zeros (1, Nc); + +% Calculate energy dependent terms +aL = 10^(-2.7 * dz); +for (m = 0:Nc-1) + aUC = 10^((-2.4 - 23 / fc(m+1)) * dz); + aUCE = aUC * E(m+1)^(0.2 * dz); + gIL = (1 - aL^(m+1)) / (1 - aL); + gIU = (1 - aUCE^(Nc-m)) / (1 - aUCE); + En = E(m+1) / (gIL + gIU - 1); + aUCEe(m+1) = aUCE^e; + Ene(m+1) = En^e; +end + +% Lower spreading +Es(Nc-1+1) = Ene(Nc-1+1); +aLe = aL^e; +for (m = Nc-2:-1:0) + Es(m+1) = aLe * Es(m+1+1) + Ene(m+1); +end + +% Upper spreading i > m +for (m = 0:Nc-2) + r = Ene(m+1); + a = aUCEe(m+1); + for (i = m+1:Nc-1) + r = r * a; + Es(i+1) = Es(i+1) + r; + end +end + +for (i = 0:Nc-1) + Es(i+1) = (Es(i+1))^(1/e) / Bs(i+1); +end diff --git a/PQevalAudio/MOV/PQavgMOVB.m b/PQevalAudio/MOV/PQavgMOVB.m new file mode 100644 index 0000000..a78e7ea --- /dev/null +++ b/PQevalAudio/MOV/PQavgMOVB.m @@ -0,0 +1,263 @@ +function MOV = PQavgMOVB (MOVC, Nchan, Nwup) +% Time average MOV precursors + +% P. Kabal $Revision: 1.1 $ $Date: 2003/12/07 13:34:46 $ + +Fs = 48000; +NF = 2048; +Nadv = NF / 2; +Fss = Fs / Nadv; +tdel = 0.5; +tex = 0.050; + +% BandwidthRefB, BandwidthTestB +[MOV(0+1), MOV(1+1)] = PQ_avgBW (MOVC.BW); + +% Total NMRB, RelDistFramesB +[MOV(2+1), MOV(10+1)] = PQ_avgNMRB (MOVC.NMR); + +% WinModDiff1B, AvgModDiff1B, AvgModDiff2B +N500ms = ceil (tdel * Fss); +Ndel = max (0, N500ms - Nwup); +[MOV(3+1), MOV(6+1), MOV(7+1)] = PQ_avgModDiffB (Ndel, MOVC.MDiff); + +% RmsNoiseLoudB +N50ms = ceil (tex * Fss); +Nloud = PQloudTest (MOVC.Loud); +Ndel = max (Nloud + N50ms, Ndel); +MOV(8+1) = PQ_avgNLoudB (Ndel, MOVC.NLoud); + +% ADBB, MFPDB +[MOV(4+1), MOV(9+1)] = PQ_avgPD (MOVC.PD); + +% EHSB +MOV(5+1) = PQ_avgEHS (MOVC.EHS); + +%----------------------------------------- +function EHSB = PQ_avgEHS (EHS) + +[Nchan, Np] = size (EHS.EHS); + +s = 0; +for (j = 0:Nchan-1) + s = s + PQ_LinPosAvg (EHS.EHS(j+1,:)); +end +EHSB = 1000 * s / Nchan; + + +%----------------------------------------- +function [ADBB, MFPDB] = PQ_avgPD (PD) + +global PQopt + +c0 = 0.9; +if (isempty (PQopt)) + c1 = 1; +else + c1 = PQopt.PDfactor; +end + +N = length (PD.Pc); +Phc = 0; +Pcmax = 0; +Qsum = 0; +nd = 0; +for (i = 0:N-1) + Phc = c0 * Phc + (1 - c0) * PD.Pc(i+1); + Pcmax = max (Pcmax * c1, Phc); + + if (PD.Pc(i+1) > 0.5) + nd = nd + 1; + Qsum = Qsum + PD.Qc(i+1); + end +end + +if (nd == 0) + ADBB = 0; +elseif (Qsum > 0) + ADBB = log10 (Qsum / nd); +else + ADBB = -0.5; +end + +MFPDB = Pcmax; + +%----------------------------------------- +function [TotalNMRB, RelDistFramesB] = PQ_avgNMRB (NMR) + +[Nchan, Np] = size (NMR.NMRavg); +Thr = 10^(1.5 / 10); + +s = 0; +for (j = 0:Nchan-1) + s = s + 10 * log10 (PQ_LinAvg (NMR.NMRavg(j+1,:))); +end +TotalNMRB = s / Nchan; + +s = 0; +for (j = 0:Nchan-1) + s = s + PQ_FractThr (Thr, NMR.NMRmax(j+1,:)); +end +RelDistFramesB = s / Nchan; + +%----------------------------------------- +function [BandwidthRefB, BandwidthTestB] = PQ_avgBW (BW) + +[Nchan, Np] = size (BW.BWRef); + +sR = 0; +sT = 0; +for (j = 0:Nchan-1) + sR = sR + PQ_LinPosAvg (BW.BWRef(j+1,:)); + sT = sT + PQ_LinPosAvg (BW.BWTest(j+1,:)); +end +BandwidthRefB = sR / Nchan; +BandwidthTestB = sT / Nchan; + +%----------------------------------------- +function [WinModDiff1B, AvgModDiff1B, AvgModDiff2B] = PQ_avgModDiffB (Ndel, MDiff) + +NF = 2048; +Nadv = NF / 2; +Fs = 48000; + +Fss = Fs / Nadv; +tavg = 0.1; + +[Nchan, Np] = size (MDiff.Mt1B); + +% Sliding window average - delayed average +L = floor (tavg * Fss); % 100 ms sliding window length +s = 0; +for (j = 0:Nchan-1) + s = s + PQ_WinAvg (L, MDiff.Mt1B(j+1,Ndel+1:Np-1+1)); +end +WinModDiff1B = s / Nchan; + +% Weighted linear average - delayed average +s = 0; +for (j = 0:Nchan-1) + s = s + PQ_WtAvg (MDiff.Mt1B(j+1,Ndel+1:Np-1+1), MDiff.Wt(j+1,Ndel+1:Np-1+1)); +end +AvgModDiff1B = s / Nchan; + +% Weighted linear average - delayed average +s = 0; +for (j = 0:Nchan-1) + s = s + PQ_WtAvg (MDiff.Mt2B(j+1,Ndel+1:Np-1+1), MDiff.Wt(j+1,Ndel+1:Np-1+1)); +end +AvgModDiff2B = s / Nchan; + +%----------------------------------------- +function RmsNoiseLoudB = PQ_avgNLoudB (Ndel, NLoud) + +[Nchan, Np] = size (NLoud.NL); + +% RMS average - delayed average and loudness threshold +s = 0; +for (j = 0:Nchan-1) + s = s + PQ_RMSAvg (NLoud.NL(j+1,Ndel+1:Np-1+1)); +end +RmsNoiseLoudB = s / Nchan; + +%----------------------------------- +% Average values values, omitting values which are negative +function s = PQ_LinPosAvg (x) + +N = length(x); + +Nv = 0; +s = 0; +for (i = 0:N-1) + if (x(i+1) >= 0) + s = s + x(i+1); + Nv = Nv + 1; + end +end + +if (Nv > 0) + s = s / Nv; +end + +%---------- +% Fraction of values above a threshold +function Fd = PQ_FractThr (Thr, x) + +N = length (x); + +Nv = 0; +for (i = 0:N-1) + if (x(i+1) > Thr) + Nv = Nv + 1; + end +end + +if (N > 0) + Fd = Nv / N; +else + Fd = 0; +end + +%----------- +% Sliding window (L samples) average +function s = PQ_WinAvg (L, x) + +N = length (x); + +s = 0; +for (i = L-1:N-1) + t = 0; + for (m = 0:L-1) + t = t + sqrt (x(i-m+1)); + end + s = s + (t / L)^4; +end + +if (N >= L) + s = sqrt (s / (N - L + 1)); +end + +%---------- +% Weighted average +function s = PQ_WtAvg (x, W) + +N = length (x); + +s = 0; +sW = 0; +for (i = 0:N-1) + s = s + W(i+1) * x(i+1); + sW = sW + W(i+1); +end + +if (N > 0) + s = s / sW; +end + +%---------- +% Linear average +function LinAvg = PQ_LinAvg (x) + +N = length (x); +s = 0; +for (i = 0:N-1) + s = s + x(i+1); +end + +LinAvg = s / N; + +%---------- +% Square root of average of squared values +function RMSAvg = PQ_RMSAvg (x) + +N = length (x); +s = 0; +for (i = 0:N-1) + s = s + x(i+1)^2; +end + +if (N > 0) + RMSAvg = sqrt(s / N); +else + RMSAvg = 0; +end diff --git a/PQevalAudio/MOV/PQframeMOV.m b/PQevalAudio/MOV/PQframeMOV.m new file mode 100644 index 0000000..af3f9bf --- /dev/null +++ b/PQevalAudio/MOV/PQframeMOV.m @@ -0,0 +1,65 @@ +function PQframeMOV (i, MOVI) +% Copy instantaneous MOV values to a new structure +% The output struct MOVC is a global. +% For most MOV's, they are just copied to the output structure. +% The exception is for the probability of detection, where the +% MOV's measure the maximum frequency-by-frequecy between channels. + +% P. Kabal $Revision: 1.1 $ $Date: 2003/12/07 13:34:46 $ + +global MOVC + +[Nchan,Nc] = size (MOVC.MDiff.Mt1B); + +for (j = 1:Nchan) + + % Modulation differences + MOVC.MDiff.Mt1B(j,i+1) = MOVI(j).MDiff.Mt1B; + MOVC.MDiff.Mt2B(j,i+1) = MOVI(j).MDiff.Mt2B; + MOVC.MDiff.Wt(j,i+1) = MOVI(j).MDiff.Wt; + + % Noise loudness + MOVC.NLoud.NL(j,i+1) = MOVI(j).NLoud.NL; + + % Total loudness + MOVC.Loud.NRef(j,i+1) = MOVI(j).Loud.NRef; + MOVC.Loud.NTest(j,i+1) = MOVI(j).Loud.NTest; + + % Bandwidth + MOVC.BW.BWRef(j,i+1) = MOVI(j).BW.BWRef; + MOVC.BW.BWTest(j,i+1) = MOVI(j).BW.BWTest; + + % Noise-to-mask ratio + MOVC.NMR.NMRavg(j,i+1) = MOVI(j).NMR.NMRavg; + MOVC.NMR.NMRmax(j,i+1) = MOVI(j).NMR.NMRmax; + + % Error harmonic structure + MOVC.EHS.EHS(j,i+1) = MOVI(j).EHS.EHS; +end + +% Probability of detection (collapse frequency bands) +[MOVC.PD.Pc(i+1), MOVC.PD.Qc(i+1)] = PQ_ChanPD (MOVI); + +%---------------------------------------- +function [Pc, Qc] = PQ_ChanPD (MOVI) + +Nc = length (MOVI(1).PD.p); +Nchan = length (MOVI); + +Pr = 1; +Qc = 0; +if (Nchan > 1) + for (m = 0:Nc-1) + pbin = max (MOVI(1).PD.p(m+1), MOVI(2).PD.p(m+1)); + qbin = max (MOVI(1).PD.q(m+1), MOVI(2).PD.q(m+1)); + Pr = Pr * (1 - pbin); + Qc = Qc + qbin; + end +else + for (m = 0:Nc-1) + Pr = Pr * (1 - MOVI.PD.p(m+1)); + Qc = Qc + MOVI.PD.q(m+1); + end +end + +Pc = 1 - Pr; diff --git a/PQevalAudio/MOV/PQloudTest.m b/PQevalAudio/MOV/PQloudTest.m new file mode 100644 index 0000000..69fab55 --- /dev/null +++ b/PQevalAudio/MOV/PQloudTest.m @@ -0,0 +1,29 @@ +function Ndel = PQloudTest (Loud) +% Loudness threshold + +% P. Kabal $Revision: 1.1 $ $Date: 2003/12/07 13:34:46 $ + +[Nchan, Np] = size (Loud.NRef); + +Thr = 0.1; + +% Loudness threshold +Ndel = Np; +for (j = 0:Nchan-1) + Ndel = min (Ndel, PQ_LThresh (Thr, Loud.NRef(j+1,:), Loud.NTest(j+1,:))); +end + +%----------- +function it = PQ_LThresh (Thr, NRef, NTest) +% Loudness check: Look for the first time, the loudness exceeds a threshold +% for both the test and reference signals. + +Np = length (NRef); + +it = Np; +for (i = 0:Np-1) + if (NRef(i+1) > Thr & NTest(i+1) > Thr) + it = i; + break; + end +end diff --git a/PQevalAudio/MOV/PQmovBW.m b/PQevalAudio/MOV/PQmovBW.m new file mode 100644 index 0000000..2e4d45e --- /dev/null +++ b/PQevalAudio/MOV/PQmovBW.m @@ -0,0 +1,45 @@ +function BW = PQmovBW (X2) +% Bandwidth tests + +% P. Kabal $Revision: 1.1 $ $Date: 2003/12/07 13:34:46 $ + +persistent kx kl FR FT N + +if (isempty (kx)) + NF = 2048; + Fs = 48000; + fx = 21586; + kx = round (fx / Fs * NF); % 921 + fl = 8109; + kl = round (fl / Fs * NF); % 346 + FRdB = 10; + FR = 10^(FRdB / 10); + FTdB = 5; + FT = 10^(FTdB / 10); + N = NF / 2; % Limit from pseudo-code +end + +Xth = X2(2,kx+1); +for (k = kx+1:N-1) + Xth = max (Xth, X2(2,k+1)); +end + +% BWRef and BWTest remain negative if the BW of the test signal +% does not exceed FR * Xth for kx-1 <= k <= kl+1 +BW.BWRef = -1; +XthR = FR * Xth; +for (k = kx-1:-1:kl+1) + if (X2(1,k+1) >= XthR) + BW.BWRef = k + 1; + break; + end +end + +BW.BWTest = -1; +XthT = FT * Xth; +for (k = BW.BWRef-1:-1:0) + if (X2(2,k+1) >= XthT) + BW.BWTest = k + 1; + break; + end +end diff --git a/PQevalAudio/MOV/PQmovEHS.m b/PQevalAudio/MOV/PQmovEHS.m new file mode 100644 index 0000000..36e6ca5 --- /dev/null +++ b/PQevalAudio/MOV/PQmovEHS.m @@ -0,0 +1,135 @@ +function EHS = PQmovEHS (xR, xT, X2) +% Calculate the EHS MOV values + +% P. Kabal $Revision: 1.2 $ $Date: 2004/02/05 04:26:19 $ + +persistent NF Nadv NL M Hw + +if (isempty (NL)) + NF = 2048; + Nadv = NF / 2; + Fs = 48000; + Fmax = 9000; + NL = 2^(PQ_log2(NF * Fmax / Fs)); + M = NL; + Hw = (1 / M) * sqrt(8 / 3) * PQHannWin (M); +end + +EnThr = 8000; +kmax = NL + M - 1; + +EnRef = xR(Nadv+1:NF-1+1) * xR(Nadv+1:NF-1+1)'; +EnTest = xT(Nadv+1:NF-1+1) * xT(Nadv+1:NF-1+1)'; + +% Set the return value to be negative for small energy frames +if (EnRef < EnThr & EnTest < EnThr) + EHS = -1; + return; +end + +% Allocate storage +D = zeros (1, kmax); + +% Differences of log values +for (k = 0:kmax-1) + D(k+1) = log (X2(2,k+1) / X2(1,k+1)); +end + +% Correlation computation +C = PQ_Corr (D, NL, M); + +% Normalize the correlations +Cn = PQ_NCorr (C, D, NL, M); +Cnm = (1 / NL) * sum (Cn(1:NL)); + +% Window the correlation +Cw = Hw .* (Cn - Cnm); + +% DFT +cp = PQRFFT (Cw, NL, 1); + +% Squared magnitude +c2 = PQRFFTMSq (cp, NL); + +% Search for a peak after a valley +EHS = PQ_FindPeak (c2, NL/2+1); + +%---------------------------------------- +function log2 = PQ_log2 (a) + +log2 = 0; +m = 1; +while (m < a) + log2 = log2 + 1; + m = 2 * m; +end +log2 = log2 - 1; + +%---------- +function C = PQ_Corr (D, NL, M) +% Correlation calculation + +% Direct computation of the correlation +% for (i = 0:NL-1) +% s = 0; +% for (j = 0:M-1) +% s = s + D(j+1) * D(i+j+1); +% end +% C(i+1) = s; +% end + +% Calculate the correlation indirectly +NFFT = 2 * NL; +D0 = [D(1:M) zeros(1,NFFT-M)]; +D1 = [D(1:M+NL-1) zeros(1,NFFT-(M+NL-1))]; + +% DFTs of the zero-padded sequences +d0 = PQRFFT (D0, NFFT, 1); +d1 = PQRFFT (D1, NFFT, 1); + +% Multiply (complex) sequences +dx(0+1) = d0(0+1) * d1(0+1); +for (n = 1:NFFT/2-1) + m = NFFT/2 + n; + dx(n+1) = d0(n+1) * d1(n+1) + d0(m+1) * d1(m+1); + dx(m+1) = d0(n+1) * d1(m+1) - d0(m+1) * d1(n+1); +end +dx(NFFT/2+1) = d0(NFFT/2+1) * d1(NFFT/2+1); + +% Inverse DFT +Cx = PQRFFT (dx, NFFT, -1); +C = Cx(1:NL); + +%---------- +function Cn = PQ_NCorr (C, D, NL, M) +% Normalize the correlation + +Cn = zeros (1, NL); + +s0 = C(0+1); +sj = s0; +Cn(0+1) = 1; +for (i = 1:NL-1) + sj = sj + (D(i+M-1+1)^2 - D(i-1+1)^2); + d = s0 * sj; + if (d <= 0) + Cn(i+1) = 1; + else + Cn(i+1) = C(i+1) / sqrt (d); + end +end + +%---------- +function EHS = PQ_FindPeak (c2, N) +% Search for a peak after a valley + +cprev = c2(0+1); +cmax = 0; +for (n = 1:N-1) + if (c2(n+1) > cprev) % Rising from a valley + if (c2(n+1) > cmax) + cmax = c2(n+1); + end + end +end +EHS = cmax; diff --git a/PQevalAudio/MOV/PQmovModDiffB.m b/PQevalAudio/MOV/PQmovModDiffB.m new file mode 100644 index 0000000..925fff8 --- /dev/null +++ b/PQevalAudio/MOV/PQmovModDiffB.m @@ -0,0 +1,43 @@ +function MDiff = PQmovModDiffB (M, ERavg) +% Modulation difference related MOV precursors (Basic version) + +% P. Kabal $Revision: 1.1 $ $Date: 2003/12/07 13:34:46 $ + +persistent Nc Ete + +if (isempty (Nc)) + e = 0.3; + [Nc, fc] = PQCB ('Basic'); + Et = PQIntNoise (fc); + for (m = 0:Nc-1) + Ete(m+1) = Et(m+1)^e; + end +end + +% Parameters +negWt2B = 0.1; +offset1B = 1.0; +offset2B = 0.01; +levWt = 100; + +s1B = 0; +s2B = 0; +Wt = 0; +for (m = 0:Nc-1) + if (M(1,m+1) > M(2,m+1)) + num1B = M(1,m+1) - M(2,m+1); + num2B = negWt2B * num1B; + else + num1B = M(2,m+1) - M(1,m+1); + num2B = num1B; + end + MD1B = num1B / (offset1B + M(1,m+1)); + MD2B = num2B / (offset2B + M(1,m+1)); + s1B = s1B + MD1B; + s2B = s2B + MD2B; + Wt = Wt + ERavg(m+1) / (ERavg(m+1) + levWt * Ete(m+1)); +end + +MDiff.Mt1B = (100 / Nc) * s1B; +MDiff.Mt2B = (100 / Nc) * s2B; +MDiff.Wt = Wt; diff --git a/PQevalAudio/MOV/PQmovNLoudB.m b/PQevalAudio/MOV/PQmovNLoudB.m new file mode 100644 index 0000000..5d028bf --- /dev/null +++ b/PQevalAudio/MOV/PQmovNLoudB.m @@ -0,0 +1,33 @@ +function NL = PQmovNLoudB (M, EP) +% Noise Loudness + +% P. Kabal $Revision: 1.1 $ $Date: 2003/12/07 13:34:47 $ + +persistent Nc Et + +if (isempty (Nc)) + [Nc, fc] = PQCB ('Basic'); + Et = PQIntNoise (fc); +end + +% Parameters +alpha = 1.5; +TF0 = 0.15; +S0 = 0.5; +NLmin = 0; +e = 0.23; + +s = 0; +for (m = 0:Nc-1) + sref = TF0 * M(1,m+1) + S0; + stest = TF0 * M(2,m+1) + S0; + beta = exp (-alpha * (EP(2,m+1) - EP(1,m+1)) / EP(1,m+1)); + a = max (stest * EP(2,m+1) - sref * EP(1,m+1), 0); + b = Et(m+1) + sref * EP(1,m+1) * beta; + s = s + (Et(m+1) / stest)^e * ((1 + a / b)^e - 1); +end + +NL = (24 / Nc) * s; +if (NL < NLmin) + NL = 0; +end diff --git a/PQevalAudio/MOV/PQmovNMRB.m b/PQevalAudio/MOV/PQmovNMRB.m new file mode 100644 index 0000000..78c29bc --- /dev/null +++ b/PQevalAudio/MOV/PQmovNMRB.m @@ -0,0 +1,36 @@ +function NMR = PQmovNMRB (EbN, Ehs) +% Noise-to-mask ratio - Basic version +% NMR(1) average NMR +% NMR(2) max NMR + +% P. Kabal $Revision: 1.1 $ $Date: 2003/12/07 13:34:47 $ + +persistent Nc gm + +if (isempty (Nc)) + [Nc, fc, fl, fu, dz] = PQCB ('Basic'); + gm = PQ_MaskOffset (dz, Nc); +end + +NMR.NMRmax = 0; +s = 0; +for (m = 0:Nc-1) + NMRm = EbN(m+1) / (gm(m+1) * Ehs(m+1)); + s = s + NMRm; + if (NMRm > NMR.NMRmax) + NMR.NMRmax = NMRm; + end +end +NMR.NMRavg = s / Nc; + +%---------------------------------------- +function gm = PQ_MaskOffset (dz, Nc) + +for (m = 0:Nc-1) + if (m <= 12 / dz) + mdB = 3; + else + mdB = 0.25 * m * dz; + end + gm(m+1) = 10^(-mdB / 10); +end diff --git a/PQevalAudio/MOV/PQmovPD.m b/PQevalAudio/MOV/PQmovPD.m new file mode 100644 index 0000000..7c62a8c --- /dev/null +++ b/PQevalAudio/MOV/PQmovPD.m @@ -0,0 +1,42 @@ +function PD = PQmovPD (Ehs) +% Probability of detection + +% P. Kabal $Revision: 1.1 $ $Date: 2003/12/07 13:34:47 $ + +Nc = length (Ehs); + +% Allocate storage +PD.p = zeros (1, Nc); +PD.q = zeros (1, Nc); + +persistent c g d1 d2 bP bM + +if (isempty (c)) + c = [-0.198719 0.0550197 -0.00102438 5.05622e-6 9.01033e-11]; + d1 = 5.95072; + d2 = 6.39468; + g = 1.71332; + bP = 4; + bM = 6; +end + +for (m = 0:Nc-1) + EdBR = 10 * log10 (Ehs(1,m+1)); + EdBT = 10 * log10 (Ehs(2,m+1)); + edB = EdBR - EdBT; + if (edB > 0) + L = 0.3 * EdBR + 0.7 * EdBT; + b = bP; + else + L = EdBT; + b = bM; + end + if (L > 0) + s = d1 * (d2 / L)^g ... + + c(1) + L * (c(2) + L * (c(3) + L * (c(4) + L * c(5)))); + else + s = 1e30; + end + PD.p(m+1) = 1 - 0.5^((edB / s)^b); % Detection probability + PD.q(m+1) = abs (fix(edB)) / s; % Steps above threshold +end diff --git a/PQevalAudio/MOV/PQprtMOV.m b/PQevalAudio/MOV/PQprtMOV.m new file mode 100644 index 0000000..5e7b064 --- /dev/null +++ b/PQevalAudio/MOV/PQprtMOV.m @@ -0,0 +1,35 @@ +function PQprtMOV (MOV, ODG) +% Print MOV values (PEAQ Basic version) + +% P. Kabal $Revision: 1.1 $ $Date: 2003/12/07 13:34:47 $ + +N = length (MOV); +PQ_NMOV_B = 11; +PQ_NMOV_A = 5; + +fprintf ('Model Output Variables:\n'); +if (N == PQ_NMOV_B) + fprintf (' BandwidthRefB: %g\n', MOV(1)); + fprintf (' BandwidthTestB: %g\n', MOV(2)); + fprintf (' Total NMRB: %g\n', MOV(3)); + fprintf (' WinModDiff1B: %g\n', MOV(4)); + fprintf (' ADBB: %g\n', MOV(5)); + fprintf (' EHSB: %g\n', MOV(6)); + fprintf (' AvgModDiff1B: %g\n', MOV(7)); + fprintf (' AvgModDiff2B: %g\n', MOV(8)); + fprintf (' RmsNoiseLoudB: %g\n', MOV(9)); + fprintf (' MFPDB: %g\n', MOV(10)); + fprintf (' RelDistFramesB: %g\n', MOV(11)); +elseif (N == NMOV_A) + fprintf (' RmsModDiffA: %g\n', MOV(1)); + fprintf (' RmsNoiseLoudAsymA: %g\n', MOV(2)); + fprintf (' Segmental NMRB: %g\n', MOV(3)); + fprintf (' EHSB: %g\n', MOV(4)); + fprintf (' AvgLinDistA: %g\n', MOV(5)); +else + error ('Invalid number of MOVs'); +end + +fprintf ('Objective Difference Grade: %.3f\n', ODG); + +return; diff --git a/PQevalAudio/MOV/PQprtMOVCi.m b/PQevalAudio/MOV/PQprtMOVCi.m new file mode 100644 index 0000000..d080c48 --- /dev/null +++ b/PQevalAudio/MOV/PQprtMOVCi.m @@ -0,0 +1,40 @@ +function PQprtMOVCi (Nchan, i, MOVC) +% Print MOV precursors + +% P. Kabal $Revision: 1.1 $ $Date: 2003/12/07 13:34:47 $ + +fprintf ('Frame: %d\n', i); + +if (Nchan == 1) + fprintf (' Ntot : %g %g\n', ... + MOVC.Loud.NRef(1,i+1), MOVC.Loud.NTest(1,i+1)); + fprintf (' ModDiff: %g %g %g\n', ... + MOVC.MDiff.Mt1B(1,i+1), MOVC.MDiff.Mt2B(1,i+1), MOVC.MDiff.Wt(1,i+1)); + fprintf (' NL : %g\n', MOVC.NLoud.NL(1,i+1)); + fprintf (' BW : %g %g\n', ... + MOVC.BW.BWRef(1,i+1), MOVC.BW.BWTest(1,i+1)); + fprintf (' NMR : %g %g\n', ... + MOVC.NMR.NMRavg(1,i+1), MOVC.NMR.NMRmax(1,i+1)); + fprintf (' PD : %g %g\n', MOVC.PD.Pc(i+1), MOVC.PD.Qc(i+1)); + fprintf (' EHS : %g\n', 1000 * MOVC.EHS.EHS(1,i+1)); +else + fprintf (' Ntot : %g %g // %g %g\n', ... + MOVC.Loud.NRef(1,i+1), MOVC.Loud.NTest(1,i+1), ... + MOVC.Loud.NRef(2,i+1), MOVC.Loud.NTest(2,i+1)); + fprintf (' ModDiff: %g %g %g // %g %g %g\n', ... + MOVC.MDiff.Mt1B(1,i+1), MOVC.MDiff.Mt2B(1,i+1), MOVC.MDiff.Wt(1,i+1), ... + MOVC.MDiff.Mt1B(2,i+1), MOVC.MDiff.Mt2B(2,i+1), MOVC.MDiff.Wt(2,i+1)); + fprintf (' NL : %g // %g\n', ... + MOVC.NLoud.NL(1,i+1), ... + MOVC.NLoud.NL(2,i+1)); + fprintf (' BW : %g %g // %g %g\n', ... + MOVC.BW.BWRef(1,i+1), MOVC.BW.BWTest(1,i+1), ... + MOVC.BW.BWRef(2,i+1), MOVC.BW.BWTest(2,i+1)); + fprintf (' NMR : %g %g // %g %g\n', ... + MOVC.NMR.NMRavg(1,i+1), MOVC.NMR.NMRmax(1,i+1), ... + MOVC.NMR.NMRavg(2,i+1), MOVC.NMR.NMRmax(2,i+1)); + fprintf (' PD : %g %g\n', MOVC.PD.Pc(i+1), MOVC.PD.Qc(i+1)); + fprintf (' EHS : %g // %g\n', ... + 1000 * MOVC.EHS.EHS(1,i+1), ... + 1000 * MOVC.EHS.EHS(2,i+1)); +end diff --git a/PQevalAudio/Misc/PQHannWin.m b/PQevalAudio/Misc/PQHannWin.m new file mode 100644 index 0000000..f028dd4 --- /dev/null +++ b/PQevalAudio/Misc/PQHannWin.m @@ -0,0 +1,10 @@ +function hw = PQHannWin (NF) +% Hann window + +% P. Kabal $Revision: 1.1 $ $Date: 2003/12/07 13:34:10 $ + +hw = zeros (1, NF); + +for (n = 0:NF-1) + hw(n+1) = 0.5 * (1 - cos(2 * pi * n / (NF-1))); +end diff --git a/PQevalAudio/Misc/PQIntNoise.m b/PQevalAudio/Misc/PQIntNoise.m new file mode 100644 index 0000000..7dcb462 --- /dev/null +++ b/PQevalAudio/Misc/PQIntNoise.m @@ -0,0 +1,10 @@ +function EIN = PQIntNoise (f) +% Generate the internal noise energy vector + +% P. Kabal $Revision: 1.1 $ $Date: 2003/12/07 13:34:10 $ + +N = length (f); +for (m = 0:N-1) + INdB = 1.456 * (f(m+1) / 1000)^(-0.8); + EIN(m+1) = 10^(INdB / 10); +end diff --git a/PQevalAudio/Misc/PQRFFT.m b/PQevalAudio/Misc/PQRFFT.m new file mode 100644 index 0000000..9ac3c8c --- /dev/null +++ b/PQevalAudio/Misc/PQRFFT.m @@ -0,0 +1,33 @@ +function X = PQRFFT (x, N, ifn) +% Calculate the DFT of a real N-point sequence or the inverse +% DFT corresponding to a real N-point sequence. +% ifn > 0, forward transform +% input x(n) - N real values +% output X(k) - The first N/2+1 points are the real +% parts of the transform, the next N/2-1 points +% are the imaginary parts of the transform. However +% the imaginary part for the first point and the +% middle point which are known to be zero are not +% stored. +% ifn < 0, inverse transform +% input X(k) - The first N/2+1 points are the real +% parts of the transform, the next N/2-1 points +% are the imaginary parts of the transform. However +% the imaginary part for the first point and the +% middle point which are known to be zero are not +% stored. +% output x(n) - N real values + +% P. Kabal $Revision: 1.1 $ $Date: 2003/12/07 13:34:11 $ + +if (ifn > 0) + X = fft (x, N); + XR = real(X(0+1:N/2+1)); + XI = imag(X(1+1:N/2-1+1)); + X = [XR XI]; +else + xR = [x(0+1:N/2+1)]; + xI = [0 x(N/2+1+1:N-1+1) 0]; + x = complex ([xR xR(N/2-1+1:-1:1+1)], [xI -xI(N/2-1+1:-1:1+1)]); + X = real (ifft (x, N)); +end diff --git a/PQevalAudio/Misc/PQRFFTMSq.m b/PQevalAudio/Misc/PQRFFTMSq.m new file mode 100644 index 0000000..a032b76 --- /dev/null +++ b/PQevalAudio/Misc/PQRFFTMSq.m @@ -0,0 +1,13 @@ +function X2 = PQRFFTMSq (X, N) +% Calculate the magnitude squared frequency response from the +% DFT values corresponding to a real signal (assumes N is even) + +% P. Kabal $Revision: 1.1 $ $Date: 2003/12/07 13:34:11 $ + +X2 = zeros (1, N/2+1); + +X2(0+1) = X(0+1)^2; +for (k = 1:N/2-1) + X2(k+1) = X(k+1)^2 + X(N/2+k+1)^2; +end +X2(N/2+1) = X(N/2+1)^2; diff --git a/PQevalAudio/Misc/PQWOME.m b/PQevalAudio/Misc/PQWOME.m new file mode 100644 index 0000000..e83f147 --- /dev/null +++ b/PQevalAudio/Misc/PQWOME.m @@ -0,0 +1,13 @@ +function W2 = PQWOME (f) +% Generate the weighting for the outer & middle ear filtering +% Note: The output is a magnitude-squared vector + +% P. Kabal $Revision: 1.1 $ $Date: 2003/12/07 13:34:11 $ + +N = length (f); +for (k = 0:N-1) + fkHz = f(k+1) / 1000; + AdB = -2.184 * fkHz^(-0.8) + 6.5 * exp(-0.6 * (fkHz - 3.3)^2) ... + - 0.001 * fkHz^(3.6); + W2(k+1) = 10^(AdB / 10); +end diff --git a/PQevalAudio/Misc/PQdataBoundary.m b/PQevalAudio/Misc/PQdataBoundary.m new file mode 100644 index 0000000..6825f81 --- /dev/null +++ b/PQevalAudio/Misc/PQdataBoundary.m @@ -0,0 +1,106 @@ +function Lim = PQdataBoundary (WAV, Nchan, StartS, Ns) +% Search for the data boundaries in a file +% StartS - starting sample frame +% Ns - Number of sample frames + +% P. Kabal $Revision: 1.1 $ $Date: 2003/12/07 13:34:10 $ + +PQ_L = 5; +Amax = 32768; +NBUFF = 2048; +PQ_ATHR = 200 * (Amax / 32768); + +% Search from the beginning of the file +Lim(1) = -1; +is = StartS; +EndS = StartS + Ns - 1; +while (is <= EndS) + Nf = min (EndS - is + 1, NBUFF); + x = PQgetData (WAV, is, Nf); + for (k = 0:Nchan-1) + Lim(1) = max (Lim(1), PQ_DataStart (x(k+1,:), Nf, PQ_L, PQ_ATHR)); + end + if (Lim(1) >= 0) + Lim(1) = Lim(1) + is; + break + end + is = is + NBUFF - (PQ_L-1); +end + +% Search from the end of the file +% This loop is written as if it is going in a forward direction +% - When the "forward" position is i, the "backward" position is +% EndS - (i - StartS + 1) + 1 +Lim(2) = -1; +is = StartS; +while (is <= EndS) + Nf = min (EndS - is + 1, NBUFF); + ie = is + Nf - 1; % Forward limits [is, ie] + js = EndS - (ie - StartS + 1) + 1; % Backward limits [js, js+Nf-1] + x = PQgetData (WAV, js, Nf); + for (k = 0:Nchan-1) + Lim(2) = max (Lim(2), PQ_DataEnd (x(k+1,:), Nf, PQ_L, PQ_ATHR)); + end + if (Lim(2) >= 0) + Lim(2) = Lim(2) + js; + break + end + is = is + NBUFF - (PQ_L-1); +end + +% Sanity checks +if (~ ((Lim(1) >= 0 & Lim(2) >= 0) | (Lim(1) < 0 & Lim(2) < 0))) + error ('>>> PQdataBoundary: limits have difference signs'); +end +if (~(Lim(1) <= Lim(2))) + error ('>>> PQdataBoundary: Lim(1) > Lim(2)'); +end + +if (Lim(1) < 0) + Lim(1) = 0; + Lim(2) = 0; +end + +%---------- +function ib = PQ_DataStart (x, N, L, Thr) + +ib = -1; +s = 0; +M = min (N, L); +for (i = 0:M-1) + s = s + abs (x(i+1)); +end +if (s > Thr) + ib = 0; + return +end + +for (i = 1:N-L) % i is the first sample + s = s + (abs (x(i+L-1+1)) - abs (x(i-1+1))); % L samples apart + if (s > Thr) + ib = i; + return + end +end + +%---------- +function ie = PQ_DataEnd (x, N, L, Thr) + +ie = -1; +s = 0; +M = min (N, L); +for (i = N-M:N-1) + s = s + abs (x(i+1)); +end +if (s > Thr) + ie = N-1; + return +end + +for (i = N-2:-1:L-1) % i is the last sample + s = s + (abs (x(i-L+1+1)) - abs (x(i+1+1))); % L samples apart + if (s > Thr) + ie = i; + return + end +end diff --git a/PQevalAudio/Misc/PQgetData.m b/PQevalAudio/Misc/PQgetData.m new file mode 100644 index 0000000..8921e23 --- /dev/null +++ b/PQevalAudio/Misc/PQgetData.m @@ -0,0 +1,59 @@ +function x = PQgetData (WAV, i, N) +% Get data from internal buffer or file +% i - file position +% N - number of samples +% x - output data (scaled to the range -32768 to +32767) + +% Only two files can be "active" at a time. +% N = 0 resets the buffer + + +% P. Kabal $Revision: 1.1 $ $Date: 2003/12/07 13:34:10 $ + +persistent Buff + +iB = WAV.iB + 1; +if (N == 0) + Buff(iB).N = 20 * 1024; % Fixed size + Buff(iB).x = PQ_ReadWAV (WAV, i, Buff(iB).N); + Buff(iB).i = i; +end + +if (N > Buff(iB).N) + error ('>>> PQgetData: Request exceeds buffer size'); +end + +% Check if requested data is not already in the buffer +is = i - Buff(iB).i; +if (is < 0 | is + N - 1 > Buff(iB).N - 1) + Buff(iB).x = PQ_ReadWAV (WAV, i, Buff(iB).N); + Buff(iB).i = i; +end + +% Copy the data +Nchan = WAV.Nchan; +is = i - Buff(iB).i; +x = Buff(iB).x(1:Nchan,is+1:is+N-1+1); + +%------ +function x = PQ_ReadWAV (WAV, i, N) +% This function considers the data to extended with zeros before and +% after the data in the file. If the starting offset i is negative, +% zeros are filled in before the data starts at offset 0. If the request +% extends beyond the end of data in the file, zeros are appended. + +Amax = 32768; +Nchan = WAV.Nchan; + +x = zeros (Nchan, N); + +Nz = 0; +if (i < 0) + Nz = min (-i, N); + i = i + Nz; +end + +Ns = min (N - Nz, WAV.Nframe - i); +if (i >= 0 & Ns > 0) + x(1:Nchan,Nz+1:Nz+Ns-1+1) = Amax * (audioread (WAV.Fname, [i+1 i+Ns-1+1]))'; +end diff --git a/PQevalAudio/Misc/PQinitFMem.m b/PQevalAudio/Misc/PQinitFMem.m new file mode 100644 index 0000000..71d077d --- /dev/null +++ b/PQevalAudio/Misc/PQinitFMem.m @@ -0,0 +1,13 @@ +function Fmem = PQinitFMem (Nc, PCinit) +% Initialize the filter memories + +% P. Kabal $Revision: 1.1 $ $Date: 2003/12/07 13:34:10 $ + +Fmem.TDS.Ef(1:2,1:Nc) = 0; +Fmem.Adap.P(1:2,1:Nc) = 0; +Fmem.Adap.Rn(1:Nc) = 0; +Fmem.Adap.Rd(1:Nc) = 0; +Fmem.Adap.PC(1:2,1:Nc) = PCinit; +Fmem.Env.Ese(1:2,1:Nc) = 0; +Fmem.Env.DE(1:2,1:Nc) = 0; +Fmem.Env.Eavg(1:2,1:Nc) = 0; diff --git a/PQevalAudio/Misc/PQtConst.m b/PQevalAudio/Misc/PQtConst.m new file mode 100644 index 0000000..ddc78d8 --- /dev/null +++ b/PQevalAudio/Misc/PQtConst.m @@ -0,0 +1,13 @@ +function [a, b] = PQtConst (t100, tmin, f , Fs) +% Calculate the difference equation parameters. The time +% constant of the difference equation depends on the center +% frequencies. + +% P. Kabal $Revision: 1.1 $ $Date: 2003/12/07 13:34:11 $ + +N = length (f); +for (m = 0:N-1) + t = tmin + (100 / f(m+1)) * (t100 - tmin); + a(m+1) = exp (-1 / (Fs * t)); + b(m+1) = (1 - a(m+1)); +end diff --git a/PQevalAudio/Misc/PQwavFilePar.m b/PQevalAudio/Misc/PQwavFilePar.m new file mode 100644 index 0000000..e49d817 --- /dev/null +++ b/PQevalAudio/Misc/PQwavFilePar.m @@ -0,0 +1,32 @@ +function WAV = PQwavFilePar (File) +% Print a WAVE file header, pick up the file parameters + +% P. Kabal $Revision: 1.1 $ $Date: 2003/12/07 13:34:11 $ + +persistent iB + +if (isempty (iB)) + iB = 0; +else + iB = mod (iB + 1, 2); % Only two files can be "active" at a time +end + +%[size WAV.Fs Nbit] = wavread (File, 'size'); +[sound, WAV.Fs] = audioread(File); +Nbit = 16; +WAV.Fname = File; +WAV.Nframe = size(sound, 1); +WAV.Nchan = size(sound, 2); +WAV.iB = iB; % Buffer number + +% Initialize the buffer +PQgetData (WAV, 0, 0); + +fprintf (' WAVE file: %s\n', File); +if (WAV.Nchan == 1) + fprintf (' Number of samples : %d (%.4g s)\n', WAV.Nframe, WAV.Nframe / WAV.Fs); +else + fprintf (' Number of frames : %d (%.4g s)\n', WAV.Nframe, WAV.Nframe / WAV.Fs); +end +fprintf (' Sampling frequency: %g\n', WAV.Fs); +fprintf (' Number of channels: %d (%d-bit integer)\n', WAV.Nchan, Nbit); diff --git a/PQevalAudio/PEAQTest.m b/PQevalAudio/PEAQTest.m new file mode 100644 index 0000000..17b2ef4 --- /dev/null +++ b/PQevalAudio/PEAQTest.m @@ -0,0 +1,9 @@ +clear; clc; +%addpath('PQevalAudio', 'PQevalAudio/CB','PQevalAudio/Misc','PQevalAudio/MOV', 'PQevalAudio/Patt') + +ref = '2_clean_48.wav' % 16 times oversampling +test = '2_noise_48.wav' % 4 times oversampling +%test = '../2_output_48.wav' % 4 times oversampling + + +[odg, movb] = PQevalAudio(ref, test) diff --git a/PQevalAudio/PQevalAudio.m b/PQevalAudio/PQevalAudio.m new file mode 100644 index 0000000..3eafa52 --- /dev/null +++ b/PQevalAudio/PQevalAudio.m @@ -0,0 +1,177 @@ +function [ODG, MOVB]= PQevalAudio (Fref, Ftest, StartS, EndS) +% Perceptual evaluation of audio quality. + +% - StartS shifts the frames, so that the first frame starts at that sample. +% This is a two element array, one element for each input file. If StartS is +% a scalar, it applies to both files. +% - EndS marks the end of data. The processing stops with the last frame that +% contains that sample. This is a two element array, one element for each +% input file. If EndS is as scalar, it applies to both files. + +% P. Kabal $Revision: 1.2 $ $Date: 2004/02/05 04:25:24 $ + +% Globals (to save on copying in/out of functions) +global MOVC PQopt + +% Analysis parameters +NF = 2048; +Nadv = NF / 2; +Version = 'Basic'; + +% Options +PQopt.ClipMOV = 0; +PQopt.PCinit = 0; +PQopt.PDfactor = 1; +PQopt.Ni = 1; +PQopt.DelayOverlap = 1; +PQopt.DataBounds = 1; +PQopt.EndMin = NF / 2; + +%addpath ('CB', 'MOV', 'Misc', 'Patt'); + +if (nargin < 3) + StartS = [0, 0]; +end +if (nargin < 4) + EndS = []; +end + +% Get the number of samples and channels for each file +WAV(1) = PQwavFilePar (Fref); +WAV(2) = PQwavFilePar (Ftest); + +% Reconcile file differences +PQ_CheckWAV (WAV); +if (WAV(1).Nframe ~= WAV(2).Nframe) + disp ('>>> Number of samples differ: using the minimum'); +end + +% Data boundaries +Nchan = WAV(1).Nchan; +[StartS, Fstart, Fend] = PQ_Bounds (WAV, Nchan, StartS, EndS, PQopt); + +% Number of PEAQ frames +Np = Fend - Fstart + 1; +if (PQopt.Ni < 0) + PQopt.Ni = ceil (Np / abs(PQopt.Ni)); +end + +% Initialize the MOV structure +MOVC = PQ_InitMOVC (Nchan, Np); + +% Initialize the filter memory +Nc = PQCB (Version); +for (j = 0:Nchan-1) + Fmem(j+1) = PQinitFMem (Nc, PQopt.PCinit); +end + +is = 0; +for (i = -Fstart:Np-1) + + % Read a frame of data + xR = PQgetData (WAV(1), StartS(1) + is, NF); % Reference file + xT = PQgetData (WAV(2), StartS(2) + is, NF); % Test file + is = is + Nadv; + + % Process a frame + for (j = 0:Nchan-1) + [MOVI(j+1), Fmem(j+1)] = PQeval (xR(j+1,:), xT(j+1,:), Fmem(j+1)); + end + + if (i >= 0) + % Move the MOV precursors into a new structure + PQframeMOV (i, MOVI); % Output is in global MOVC + + % Print the MOV precursors + %if (PQopt.Ni ~= 0 & mod (i, PQopt.Ni) == 0) + % PQprtMOVCi (Nchan, i, MOVC); + %nd + end +end + +% Time average of the MOV values +if (PQopt.DelayOverlap) + Nwup = Fstart; +else + Nwup = 0; +end +MOVB = PQavgMOVB (MOVC, Nchan, Nwup); + +% Neural net +ODG = PQnNet (MOVB); + +% Summary printout +%PQprtMOV (MOVB, ODG); + +%---------- +function PQ_CheckWAV (WAV) +% Check the file parameters + +Fs = 48000; + +if (WAV(1).Nchan ~= WAV(2).Nchan) + error ('>>> Number of channels differ'); +end +if (WAV(1).Nchan > 2) + error ('>>> Too many input channels'); +end +if (WAV(1).Nframe ~= WAV(2).Nframe) + disp ('>>> Number of samples differ'); +end +if (WAV(1).Fs ~= WAV(2).Fs) + error ('>>> Sampling frequencies differ'); +end +if (WAV(1).Fs ~= Fs) + error ('>>> Invalid Sampling frequency: only 48 kHz supported'); +end + +%---------- +function [StartS, Fstart, Fend] = PQ_Bounds (WAV, Nchan, StartS, EndS, PQopt) + +PQ_NF = 2048; +PQ_NADV = (PQ_NF / 2); + +if (isempty (StartS)) + StartS(1) = 0; + StartS(2) = 0; +elseif (length (StartS) == 1) + StartS(2) = StartS(1); +end +Ns = WAV(1).Nframe; + +% Data boundaries (determined from the reference file) +if (PQopt.DataBounds) + Lim = PQdataBoundary (WAV(1), Nchan, StartS(1), Ns); + %fprintf ('PEAQ Data Boundaries: %ld (%.3f s) - %ld (%.3f s)\n', ... + % Lim(1), Lim(1)/WAV(1).Fs, Lim(2), Lim(2)/WAV(1).Fs); +else + Lim = [Starts(1), StartS(1) + Ns - 1]; +end + +% Start frame number +Fstart = floor ((Lim(1) - StartS(1)) / PQ_NADV); + +% End frame number +Fend = floor ((Lim(2) - StartS(1) + 1 - PQopt.EndMin) / PQ_NADV); + +%---------- +function MOVC = PQ_InitMOVC (Nchan, Np) +MOVC.MDiff.Mt1B = zeros (Nchan, Np); +MOVC.MDiff.Mt2B = zeros (Nchan, Np); +MOVC.MDiff.Wt = zeros (Nchan, Np); + +MOVC.NLoud.NL = zeros (Nchan, Np); + +MOVC.Loud.NRef = zeros (Nchan, Np); +MOVC.Loud.NTest = zeros (Nchan, Np); + +MOVC.BW.BWRef = zeros (Nchan, Np); +MOVC.BW.BWTest = zeros (Nchan, Np); + +MOVC.NMR.NMRavg = zeros (Nchan, Np); +MOVC.NMR.NMRmax = zeros (Nchan, Np); + +MOVC.PD.Pc = zeros (1, Np); +MOVC.PD.Qc = zeros (1, Np); + +MOVC.EHS.EHS = zeros (Nchan, Np); diff --git a/PQevalAudio/PQnNet.m b/PQevalAudio/PQnNet.m new file mode 100644 index 0000000..641e93e --- /dev/null +++ b/PQevalAudio/PQnNet.m @@ -0,0 +1,100 @@ +function ODG = PQnNetB (MOV) +% Neural net to get the final ODG + +% P. Kabal $Revision: 1.1 $ $Date: 2003/12/07 13:27:44 $ + +persistent amin amax wx wxb wy wyb bmin bmax I J CLIPMOV +global PQopt + +if (isempty (amin)) + I = length (MOV); + if (I == 11) + [amin, amax, wx, wxb, wy, wyb, bmin, bmax] = NNetPar ('Basic'); + else + [amin, amax, wx, wxb, wy, wyb, bmin, bmax] = NNetPar ('Advanced'); + end + [I, J] = size (wx); +end + +sigmoid = inline ('1 / (1 + exp(-x))'); + +% Scale the MOV's +Nclip = 0; +MOVx = zeros (1, I); +for (i = 0:I-1) + MOVx(i+1) = (MOV(i+1) - amin(i+1)) / (amax(i+1) - amin(i+1)); + if (~ isempty (PQopt) & PQopt.ClipMOV ~= 0) + if (MOVx(i+1) < 0) + MOVx(i+1) = 0; + Nclip = Nclip + 1; + elseif (MOVx(i+1) > 1) + MOVx(i+1) = 1; + Nclip = Nclip + 1; + end + end +end +if (Nclip > 0) + fprintf ('>>> %d MOVs clipped\n', Nclip); +end + +% Neural network +DI = wyb; +for (j = 0:J-1) + arg = wxb(j+1); + for (i = 0:I-1) + arg = arg + wx(i+1,j+1) * MOVx(i+1); + end + DI = DI + wy(j+1) * sigmoid (arg); +end + +ODG = bmin + (bmax - bmin) * sigmoid (DI); + +function [amin, amax, wx, wxb, wy, wyb, bmin, bmax] = NNetPar (Version) + +if (strcmp (Version, 'Basic')) + amin = ... + [393.916656, 361.965332, -24.045116, 1.110661, -0.206623, ... + 0.074318, 1.113683, 0.950345, 0.029985, 0.000101, ... + 0]; + amax = ... + [921, 881.131226, 16.212030, 107.137772, 2.886017, ... + 13.933351, 63.257874, 1145.018555, 14.819740, 1, ... + 1]; + wx = ... + [ [ -0.502657, 0.436333, 1.219602 ]; + [ 4.307481, 3.246017, 1.123743 ]; + [ 4.984241, -2.211189, -0.192096 ]; + [ 0.051056, -1.762424, 4.331315 ]; + [ 2.321580, 1.789971, -0.754560 ]; + [ -5.303901, -3.452257, -10.814982 ]; + [ 2.730991, -6.111805, 1.519223 ]; + [ 0.624950, -1.331523, -5.955151 ]; + [ 3.102889, 0.871260, -5.922878 ]; + [ -1.051468, -0.939882, -0.142913 ]; + [ -1.804679, -0.503610, -0.620456 ] ]; + wxb = ... + [ -2.518254, 0.654841, -2.207228 ]; + wy = ... + [ -3.817048, 4.107138, 4.629582 ]; + wyb = -0.307594; + bmin = -3.98; + bmax = 0.22; +else + amin = ... + [ 13.298751, 0.041073, -25.018791, 0.061560, 0.024523 ]; + amax = ... + [ 2166.5, 13.24326, 13.46708, 10.226771, 14.224874 ]; + wx = ... + [ [ 21.211773, -39.913052, -1.382553, -14.545348, -0.320899 ]; + [ -8.981803, 19.956049, 0.935389, -1.686586, -3.238586 ]; + [ 1.633830, -2.877505, -7.442935, 5.606502, -1.783120 ]; + [ 6.103821, 19.587435, -0.240284, 1.088213, -0.511314 ]; + [ 11.556344, 3.892028, 9.720441, -3.287205, -11.031250 ] ]; + wxb = ... + [ 1.330890, 2.686103, 2.096598, -1.327851, 3.087055 ]; + wy = ... + [ -4.696996, -3.289959, 7.004782, 6.651897, 4.009144 ]; + wyb = -1.360308; + bmin = -3.98; + bmax = 0.22; +end diff --git a/PQevalAudio/Patt/PQadapt.m b/PQevalAudio/Patt/PQadapt.m new file mode 100644 index 0000000..5beb8b3 --- /dev/null +++ b/PQevalAudio/Patt/PQadapt.m @@ -0,0 +1,107 @@ +function [EP, Fmem] = PQadapt (Ehs, Fmem, Ver, Mod) +% Level and pattern adaptation + +% P. Kabal $Revision: 1.1 $ $Date: 2003/12/07 13:35:08 $ + +persistent a b Nc M1 M2 Version Model + +if (~strcmp (Ver, Version) | ~strcmp (Mod, Model)) + Version = Ver; + Model = Mod; + if (strcmp (Model, 'FFT')) + [Nc, fc] = PQCB (Version); + NF = 2048; + Nadv = NF / 2; + else + [Nc, fc] = PQFB; + Nadv = 192; + end + Version = Ver; + Model = Mod; + Fs = 48000; + Fss = Fs / Nadv; + t100 = 0.050; + tmin = 0.008; + [a b] = PQtConst (t100, tmin, fc, Fss); + [M1, M2] = PQ_M1M2 (Version, Model); +end + +% Allocate memory +EP = zeros (2, Nc); +R = zeros (2, Nc); + +% Smooth the excitation patterns +% Calculate the correlation terms +sn = 0; +sd = 0; +for (m = 0:Nc-1) + Fmem.P(1,m+1) = a(m+1) * Fmem.P(1,m+1) + b(m+1) * Ehs(1,m+1); + Fmem.P(2,m+1) = a(m+1) * Fmem.P(2,m+1) + b(m+1) * Ehs(2,m+1); + sn = sn + sqrt (Fmem.P(2,m+1) * Fmem.P(1,m+1)); + sd = sd + Fmem.P(2,m+1); +end + +% Level correlation +CL = (sn / sd)^2; + +for (m = 0:Nc-1) + +% Scale one of the signals to match levels + if (CL > 1) + EP(1,m+1) = Ehs(1,m+1) / CL; + EP(2,m+1) = Ehs(2,m+1); + else + EP(1,m+1) = Ehs(1,m+1); + EP(2,m+1) = Ehs(2,m+1) * CL; + end + +% Calculate a pattern match correction factor + Fmem.Rn(m+1) = a(m+1) * Fmem.Rn(m+1) + EP(2,m+1) * EP(1,m+1); + Fmem.Rd(m+1) = a(m+1) * Fmem.Rd(m+1) + EP(1,m+1) * EP(1,m+1); + if (Fmem.Rd(m+1) <= 0 | Fmem.Rn(m+1) <= 0) + error ('>>> PQadap: Rd or Rn is zero'); + end + if (Fmem.Rn(m+1) >= Fmem.Rd(m+1)) + R(1,m+1) = 1; + R(2,m+1) = Fmem.Rd(m+1) / Fmem.Rn(m+1); + else + R(1,m+1) = Fmem.Rn(m+1) / Fmem.Rd(m+1); + R(2,m+1) = 1; + end +end + +% Average the correction factors over M channels and smooth with time +% Create spectrally adapted patterns +for (m = 0:Nc-1) + iL = max (m - M1, 0); + iU = min (m + M2, Nc-1); + s1 = 0; + s2 = 0; + for (i = iL:iU) + s1 = s1 + R(1,i+1); + s2 = s2 + R(2,i+1); + end + Fmem.PC(1,m+1) = a(m+1) * Fmem.PC(1,m+1) + b(m+1) * s1 / (iU-iL+1); + Fmem.PC(2,m+1) = a(m+1) * Fmem.PC(2,m+1) + b(m+1) * s2 / (iU-iL+1); + + % Final correction factor => spectrally adapted patterns + EP(1,m+1) = EP(1,m+1) * Fmem.PC(1,m+1); + EP(2,m+1) = EP(2,m+1) * Fmem.PC(2,m+1); +end + +%-------------------------------------- +function [M1, M2] = PQ_M1M2 (Version, Model) +% Return band averaging parameters + +if (strcmp (Version, 'Basic')) + M1 = 3; + M2 = 4; +elseif (strcmp (Version, 'Advanced')) + if (strcmp (Model, 'FFT')) + M1 = 1; + M2 = 2; + else + M1 = 1; + M2 = 1; + end +end diff --git a/PQevalAudio/Patt/PQloud.m b/PQevalAudio/Patt/PQloud.m new file mode 100644 index 0000000..d46f40d --- /dev/null +++ b/PQevalAudio/Patt/PQloud.m @@ -0,0 +1,53 @@ +function Ntot = PQloud (Ehs, Ver, Mod) +% Calculate the loudness + +% P. Kabal $Revision: 1.1 $ $Date: 2003/12/07 13:35:09 $ + +e = 0.23; + +persistent Nc s Et Ets Version Model + +if (~strcmp (Ver, Version) | ~strcmp (Mod, Model)) + Version = Ver; + Model = Mod; + if (strcmp (Model, 'FFT')) + [Nc, fc] = PQCB (Version); + c = 1.07664; + else + [Nc, fc] = PQFB; + c = 1.26539; + end + E0 = 1e4; + Et = PQ_enThresh (fc); + s = PQ_exIndex (fc); + for (m = 0:Nc-1) + Ets(m+1) = c * (Et(m+1) / (s(m+1) * E0))^e; + end +end + +sN = 0; +for (m = 0:Nc-1) + Nm = Ets(m+1) * ((1 - s(m+1) + s(m+1) * Ehs(m+1) / Et(m+1))^e - 1); + sN = sN + max(Nm, 0); +end +Ntot = (24 / Nc) * sN; + +%==================== +function s = PQ_exIndex (f) +% Excitation index + +N = length (f); +for (m = 0:N-1) + sdB = -2 - 2.05 * atan(f(m+1) / 4000) - 0.75 * atan((f(m+1) / 1600)^2); + s(m+1) = 10^(sdB / 10); +end + +%-------------------- +function Et = PQ_enThresh (f) +% Excitation threshold + +N = length (f); +for (m = 0:N-1) + EtdB = 3.64 * (f(m+1) / 1000)^(-0.8); + Et(m+1) = 10^(EtdB / 10); +end diff --git a/PQevalAudio/Patt/PQmodPatt.m b/PQevalAudio/Patt/PQmodPatt.m new file mode 100644 index 0000000..d7a5ee4 --- /dev/null +++ b/PQevalAudio/Patt/PQmodPatt.m @@ -0,0 +1,34 @@ +function [M, ERavg, Fmem] = PQmodPatt (Es, Fmem) +% Modulation pattern processing + +% P. Kabal $Revision: 1.1 $ $Date: 2003/12/07 13:35:09 $ + +persistent Nc a b Fss + +if (isempty (Nc)) + Fs = 48000; + NF = 2048; + Fss = Fs / (NF/2); + [Nc, fc] = PQCB ('Basic'); + t100 = 0.050; + t0 = 0.008; + [a, b] = PQtConst (t100, t0, fc, Fss); +end + +% Allocate memory +M = zeros (2, Nc); + +e = 0.3; +for (i = 1:2) + for (m = 0:Nc-1) + Ee = Es(i,m+1)^e; + Fmem.DE(i,m+1) = a(m+1) * Fmem.DE(i,m+1) ... + + b(m+1) * Fss * abs (Ee - Fmem.Ese(i,m+1)); + Fmem.Eavg(i,m+1) = a(m+1) * Fmem.Eavg(i,m+1) + b(m+1) * Ee; + Fmem.Ese(i,m+1) = Ee; + + M(i,m+1) = Fmem.DE(i,m+1) / (1 + Fmem.Eavg(i,m+1)/0.3); + end +end + +ERavg = Fmem.Eavg(1,:); diff --git a/conf/conf.yaml b/conf/conf.yaml new file mode 100644 index 0000000..9a33608 --- /dev/null +++ b/conf/conf.yaml @@ -0,0 +1,112 @@ +defaults: + - dset: dataset + - hydra/job_logging: colorlog + - hydra/hydra_logging: colorlog + +path_experiment: "non-specified_name" #there should be a better way to do this +tensorboard_logs: "/scratch/work/molinee2/tensorboard_logs/unet_historical" #path with tensorboard +# Dataset related +fs: 44100 #default is 44100, better NEVER change +seg_len_s_train: 5 #length of the train (and val) segments in seconds +freq_inference: 10 #we do inference after * epochs +seg_len_s_test: 15 #lenum_test_segments: 10 #number of test segments (inferenced every epoch) +num_real_test_segments: 5 #number of real recordings inferenced every epoch +num_test_segments: 15 +buffer_size: 1000 # buffer size for shuffling datasets (train and val) +# Dataset Augmentation +overlap: 0 #overlap when extracting audio segments, default is 0, augment if more data is needed + +# Logging and printing, and does not impact training +#device: cuda +verbose: 0 +use_tensorboard: True +use_soft_denoising: False + + +num_workers: 10 + + +# Checkpointing, by default automatically load last checkpoint +checkpoint: true +continue_from: '' # Path the a checkpoint.th file to start from. + # this is not used in the name of the experiment! + # so use a dummy=something not to mixup experiments. +continue_best: false # continue from best, not last state if continue_from is set. +only_inference: false + +# Optimization related +optim: adam +lr: 1e-4 #used +variable_lr: True +beta1: 0.5 #used +beta2: 0.9 #used + +loss: "mae" #choose loss: + +epochs: 73 +batch_size: 16 +val_take: -1 +steps_per_epoch: 1000 + + +sp: + method: "wiener" + +#STFT parameteres +stft: + win_size: 2048 #STFT window size + hop_size: 512 + +#inference param +inference: + audio: None +# Models +model: unet # either demucs or dwave +unet: + activation: "elu" + use_csff: False + use_SAM: True + use_cam: False + use_fam: False + use_fencoding: True + use_tdf: False + use_alttdfs: False + num_tfc: 3 + num_stages: 3 + depth: 6 + f_dim: 1025 #hardcoded, depends on the stft window + + + +# Hydra config +hydra: + job: + config: + # configuration for the ${hydra.job.override_dirname} runtime variable + override_dirname: + kv_sep: '=' + item_sep: ',' + # Remove all paths, as the / in them would mess up things + # Remove params that would not impact the training itself + # Remove all slurm and submit params. + # This is ugly I know... + exclude_keys: ['path_experiment', + 'hydra.job_logging.handles.file.filename'] + job_logging: + handlers: + file: + class: logging.FileHandler + mode: w + formatter: colorlog + filename: trainer.log + console: + class: logging.StreamHandler + formatter: colorlog + stream: ext://sys.stderr + + hydra_logging: + handlers: + console: + class: logging.StreamHandler + formatter: colorlog + stream: ext://sys.stderr diff --git a/conf/dset/dataset.yaml b/conf/dset/dataset.yaml new file mode 100644 index 0000000..2dc6d94 --- /dev/null +++ b/conf/dset/dataset.yaml @@ -0,0 +1,13 @@ +dset: + path_music_train: [ "/scratch/work/molinee2/datasets/MusicNet_with_opera/train"] + path_music_validation: ["/scratch/work/molinee2/datasets/MusicNet_with_opera/validation"] + path_music_test: ["/scratch/work/molinee2/datasets/MusicNet_extended/test"] + + path_piano_test: ["/scratch/work/molinee2/datasets/MusicNet_with_opera/test/piano"] + path_strings_test: ["/scratch/work/molinee2/datasets/MusicNet_with_opera/test/strings"] + path_orchestra_test: ["/scratch/work/molinee2/datasets/MusicNet_with_opera/test/orchestra"] + path_opera_test: ["/scratch/work/molinee2/datasets/MusicNet_with_opera/test/opera"] + + path_noise: "/scratch/work/molinee2/datasets/noise_set_refined_last" #path with noise samples. There must be an info.csv file there + path_recordings: "/scratch/work/molinee2/datasets/real_noisy_data_test" #path with real noisy recordings. There must be an audio_files.txt file there + diff --git a/experiments/trained_model/noti b/experiments/trained_model/noti new file mode 100644 index 0000000..e69de29