% begin timing the script.
printf('Loading...'); tic;

% tell octave to look here for the mvapack code.
addpath('/opt/mvapack');

% load in the base class information.
[cls.labels, cls.idx, cls.pca.Y] = loadlabels('labels.txt');

% define the estimated caffeine content (mM) by class and use
% it to build a regression response matrix for opls-r.
%               B      P      D      X
cls.caffeine = [4.993; 1.526; 1.979; 0.217];
cls.opls.r.Y = repmat(cls.caffeine, 16, 1);
cls.opls.r.Y = cls.opls.r.Y(cls.idx, :);

% define the decaf<->other response matrix for use with opls-da.
cls.opls.da.Y = repmat([1 0; 1 0; 1 0; 0 1], 16, 1);
cls.opls.da.Y = cls.opls.da.Y(cls.idx, :);

% build the list of directories to load fids in from.
F.dirs = glob('???');

% load the fids in from the directory list.
[F.all, F.parms, F.t] = loaddmx(F.dirs);

% print the elapsed time.
printf(' completed. '); toc;

% begin timing the script again.
printf('Processing...'); tic;

% split the dataset into two halves.
% * odds: without T2 filter.
% * evens: with T2 filter.
F.odds = F.all([ 1 : 2 : rows(F.all) ], :);
F.evens = F.all([ 2 : 2 : rows(F.all) ], :);

% reorder the datasets.
F.odds = F.odds(cls.idx, :);
F.evens = F.evens(cls.idx, :);

% perform a fourier transform of all data.
[S.all, S.ppm] = nmrft(F.all, F.parms);

% autophase all the spectra.
[S.all, S.phc0, S.phc1] = autophase(S.all, F.parms);

% again, split the spectra into two halves.
S.odds = S.all([ 1 : 2 : rows(S.all) ], :);
S.evens = S.all([ 2 : 2 : rows(S.all) ], :);

% tweak the phase of the even observations a bit more.
S.evens = phase(S.evens, F.parms, -5, 14);
S.phc0([ 2 : 2 : end ]) += -5;
S.phc1([ 2 : 2 : end ]) += 14;

% again, reorder the datasets.
S.odds = S.odds(cls.idx, :);
S.evens = S.evens(cls.idx, :);

% get the indices that hold the spectral ends.
i0 = findnearest(S.ppm, min(S.ppm));
i1 = findnearest(S.ppm, 0.44);
i2 = findnearest(S.ppm, 9.16);
i3 = findnearest(S.ppm, max(S.ppm));

% chop the ends of the spectra off.
S.rm.var = [ i3 : i2, i1 : i0 ];
[S.all, jnk] = rmvar(S.all, S.ppm, S.rm.var);
[S.odds, jnk] = rmvar(S.odds, S.ppm, S.rm.var);
[S.evens, S.ppm] = rmvar(S.evens, S.ppm, S.rm.var);

% begin to build the final datasets for modeling.
X.ppm = S.ppm;
X.odds = real(S.odds);
X.evens = real(S.evens);
X.pca.data = X.evens;
X.opls.data = X.evens;

% print the elapsed time.
printf(' completed. '); toc;

% begin timing the script again.
printf('Treating...'); tic;

% normalize the entire dataset for pca.
X.pca.data = pqnorm(X.pca.data);

% align the entire dataset for pca analysis.
X.pca.wbin = 0.01;
[X.pca.data, X.pca.ppm, X.pca.widths] = ...
  binadapt(X.pca.data, X.ppm, F.parms, X.pca.wbin);

% remove noise variables for pca.
X.pca.noise = [findnearest(X.pca.ppm, 5.84), findnearest(X.pca.ppm, 4.54)];
[X.pca.data, X.pca.ppm] = rmnoise(X.pca.data, X.pca.ppm, X.pca.noise);

% define a list of chemical shift regions to use for segmented alignment.
X.segs.per = [0.55 0.87; 0.87 1.06; 1.12 1.22; 1.22 1.40; ...
              1.68 1.76; 1.76 1.79; 1.79 1.85; 1.87 2.03; ...
              2.20 2.30; 2.30 2.36; 2.50 2.58; 2.73 2.82; ...
              3.00 3.09; 3.09 3.23; 3.24 3.36; 3.36 3.43; ...
              3.43 3.65; 3.69 3.77; 3.77 3.82; 3.82 3.94; ...
              3.94 4.08; 4.20 4.26; 4.26 4.32; 4.60 4.80; ...
              5.66 5.86; 6.08 6.44; 6.44 6.82; 6.82 7.26; ...
              7.26 7.57; 7.57 7.75; 7.83 7.98; 8.25 8.33; ...
              8.33 8.41; 8.58 8.65; 8.65 8.75; 8.92 9.02];

% define a list of chemical shift regions to use for segmented alignment.
X.segs.all = [1.76 1.79; 3.09 3.23; 3.24 3.36; 7.57 7.75];

% align the dataset class-wise for opls analysis. don't be fooled by the
% use of `cls.pca.Y'; the `perclass' function needs a discriminant matrix.
X.opls.ppm = X.ppm;
X.opls.data = perclass(@(x) icoshift(x, X.opls.ppm, X.segs.per, true), ...
                       X.opls.data, cls.pca.Y);
X.opls.data = icoshift(X.opls.data, X.opls.ppm, X.segs.all);

% reference the dataset for opls analysis.
X.opls.ppm = refadj(X.opls.ppm, 0.000, 0.131);

% normalize the dataset class-wise for opls.
X.opls.data = pqnorm(X.opls.data);
X.opls.data = perclass(@pqnorm, X.opls.data, cls.pca.Y);

% print the elapsed time.
printf(' completed. '); toc;

% begin timing the script again.
printf('Modeling...'); tic;

% build a pca model.
mdl.pca = pca(X.pca.data);
mdl.pca = addclasses(mdl.pca, cls.pca.Y);
mdl.pca = addlabels(mdl.pca, cls.labels);

% build a pls-da model.
mdl.pls.da = pls(X.opls.data, cls.opls.da.Y);
mdl.pls.da = addlabels(mdl.pls.da, {'Regular', 'Decaf'});

% build an opls-r model.
mdl.opls.r = opls(X.opls.data, cls.opls.r.Y);
mdl.opls.r = addlabels(mdl.opls.r, cls.labels);

% build a skeleton copy of the opls-r model that contains class matrices
% that can be used for plotting with pretty colors and ellipses. :)
mdl.opls.r.da = addclasses(mdl.opls.r, cls.pca.Y, true);
mdl.opls.r.da.CV = [];

% build an opls-da model.
mdl.opls.da = opls(X.opls.data, cls.opls.da.Y);
mdl.opls.da = addlabels(mdl.opls.da, {'Regular', 'Decaf'});

% build an lda model of the pca scores.
mdl.lda = lda(scores(mdl.pca), cls.pca.Y);
mdl.lda = addlabels(mdl.lda, cls.labels);

% print the elapsed time.
printf(' completed. '); toc;

% begin timing the script again.
printf('Validating...'); tic;

% run permutation tests of the supervised models.
P.n = 1000;
P.pls.da = permtest(mdl.pls.da, P.n);
P.opls.r = permtest(mdl.opls.r, P.n);
P.opls.da = permtest(mdl.opls.da, P.n);
P.lda = permtest(mdl.lda, P.n);

% run a cv-anova on the opls models.
mdl.pls.da.cv.anova = cvanova(mdl.pls.da);
mdl.opls.r.cv.anova = cvanova(mdl.opls.r);
mdl.opls.da.cv.anova = cvanova(mdl.opls.da);

% print the elapsed time.
printf(' completed. '); toc;

% begin timing the script again.
printf('Writing...'); tic;

% strip the models of all that heavy cross-validation information.
mdl.pls.da.CV = [];
mdl.opls.r.CV = [];
mdl.opls.da.CV = [];
mdl.lda.CV = [];

% build a date string.
dv = fix(clock());
dstr = sprintf('%04d%02d%02d', dv(1), dv(2), dv(3));

% save the entire dataset into a binary data file.
save('-binary', '-z', sprintf('coffees-%s.dat.gz', dstr));

% print the elapsed time.
printf(' completed. '); toc;

