%% Data handling demo
% for dataset, see course web page

d = dir('testdata/*.csv');
nyiso = [];
for k = 1:length(d)
    t = importNYenergydata( [d(k).folder '/' d(k).name]);
    t.Name = categorical(t.Name);
    [y,m,day,h,mn,s] = datevec(t.TimeStamp);
    [~,firstHourId] = unique([h double(t.Name)],'rows');
    nyiso = [nyiso; t(firstHourId,:)];
end

%% reformat and sort 
nyiso = unstack(nyiso,'Load','Name');
% they should be sorted already, but let's make sure. 
% that's why you bought that laptop with gigabytes of memory
nyiso = sortrows(nyiso,'TimeStamp');
%% initial observations
figure(1)
plot(nyiso.TimeStamp,nyiso.DUNWOD)
figure(2)
plot(nyiso.TimeStamp,nyiso.LONGIL)

%% smoothing spline 
% for convenience we pick only one city  -- obviously you could, and should
% do this for any of them, or even all of them 

var = 'N_Y_C_';
x = datenum(nyiso.TimeStamp);
y = nyiso.(var);

idxbad = y<=110;
y(idxbad) = NaN;
% fit smoothing spline and make prediction over data set
fo = fit(x,y,'smoothingspline','Exclude',isnan(y));
yy = fo(x);
% alternative: Savitzky-Golay smoothing
%yy = sgolayfilt(y,8,23);
% or we can take a dive into dsp toolbox with 
%[yy,p]  = csaps(x,y,0.9998,x);

% find points that don't meet error threhold
error_threshold = 100;
idxbad = idxbad | (abs(y-yy) > error_threshold);


%%
figure(1); clf
figure(1);plot(nyiso.TimeStamp,[y yy]);ylabel('Load (MW)');hold on;
plot(nyiso.TimeStamp(idxbad),y(idxbad),'ro')
legend('Actual','Smoothed','Anomalies')
hold off 


%% Aggregating data - add weather 

load weather_cleaned
% need to have at least one common variable name
weatherData.Properties.VariableNames{'Date'} = 'TimeStamp';
weatherData.TimeStamp = datetime(datestr(weatherData.TimeStamp));
nyiso = innerjoin(nyiso, weatherData);


%% Maybe now we can model stuff

modeldata = nyiso(:,{'TimeStamp','N_Y_C_','TemperatureKLGA'});
% Change the table column names to be more general
modeldata.Properties.VariableNames(1:3) = {'Date','Load','Temperature'};

% Create predictors
% In order to build an accurate model, we need useful predictors to work
% with. A common technique with temporal predictors is to break them into
% their separate parts so they can be varied independently of each other.

% Create temporal predictors
modeldata.Hour = modeldata.Date.Hour;
modeldata.Month = modeldata.Date.Month;
modeldata.DayOfWeek = weekday(modeldata.Date);
modeldata.isWeekend = ismember(modeldata.DayOfWeek,[1,7]);


modeldata.Temp = modeldata.Temperature(:,1);
modeldata.DewPnt = modeldata.Temperature(:,2);
modeldata.Temperature = [];

%%

% Compute and plot autocorrelation in load data
c = xcorr(modeldata.Load(~isnan(modeldata.Load)),200,'coeff');
figure(2)
plot(c)

% Create predictor for the load at the same time the prior day, 24 hour
% lag. Because we have missing timestamps, we can't simply look back by 24
% rows. This method is robust to missing timestamps.
modeldata.PriorDay = nan(height(modeldata),1);
idxload = ismember(modeldata.Date+hours(24),modeldata.Date);
idxprior = ismember(modeldata.Date-days(1),modeldata.Date);
modeldata.PriorDay(idxprior) = modeldata.Load(idxload);
% Create predictor for the load at the same time the same day the prior
% week, 168 hour lag
modeldata.PriorWeek = nan(height(modeldata),1);
idxload = ismember(modeldata.Date+hours(168),modeldata.Date);
idxprior = ismember(modeldata.Date-days(7),modeldata.Date);
modeldata.PriorWeek(idxprior) = modeldata.Load(idxload);

%%
cutoff = datetime(2008,6,15);
idxtrain = modeldata.Date <= cutoff;
idxtest = modeldata.Date > cutoff;
% some of the machine learning functions expect separate matricies for the
% inputs and output
Xtrain = modeldata{idxtrain,3:end};
Ytrain = modeldata{idxtrain,2};
Xtest = modeldata{idxtest,3:end};
Ytest = modeldata{idxtest,2};

%%



trainFcn = 'trainlm';  % Levenberg-Marquardt training algorithm

% Create a Fitting Network
hiddenLayerSize = 20; 
net = fitnet(hiddenLayerSize,trainFcn);

% Train the Network
mdl_net = train(net,Xtrain',Ytrain');

% make predictions on the test data set and plot results
%%
Y_nn = mdl_net(Xtest')';
figure
ax1=subplot(2,1,1);
plot(modeldata.Date(idxtest),Y_nn,'DisplayName','Y_nn');hold on
plot(modeldata.Date(idxtest),Ytest,'DisplayName','Ytest');hold off
legend('Neural Network','Measured')
ylabel('Load (MW)')
ax2=subplot(2,1,2);
plot(modeldata.Date(idxtest),Ytest-Y_nn);
legend('Neural Network')
ylabel('Error (MW)')
linkaxes([ax1,ax2],'x')


