function [ffInfo,masterList,numT,chosenInfo,probCount] = getNLDLGts(DLGstats)
%%% DLGstats is the ouput of running DLGstatsHOLD.m, then DLGstatsPLACE.m
%%% -> DLGstats holds the info from each RCWT realization about whether
%%% the resulting surrogate process maxima cluster
%%% ffInfo is a struct which gives the different possible maxima configurations
%%% -> each field is a possible maxima config. (e.g. z12345 means surrogate
%%% processes 1-5 cluster together; z1z2345 means surrogate process 1
%%% occurs separately over an exposure while surrogates 2-5 occur
%%% clustered etc.)
%%% -> within each field, we have the probability of experiencing that
%%% maxima cluster (pz...), the info on the time series which contain
%%% those clusters (z... - organized the same way as DLGstats: marker zA
%%% ZB ...), and the chosen indices based on numT, the probability of
%%% experiencing that maxima config., and the distribution of those
%%% maxima clusters
%%% masterList gives the indices of all chosen DLG time series based on
%%% numT (note length(masterList)>=numT because some time series contain
%%% un-clustered maxima, and each index of MasterList is a time series
%%% which may excite clustered or un-clustered surrogate maxima
%%% numT is the max number of NL-DLG time series which can be assembled
%%% without oversampling the maxima cluster distributions
%%% chosenInfo is a struct which gives similar info as ffInfo, but removes
%%% maxima configurations where the probability of experiencing that maxima
%%% config. is so low that we wouldn't sample any of these time series
nSP = 2;
%%% find time series markers with a value of z1, z2, z3, ... (or any combo of)
%%% if you want specific surrogates, not including previously numbered
%%% surrogates, change kz,kb,... below so ka is the first surrogate you
%%% want (e.g. if you want to look at wave groups of 2,3, and 4 waves, set
%%% nSP = 3, ka=2,kb=3,kc=4)
% right now defined to consider all 6 surrogate processes
ka = 1; kb = 2; kc = 3; kd = 4; ke = 5; kf = 6;
a = DLGstats.ziHAT(:,(ka+1))>0;
b = DLGstats.ziHAT(:,(kb+1))>0;
c = DLGstats.ziHAT(:,(kc+1))>0;
d = DLGstats.ziHAT(:,(kd+1))>0;
e = DLGstats.ziHAT(:,(ke+1))>0;
f = DLGstats.ziHAT(:,(kf+1))>0;
ziHOLD = [a, b, c, d, e, f];
loc = [1 1000; 1001 2000; 2001 3000; 3001 4000; 4001 5000; 5001 6000; 6001 7000; 7001 8000; 8001 9000; 9001 10000];
loc = [loc(ka,:); loc(kb,:); loc(kc,:); loc(kd,:); loc(ke,:); loc(kf,:)];
Pstats = failureProbALL(DLGstats,nSP,ziHOLD,loc);
nsim = 1000; %num of DLG TS for each surrogate process
allSP = 1:nSP;
[Bn, ~] = Bell(nSP);
%% Initial struct set-up
%%% create a struct to hold maxima configuration probabilities, then later
%%% hold indices of DLG time series which fit into the configurations
counter = 1;
for k = 1:nSP
list = SetPartition(nSP,k);
num = length(list); %number of configurations given the # of groups
%%% go through all configurations given group number k
for h = 1:num
configurationNAMEp = '';
%%% index through k parts of group
for g = 1:k
members = list{h,1}{1,g};
numMembers = length(members);
%%% put in indices of DLG TS which fit maxima config. criteria
nameHOLDonP = 'z';
for ii = 1:numMembers
nameHOLDonP = strcat(nameHOLDonP,num2str(members(ii)));
end
nameStrP = strcat(nameHOLDonP);
configurationNAMEp = strcat(configurationNAMEp,nameStrP);
end
newField = configurationNAMEp;
newFieldp = strcat('p',configurationNAMEp);
ffInfo.(newField).(newFieldp) = Pstats.pConfig(counter);
counter = counter + 1;
end
end
%% another struct for later to hold total probabilities for similar maxima
% groups in different maxima configs so we don't repeat DLG TS
counter = 1;
for k = 1:nSP
list = SetPartition(nSP,k);
num = length(list); %number of configurations given the # of groups
%%% go through all configurations given group number k
for h = 1:num
%%% index through k parts of group
for g = 1:k
members = list{h,1}{1,g};
numMembers = length(members);
%%% put in indices of DLG TS which fit maxima config. criteria
nameHOLDonP = 'z';
for ii = 1:numMembers
nameHOLDonP = strcat(nameHOLDonP,num2str(members(ii)));
end
nameStr44 = strcat('probCount',nameHOLDonP);
probCount.(nameStr44) = 0;
end
counter = counter + 1;
end
end
%% populate struct with indices of DLG
fields = fieldnames(ffInfo);
counter = 1;
%%% now go through 1-n groups for maxima configurations
for k = 1:nSP
list = SetPartition(nSP,k);
num = length(list); %number of configurations given the # of groups
%%% go through all configurations given group number k
for h = 1:num
%%% index through k parts of group
for g = 1:k
members = list{h,1}{1,g};
numMembers = length(members);
%%% to concatenate all ziHAT of same "type" i.e. z123,
%%% z213, z312 are all the "same"
ziHOLDconcat = zeros(1,length(members)+1);
nameHOLDon = 'z';
for iii = 1:numMembers % k members of maxima config.
range = (loc(members(iii),1):loc(members(iii),2))';
part = members(ismember(members,allSP));
notPart = allSP(not(ismember(allSP,members)));
partMat = ziHOLD(range,part);
%%% which time series have all ziHAT in group
mult = ones(nsim,1);
for y = 1:numMembers
mult = mult .* partMat(:,y);
end
%%% minus any ziHAT not part of group
if isempty(notPart) == 0
notPartMat = ziHOLD(range,notPart);
for z = 1:length(notPart)
mult = mult - notPartMat(:,z);
end
end
%%% find time series which satisfy ziHAT condition-
%%% 1 or 0
haveZiHat1 = mult;
haveZiHat1(haveZiHat1<0)=0;
%%% find p(safety|config & design) for ziHAT
%%% find time series which satisfy ziHAT condition-
%%% keep time series marker to compare with ff
%%% also keep ziHAT value for when we do histogram later to
%%% choose TS based on prob of occurrence
haveZiHat = mult .* DLGstats.ziHAT(range,[1,members+1]);
haveZiHat(haveZiHat<0)=0;
haveZiHat(all(haveZiHat==0,2),:)=[];
%%% concatenate all ziHAT of same "type"
ziHOLDconcat = [ziHOLDconcat; haveZiHat];
nameHOLDon = strcat(nameHOLDon,num2str(members(iii)));
end
%%% get rid of original placeholder (first row)
ziHOLDconcat = ziHOLDconcat(2:end,:);
nameStr = strcat(nameHOLDon);
ffInfo.(fields{counter}).(nameStr) = ziHOLDconcat;
%%% put TS into a different struct to use later
TSinfo.(strcat('TS',nameHOLDon)) = ziHOLDconcat;
fNamesInside = fieldnames(ffInfo.(fields{counter}));
pConfigCount = ffInfo.(fields{counter}).(fNamesInside{1});
probCount.(strcat('probCount',nameHOLDon)) = probCount.(strcat('probCount',nameHOLDon)) + pConfigCount;
end
counter = counter + 1;
end
end
%% now find the max number of MCS we can approximate without repeating TS
%%% now get number of MCS we are trying to replicate i.e. what would most-
%%% likely failure distribution look like if we did 'numTS' # of MCS
%%% also want to make sure that for maxima categories that are in more
%%% than 1 config, we don't repeat TS
%%% use struct probCount
count = 1;
innercount = 1;
fieldsPC = fieldnames(probCount);
gMin = zeros(length(fieldsPC),1);
%%% num TS based on maxima configuration probabilities
for k = 1:nSP
list = SetPartition(nSP,k);
num = length(list); %number of configurations given the # of groups
for e = 1:num
for h = 1:k %clusters in maxima configuration
fieldsIN = fieldnames(ffInfo.(fields{count}));
str = strcat('probCount',fieldsIN(h+1));
pc = probCount.(str{:});
if pc == 0
gMin(nSP) = 0;
else
minimum = 100000; % artifically high for comparison
val = length(ffInfo.(fields{count}).(fieldsIN{h+1}))/pc;
if val < minimum
minimum = val;
end
gMin(innercount) = minimum;
end
innercount = innercount + 1;
end
count = count + 1;
end
end
gMin(all(gMin==0,2),:)=[];
%%% number of MCS we can approximate to satisfy p(config) probabilities
%%% without repeating trials of DLG TS
numT = floor(min(gMin));
%% now get a list of DLG TS for each config which will approximate the
%%% results of "numTS" MCS
%%% use struct TSinfo so we don't repeat DLG TS over different maxima
%%% configurations that share the same maxima group type
masterList = [];
addedN = 0;
for nSP = 1:Bn
fieldsIN = fieldnames(ffInfo.(fields{nSP}));
pc = ffInfo.(fields{nSP}).(fieldsIN{1});
fracInv = 1;
if pc == 0
%%% move on
else
%%% pick DLG TS
%%% num of TS to choose from each category within a maxima config.
num = floor(numT*pc/fracInv);
for k = 1:length(fieldsIN)-1 %1st field holds prob config, next fields hold clusters
if num > 1
TSname = strcat('TS',fieldsIN{1+k});
TS = TSinfo.(TSname)(:,1); % DLG TS indices; length(TS) >= num
ziVal = TSinfo.(TSname)(:,2:end);
numZ = size(ziVal,2); % dim of our hist
orderHist = [];
%%% may need to adjust histogram bin width depending on the
%%% number of surrogate processes, n
%%% want this bin size to be small, but for large n this leads
%%% to too big a histogram, and matlab cannot handle it :(
binW = 0.5;
if numZ == 1
minVal = min(TSinfo.(TSname)(:,2));
maxVal = max(TSinfo.(TSname)(:,2));
edges = {minVal-binW:binW:maxVal+binW};
else
minVal = min(TSinfo.(TSname)(:,2));
maxVal = max(TSinfo.(TSname)(:,2));
edges = {minVal-binW:binW:maxVal+binW};
for h = 2:numZ
minVal = min(TSinfo.(TSname)(:,1+h));
maxVal = max(TSinfo.(TSname)(:,1+h));
edges{end+1} = minVal-binW:binW:maxVal+binW;
end
end
[ NN, bin ] = histcnd(ziVal,edges);
[maxHit, ~] = max(NN(:));
if size(bin,2) == 1
bin = [ones(size(bin)) bin];
end
for m = maxHit:-1:1
%%% find bins with 'm' number of occurrences
hits = find(NN == m);
hitsVec = hits(:);
if max(hitsVec) > 0
%%% get locations of hits
if size(bin,2) == 2
[row, col] = ind2sub(size(NN),hits);
if size(row,2) > 1
row = row';
end
if size(col,2) > 1
col = col';
end
locations = [row col];
else
%%% need to account for n-dimensions!
add = size(bin,2);
mycell = cell(1,add);
[mycell{1:add}] = ind2sub(size(NN),hits);
rowsCell = size(mycell{1},1);
locations = zeros(rowsCell,add);
for z = 1:add
locations(:,z) = mycell{z};
end
end
numSpots = size(locations,1);
for p = 1:numSpots
bin2 = bin - locations(p,:);
%%% row indices of TS which fit this bin
orderHist = [orderHist; TS(find(all(bin2==0,2)))];
end
else
%%% do nothing
end
end
nameSTR1 = strcat('chosen',fieldsIN{1+k});
ffInfo.(fields{nSP}).(nameSTR1) = orderHist(1:num);
chosenInfo.(fields{nSP}).(nameSTR1) = orderHist(1:num);
masterList = [masterList; orderHist(1:num)];
%%% we don't want to repeat TS
TSinfo.(TSname) = TSinfo.(TSname)(num+1:end,:);
addedN = addedN + num;
end
end
end
end
masterList = sort(masterList, 'ascend');
%% function to get histogram counts of n-dim ziHAT time series
function [ n, bin ] = histcnd( x, edges )
%HISTCND Histogram count for n dimensional data.
% N = HISTCND(X,EDGES), for row vectors X, counts the number of values in
% X that fall between the grid defined by the cell array of EDGES, each
% of whose element is a vector that contain monotonically non-decreasing
% values. N is an N-D array each of whose dimension corresponds to
% LENGTH(EDGES{j}) and each element contains a count of data that falls
% into the edge.
%
% EDGES must have the same length to the number of columns of X.
% Alternatively, EDGES can be a numeric vector which gives a uniform
% grid for all dimensions of X.
%
% N(k1,k2,...) will count the vector X(i,:) if for each dimension
% j = 1,2,..., EDGES{j}(kj) <= X(i,j) < EDGES{j}(kj+1). The last bin will
% count any values of X that match EDGES(end). Values outside the values
% in EDGES are not counted. Use -inf and inf in EDGES to include all
% non-NaN values.
%
% [N,BIN] = HISTCND(X,EDGES) also returns subscript indices BIN.
% BIN is zero for out of range values.
%
% Example:
% >> X = randn(100,2); % 100-by-2 row vectors
% >> edges = {-2:.4:2,-2:.5:2}; % ranges for each dimension
% >> histcnd(X,edges)
%
% ans =
%
% 0 0 0 1 1 1 0 0 0
% 0 1 1 2 1 1 0 0 0
% 1 0 3 4 0 3 0 0 0
% 0 1 2 1 3 0 1 0 0
% 0 3 1 4 2 1 3 1 0
% 1 1 2 3 3 4 1 0 0
% 0 1 1 2 1 1 4 0 0
% 0 1 2 2 2 1 0 0 0
% 1 2 0 3 2 0 0 1 0
% 0 1 1 0 0 0 0 0 0
% 0 0 0 0 0 0 0 0 0
%
% Class support for inputs X:
% float: double, single
%
% See also histc.
% Validation
if ~isnumeric(x)
error('Input array must be numeric.');
end
if ~iscell(edges)
if isnumeric(edges) && isvector(edges)
if isempty(x)
n = [];
bin = [];
return;
end
tmp = cell(1,size(x,2));
tmp(:) = {edges};
edges = tmp;
else
error('Edges must be cell array.');
end
end
if isempty(x)
x = reshape(x,[0 length(edges)]);
end
if length(edges)~=size(x,2)
error('Invalid cell array.');
end
% Compute dimension of the output histogram
dims = cellfun(@(c) length(c), edges(:)');
if length(dims)==1
dims = [1 dims];
end
% Quantize input along each dimension
bin = cell(1,size(x,2));
outlier = false(size(x,1),1);
for i = 1:size(x,2)
[tmp,bin{i}] = histc(x(:,i),edges{i});
outlier = outlier | (bin{i}==0);
end
% Remove outlier before computing a linear index
ind = bin;
for i = 1:size(x,2)
ind{i} = ind{i}(~outlier);
end
% Compute count of the index
n = histc(sub2ind(dims,ind{:}),1:prod(dims));
% Format the output
n = reshape(n,dims);
bin = cat(2,bin{:});
end
end