%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Part of the replication package for the paper
%   "Marginal Effects for Probit and Tobit with Endogeneity"
%   by Kirill S. Evdokimov, Ilze Kalnina, and Andrei Zeleneev.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%  MC_replicate_main
%
% Performs the Monte Carlo simulation of the paper. 
%   At the end, saves the result into a mat file (name given by s_mat_full_fname), 
%   and then calls function MC_draw_figure to create the figure replicating Figure 1 in the paper.

clear
tic_ID = tic;

bProbit     = 0;
bOutMC      = 1; %###### Set bOutMC = 1 for the replication, bOutMC = 0 for very fast execution to test the code
bMakeFixYFigures = 1;
n_reps      = qyn(bOutMC, 5000, 51); %
max_rho_UV  = qyn(bOutMC, 0.95, 0.9);
n_grid_rho  = qyn(bOutMC, 21, 5);

n       = qyn(bOutMC, 1000, 201);
thetaXW_0 = [2 1]';
dim_X   = length(thetaXW_0); % X is [X W]
sig_V_0 = 1;
sig_U_0 = 1.0;
theta_0 = [thetaXW_0; sig_U_0];
pi_Z    = sig_V_0;
dgp.EX1 = 0;
dgp.EW = [zeros(1,dim_X-2) 1];
dgp.true_EXW = [dgp.EX1 dgp.EW];
dgp.b_PEs_at_sample_means = 0;
pi_W1_X = 0; 

% calculate unconditional probability of Y^*>0 when dim_W = 1 and rho_UV = 0;
E_Ystar = dgp.true_EXW(:)'*thetaXW_0;
fn_V_Ystar = @(rho_UV) (theta_0(1)*pi_Z(1))^2 + (theta_0(1)*sig_V_0)^2 + sig_U_0^2 + 2*theta_0(1)*sig_V_0*sig_U_0*rho_UV;
pr_Y_positive = normcdf(E_Ystar./sqrt([fn_V_Ystar(-max_rho_UV) fn_V_Ystar(0) fn_V_Ystar(max_rho_UV)]));

dgp_tau_eps = 1; % Magnitude of the Measurement Error
s_fig_dir = "MC_res/";
n_obs   = n;
s_fig_fname_suffix = sprintf("_n%d", n_obs);

b_add_sgtitle = 1; % Add sgtitle to plots?
b_HideWarning_nearlySingularMatrix = 0;

s_design_info = sprintf("\\tau_{EIV} = %4.2f,  \\pi_{Z} = %4.2f, \\sigma_{V0} = %4.2f, n = %d, \\theta_0 = [%s],   E[(X,W')]=[%s]"...
                           , dgp_tau_eps, pi_Z, sig_V_0, n, strtrim(sprintf('%4.2f ', thetaXW_0(:)')), strtrim(sprintf('%4.2f ', dgp.true_EXW)));

rng(12345); % Just in case, does nothing, see below
rho_UV_arr = linspace(-max_rho_UV, max_rho_UV, n_grid_rho);

alpha              = 0.05;
alpha_bonf_1       = alpha/10; %/2; %/5; 
coverage_PE_bounds = zeros((2-bProbit)*dim_X,n_grid_rho,n_reps);
coverage_PE_true   = zeros((2-bProbit)*dim_X,n_grid_rho,n_reps);
arr_all_R          = cell(n_grid_rho, 1);
for i_rho = 1:n_grid_rho
  rho_UV_0 = rho_UV_arr(i_rho);
  fprintf('i_rho = %3d / %3d : \n', i_rho, n_grid_rho);
  arr_R_i_rho = cell(n_reps, 1);
  parfor i_rep = 1:n_reps
  %for i_rep = 1:n_reps
    RandStream.setGlobalStream(RandStream.create('mrg32k3a','NumStreams',10^7,'StreamIndices',i_rep,'Seed',0));

    Z = randn(n,1);
    Vstd = randn(n,1); %V[Vstd]=1
    Ustd = rho_UV_0*Vstd + sqrt(1-rho_UV_0^2)*randn(n,1); %V[Ustd]=1
    W = [randn(n,dim_X-2)+dgp.EW(1:dim_X-2) ones(n,1)]; assert(dgp.EW(end)==1); %W(:,end) must be a column of ones
    Xs = Z*pi_Z+sig_V_0*Vstd + dgp.EX1;
    if dim_X>=3
      Xs = Xs + pi_W1_X*W(:,1); % make W_1 correlated with Xs (unless dim_X=2, so W_1 is a constant)
    end
    Ystar  = [Xs W]*thetaXW_0 + sig_U_0*Ustd;
    Y = Ystar;
    if bProbit,  Y = Y>0;
    else,        Y(Y<0) = 0; % Tobit
    end
    X = Xs + dgp_tau_eps*sig_V_0*randn(n,1);
    Pr_Y0 = mean(Y==0);

    warning(qyn(b_HideWarning_nearlySingularMatrix, "off", "on"), "MATLAB:nearlySingularMatrix"); %Probit has problematic asyV matrix for some designs???
    tic_ID_this = tic;
    if dgp.b_PEs_at_sample_means
      h_for_PEs = mean([Xs W]);
    else
      h_for_PEs = dgp.true_EXW;
    end

    s_xobit_options = "EstAPE AsyV"; 
    R = fn_ivxobit(Y, X, W, Z, bProbit, s_xobit_options, h_for_PEs); %, 1); %fn_ivXobit(Y, X, W, Z, 1,R_oracle);
    R.h_for_PEs       = h_for_PEs;
    R.mc_mean_Y_star  = mean(Ystar);
    R.mc_std_Y_star   = std(Ystar);
    R.mc_prob_Y_posit = mean(Y>0);
    
    if mod(i_rep,floor(n_reps/2))==0
      fprintf('REP %3d  took %5.3fs (total so far %4.1fs)\n', i_rep, toc(tic_ID_this), toc(tic_ID));
    end

    XsW_th0 = [Xs W]*thetaXW_0; % \theta_0'H^*
    % PEs on Probability
    R.APE_P_oracle = mean(normpdf(XsW_th0/sig_U_0))*thetaXW_0/sig_U_0;
    R.PEM_P_oracle = normpdf(mean(XsW_th0)/sig_U_0)*thetaXW_0/sig_U_0;
    R.APE_P_true = NaN(size(thetaXW_0)); %TBA: calculate analytically, mean(normpdf(dgp.h_for_PEs*thetaXW_0/sig_U_0))*thetaXW_0/sig_U_0;
    R.PEM_P_true = normpdf(dgp.true_EXW*thetaXW_0/sig_U_0)*thetaXW_0/sig_U_0;
    
    if ~bProbit
      R.APE_E_oracle = mean(normcdf(XsW_th0/sig_U_0))*thetaXW_0;
      R.PEM_E_oracle = normcdf(mean(XsW_th0)/sig_U_0)*thetaXW_0;
      R.APE_E_true   = NaN(size(thetaXW_0)); %TBA: calculate analytically, mean(normpdf(dgp.h_for_PEs*thetaXW_0/sig_U_0))*thetaXW_0/sig_U_0;
      R.PEM_E_true   = normcdf(dgp.true_EXW*thetaXW_0/sig_U_0)*thetaXW_0;
    end
    Y1 = Y>0; % Y1 == Y for Probit
    R.frac_Y_positive = mean(Y>0);
    %OLS and TSLS for Y1:
    R.beta_OLS_Xs = [Xs W]\Y1;
    R.beta_OLS    = [X W]\Y1;
    X_hat_TSLS    = [Z W]*([Z W]\X);
    R.beta_TSLS   = [X_hat_TSLS W]\Y1;
    if ~bProbit
      R.beta_E_OLS_Xs = [Xs W]\Y;
      R.beta_E_OLS    = [X W]\Y;
      X_hat_TSLS      = [Z W]*([Z W]\X);
      R.beta_E_TSLS   = [X_hat_TSLS W]\Y;
    end
    % compute true PEM_Corr based on the parameters of the noisy DGP. Use eq (12)
    true_sig_eps      = dgp_tau_eps*sig_V_0;       % b/c X = Xs + dgp_tau_eps*sig_V_0*randn(n,1);
    true_sig_U_noisy  = sqrt(sig_U_0^2 + thetaXW_0(1)^2*true_sig_eps^2);                % sig2_U = sig2_Us + th01^2 * sig2_e
    true_sig_V_noisy  = sqrt(sig_V_0^2 + true_sig_eps^2);                                 % sig2_V = sig2_Vs +          sig2_e
    true_sig_UV_noisy = rho_UV_arr(i_rho)*sig_U_0*sig_V_0 - thetaXW_0(1)*true_sig_eps^2;% sig_UV = sig2_UsVs - th01 * sig2_e
    R.mc_true_PEM_Corr = fn_ivtobit_PE_from_beta(dgp.true_EXW,[thetaXW_0',true_sig_V_noisy, true_sig_UV_noisy, true_sig_U_noisy]');
    [CI_PE_Corr, CI_sig_Us] = fn_CI_PE_and_sig_Us(bProbit,R,alpha,alpha_bonf_1); % CI_PE_Corr = zeros((2-bProbit)*dim_theta,2);
    R.CI_PE_Corr = CI_PE_Corr;
    arr_R_i_rho{i_rep} = R;
  end %for i_rep
  arr_all_R{i_rho} = [arr_R_i_rho{:}]';
end %for i_rho

fprintf('In total took %1.1fs  (n_reps = %d)\n\n', toc(tic_ID), n_reps);
R = arr_all_R{end}(end); %for debugging
if b_HideWarning_nearlySingularMatrix
  warning("on", "MATLAB:nearlySingularMatrix"); % Re-enable the warning
end

%%

s_fig_fname = sprintf("fig_MC_R%d_%s.fig", n_reps, s_fig_fname_suffix);

% reduce size of saved file: only save fields used by the MC_draw_figure
target_fields = {'theta','mc_prob_Y_posit','PEM_E_oracle','PEM_P_oracle',...
                 'PEM_E_true','PEM_P_true','PEMs_Corr','CI_PE_Corr',...
                 'mc_true_PEM_Corr','PEM_Naive','se_PEM_Naive'};
arr_all_R = fn_extract_fields(arr_all_R,target_fields);

if 1 && (n_reps>=50)
  s_mat_dir = s_fig_dir + "mat/";
  allvars = whos;
  ix2save = cellfun(@isempty, regexp({allvars.class}, '^matlab\.(ui|graphics)\.'));
  s_mat_full_fname = fullfile(s_mat_dir, strrep(s_fig_fname, '.fig', '.mat'));
  save(s_mat_full_fname, allvars(ix2save).name);
  % ---------- MAKE THE FIGURE IN THE PAPER ------------------
  MC_draw_figure(s_mat_full_fname);
  s_fig_full_fname = fullfile(s_fig_dir, s_fig_fname);
  % To save the figure:
  % savefig(s_fig_full_fname);
end

function R_out = fn_extract_fields(R_in,target_fields)
  R_out            = R_in; % = cell(length(R_in),1); 
  fieldnames_all   = fieldnames(R_in{1});
  fieldnames_delme = setdiff(fieldnames_all,target_fields);
  for i_field = 1:length(fieldnames_delme)
    fieldname_i = fieldnames_delme{i_field};
    for i_cell = 1:length(R_out)
      R_out{i_cell} = rmfield(R_out{i_cell},fieldname_i);
    end
  end
end
