%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Part of the replication package for the paper
%   "Marginal Effects for Probit and Tobit with Endogeneity"
%   by Kirill S. Evdokimov, Ilze Kalnina, and Andrei Zeleneev.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%
%clear
s_DIR = "";

bInternal = 0;

if ~exist('bProbit', 'var')
  bProbit = 0; % Set this to 1 to get Probit, or 0 to get Tobit
end

s_diary_fname = "Empirical_res/diary_" + qyn(bProbit, 'Pr', 'T')+"obit_NLSY97.txt";
diary off

T_data = readtable(s_DIR + "NLSY/nlsy97_empirical.csv");
T_data = renamevars(T_data, "spouse_educ", "huseduc"); % variable renamed to match the code below (originally Mroz variable names)

T_data.inlf = T_data.hours > 0;
ix_drop = T_data.nwifeinc < 0; % Drop observations with missing data
T_data(ix_drop,:) = [];

fprintf('N = %d, working non-zero hours = %d (i.e., %5.1f%%)\n', nrows(T_data), sum(T_data.inlf), mean(T_data.inlf)*100);

% *********************************************************************************************************************
% Some Notes
%
% * Note: Probit's R.theta_normalized uses the standard Probit normalization, but R.theta does not.
% * The APEs for the ivtobit and ivprobit match the Stata results that I tried ("Empirical007_Example17-2_and_17-3.do"
%     and "Empirical006_Example15-3B with template code.do")
% * The APEs formulas for Tobit-P and Probit are the same if stated in terms of theta_rho; Tobit estimates sig_e1,
%     while for Probit it is sig_e1 = 1;
% *********************************************************************************************************************
  
s_model_name = qyn(bProbit, "IV Probit", "IV Tobit");
fprintf('\n*** Model: %s\n', s_model_name);

%T_data.nwifeinc = T_data.nwifeinc + 1.0*randn(length(T_data.nwifeinc),1)*std(T_data.nwifeinc)*1; warning('ADDED NOISE!!');
T_data.exper2 = T_data.exper.^2;
n = size(T_data, 1);
arr_W_var_names = ["educ" "exper" "exper2" "age" "kidslt6" "kidsge6" "black" "hispanic"]; 
W = [T_data{:,arr_W_var_names} ones(n,1)]; % we must add the constant manually
X = T_data.nwifeinc;
Z = T_data.huseduc;
Y = qyn(bProbit, T_data.inlf, T_data.hours);
PEMs_h = mean([X W])';

theta_hat_OLS  = [X W]\Y;
theta_hat_TSLS = fn_TSLS(Y, X, Z, W);
theta_hat_OLS_Probit  = [X W]\(Y>0);
theta_hat_TSLS_Probit = fn_TSLS(Y>0, X, Z, W);
if bProbit
  [beta_hat_Naive_MLE, ~, loglik_val] = probit_MLE(Y, [X W]);
  theta_hat_Naive_MLE = beta_hat_Naive_MLE; sig_U_Naive_MLE = 1;
  Naive_MLE_PEMs_E = [];
  Naive_MLE_APEs_E = [];
else
  [beta_hat_Naive_MLE, loglik_val] = tobit_MLE(Y, [X W]);
  theta_hat_Naive_MLE = beta_hat_Naive_MLE(1:end-1); sig_U_Naive_MLE = beta_hat_Naive_MLE(end);
  Naive_MLE_PEMs_E = normcdf(theta_hat_Naive_MLE'*PEMs_h/sig_U_Naive_MLE)*theta_hat_Naive_MLE;
  Naive_MLE_APEs_E = mean(normcdf([X W]*theta_hat_Naive_MLE/sig_U_Naive_MLE))*theta_hat_Naive_MLE;
end
Naive_MLE_PEMs_Pr = normpdf(theta_hat_Naive_MLE'*PEMs_h/sig_U_Naive_MLE)*theta_hat_Naive_MLE/sig_U_Naive_MLE;
Naive_MLE_APEs_Pr = mean(normpdf([X W]*theta_hat_Naive_MLE/sig_U_Naive_MLE))*theta_hat_Naive_MLE/sig_U_Naive_MLE;

Naive_MLE_PEMs_this_Y = qyn(bProbit, Naive_MLE_PEMs_Pr, Naive_MLE_PEMs_E);
Naive_MLE_APEs_this_Y = qyn(bProbit, Naive_MLE_APEs_Pr, Naive_MLE_APEs_E);

s_xobit_options = "EstAPE AsyV"; %flag_Optimize_for_APE_LB_UB = 1; %0; %2;
R = fn_ivxobit(Y, X, W, Z, bProbit, s_xobit_options, PEMs_h);
Z_partialed_out_W = Z-W*(W\Z);

alpha = 0.05;
alpha_bonf_1 = alpha/10;
dim_theta = length(R.theta);

%%
s_APE_name = qyn(R.bProbit, "APE-Pr", "APE-Tob");
[APE_Naive_ivxobit, APE_Naive_ivxobit_CI] = fn_APE_with_CI(R, "Naive-ivxobit-" + s_APE_name, Z, W, alpha, alpha_bonf_1);
[APE_bounds, APE_bounds_CI] = fn_APE_with_CI(R, s_APE_name, Z, W, alpha, alpha_bonf_1);
%% Inference on the PEs
model_info.dim_theta = length(R.theta);
model_info.dim_pi = length(R.pi_hat);
model_info.bProbit = bProbit;

[CI_PE_Corr,asy_CI_sig_Us] = fn_CI_PE_and_sig_Us(bProbit,R,alpha,alpha_bonf_1);

se_PEM_Naive = R.se_PEM_Naive;

Sigma_theta = R.asy_Sig_eta(1:dim_theta,1:dim_theta);
CI_sig_Us = asy_CI_sig_Us;

res_PEMs_E_CI = zeros(dim_theta-1,2);
res_PEMs_P_CI = zeros(dim_theta-1,2);

fclose('all');
str_CI_info     = 'Asy' + qyn(alpha==0.05, "", sprintf("__alpha=%4.2f__", alpha));

warning('off','MATLAB:DELETE:FileNotFound');
delete(s_diary_fname); % remove old diary file if it existed
diary(s_diary_fname);
for bProb=[qyn(bProbit, [], 0) 1] %inference on Pr or E. Probit has "for bProb = 1". Tobit has "for bProb = [0 1]"
  n_theta_2_include    = dim_theta-5;
  idx_rows_2_include   = bProb*dim_theta+(1:n_theta_2_include);
  PEM_ivXobit_Naive_CI = R.PEM_Naive+norminv(1-alpha/2)*se_PEM_Naive*[-1 1];
  T = table(R.PEM_Naive(         idx_rows_2_include,1)  ...
          , PEM_ivXobit_Naive_CI(idx_rows_2_include,:)  ...
          , R.PEMs_Corr(         idx_rows_2_include,1:2), zeros(n_theta_2_include,2)  ...
          , 'VariableNames', ["PE_ivXobit" "PE_ivXobit_CI" "PE_Corr" "PE_Corr_CI"]);

  T.PE_Corr_CI = CI_PE_Corr((~bProbit)*bProb*dim_theta + (1:n_theta_2_include),:);
  
  XW_var_names = [ "nwifeinc" arr_W_var_names "const"]';
  T.Properties.RowNames = XW_var_names(1:n_theta_2_include);
  mul_str = qyn(bProb," All numbers are multiplied by 100.","");
  obs_str = " $n=" + R.n + "$.";
  s_table_caption = strrep(s_model_name, "IV ", "") + ". Partial Effects on " + qyn(bProb, 'Probability.', 'Expectation.') + mul_str + obs_str;
  T.Properties.Description = s_table_caption;
  T2      = table(qyn(bProb, Naive_MLE_PEMs_Pr, Naive_MLE_PEMs_E), 'VariableNames', ["PE_Exog_MLE"]);
  T       = [T2(1:n_theta_2_include,:) T];
  PE_mul  = qyn(bProb, 100, 1);
  T{:,:}  = T{:,:}*PE_mul;
  T_print = T;
  if bProbit == bProb
    T2 = table(theta_hat_OLS*PE_mul, theta_hat_TSLS*PE_mul ... 
           , 'VariableNames', ["ols" "tsls"]);
  end
  
  fprintf('\n');
  fprintf("***  %s:\n", T.Properties.Description);

  % Computing APEs:
  T_Naive_APEs = array2table(qyn(bProbit==bProb, [theta_hat_OLS, NaN(dim_theta,1), theta_hat_TSLS] ...
                    , [theta_hat_OLS_Probit, NaN(dim_theta,1), theta_hat_TSLS_Probit]) ...
                    , 'VariableNames', ["ols" "APE_Exog" "tsls"]);
  bAPE=1;
  s_APE_name = qyn(bProb, "APE-Pr", "APE-Tob");
  [APE_Naive_ivxobit, APE_Naive_ivxobit_CI] = fn_APE_with_CI(R, "Naive-ivxobit-" + s_APE_name, Z, W, alpha, alpha_bonf_1);
  [APE_bounds, APE_bounds_CI] = fn_APE_with_CI(R, s_APE_name, Z, W, alpha, alpha_bonf_1);
  T_APE = table(APE_Naive_ivxobit, APE_Naive_ivxobit_CI, APE_bounds, APE_bounds_CI);
  T_APE.Properties.RowNames = XW_var_names;
  T_Naive_APEs.APE_Exog = qyn(bProb, Naive_MLE_APEs_Pr, Naive_MLE_APEs_E);
  T_Naive_APEs = renamevars(T_Naive_APEs, "APE_Exog", qyn(bProbit,'Probit','Tobit'));
  T_APE = [T_Naive_APEs T_APE]; %#ok<AGROW>
  T_APE{:,:} = PE_mul * T_APE{:,:};
  T_out = removevars(T_APE(1:n_theta_2_include,:), ["ols" "tsls"]);
  T_out{:,:} = round(T_out{:,:}, 3, 'significant');
  disp(T_out);

end
diary off

%% Helper functions

function theta_hat_TSLS = fn_TSLS(Y, X, Z, W)
  ZW = [Z W];
  X_hat = ZW*((ZW'*ZW)\(ZW'*X)); %X_hat = Z*((Z'*Z)\(Z'*X));
  XW_hat = [X_hat W];
  theta_hat_TSLS = (XW_hat'*XW_hat)\(XW_hat'*Y);
end

%% New APE Estimation and Inference

function CI_sig_Us = fn_CI_for_sig_Us(R, alpha_bonf_1)
  % Calculate (asy) CI for sig_Us
  Sig_theta_V_sig_e1 = R.asy_Sig_for_bounds(2:3,2:3); % = Var([R.theta_V, R.sig_e1])
  asy_CI_sig_Us2_LB = max(1e-7, inference_on_max([R.sig_Us_LB_1^2 R.sig_Us_LB_2^2], Sig_theta_V_sig_e1, alpha_bonf_1/2));
  asy_CI_sig_Us_UB  = R.sig_U + sqrt(R.asy_Sig_for_bounds(1,1)) * norminv(1-alpha_bonf_1/2);
  CI_sig_Us = [sqrt(asy_CI_sig_Us2_LB) asy_CI_sig_Us_UB];
end

function [APE_bounds, APE_bounds_CI, debug_info] = fn_APE_with_CI(R, s_name, Z, W, alpha, alpha_bonf_1)
  if nargin<5; alpha=0.05; end
  if nargin<6; alpha_bonf_1=alpha/10; end
  assert(isscalar(alpha) && isscalar(alpha_bonf_1) && alpha_bonf_1<alpha);
  CI_sig_Us = fn_CI_for_sig_Us(R, alpha_bonf_1);
  debug_info.CI_sig_Us = CI_sig_Us;

  % mi = model_info
  mi.dim_theta = length(R.theta);
  mi.dim_pi = length(R.pi_hat);
  mi.bProbit = R.bProbit;
  idx_FUN = 1:qyn(s_name=="DEBUG", mi.dim_theta+1, mi.dim_theta);
  dim_FUN = length(idx_FUN);
  
  TolX = 1e-6;
  s_naive_ivxobit_name_prefix = "Naive-ivxobit-";
  b_naive_ivxobit_PE = 0;
  if s_name=="DEBUG" || startsWith(s_name, s_naive_ivxobit_name_prefix,"IgnoreCase",1)
    b_naive_ivxobit_PE = 1;
    s_name = erase(s_name, caseInsensitivePattern(s_naive_ivxobit_name_prefix));
    n_grid_sig_Us = 1;
    grid_sig_Us = -999;
    idx_Naive = 1;
  else
    n_grid_sig_Us = 51; %51
    eps_bnd = min([1e-7, 0.1*TolX, 0.1*(CI_sig_Us(2)-CI_sig_Us(1))]); % avoid evaluating exactly at lb and ub
    grid_sig_Us = linspace(CI_sig_Us(1)+eps_bnd, CI_sig_Us(2)-eps_bnd, n_grid_sig_Us-2);
    idx_sig_Us_LB = find(grid_sig_Us>R.sig_Us_LB, 1);
    grid_sig_Us = [grid_sig_Us(1:idx_sig_Us_LB-1) R.sig_Us_LB grid_sig_Us(idx_sig_Us_LB:end)];
    idx_Naive = find(grid_sig_Us>R.sig_U, 1);
    grid_sig_Us = [grid_sig_Us(1:idx_Naive-1) R.sig_U grid_sig_Us(idx_Naive:end)];
  end
  arr_res_FUN = cell(n_grid_sig_Us, 3);

  % FUN = FUNctional
  fn_FUN_at_sigUs = @(eta, sig_Us) fn_get_APE_functional(eta, sig_Us, idx_FUN, mi, s_name, Z, W);
  
  sc_eta_0 = R.eta_0_for_grad_fn;
  dim_sc_eta_0 = length(sc_eta_0);
  % We will use augmented parameter vector. The augmented score is linear in the extra parameters.
  sc_aug_d_eps = [R.score_num_deriv__d_eps(:); 1e-3*ones(dim_FUN,1)];
  
  %tic
  for i_sigUs = 1:n_grid_sig_Us
    this_sig_Us = grid_sig_Us(i_sigUs);
    this_FUN_i = fn_FUN_at_sigUs(sc_eta_0, this_sig_Us);
    this_E_FUN = mean(this_FUN_i)';
    sc_aug_eta_0 = [sc_eta_0; this_E_FUN];
    
    fn_aug_score = @(aug_eta) [R.score_fn(aug_eta(1:dim_sc_eta_0)) ...
            fn_FUN_at_sigUs(aug_eta(1:dim_sc_eta_0), this_sig_Us) - aug_eta(dim_sc_eta_0+1:end)'];
  
    [gmm_G, gmm_Om, gmm_Sig] = gmm_get_G_Om_Sig(fn_aug_score, sc_aug_eta_0, sc_aug_d_eps); %#ok<ASGLU> 
    %  Warning: gmm_Sig is not divided by n, but all other Sig in the code ARE divided by n, so usually asy_Sig = gmm_Sig/n 
    
    arr_res_FUN{i_sigUs,1} = sc_aug_eta_0;              % hat{FUN}(this_sig_Us)
    arr_res_FUN{i_sigUs,2} = sqrt(diag(gmm_Sig)/R.n); % S.E. 
    arr_res_FUN{i_sigUs,3} = gmm_Sig/R.n; %
  end

  idx_FUN_in_aug = dim_sc_eta_0 + idx_FUN;
  if b_naive_ivxobit_PE
    APE_bounds    = arr_res_FUN{idx_Naive, 1}(idx_FUN_in_aug); % = APE_Naive_ivxobit
    APE_bounds_CI = APE_bounds + norminv(1-alpha/2) * [-1 1] .* arr_res_FUN{idx_Naive, 2}(idx_FUN_in_aug); % APE_Naive_ivxobit_CI
    debug_info.this_E_FUN = arr_res_FUN{idx_Naive, 1}(idx_FUN_in_aug);
    debug_info.aug_eta = arr_res_FUN{idx_Naive, 1};
    debug_info.Sig = arr_res_FUN{idx_Naive, 3};
  else
    APE_bounds    = NaN(dim_FUN,2);
    APE_bounds_CI = NaN(dim_FUN,2);
    qN = norminv(1-(alpha-alpha_bonf_1)/2);
  
    fn_find_extremum = @(grid_f, bMax) find_grid_extremum_quadratic(grid_sig_Us, grid_f, bMax);
    for i_FUN = 1:dim_FUN
      arr_b  = cellfun(@(v) v(idx_FUN_in_aug(i_FUN)), arr_res_FUN(:,1));
      arr_se = cellfun(@(v) v(idx_FUN_in_aug(i_FUN)), arr_res_FUN(:,2));
      
      APE_bounds(i_FUN,1)    = fn_find_extremum(arr_b(idx_sig_Us_LB:idx_Naive), 0);
      APE_bounds(i_FUN,2)    = fn_find_extremum(arr_b(idx_sig_Us_LB:idx_Naive), 1);
      APE_bounds_CI(i_FUN,1) = fn_find_extremum(arr_b - qN * arr_se, 0);
      APE_bounds_CI(i_FUN,2) = fn_find_extremum(arr_b + qN * arr_se, 1);
    end
  end
end

function [val, x] = find_grid_extremum_quadratic(grid_x, grid_f, bMax)
  % Find MIN or MAX on a grid plus quadratic interpolation
  if bMax
    grid_f = -grid_f;
  end
  [val,i_min] = min(grid_f);
  x = grid_x(i_min);
  for i=2:length(grid_f)-1
    if grid_f(i)<=grid_f(i-1) && grid_f(i)<grid_f(i+1) % local minimum on the grid
      p = polyfit(grid_x(i+(-1:1)), grid_f(i+(-1:1)), 2);
      assert(p(1)>=0);
      x_try = -p(2)/(2*p(1));
      if x_try>=grid_x(i-1) && x_try<=grid_x(i+1)
        v_try = polyval(p, x_try);
        if v_try<val
          val = v_try; x = x_try;
        end
      end
    end
  end
  if bMax
    val = -val;
  end
end

function APE_i = fn_get_APE_functional(eta, sig_Us, idx_FUN, mi, s_name, Z, W)
  % APE_i is n x length(idx_theta)
  if isempty(idx_FUN), idx_FUN = 1:mi.dim_theta; end %idx_theta = [] means "all of theta"

  % eta ~ R.eta_0_for_grad_fn; so
  % eta = [R.theta; R.theta_V; pi_hat; qyn(bProbit, [], R.sig_e1); R.sig_V];
  eta = eta(:);
  assert( length(eta) == mi.dim_theta + 1 + mi.dim_pi + (~mi.bProbit) + 1 );
  assert( (mi.dim_theta == size(W,2) + 1) && (mi.dim_pi == size(W,2) + size(Z,2)) );
  n = size(Z,1);
  
  theta = eta(1:mi.dim_theta); 
  pi_hat = eta(mi.dim_theta + 1 + (1:mi.dim_pi));
  
  % These are used for the Denominator of APE, and for the (A)PEs of the Naive-ivxobit
  sig_e1 = qyn(mi.bProbit, 1, eta(end-1)); % = \sigma_{U|V}
  sig_V  = eta(end);
  gamma  = eta(mi.dim_theta+1);
  sig_U2 = (gamma*sig_V)^2 + sig_e1^2;
  
  if sig_Us== -999 % computing naive IV-Tobit PEs, which assume there is no EiV
    sig_Us = sqrt(sig_U2);
  end
  
  % APE
  APE_numerator = W*theta(2:end) + theta(1)*([Z W]*pi_hat);
  
  % Denominator:
  % \theta_{01}^{2}\sigma_{V}^{2}-\sigma_{U}^{2} = (\theta_{01}^{2}-\gamma^{2}) \sigma_{V}^{2} + \sigma_{U|V}^{2}
  % denom_expr = sqrt( 2*sig_Us^2 +  (theta(1)^2 - gamma^2)*sig_V^2 - sig_e1^2 );
  denom_expr = sqrt( 2*sig_Us^2 - sig_U2 + (theta(1)*sig_V)^2  );
  APE_i = [];
  
  switch s_name
    case "APE-Tob"; APE_i = normcdf(APE_numerator/denom_expr) * theta(idx_FUN)';
    case "APE-Pr";  APE_i = normpdf(APE_numerator/denom_expr)/denom_expr * theta(idx_FUN)';
    otherwise; error('Unknown APE!!');
  end
end

