%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Part of the replication package for the paper
%   "Marginal Effects for Probit and Tobit with Endogeneity"
%   by Kirill S. Evdokimov, Ilze Kalnina, and Andrei Zeleneev.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

function [R] = fn_ivxobit(Y, X, W, Z, bProbit, s_options, h_for_PEs)
  if nargin<6 || s_options==""; s_options = "EstAPE AsyV"; end
  flag_Compute_AsyVars = contains(s_options, 'AsyV','IgnoreCase',true);
  flag_Estimate_APEs = contains(s_options, 'EstAPE','IgnoreCase',true);
  if isempty(W) || min(var(W))>0
    warning("fn_ivxobit:noInterceptQ", "fn_ivxobit: there seems to be no intercept in W, it should be included manually!");
  end
  if nargin<7; h_for_PEs = mean([X W])'; end
  
  if var(Y)==0
    R = struct();
    error('fn_ivxobit:varY0', 'fn_ivxobit Error: var(Y)=0, cannot estimate ivxobit parameters.');
  end

  pi_hat      = [Z W]\X;
  V_hat       = X - [Z W]*pi_hat;
  if bProbit
    [theta_rho, ~, loglik_val] = probit_MLE(Y, [X W V_hat]); % Non-standard normalization for computing!
    R.sig_e1  = 1;
  else
    [theta_rho_w_sig_e1, loglik_val] = tobit_MLE(Y, [X W V_hat]);
    theta_rho   = theta_rho_w_sig_e1(1:end-1);
    R.sig_e1    = theta_rho_w_sig_e1(end);
  end
  R.estimator = qyn(bProbit,"ivprobit","ivtobit");
  R.bProbit   = bProbit;
  R.theta     = theta_rho(1:end-1); % Probit: Non-standard normalization
  R.theta_V   = theta_rho(end);
  R.theta_rho = theta_rho;

  n = length(Y);
  R.sig_V     = sqrt(sum(V_hat.^2)/n); %std(V_hat);% probit code: R.sig_V   = sqrt(sum(V_hat.^2)/(n-1)); 
  R.sig_U     = sqrt(R.sig_e1^2 + R.theta_V^2*R.sig_V^2);
  R.sig_UV    = R.theta_V*R.sig_V^2;
  R.theta_normalized = R.theta/R.sig_U; % Standard Probit normalization
  R.R2_1st_stage = 1-R.sig_V^2/var(X);
  
  R2_u        = R.R2_1st_stage;
  R2_r        = 1-std(X - W*(W\X))^2/var(X);
  R.F_weakIV  = (R2_u-R2_r)/(1-R2_u)*(length(Z)-size(W,2));
  R.n = n;
  R.pi_hat = pi_hat;

  R.sig_Us_LB_1 = sqrt((R.theta(1)*R.sig_UV+R.sig_U^2)^2/(R.sig_V^2*R.theta(1)^2 + 2*R.theta(1)*R.sig_UV + R.sig_U^2));
  sig_Us_LB_2_squared = R.sig_U^2-R.theta(1)^2*R.sig_V^2;
  R.sig_Us_LB_2 = sqrt(max(sig_Us_LB_2_squared, 0));
  R.sig_Us_LB = max(R.sig_Us_LB_1, R.sig_Us_LB_2);
    
  R.loglik_val      = loglik_val;
  dim_theta = length(R.theta);
  dim_W = size(W,2);

  if flag_Compute_AsyVars % && size(W,2)==7 %testing
    eta_0 = [R.theta; R.theta_V; pi_hat; R.sig_e1; R.sig_V]; %sig_V_MLE];
    eta_0_for_grad_fn = eta_0;
    if bProbit
      eta_0_for_grad_fn(end-1) = []; % R.sig_e1 is not estimated, it is 1 
      score_fn = @(eta) fn_ivprobit_grad_new(Y,X,W,Z,eta(1:dim_theta), eta(dim_theta+1), eta(dim_theta+1+(1:dim_theta)), eta(end));
    else
      score_fn = @(eta) fn_ivtobit_grad_new(Y,X,W,Z,eta(1:dim_theta), eta(dim_theta+1), eta(dim_theta+1+(1:dim_theta)), eta(end-1), eta(end));
    end
    
    grad_i = score_fn(eta_0_for_grad_fn);
    assert(all(isfinite(grad_i(:))));
    assert(max(abs(mean(grad_i)))<1e-6);

    % determine d_eps for numerical differentiation
    MLE_SEs = sqrt(diag(inv(cov(grad_i)))/n); %MLE_SEs if Correctly Specified
    MLE_SEs = qyn(~all(isfinite(MLE_SEs)), 1./sqrt(diag(cov(grad_i))/n), MLE_SEs);
    d_eps = max(0.01*MLE_SEs, 1e-5);
    d_eps(~isfinite(d_eps)) = 1e-3;
    [gmm_G, gmm_Om, gmm_Sig] = gmm_get_G_Om_Sig(score_fn, eta_0_for_grad_fn, d_eps); %#ok<ASGLU> 
    %  Warning: gmm_Sig is not divided by n, but all other Sig in the code ARE divided by n, so usually asy_Sig = gmm_Sig/n 
    
    if bProbit % insert zero variance column and row for sig_e1
      dim_Sig = size(gmm_Sig, 1);
      gmm_Sig = [gmm_Sig(1:dim_Sig-1,:); zeros(1,dim_Sig); gmm_Sig(dim_Sig,:)];
      gmm_Sig = [gmm_Sig(:,1:dim_Sig-1) zeros(dim_Sig+1,1) gmm_Sig(:,dim_Sig)];
    end
    gmm_Sig = (gmm_Sig+gmm_Sig')/2;
    R.asy_Sig_eta = gmm_Sig/n; % <<< Sig matrices are divided by n, except for gmm_Sig! 
    % NB: R.asy_Sig_eta contains zero column and row for Probit, because of R.sig_e1 = 1.
    R.eta_0_for_grad_fn = eta_0_for_grad_fn;
    R.score_fn = score_fn;
    R.score_num_deriv__d_eps = d_eps;
    
    % checking the choice of d_eps
    asy_SEs = sqrt(diag(R.asy_Sig_eta)); 
    asy_SEs = qyn(bProbit, [asy_SEs(1:end-2); asy_SEs(end)], asy_SEs);
    max_d_eps_rel_to_asy_SEs = max(d_eps(:)./asy_SEs);
    if max_d_eps_rel_to_asy_SEs>0.1
      warning('fn_ivxobit: max_d_eps_rel_to_asy_SEs = %7.3f', max_d_eps_rel_to_asy_SEs);
    end

    % Delta Method to prepare inference on the bounds for sigma_Us

    % NB: FN_IVTOBIT_DELTA_METHOD_HELPER is used for both Tobit and Probit:
    %  For Probit asy_Sig_eta contains column and row of zeros corresponding to R.sig_e1, since R.sig_e1 = 1 for Probit.
    %  
    %  This function gives the jacobian for the nonlinear mapping of
    %    [R.theta(1), R.theta_V, R.sig_e1, R.sig_V]
    %      into
    %    [R.sig_U, R.sig_Us_LB_1^2, R.sig_Us_LB_2^2]
    %
    % NB: FN_IVTOBIT_DELTA_METHOD_HELPER is made by study_K_tobit_make_tmp_fn.m
    % 
    jac_DeltaMethod = fn_ivtobit_delta_method_helper(R.theta(1), R.theta_V, R.sig_e1, R.sig_V);
    jac_DeltaMethod = jac_DeltaMethod'; % now 3x4
    len_eta = length(eta_0); %Remember that R.asy_Sig_eta includes 
    idx = [1, length(R.theta)+1, len_eta-1, len_eta]; %indices of the elements of eta_0
    R.asy_Sig_for_bounds = jac_DeltaMethod*R.asy_Sig_eta(idx,idx)*jac_DeltaMethod';
    jac_Sig_th_and_bounds = [jac_DeltaMethod(:,1) zeros(3, dim_theta-1) jac_DeltaMethod(:,2:end)];
    idx = [1:length(R.theta)+1 len_eta-1 len_eta]; %indices of the elements of eta_0
    jac_Sig_th_and_bounds = [eye(length(idx)); jac_Sig_th_and_bounds];
    % Matrix R.asy_Sig_theta_and_bounds is the asy var. matrix of:
    %   [R.theta; R.theta_V; R.sig_e1; R.sig_V; R.sig_U; R.sig_Us_LB_1^2; R.sig_Us_LB_2^2]
    % (i.e., all except pi_hat plus some ingredients of sig_Us bounds)
    R.asy_Sig_theta_and_bounds = jac_Sig_th_and_bounds*R.asy_Sig_eta(idx,idx)*jac_Sig_th_and_bounds';
    R.asy_SE_theta_and_vars = sqrt(diag(R.asy_Sig_theta_and_bounds));
    R.asy_SE_theta_and_vars_descr = 'SE of [R.theta; R.theta_V; R.sig_e1; R.sig_V; R.sig_U; R.sig_Us_LB_1^2; R.sig_Us_LB_2^2]'; % for debugging
  end

  % Do not use APE_E variables when doing Probit!
  R.APE_E_ACV_Naive = mean(normcdf([X W V_hat]*theta_rho/R.sig_e1))*R.theta;  % check about e1 !!!
  R.APE_P_ACV_Naive = mean(normpdf([X W V_hat]*theta_rho/R.sig_e1))*R.theta/R.sig_e1;
  R.APE_E_N_Naive = mean(normcdf([X W]*R.theta/R.sig_U))*R.theta;
  R.APE_P_N_Naive = mean(normpdf([X W]*R.theta/R.sig_U))*R.theta/R.sig_U;% compare with PLOTS below 
  
  % Do not use APE_E variables when doing Probit!
  [R.APE_E_NN_Corr_at_LB,  R.APE_P_NN_Corr_at_LB]  = fn_ivtobit_APE_Correct(R.sig_Us_LB);
  [R.APE_E_NN_Corr_wo_EIV, R.APE_P_NN_Corr_wo_EIV] = fn_ivtobit_APE_Correct(R.sig_U);
  
  % exogenous U_star case
  if bProbit
    R.sig_Us_exog = sqrt(R.sig_U^2 + R.theta_normalized(1)*R.sig_UV);
  else
    R.sig_Us_exog = sqrt(R.sig_U^2 + R.theta(1)*R.sig_UV); % can deduce from eq. 12
  end
  [R.APE_E_NN_exog,R.APE_P_NN_exog] = fn_ivtobit_APE_Correct(R.sig_Us_exog);
  
  % PEM naive
  R.h_for_PEs = h_for_PEs;
  PE_h_x_theta = h_for_PEs(:)'*R.theta; %mean([X W]*R.theta); % h'theta
  % eq (14) at h = mean
  R.PEM_Naive = [  normcdf(PE_h_x_theta/R.sig_U)*R.theta...
                 ; normpdf(PE_h_x_theta/R.sig_U)*R.theta/R.sig_U];
  
  if flag_Compute_AsyVars %computing se_PEM_Naive
    foo_idx = [1:dim_theta size(R.asy_Sig_theta_and_bounds,2)-2]; %theta and sig_U
    Sigma_th_sigU = R.asy_Sig_theta_and_bounds(foo_idx,foo_idx);
    [se_PEM_P_Naive, PEM_P_Naive_test] = fn_ivtobit_Naive_PE_SE(R.theta,R.sig_U,h_for_PEs,Sigma_th_sigU,1);
    [se_PEM_E_Naive, PEM_E_Naive_test] = fn_ivtobit_Naive_PE_SE(R.theta,R.sig_U,h_for_PEs,Sigma_th_sigU,0);
    R.se_PEM_Naive = [se_PEM_E_Naive; se_PEM_P_Naive];
    assert(max(abs([PEM_E_Naive_test; PEM_P_Naive_test]-R.PEM_Naive),[], 'all')<1e-7);
  end

  % eq (15) PE^Tob bounds are Naive and PE^Tob(sig2_LB)
  R.PEM_E_atSig2LB = normcdf(PE_h_x_theta/R.sig_Us_LB)*R.theta;
  % PEM_P bounds are calculated differently (eq 16): not needed yet.

  PEMs_Corr = zeros(2*dim_theta,4);
  for i = 1:2*dim_theta
    arr_PE_sig_U = [R.sig_Us_LB; R.sig_U];% 2 or 3 pts matter: para after eq (16)
    if i>dim_theta %only for PEM-Pr
      abs_theta_h = abs(PE_h_x_theta);
      if R.sig_Us_LB < abs_theta_h && abs_theta_h < R.sig_U
        arr_PE_sig_U(end+1) = abs_theta_h; %#ok<AGROW> 
      end
    end
    arr_PE_val = arrayfun(@(s) fn_ivtobit_PEM(s, i), arr_PE_sig_U);
    [~, i_max] = max(arr_PE_val);
    [~, i_min] = min(arr_PE_val);
    PEMs_Corr(i,:) = [arr_PE_val(i_min) -arr_PE_val(i_max) arr_PE_sig_U(i_min) arr_PE_sig_U(i_max)];
  end
  PEMs_Corr(:,2) = -PEMs_Corr(:,2); % flip sign
  R.PEMs_Corr    = PEMs_Corr;
  APEs_Corr = NaN(2*dim_theta,4); % Columns are: APE_LB APE_UB APE_LB_argmin_sig_Us APE_UB_argmax_sig_Us
  
  if flag_Estimate_APEs % for things that need fast_fmin    

    for i=1:2*dim_theta
      TolX = 1e-7;
      [APEs_Corr(i,3), APEs_Corr(i,1)] = fast_fminbnd_U_shaped(@(sig_Us)  fn_ivtobit_APE_Correct(sig_Us, i), R.sig_Us_LB, R.sig_U, TolX);
      [APEs_Corr(i,4), APEs_Corr(i,2)] = fast_fminbnd_U_shaped(@(sig_Us) -fn_ivtobit_APE_Correct(sig_Us, i), R.sig_Us_LB, R.sig_U, TolX);

    end % for i=1:2*dim_theta
    APEs_Corr(:,2)     = -APEs_Corr(:,2);
  end
  R.APEs_Corr = APEs_Corr;

%% Helper Functions  

  function [APE_E, APE_P] = fn_ivtobit_APE_Correct(sig_U_star, only_APE_j)
    % When only_APE_j is between 1 and 2*dim_theta computing only a single APE (APE_E_j or APE_P_j) -- used for optimization
    if nargin<2; only_APE_j = 0; end
    std_in_denom = sqrt(2*sig_U_star^2 - R.sig_U^2 + R.theta(1)^2*R.sig_V^2);
    numer_Z_index = R.theta(1)*(X-V_hat) + W*R.theta(2:end);  % [Z W]*pi_hat = X-V_hat
    if only_APE_j==0
      APE_E = mean(normcdf(numer_Z_index/std_in_denom))*R.theta;             % eq 19. At sig_U, its APE_E_NN_Naive
      APE_P = mean(normpdf(numer_Z_index/std_in_denom))*R.theta/std_in_denom;% eq 20. At sig_U, its APE_P_NN_Naive
    elseif only_APE_j<=dim_theta % compute APE_E(j)
      APE_E_j = mean(normcdf(numer_Z_index/std_in_denom))*R.theta(only_APE_j);
      APE_E = APE_E_j;
    else
      assert(only_APE_j<=2*dim_theta);
      % compute APE_P(j) and return as scalar in APE_E
      APE_P_j = mean(normpdf(numer_Z_index/std_in_denom))*R.theta(only_APE_j-dim_theta)/std_in_denom;
      APE_E = APE_P_j;
    end
  end
  
  function [PEM_E, PEM_P] = fn_ivtobit_PEM(sig_U_star, only_PEM_j)
    % When only_PEM_j is between 1 and 2*dim_theta computing only a single PEM (PEM_E_j or PEM_P_j) -- used for optimization
    if nargin<2; only_PEM_j = 0; end
    if only_PEM_j==0
      PEM_E = normcdf(PE_h_x_theta/sig_U_star)*R.theta;
      PEM_P = normpdf(PE_h_x_theta/sig_U_star)*R.theta/sig_U_star;
    elseif only_PEM_j<=dim_theta % compute APE_E(j)
      PEM_E_j = normcdf(PE_h_x_theta/sig_U_star)*R.theta(only_PEM_j);
      PEM_E   = PEM_E_j;
    else
      assert(only_PEM_j<=2*dim_theta);
      % compute PEM_P(j) and return as scalar in PEM_E
      PEM_P_j = normpdf(PE_h_x_theta/sig_U_star)*R.theta(only_PEM_j-dim_theta)/sig_U_star;
      PEM_E   = PEM_P_j;
    end
  end
end

