/* =========================================================================== */ /* =========================================================================== */ /* Procedures */ /* =========================================================================== */ /* =========================================================================== */ /* =========================================================================== */ /* 2D-Plot */ /* Stefan Hupfeld, Okt. 2006 */ /* Black-and-White Plot, with Auto-Save */ /* =========================================================================== */ /* What it does: Supplied with labels and a title, a line-plot is drawn and saved as .tkf and .eps (ready for a latex file), in the globally defined path for figures. */ proc plot2d(plot_title, plot_x_label, plot_y_label, plot_x, plot_y); _pcolor={0 0 0 0}; _pmcolor = {0, 0, 0, 0, 0, 0, 0, 0, 15}; _pltype = { 6, 4, 4, 0 }; _plwidth = {4, 1, 1}; _paxht = 0.2; _ptitlht = 0.2; _ptek = path_fig$+plot_title$+".tkf"; tkf2eps(path_fig$+plot_title$+".tkf",path_fig$+plot_title$+".eps"); title(plot_title); xlabel(plot_x_label); ylabel(plot_y_label); xy(plot_x, plot_y); retp("(Plot saved under "$+path_fig$+plot_title$+".tkf)"); endp; /* =========================================================================== */ /* 3D-Plot */ /* Input: String for Title, x-Axis, y-Axis, and xy-Values */ /* Output: Standardized xyz-Surface */ /* Stefan Hupfeld, Oct. 2006 */ /* =========================================================================== */ /* What it does: Supplied with labels and a title, a surface-plot is drawn and saved as .tkf and .eps (ready for a latex file), in the globally defined path for figures. Particular convenient for bivariate smoothing - do not use an excessive amount of grid points (50 serves well). */ proc plot3d(plot_title, plot_x_label, plot_y_label, plot_z_label, plot_x, plot_y, plot_z); _pcolor={0 0 7 7}; _pmcolor = {0, 0, 0, 0, 0, 0, 0, 0, 15}; _pltype = { 0, 0, 0, 0 }; _paxht = 0.1; _pnumht = 0.05; _ptitlht = 0.2; _pzclr = 0; _ptek = path_fig$+plot_title$+".tkf"; title(plot_title); xlabel(plot_x_label); ylabel(plot_y_label); zlabel(plot_z_label); surface(plot_x, plot_y, plot_z); tkf2eps(path_fig$+plot_title$+".tkf",path_fig$+plot_title$+".eps"); retp("(Plot saved under "$+path_fig$+plot_title$+".tkf)"); endp; /* =========================================================================== */ /* Normal Kernel */ /* For use in Local polynomial estimation */ /* =========================================================================== */ /* What it does: Supplies weights given the density of the normal (not restricted to the univariate case). */ proc knorm(xx, x0, h); local k,n; k = cols(xx); n = rows(xx); retp(exp(-(0.5/h^2)*sumc(((xx-x0).*(xx-x0))'))/(sqrt(2*pi*h^2)^k)); endp; /* =========================================================================== */ /* 3D-Locally Linear Estimation */ /* Needs provision of bandwidth */ /* Stefan Hupfeld, Okt. 2006 */ /* =========================================================================== */ /* What it does: Estimates the surface z_hat, given x and y, on a grid of 50x50 points. Needs bandwidth for normal kernel, which is not included. */ proc local_lin3D(est_title, plot_x_label, plot_y_label, plot_z_label, bw, x_val, y_val, z_val); local z_ll3D_fit, g, x0, beta, w, Xd, xg, yg, nn, est_title_full; x_val = (x_val-meanc(x_val))/stdc(x_val); y_val = (y_val-meanc(y_val))/stdc(y_val); nn = rows(x_val); g = 25; z_ll3D_fit = zeros(g,g); xg = seqa(minc(x_val), (maxc(x_val) - minc(x_val))/(g-1),g)'; yg = seqa(minc(y_val), (maxc(y_val) - minc(y_val))/(g-1),g); x0 = zeros(1,2); w = zeros(nn,1); output off; format /mat /ld 6,0; for i (1,g,1); @ Loop X @ for j (1,g,1); @ Loop Y @ locate(1,1); print /flush est_title " " i~j "of " g; x0[1,1] = xg[i]; x0[1,2] = yg[j]; w = knorm(x_val~y_val, x0, bw); Xd = ones(nn,1)~((x_val~y_val)-x0); @ Design Matrix @ beta = inv((Xd.*w)'*Xd)*(Xd.*w)'*z_val; z_ll3D_fit[i,j] = beta[1,1]; endfor; endfor; format /mat /ld 6,3; output on; _pcolor={0 0 7 7}; _pmcolor = {0, 0, 0, 0, 0, 0, 0, 0, 15}; _pltype = { 0, 0, 0, 0 }; _paxht = 0.1; _pnumht = 0.05; _ptitlht = 0.2; _pzclr = 0; _ptek = path_fig$+est_title$+".tkf"; est_title_full = est_title$+", BW = "$+ftos(bw, "%*.*lf", 2, 2); title(est_title_full); xlabel(plot_x_label); ylabel(plot_y_label); zlabel(plot_z_label); surface(xg, yg, z_ll3D_fit); tkf2eps(path_fig$+est_title$+".tkf",path_fig$+est_title$+".eps"); print "(Plot saved under "$+path_fig$+est_title$+".tkf)"; retp(z_ll3D_fit); endp; /* =========================================================================== */ /* Local Linear Estimation */ /* Needs provision of bandwidth */ /* Stefan Hupfeld, August 2006 */ /* =========================================================================== */ /* What it does: Estimates Local Linear Regression on a grid of 50. Needs bandwidth, which is not included. */ proc local_lin(est_title, plot_x_label, plot_y_label, bw, x_val, y_val); local y_ll_fit, nn, x0, beta, w, Xd, xg, g, bw_opt, est_title_full; nn = rows(x_val); g = 50; y_ll_fit = zeros(g,1); x_val = (x_val-meanc(x_val))/stdc(x_val); xg = seqa(minc(x_val), (maxc(x_val) - minc(x_val))/(g-1),g); output off; format /mat /ld 6,0; for i (1, g, 1); locate(1,1); print /flush est_title " " i "of " g; x0 = xg[i,1]; w = knorm(x_val, x0, bw); Xd = ones(nn, 1)~(x_val - x0); beta = inv((Xd.*w)'*Xd)*(Xd.*w)'*y_val; y_ll_fit[i] = beta[1]; endfor; format /mat /ld 6,3; output on; _pcolor={0 0 7 7}; _pmcolor = {0, 0, 0, 0, 0, 0, 0, 0, 15}; _pltype = { 0, 0, 0, 0 }; _paxht = 0.2; _ptitlht = 0.2; _ptek = path_fig$+est_title$+".tkf"; tkf2eps(path_fig$+est_title$+".tkf",path_fig$+est_title$+".eps"); est_title_full = est_title$+", BW = "$+ftos(bw, "%*.*lf", 2, 2); title(est_title_full); xlabel(plot_x_label); ylabel(plot_y_label); xy(xg, y_ll_fit); print"(Plot saved under "$+path_fig$+est_title$+".tkf)"; retp(y_ll_fit); endp; /* =========================================================================== */ /* Local Quad Estimation */ /* Provides Bandwidth, e.g. for LOC_LIN procedure */ /* Stefan Hupfeld, August 2006 */ /* =========================================================================== */ /* What it does: Estimates locally quadratic polynomial. Returns optimal bandwidth (plug-in following Loader [2004]) only, and not the estimate itself. Takes n estimations! */ proc local_quad(est_title, x_val, y_val); local nq, bwq, yq, x0q, H1q, H2q, e_q, wq, XWLSq, betaq, holdq, bw_star; nq = rows(x_val); bwq = 5*nq^(-1/5); @ Pilot BW; Order of Magn ~n^(-1/5) @ print "bw pilot" bwq; yq = zeros(nq, 3); @ 3 for 2nd Derivative @ e_q = zeros(1,3); e_q[1] = 1; H1q = zeros(nq, 1); H2q = zeros(nq, 1); output off; format /mat /ld 6,0; for i (1, nq, 1); locate(1,1); print /flush est_title i " of " nq; x0q = x_val[i,1]; wq = knorm(x_val, x0q, bwq); XWLSq = ones(nq, 1)~(x_val - x0q)~(x_val - x0q)^2; betaq = inv((XWLSq.*wq)'*XWLSq)*(XWLSq.*wq)'*y_val; yq[i,.] = betaq'; holdq = e_q*inv((XWLSq.*wq)'*XWLSq)*(XWLSq.*wq)'; H1q[i] = holdq[i]; H2q[i] = sqrt(sumc((holdq^2)')); endfor; format /mat /ld 6,3; output on; bw_star = bwidth(H1q, H2q, yq, y_val, x_val); print; print "-----------------------"; print "Plug-In Bandwidth"; print "-----------------------"; print est_title; print "Optimal BW " bw_star; print; retp(bw_star); endp; /* =========================================================================== */ /* Plug-In Bandwidth Loader 2004 */ /* Subordinate Procedure for LOCAL_QUAD */ /* Source: Own, Jan 2006 */ /* =========================================================================== */ proc bwidth(H1_b, H2_b, y_pilot_b, dep_b, x_b); local nu1_b, nu2_b, sigma_b, K1_b, K2_b, frac1_b, frac2_b, sum_der_b, n_b, bwidth; n_b = rows(x_b); K1_b = 0.282095; K2_b = 1; nu1_b = sumc(H1_b); nu2_b = sumc(H2_b^2); sigma_b = sqrt(sumc((dep_b-y_pilot_b[.,1])^2)*(n_b-2*nu1_b+nu2_b)^(-1)); y_pilot_b[.,3] = y_pilot_b[.,3]*2; sum_der_b = sumc(y_pilot_b[.,3]^2); frac1_b = sigma_b*(maxc(x_b)-minc(x_b))*K1_b; frac2_b = n_b*K2_b*sum_der_b; bwidth = (frac1_b/frac2_b)^(-1/5); retp(bwidth); endp; /* =========================================================================== */ /* Procedure for OLS Estimation and White / Newey-West Standard Errors */ /* Does not add Constant to X */ /* Following Mittelhammer */ /* =========================================================================== */ /* What it does: Standard OLS Regression. Returns coefficients, OLS standard errors, white standard errors, and newey-west standard errors. */ proc leastsq(x_ls, y); local beta, e, sigma, n, k, se, R2, invxTx, e2, sigma_w, q, se_w, sigma_nw, se_nw, w, j, t; n = rows(x_ls); k = cols(x_ls); beta = inv(x_ls'x_ls)*x_ls'y; @ OLS Coefficient @ e = y-x_ls*beta; sigma = e'e/(n-k); sigma = sigma*inv(x_ls'x_ls); se = sqrt(diag(sigma)); @ OLS S.E. @ R2 = 1 - e'e/(y-meanc(y))'(y-meanc(y)); @invxTx = invpd(x_ls'x_ls);@ invxTx = inv(x_ls'x_ls); e2 = e.*e*n/(n-k); sigma_w = invxTx*(x_ls'(x_ls.*e2))*invxTx; se_w = sqrt(diag(sigma_w)); @ White S.E. @ sigma_nw = zeros(k,k); q = floor(4*(n/100)^(2/9)); for j (1, q, 1); w = 1 - (j/(q + 1)); for t (j+1, n, 1); sigma_nw = sigma_nw + (w*e[t]*e[t-j]*(x_ls[t,.]'x_ls[t-j,.])); endfor; endfor; sigma_nw = sigma_nw + sigma_nw'; sigma_nw = invxTx*sigma_nw*invxTx + sigma_w; se_nw = sqrt(diag(sigma_nw)); @ NW HAC S.E. @ @output off;@ format /mat /ld 6,5; print; print "-----------------------"; print "OLS Estimation"; print "-----------------------"; print; print "R2: " R2; print "Observations: " n; print "-----------------------"; print "Coefficient: " beta'; print "S.E.: " se'; print "White SE: " se_w'; print "NW HAC S.E. " se_nw'; print; @output on;@ retp(beta~se~se_w~se_nw); endp; /* =========================================================================== */ /* Subordinate Procedure for LEASTSQ */ /* =========================================================================== */ proc NW(y,X,b); local sse,n,yhat,e,G,w,a,t,ga,V,F,nwerr,olserr,k,za,hhat,lag; n=ROWS(X); k=ROWS(B); lag = floor(4*(n/100)^(2/9)); yhat=X*b; e=y-yhat; hhat=e'.*x'; G=ZEROS(k,k); w=ZEROS(2*lag+1,1); a=0; do until a==lag+1; ga=ZEROS(ROWS(b),ROWS(b)); w[lag+1+a]=(lag+1-a)/(lag+1); za=hhat[.,(a+1):n]*hhat[.,1:n-a]'; if a==0; ga=ga+za; else; ga=ga+za+za'; endif; G=G+w[lag+1+a]*ga; a=a+1; endo; F=X'*X; V=INV(F)*G*INV(F); nwerr= (DIAG(V))^.5; olserr=(DIAG(INV(X'X)*e'e/(n-k)))^.5; retp(nwerr~olserr); endp; /* =========================================================================== */ /* Procedure Univariate Cross-Validation */ /* Provides BW for LOC_LIN Procedure */ /* Stefan Hupfeld, Oct 2006 */ /* =========================================================================== */ /* What it does: Returns optimal bandwidth for univariate locally linear regression, searching on 3 subsequent intervals. Takes regressions of of order n X interval-lenght! */ proc cross_validate(est_title, x, y); local bw, bw_opt, initial_lower, initial_upper, steps; initial_lower = 5; initial_upper = 25; steps = 11; bw = seqa(initial_lower, (initial_upper-initial_lower)/(steps-1), steps); bw_opt = cv_inner_procedure(est_title, x, y, bw); if bw_opt == initial_upper; bw = seqa(initial_upper, (initial_upper-initial_lower)/(steps-1), steps); bw_opt = cv_inner_procedure(est_title, x, y, bw); bw = seqa(bw_opt- 0.5*(steps-1)*(initial_upper-initial_lower)/(4*(steps-1)), (initial_upper-initial_lower)/(4*(steps-1)), steps); bw_opt = cv_inner_procedure(est_title, x, y, bw); elseif bw_opt == initial_lower; bw = seqa(0.1, (initial_upper-initial_lower)/(2*(steps-1)), steps); bw_opt = cv_inner_procedure(est_title, x, y, bw); bw = seqa(bw_opt- 0.5*(steps-1)*(initial_upper-initial_lower)/(4*(steps-1)), (initial_upper-initial_lower)/(4*(steps-1)), steps); bw_opt = cv_inner_procedure(est_title, x, y, bw); else; bw = seqa(bw_opt- 0.5*(steps-1)*(initial_upper-initial_lower)/(4*(steps-1)), (initial_upper-initial_lower)/(4*(steps-1)), steps); bw_opt = cv_inner_procedure(est_title, x, y, bw); endif; bw = seqa(bw_opt- 0.5*(steps-1)*(initial_upper-initial_lower)/(20*(steps-1)), (initial_upper-initial_lower)/(20*(steps-1)), steps); bw_opt = cv_inner_procedure(est_title, x, y, bw); print; print "-----------------------"; print "Cross-Validation"; print "-----------------------"; print est_title; print "Optimal BW " bw_opt; print; retp(bw_opt); endp; /* =========================================================================== */ /* Procedure Univariate Cross-Validation */ /* Subordinate Procedure for CROSS_VALIDATE */ /* Stefan Hupfeld, Oct 2006 */ /* =========================================================================== */ proc cv_inner_procedure(est_title, x, y, bw); local error_mat, y_fit, total_sample, sub_sample, x_cv, x0, w, xd, beta, bw_opt, y_fit_ex, g, xg, n; n = rows(x); g = 50; xg = (x-meanc(x))/stdc(x); xg = seqa(minc(x), (maxc(x) - minc(x))/(g-1),g); error_mat = zeros(rows(bw), 2); y_fit = zeros(g, 1); total_sample = seqa(1, 1, n); for i (1, rows(bw), 1); for j (2, n-1, 1); sub_sample = total_sample[1:j-1]|total_sample[j+1:n]; x_cv = x[sub_sample,.]; output off; format /mat /ld 6,2; locate(1,1); print /flush est_title " BW " bw[i] "in " minc(bw) "," maxc(bw) "Training Sample " j "of " n-1; for k (1, g, 1); x0 = xg[k,1]; w = knorm(x_cv, x0, bw[i]); xd = ones(rows(x_cv), 1)~(x_cv - x0); beta = inv((xd.*w)'*xd)*(xd.*w)'*y[sub_sample,.]; y_fit[k] = beta[1]; endfor; sub_sample = setdif(total_sample, j, 1); x_cv = x[sub_sample,.]; for k (1, g, 1); x0 = x[k,1]; w = knorm(x_cv, x0, bw[i]); xd = ones(rows(x_cv), 1)~(x_cv - x0); beta = inv((xd.*w)'*xd)*(xd.*w)'*y[sub_sample,.]; y_fit[k] = beta[1]; format /mat /ld 6,3; output on; endfor; endfor; y_fit_ex = interpolate(x, y, y_fit); error_mat[i,1] = bw[i]; error_mat[i,2] = (y - y_fit_ex)'(y - y_fit_ex); endfor; error_mat = sortc(error_mat, 2); bw_opt = error_mat[1,1]; error_mat = sortc(error_mat, 1); plot2d("CV-"$+est_title, "BW", "Error", error_mat[.,1], error_mat[.,2]); retp(bw_opt); endp; /* =========================================================================== */ /* Procedure Bivariate Cross-Validation */ /* Provides BW for LOC_LIN3D Procedure */ /* Stefan Hupfeld, Oct 2006 */ /* =========================================================================== */ /* Returns optimal bandwidth - takes regressions of order n X interval-length! */ proc cross_validate3D(est_title, x, y, z); local bw, bw_opt; bw = seqa(0.5, 2, 11); bw_opt = cv_inner_procedure3D(est_title, x, y, z, bw); if bw_opt >= 18; bw = seqa(18, 0.5, 11); bw_opt = cv_inner_procedure3D(est_title, x, y, z, bw); elseif bw_opt <= 3; bw = seqa(0.5, 0.5, 11); bw_opt = cv_inner_procedure3D(est_title, x, y, z, bw); else; bw = seqa(bw_opt-2.5, 0.5, 11); bw_opt = cv_inner_procedure3D(est_title, x, y, z, bw); endif; bw = seqa(bw_opt-0.25, 0.05, 11); bw_opt = cv_inner_procedure3D(est_title, x, y, z, bw); print; print "-----------------------"; print "Cross-Validation 3D"; print "-----------------------"; print est_title; print "Optimal BW " bw_opt; print; retp(bw_opt); endp; /* =========================================================================== */ /* Procedure Bivariate Cross-Validation */ /* Subordinate Procedure for CROSS_VALIDATE3D */ /* Stefan Hupfeld, Oct 2006 */ /* =========================================================================== */ proc cv_inner_procedure3D(est_title, x, y, z, bw); local error_mat, z_ll_fit3D, total_sample, sub_sample_x, sub_sample_y, x_cv, y_cv, x0, w, xd, beta, bw_opt, n; /*x = (x-meanc(x))/stdc(x); y = (y-meanc(y))/stdc(y);*/ n = rows(x); error_mat = zeros(rows(bw), 2); z_ll_fit3D = zeros(n, 1); total_sample = seqa(1, 1, n); for i (1, rows(bw), 1); for j (2, n-1, 1); for k (2, n-1, 1); sub_sample_x = total_sample[1:j-1]|total_sample[j+1:n]; sub_sample_y = total_sample[1:j-1]|total_sample[j+1:n]; x_cv = x[sub_sample_x,.]; y_cv = y[sub_sample_y,.]; output off; format /mat /ld 6,2; locate(1,1); print /flush est_title " BW " i "in " minc(bw) "," maxc(bw) "Training Sample " j "of " n-1 "and " k "of " n-1; x0 = x[j,1]~y[k,1]; w = knorm(x_cv~y_cv, x0, bw[i]); xd = ones(rows(x), 1)~(x~y - x0); beta = inv((xd.*w)'*xd)*(xd.*w)'*z; z_ll_fit3D[j] = beta[1]; format /mat /ld 6,3; sub_sample_x = setdif(total_sample, j, 1); x_cv = x[sub_sample_x,.]; x0 = x_cv[j,1]; w = knorm(x, x0, bw[i]); xd = ones(rows(x), 1)~(x - x0); beta = inv((xd.*w)'*xd)*(xd.*w)'*y; z_ll_fit3D[j] = beta[1]; format /mat /ld 6,3; output on; endfor; endfor; error_mat[i,1] = bw[i]; error_mat[i,2] = (z - z_ll_fit3D)'(z - z_ll_fit3D); endfor; error_mat = sortc(error_mat, 2); bw_opt = error_mat[1,1]; error_mat = sortc(error_mat, 1); plot2d("CV-"$+est_title, "BW", "Error", error_mat[.,1], error_mat[.,2]); retp(bw_opt); endp; /* =========================================================================== */ /* Procedure Boostrapping Confidence Interval around LL Estimate */ /* Follows Yatchew (2003), pp. 157, 161 */ /* Stefan Hupfeld, Oct 2006 */ /* =========================================================================== */ /* Bootstraps CI around locally linear regression on a grid of 50, following Yatchew (200x). It is not guaranteed that the original estimate lies within the CI! */ proc local_lin_bootstrap(est_title, x_label, y_label, bw, x, y); local bw_u, y_u, n, g, xg, x0, w, Xd, beta, bw_o, y_o, e, yb_fit, text, omega, indices, b, eb, yb, yb_mat, y_fit, ci_upper, ci_lower, y_u_ex, y_o_ex; bw_u = 0.9*bw; @ Undersmoothing @ bw_o = 1.1*bw; @ Oversmoothing @ b = 1000; @ Repetitions @ n = rows(x); g = 50; y_fit = zeros(g,1); y_u = zeros(g,1); y_o = zeros(g,1); xg = (x-meanc(x))/stdc(x); xg = seqa(minc(x), (maxc(x) - minc(x))/(g-1),g); eb = zeros(n,1); yb_fit = zeros(g,1); for i (1, g, 1); x0 = xg[i,1]; w = knorm(x, x0, bw_o); Xd = ones(n, 1)~(x - x0); beta = inv((Xd.*w)'*Xd)*(Xd.*w)'*y; y_fit[i] = beta[1]; endfor; for i (1, g, 1); x0 = xg[i,1]; w = knorm(x, x0, bw_u); Xd = ones(n, 1)~(x - x0); beta = inv((Xd.*w)'*Xd)*(Xd.*w)'*y; y_u[i] = beta[1]; endfor; y_u_ex = interpolate(x, y, y_u); for i (1, g, 1); x0 = xg[i,1]; w = knorm(x, x0, bw_o); Xd = ones(n, 1)~(x - x0); beta = inv((Xd.*w)'*Xd)*(Xd.*w)'*y; y_o[i] = beta[1]; endfor; y_o_ex = interpolate(x, y, y_o); e = y - y_u_ex; @ Undersmoothed Errors @ /*omega = e*(1-sqrt(5))/2~e*(1+sqrt(5))/2~rndu(n, 1); @ Wild Bootstrap Errors @ for i (1, n, 1); if omega[i,3] ge (5+sqrt(5))/10; e[i] = omega[i,1]; else; e[i] = omega[i,2]; endif; endfor;*/ yb_mat = y_fit; output off; format /mat /ld 6,0; for i (1, b, 1); locate(1,1); print /flush "Bootstrapping Confidence Interval, Pass " i " of " b; indices = round(0.5 + n*rndu(n,1)); for j (1, n, 1); eb[j] = e[indices[j]]; endfor; yb = y_o_ex + eb; for j (1, g, 1); x0 = xg[j,1]; w = knorm(x, x0, bw); Xd = ones(n, 1)~(x - x0); beta = inv((Xd.*w)'*Xd)*(Xd.*w)'*yb; yb_fit[j] = beta[1]; endfor; yb_mat = yb_mat~yb_fit; endfor; output on; format /mat /ld 6,3; ci_upper = quantile(yb_mat', 0.950); ci_lower = quantile(yb_mat', 0.050); ci_upper = ci_upper'; ci_lower = ci_lower'; /*xy(xg, y_fit~ci_upper~ci_lower);*/ text = plot2d("CI-"$+est_title, x_label, y_label, xg, y_fit~ci_upper~ci_lower); retp(y_fit); endp; /* =========================================================================== */ /* Interpolation */ /* Stefan Hupfeld, Oct. 2006 */ /* Expands y_fit (based on grid) to y_fit (based on all observations) */ /* =========================================================================== */ /* What it does: Interpolates Small y-fit vector (e.g. from local regression) to full size (n). */ proc interpolate(x, y, y_fit); local n, g, b, xg, y_fit_ex, upper_bound, lower_bound, distance; n = rows(x); g = rows(y_fit); b = n/(g-1); xg = seqa(minc(x), (maxc(x) - minc(x))/(g-1),g); y_fit_ex = zeros(n, 1); y_fit_ex[1] = y_fit[1]; y_fit_ex[n] = y_fit[g]; output off; for i (2, n-1, 1); lower_bound = trunc(i/b + 1); upper_bound = lower_bound + 1; distance = (x[i] - xg[lower_bound])/(xg[upper_bound] - xg[lower_bound]); y_fit_ex[i] = y_fit[lower_bound] + distance*(y_fit[upper_bound] - y_fit[lower_bound]); locate(1,1); print /flush "Interpolation... " i/n*100 "Percent"; endfor; output on; retp(y_fit_ex); endp; /* =========================================================================== */ /* Collapse */ /* Stefan Hupfeld, Okt. 2006 */ /* Collapses Y to Means (by Variable X) */ /* =========================================================================== */ /* Similar to STATA command "collapse". */ proc (2) = collapse(x, y); local minx, maxx, x_collapse, y_collapse, y_select, data, g, x_collapse_new; data = x~y; minx = minc(x); maxx = maxc(x); x_collapse = seqa(minx, 1, maxx); g = rows(x_collapse); for i (1, g, 1); y_select = selif(data[.,2], data[.,1] .eq x_collapse[i]); if rows(y_select) > 0; if i .eq 1; x_collapse_new = x_collapse[i]; y_collapse = meanc(y_select); else; x_collapse_new = x_collapse_new|x_collapse[i]; y_collapse = y_collapse|meanc(y_select); else; endif; endif; endfor; retp(x_collapse_new, y_collapse); endp;