% crspreg.m Runs regressions on crsp data to see if real stocks act like VC % crspdat loads data and makes the matlab matrices % variables are % >> permno = M(:,1); % >> permco = M(:,2); % >> date = M(:,3); % >> shrcd = M(:,4); % >> exchcd = M(:,5); % >> shrout = M(:,6); % >> prc = M(:,7); % >> ret = M(:,8); % CHECK units of delist return, and all other documentation. % Check crsp -- firms where price drops to zero stays on but no delisting return. perm 11300 close all; clear all; format bank; % comparison with indices -- does crsp nasdaq small behave the way my small seems to be doing? % this is copied from doit3.m load nasdaq.prn; z = nasdaq; % create quarterly returns starting Q1 1987, */ % select Dec31, mar31, etc. */ dates = (12:3:rows(z))'; nd1 = z(dates, 3); % small nasdaq index */ nas = z(dates, 4); % nasdaq total return */ nind = z(dates, 9); % economagic nasdaq index */ clear z; T = rows(nd1); nd1 = nd1(2:T)./nd1(1:T-1); nas = nas(2:T)./nas(1:T-1); nind = nind(2:T)./nind(1:T-1); nd1(T-3:T-1) = nind(T-3:T-1); nas(T-3:T-1) = nind(T-3:T-1); % pad out crsp with index. */ % nas is nasdaq, ndl is nasdaq smallest decile % these are quarterly gross returns load sp500.txt; z = sp500; spdate = z(:,1) + z(:,2)/12; % decimal date. end of month so */ % 12/31/97 = 1998. this is ok. */ spind = z(:,3); % create return series */ begindx = find(spdate==1987); endindx = find(spdate==2000.5); dates = (begindx:12*1/4:endindx)'; logspret = log(spind(dates(2:rows(dates)))./spind(dates(1:rows(dates)-1))); disp('Mean, stdev of arithmetic returns. Nasdaq small, nasdaq all, s&p 500. Annualized'); disp([400*(mean(nd1)-1) 100*2*std(nd1)]); disp([400*(mean(nas)-1) 100*2*std(nas)]); disp([400*(mean(exp(logspret))-1) 100*2*std(exp(logspret))]); disp('Regressions of returns on S&P 500. annaulized alpha, its se, beta, 100*R2. Nas small, Nas all.'); lhv = nd1-1; rhv = [ones(rows(logspret),1) exp(logspret)-1]; lags = 0; weight = 0; [bv,sebv,R2v,R2vadj,v,F] = olsgmm(lhv,rhv,lags,weight); disp([bv(1)*400 sebv(1)*400 bv(2) sebv(2) 100*R2vadj]); lhv = nas-1; [bv,sebv,R2v,R2vadj,v,F] = olsgmm(lhv,rhv,lags,weight); disp([bv(1)*400 sebv(1)*400 bv(2) sebv(2) 100*R2vadj]); % load new monthly y nasdaq series load new_nasdaq_index.csv; caldt = new_nasdaq_index(:,1); nasvwretd = new_nasdaq_index(:,2); nasewretd = new_nasdaq_index(:,3); ncrtrn = new_nasdaq_index(:,4); ncindx = new_nasdaq_index(:,5); decret1 = new_nasdaq_index(:,6); totval = new_nasdaq_index(:,7); totcnt = new_nasdaq_index(:,8); usdval = new_nasdaq_index(:,9); usdcnt = new_nasdaq_index(:,10); % Note from 19850131 to 20021231 % load new monthly S&P500 return load new_SP500.csv; new_sptrn = new_SP500(:,2); % load new monthly t bill rate (crsp monthly 3 mo t bill returns) load new_tbill.csv; % 19850131 to 20021231 caltb = new_tbill(:,1); tb3mret = new_tbill(:,2); if 0; load crspstock1; % 1 2 3 4 5 6 7 8 9 10 x = [shrcd exchcd dlret ret vwretd sptrd shrout prc permno dates]; clear permno permco dates shrcd exchcd shrout dlret prc ret vwretd sptrd; load crspstock2; x = [x;shrcd2 exchcd2 dlret2 ret2 vwretd2 sptrd2 shrout2 prc2 permno2 dates2]; clear permno2 permco2 dates2 shrsc2 exchcd2 shrout2 dlret2 prc2 ret2 vwretd2 sptrd2; x = sortrows(x,9); x = x((x(:,1)==10)|(x(:,1)==11),:); % share code 10 or 11 x = x(x(:,2)==3,:); % nasdaq; x = [x (1:rows(x))']; % for keeping track of histories when debugging; x = [x zeros(rows(x),2)]; % make room for nasdaq and t bill returns for i=1:rows(x); tbindx = find( x(i,10)==caltb); nindx = find( x(i,10)==caldt); if (rows(tbindx)>1)|(rows(nindx)>1)|rows(tbindx)==0|rows(nindx)==0|tbindx~=nindx; disp('error in assigning dates for tbill or nasdaq returns to individual stock data i=='); disp(i); end; x(i,12:13) = [ncrtrn(nindx) tb3mret(tbindx)]; end; save x x; else; load x; end; ret = nansum([x(:,4)'; x(:,3)'])';% use return or dlret; disp('in full sample x of y returns are nan and ignored'); disp([sum(isnan(ret)) rows(ret)]); vwretd = x(~isnan(ret),5); % pick the non nan returns. spretd = x(~isnan(ret),6); tb3mretinx = x(~isnan(ret),13); ret = ret(~isnan(ret)); if sum(isnan(ret))>0; disp('error: a return is still NaN'); end; disp('full sample'); docrspreg(ret,vwretd,spretd,tb3mretinx); %xsm = x(1:2000,:); %save xsm xsm; %x = xsm; % % --------------- form small stock returns % start by characterizing size deciles of nasdaq sample. disp('observations in whole sample'); disp(rows(x)); % find cutoff of 1/10 smallest. testpcshr = (~isnan(x(:,7)))&(~isnan(x(:,8))); % good price and share data mktval = x(:,7).*abs(x(:,8)); mkt2 = sortrows(mktval(testpcshr)); cutoff1 = mkt2(floor(rows(mkt2)/10)); cutoff2 = mkt2(floor(2*rows(mkt2)/10)); cutoff3 = mkt2(floor(3*rows(mkt2)/10)); cutoff4 = mkt2(floor(4*rows(mkt2)/10)); cutoff5 = mkt2(floor(5*rows(mkt2)/10)); disp('market value cutoff for deciles 1 2 3 4 5 of entire nasdaq sample'); disp([cutoff1 cutoff2 cutoff3 cutoff4 cutoff5]); cutoffs = ([2 5 10 50]')*1000; %cutoff for firm value in 1000 %cutoffs = 2000; % *********Two month lag disp('**********small sample 2 two month lag, Nan are -1'); for indx = 1:rows(cutoffs); cutoff = cutoffs(indx); disp(' Cutoff in millions'); disp(cutoff/1000); % buy with good data and value below cutoff indxs = [1==0;1==0;testpcshr(1:end-2)]&... % market value period one good data and [1==0;1==0;mktval(1:end-2) 1E-10; disp('crspreg Error in logic'); end; ret = nansum([x2(:,3)'; x2(:,4)'])';% use return or dlret; disp('number of remaining Nan and total'); disp([sum(isnan(ret)) rows(ret)]); ret(isnan(ret))=-1*ones(sum(isnan(ret)),1); % NOte this has a minor effect. Just ignoring nans leads to similar numbers % most remaining after 1, 2 above are a long list of p=0, and then disappears % with no delisting return. -1 sounds not bad for this! disp(' average return raw and value weighted -- annual percent'); disp(1200*[mean(ret) mean(ret.*mktval2a)/mean(mktval2a)]); vwretd = x2(:,5); spretd = x2(:,6); tb3mretinx = x2(:,13); docrspreg(ret,vwretd,spretd,tb3mretinx); % ******************* form portfolio return. reuse x2c x2 = [ret mktval2a ddate(x2(:,10))]; % keep dates, values for weighting and returns x2 = sortrows(x2,3); i = 1; for t = 1987.25:(1/12):2002.0; thisdate = find( (x2(:,3)>t-1/24)&(x2(:,3)0,:); %/* positive returns */ zn = sum(z==0); % /* number of zeros */ zl = log(zp); figure; plot(1200*zl,(1:rows(zl))/rows(zl)); zsm = sortrows(1+retsm); zpsm = zsm(zsm>0,:); %/* positive returns */ znsm = sum(zsm==0); % /* number of zeros */ zlsm = log(zpsm); figure; plot(1200*zlsm,(1:rows(zlsm))/rows(zlsm)); title('small sample returns'); end; if 0; for j=1:rows(lnrgrid); %disp([j rows(lnrgrid)]); lpvals(j) = sum( 1./((szl*2*pi).^0.5).*exp(-1/2 * (lnrgrid(j) - zl).^2/szl.^2)); end; lpvals = lpvals/sum(lpvals); logdist = exp(-(lnrgrid-elnr).^2/(2*sdlnr^2)); logdist = logdist/sum(logdist); figure; plot(1200*lnrgrid,[lpvals logdist]); xlabel('annualized log return, %'); axis([-1500 1500 -inf inf]); end; % good data: % shrcd = 10 or 11, % exchcode 1 NYSE 2 AMEX 3 NASDAQ, we want 3 ? % shrout check decent numbers % prc not 0, -99. Use abs negatives are ok. % ************************** % OLD stuff. % % one month lag. dropped in favor of 2 to make sure no low price - high return bounce % ************** one month lag, if 0; disp('Small sample, One month lag, Nan are -1'); for indx = 1:rows(cutoffs); cutoff = cutoffs(indx); disp(' Cutoff in millions'); disp(cutoff/1000); % buy with good data and value below cutoff x2 = x([1==0;testpcshr(1:end-1)]&... % last one good data and [1==0;mktval(1:end-1)