/* lookbond.prg figures out the history of individual bonds, looks at them, cleans quantity outstanding numbers. Run makefmt.prg first to store mbx in gauss binary format. input: mbx.fmt gauss format file identical to mbx 1950 - output: (saved as *.fmt) xnew: cleaned bond data organized by issue, then date xnew2: cleaned bond data organized by date (as mbx) rows correspond exactly to mbx rows with clean quantities xa2: Dec 31 slices of xnew2 used for annual calculations --------------------------------------------------------------------------- */ library pgraph; output file = lookbond.out reset; /* directs output to "lookbond.out" vs. printing on-screen. resets for each run */ debugit = 0; /* uses small data sample for program debugging */ graphq = 0; /* graphs quantity history of each bond */ load x = mbx ; /* start with gauss format data 1950- from makefmt */ x = x~seqa(1,1,rows(x)); /* used to reestablish order. creates observation number in column 14 */ /*---------------------------------------------------------------------*/ /* Definitions of the data in x. See crsp bond data documentation too. date = x[.,1]; /* quotation date */ crspid = x[.,2]; /* crsp id YYYYMMDD.TCCCE T=Itype 1 = noncallable bond 2 = noncallable note 3 = cert of indebtedness 4 = treasury bill 5 = callable bond 6 = callable note 7 = tax anticipation cert of ind 8 = tax anticipation bill 9 = other (strange provisions) CCC = coupon E = uniqueness number (separates otherwise identical bonds) */ pric1r = x[.,3]; /* month end bid price */ pric2r = x[.,4]; /* month end ask price */ accint = x[.,5]; /* accrued interest */ pdint = x[.,6]; /* interest payable during month */ Itax = x[.,7]; /* taxability of interest */ Iflwr = x[.,8]; /* flower bond code */ ytm = x[.,9]; /* yield to maturity */ retadj = x[.,10]; /* 1 month holding period return */ duratn = x[.,11]; /* duration */ iout1r = x[.,12]; /* face value outstanding 0, -1 = unknown */ iout2r = x[.,13]; /* par value publicly held NA for bills, many others */ */ /*---------------------------------------------------------------------*/ /* --------------------------- */ /* coding errors fixed by hand */ /* --------------------------- */ /* note rows of x refer to post 50 sample, NOT original mbx rows */ /* see also output file for automatic coding fixes */ /* no code here; errors to date have been fixed by CRSP in original mbx */ /* --------------- */ /* prepare to look */ /* --------------- */ /* 1 2 3 4 5 new colum numbers */ xnew = x[.,1|2|12|13|14]; /* keeps all rows in columns 1,2,12,13&14 1=date,2=crspid,3=face value outstanding, 4=par value publicly held,5=obsno */ xnew = sortc(xnew,2); /* sort by bond maturity date */ /* so you can look at the time series of q for */ /* each bond. */ /* this scrambles observation dates. reorder the dates for each bond below. */ if debugit; xnew = xnew[1:100,.]; endif; xnew[.,3:4] = (xnew[.,3:4] .>= 0).*xnew[.,3:4] + (xnew[.,3:4] .== -1).*(-99); /* uses -99 for missing so it shows on graphs */ /* replaces other negative numbers by 0 */ format /RD 3,0; /* formats for printing bonds */ /* -------------------------------------------------------- */ /* form bonddat matrix of data for each bond sorted by date */ /* -------------------------------------------------------- */ bondnum = 1; i = 1; do while i <= rows(xnew); output off; "Doing bond " bondnum x[i,2] 1000000*(x[i,2]-trunc(x[i,2])); output on; bondnum = bondnum+1; bondnam = xnew[i,2]; /* stores identity of this bond */ bonddat = xnew[i,.]; /* bonddat stores all data for this bond */ j = 1; if i < rows(xnew); /* < removes error on next line if last row */ do while (xnew[i+j,2] == bondnam); bonddat = bonddat|xnew[i+j,.]; /* fill out data for this bond */ j = j+1; if i+j > rows(xnew); /* end of file */ break; endif; endo; endif; bonddat = sortc(bonddat,1); /* sort this bond by date */ /* bonddat is now data for one bond sorted by date. */ /* ------------------------------------- */ /* Now, fixup quantity outstanding data */ /* ------------------------------------- */ itype = bonddat[1,2]; /* itype=type of issue, part of crspid*/ itype = itype-trunc(itype); /* gets decimal part*/ itype = trunc(10*itype); /* gets first decimal */ oldbdat = bonddat; /* saves old so you can see source of probs in orig data */ /* ----------------------------------------------------- */ /* 1. if first obs is NA (not zero) and good observations follow, use next available good observation */ /* ----------------------------------------------------- */ if (bonddat[1,3] < 0) and (maxc(bonddat[.,3]) >= 0); trash = bonddat[.,3]; trash = selif(trash,trash .>= 0); bonddat[1,3] = trash[1]; endif; /* below will propagate to 2,3,.. if need*/ k = 1; do while k <= rows(bonddat); if (itype /= 5) or (itype /= 6); /* if non callable bonds */ /* since they can decline if called */ /* --------------------------------------------------------- */ /* 2. Use last months public q outstanding if this months NA */ /* --------------------------------------------------------- */ if (bonddat[k,4] <= 1); /* +1 shows up;i bet it's miscoded -1 */ if k > 1; /* try last month's q outstanding*/ if bonddat[k-1,4] > 1; /* use if that's any good */ bonddat[k,4] = bonddat[k-1,4]; endif; endif; endif; /* ------------------------------------------------------- */ /* 3. Fix missing total outstanding. use last months total */ /* ------------------------------------------------------- */ if bonddat[k,3] <= 1; if k > 1; /* try last month's q outstanding*/ if bonddat[k-1,3] > 1; /* use if that's any good */ bonddat[k,3] = bonddat[k-1,3]; endif; endif; endif; endif; /* ends itype if */ k = k+1; endo; /* -------------------------------------------------------------------- */ /* 4. if all public are missing, some private are there, use the private*/ /* -------------------------------------------------------------------- */ if (maxc(bonddat[.,3] .>1) == 0) and (maxc(bonddat[.,4] .>1) ==1); bonddat[.,3] = bonddat[.,4]; endif; /* -------------------------------------------------------------------- */ /* 5. Use private outstanding when total is still missing. I checked every bond. this is sensible. In most cases, it probably understates initial total; as total is 0 0 0 0 0 3600 3600 3600 and we use say 1200 for private. Still, it's not wild */ /* -------------------------------------------------------------------- */ if rows(bonddat) > 2; if maxc(bonddat[.,3] .== 0 ); /* if there are any missing*/ bonddat[indexcat(bonddat[.,3],0),3] = /* use the private number */ bonddat[indexcat(bonddat[.,3],0),4]; endif; endif; /* ------------------------------------------------------------------------ */ /* 6. Test for big one period change that is reversed; sign of coding error */ /* ------------------------------------------------------------------------ */ dqflag1 = 0; /* 1 indicates any big change error in total, private */ dqflag2 = 0; if (maxc(bonddat[.,3]) > 0) and (rows(bonddat) > 4); typsiz1 = meanc(selif(bonddat[.,3],bonddat[.,3] .> 0)); /* typcial size of bond, not including NA's */ dq1 = bonddat[2:rows(bonddat),3]-bonddat[1:rows(bonddat)-1,3]; dq1 = dq1/typsiz1; /* now is difference as percent of typical size */ trash1 = (abs(dq1[2:rows(dq1),1]+dq1[1:rows(dq1)-1,1]) .< 0.03) .and (abs(dq1[2:rows(dq1),1]) .>= 0.10) ; /* 1 if there is a >10% reversed change to within 3% of original value */ dqflag1 = maxc(trash1); endif; if (maxc(bonddat[.,4]) > 0) and (rows(bonddat) > 4); typsiz2 = meanc(selif(bonddat[.,4],bonddat[.,4] .> 0)); dq2 = bonddat[2:rows(bonddat),4]-bonddat[1:rows(bonddat)-1,4]; dq2 = dq2/typsiz2; trash2 = (abs(dq2[2:rows(dq2),1]+dq2[1:rows(dq2)-1,1]) .< 0.03) .and (abs(dq2[2:rows(dq2),1]) .>= 0.10) ; dqflag2 = maxc(trash2); endif; if dqflag1; ""; "Found large, reversed % change in in bond "; format /RD 18,8; bonddat[indexcat(trash1,1)+1,1:2]; "Partial history of quantities" ;; format /RD 3,0; bonddat[sortc(vec(indexcat(trash1,1)'+(0|1|2)),1),1|3|4] ; "Removed big change"; bonddat[indexcat(trash1,1)+1,3] = bonddat[indexcat(trash1,1),3]; endif; if dqflag2; ""; "Found error in bond "; format /RD 18,8; bonddat[indexcat(trash2,1)+1,1:2]; "Partial history of quantities" ;; format /RD 3,0; bonddat[sortc(vec(indexcat(trash2,1)'+(0|1|2)),1),1|3|4] ; "Removed big change"; bonddat[indexcat(trash2,1)+1,4] = bonddat[indexcat(trash2,1),4]; endif; /* --------------------------------------------- */ /* print or graph bonds that still have problems */ /* insert whatever selectors you want */ /* --------------------------------------------- */ maxq = 0; misst = 0; /* test for private > public */ if rows(bonddat) < 6; maxq = maxc(bonddat[.,3] .< bonddat[.,4] - 5); else; maxq = maxc(bonddat[3:rows(bonddat)-3,3] .< bonddat[3:rows(bonddat)-3,4] - 5); /* total < private? allows 5 over */ /* allows difference near sale, redemption date */ endif; /* test for missing total */ if rows(bonddat) > 2; misst = maxc(bonddat[3:rows(bonddat),3] .< 1) ; /* missing past 2nd. 1st 2 ok */ endif; /* remove comments or add selectors to show only histories conditional on signs of trouble */ if (@graphq or@ maxq @or misst or dqflag1 or dqflag2@) and (rows(bonddat) > 1); gosub graphbnd; endif; /* this will print the bond no matter what. (bonddat[.,1:2]~(100000*(bonddat[.,2]-trunc(bonddat[.,2])))~ bonddat[.,3:cols(bonddat)]); /* print this bond */ */ xnew[i:i+j-1,.] = bonddat; /* xnew includes all cleaning*/ i = i+j; endo; /* -------------------------- */ /* prepare, save cleaned data */ /* -------------------------- */ save xnew; /* xnew is cleaned data organized by bond issue, then date*/ xnew2 = sortc(xnew,cols(xnew)); /* produce cleaned old x, sorted by date using observation number created above. For use by crsp.prg */ clear xnew; /*saves memory */ xnew2 = xnew2[1:rows(xnew2),1|2]~x[1:rows(xnew2),3:11]~xnew2[1:rows(xnew2),3:5]; /* put back in unused x data */ save xnew2; /* xnew2 is cleaned data organized by date, then bond issue. Rows should correspond exactly to x, with clean data */ mo = trunc(xnew2[.,1]/100)-100*trunc(xnew2[.,1]/10000); indxs = indexcat(mo,12); /* implements selif in less memory */ clear mo; xa2 = xnew2[indxs,.]; save xa2; /* only december xnew2. For annual calculations */ output off; end; /* ---------------------------------- */ /* subroutine to graph bond's history */ /* ---------------------------------- */ graphbnd: graphjc; _plctrl = 1; title("quantity history for bond" $+ ftos(bondnam,"%*.*lf",15,6)); _plegstr = "fixed total Q\000fixed private Q" $+ "\000original total Q\000original private Q"; _plegctl = 1; _pltype = 6|6|4|4; /* convert 19590131 to decimal date */ yr = trunc(bonddat[.,1]/10000); mo = trunc(bonddat[.,1]/100)-100*yr; day = bonddat[.,1]-100*mo-10000*yr; dates = yr+mo/12+day/365; xy(dates,bonddat[.,3:4]~oldbdat[.,3:4]); return;