-
Notifications
You must be signed in to change notification settings - Fork 21
/
iclink.sas
248 lines (223 loc) · 10.8 KB
/
iclink.sas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
/*******************************************************************************************/
/* FileName: iclink.sas */
/* Date: Sept 25, 2006 */
/* Author: Rabih Moussawi */
/* Description: Create IBES - CRSP Link Table */
/* FUNCTION: - Creates a link table between IBES TICKER and CRSP PERMNO */
/* - Scores links from 0 (best link) to 6 */
/* */
/* INPUT: */
/* - IBES: IDUSM file */
/* - CRSP: STOCKNAMES file */
/* */
/* OUTPUT: ICLINK set stored in home directory */
/* ICLINK has 15,187 unique IBES TICKER - CRSP PERMNO links */
/* ICLINK contains IBES TICKER and the matching CRSP PERMNO and other fields: */
/* - IBES and CRSP Company names */
/* - SCORE variable: lower scores are better and high scores may need further */
/* checking before using them to link CRSP & IBES data. */
/* In computing the score, a CUSIP match is considered better than a */
/* TICKER match. The score also includes a penalty for differences in */
/* company names-- CNAME in IBES and COMNAM in CRSP. The name penalty is */
/* based upon SPEDIS, which is the spelling distance function in SAS. */
/* SPEDIS(cname,comnam)=0 is a perfect score and SPEDIS < 30 is usually good */
/* enough to be considered a name match. */
/* */
/* "SCORE" levels: */
/* - 0: BEST match: using (cusip, cusip dates and company names) */
/* or (exchange ticker, company names and 6-digit cusip) */
/* - 1: Cusips and cusip dates match but company names do not match */
/* - 2: Cusips and company names match but cusip dates do not match */
/* - 3: Cusips match but cusip dates and company names do not match */
/* - 4: Exch tickers and 6-digit cusips match but company names do not match */
/* - 5: Exch tickers and company names match but 6-digit cusips do not match */
/* - 6: Exch tickers match but company names and 6-digit cusips do not match */
/* */
/* ICLINK Example: */
/* TICKER CNAME PERMNO COMNAM SCORE */
/* BAC BANKAMERICA CORPORATION 58827 BANKAMERICA CORP 0 */
/* DELL DELL INC 11081 DELL INC 0 */
/* FFS 1ST FED BCP DEL 75161 FIRST FEDERAL BANCORP DE 3 */
/* IBM INTERNATIONAL BUSINESS MACHINES 12490 INTERNATIONAL BUSINESS MACHS CO 0 */
/* MSFT MICROSOFT CORP 10107 MICROSOFT CORP 0 */
/* */
/*******************************************************************************************/
* Possible IBES ID (names) file to use (as of April 2006);
* Detail History: ID file : 23808 unique US and Canadian company IBES TICKERs;
* Summary History: IDSUM File: 15576 unique US company IBES TICKERs;
* Recommendation Summary Statistics: RECDSUM File 12465 unique US company IBES tickers;
* It seems that the Summary History Identifier file IDSUM is best
because USFIRM dummy is used to designate only US companies;
%let IBES1= IBES.IDSUM;
%let CRSP1= CRSP.STOCKNAMES;
libname home '~'; * Save link table in home directory;
/* Step 1: Link by CUSIP */
/* IBES: Get the list of IBES TICKERS for US firms in IBES */
proc sort data=&IBES1 out=IBES1 (keep=ticker cusip CNAME sdates);
where USFIRM=1 and not(missing(cusip));
by ticker cusip sdates;
run;
/* Create first and last 'start dates' for CUSIP link */
proc sql;
create table IBES2
as select *, min(sdates) as fdate, max(sdates) as ldate
from IBES1
group by ticker, cusip
order by ticker, cusip, sdates;
quit;
/* Label date range variables and keep only most recent company name for CUSIP link */
data IBES2;
set IBES2;
by ticker cusip;
if last.cusip;
label fdate="First Start date of CUSIP record";
label ldate="Last Start date of CUSIP record";
format fdate ldate date9.;
drop sdates;
run;
/* CRSP: Get all PERMNO-NCUSIP combinations */
proc sort data=&CRSP1 out=CRSP1 (keep=PERMNO NCUSIP comnam namedt nameenddt);
where not missing(NCUSIP);
by PERMNO NCUSIP namedt;
run;
/* Arrange effective dates for CUSIP link */
proc sql;
create table CRSP2
as select PERMNO,NCUSIP,comnam,min(namedt)as namedt,max(nameenddt) as nameenddt
from CRSP1
group by PERMNO, NCUSIP
order by PERMNO, NCUSIP, NAMEDT;
quit;
/* Label date range variables and keep only most recent company name */
data CRSP2;
set CRSP2;
by permno ncusip;
if last.ncusip;
label namedt="Start date of CUSIP record";
label nameenddt="End date of CUSIP record";
format namedt nameenddt date9.;
run;
/* Create CUSIP Link Table */
/* CUSIP date ranges are only used in scoring as CUSIPs are not reused for
different companies overtime */
proc sql;
create table LINK1_1
as select *
from IBES2 as a, CRSP2 as b
where a.CUSIP = b.NCUSIP
order by TICKER, PERMNO, ldate;
quit; * 14,591 IBES TICKERs matched to CRSP PERMNOs;
/* Score links using CUSIP date range and company name spelling distance */
/* Idea: date ranges the same cusip was used in CRSP and IBES should intersect */
data LINK1_2;
set LINK1_1;
by TICKER PERMNO;
if last.permno; * Keep link with most recent company name;
name_dist = min(spedis(cname,comnam),spedis(comnam,cname));
if (not ((ldate<namedt) or (fdate>nameenddt))) and name_dist < 30 then SCORE = 0;
else if (not ((ldate<namedt) or (fdate>nameenddt))) then score = 1;
else if name_dist < 30 then SCORE = 2;
else SCORE = 3;
keep TICKER PERMNO cname comnam score;
run;
/* Step 2: Find links for the remaining unmatched cases using Exchange Ticker */
/* Identify remaining unmatched cases */
proc sql;
create table NOMATCH1
as select distinct a.*
from IBES1 (keep=ticker) as a
where a.ticker NOT in (select ticker from LINK1_2)
order by a.ticker;
quit; * 990 IBES TICKERs not matched with CRSP PERMNOs using CUSIP;
/* Add IBES identifying information */
proc sql;
create table NOMATCH2
as select b.ticker, b.CNAME, b.OFTIC, b.sdates, b.cusip
from NOMATCH1 as a, &IBES1 as b
where a.ticker = b.ticker and not (missing(b.OFTIC))
order by ticker, oftic, sdates;
quit; * 4,157 observations;
/* Create first and last 'start dates' for Exchange Tickers */
proc sql;
create table NOMATCH3
as select *, min(sdates) as fdate, max(sdates) as ldate
from NOMATCH2
group by ticker, oftic
order by ticker, oftic, sdates;
quit;
/* Label date range variables and keep only most recent company name */
data NOMATCH3;
set NOMATCH3;
by ticker oftic;
if last.oftic;
label fdate="First Start date of OFTIC record";
label ldate="Last Start date of OFTIC record";
format fdate ldate date9.;
drop sdates;
run;
/* Get entire list of CRSP stocks with Exchange Ticker information */
proc sort data=&CRSP1 out=CRSP1 (keep=ticker comnam permno ncusip namedt nameenddt);
where not missing(ticker);
by permno ticker namedt;
run;
/* Arrange effective dates for link by Exchange Ticker */
proc sql;
create table CRSP2
as select permno,comnam,ticker as crsp_ticker,ncusip,
min(namedt)as namedt,max(nameenddt) as nameenddt
from CRSP1
group by permno, ticker
order by permno, crsp_ticker, namedt;
quit; * CRSP exchange ticker renamed to crsp_ticker to avoid confusion with IBES TICKER;
/* Label date range variables and keep only most recent company name */
data CRSP2;
set CRSP2;
if last.crsp_ticker;
by permno crsp_ticker;
label namedt="Start date of exch. ticker record";
label nameenddt="End date of exch. ticker record";
format namedt nameenddt date9.;
run;
/* Merge remaining unmatched cases using Exchange Ticker */
/* Note: Use ticker date ranges as exchange tickers are reused overtime */
proc sql;
create table LINK2_1
as select a.ticker,a.oftic, b.permno, a.cname, b.comnam, a.cusip, b.ncusip, a.ldate
from NOMATCH3 as a, CRSP2 as b
where a.oftic = b.crsp_ticker and
(ldate>=namedt) and (fdate<=nameenddt)
order by ticker, oftic, ldate;
quit; * 146 new match of 136 IBES TICKERs;
/* Score using company name using 6-digit CUSIP and company name spelling distance */
data LINK2_2;
set LINK2_1;
name_dist = min(spedis(cname,comnam),spedis(comnam,cname));
if substr(cusip,1,6)=substr(ncusip,1,6) and name_dist < 30 then SCORE=0;
else if substr(cusip,1,6)=substr(ncusip,1,6) then score = 4;
else if name_dist < 30 then SCORE = 5;
else SCORE = 6;
run;
/* Some companies may have more than one TICKER-PERMNO link, */
/* so re-sort and keep the case (PERMNO & Company name from CRSP) */
/* that gives the lowest score for each IBES TICKER (first.ticker=1) */
proc sort data=LINK2_2; by ticker score; run;
data LINK2_3;
set LINK2_2;
by ticker score;
if first.ticker;
keep ticker permno cname comnam permno score;
run;
/* Step 3: Add Exchange Ticker links to CUSIP links */
/* Create final link table and save it in home directory */
data home.ICLINK;
set LINK1_2 LINK2_3;
run;
proc sort data=home.ICLINK; by TICKER PERMNO; run;
/* Create Labels for ICLINK dataset and variables */
proc datasets lib=home nolist;
modify ICLINK (label="IBES-CRSP Link Table");
label CNAME = "Company Name in IBES";
label COMNAM= "Company Name in CRSP";
label SCORE= "Link Score: 0(best) - 6";
run;
quit;