-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy path3_merge.sas
190 lines (159 loc) · 4.98 KB
/
3_merge.sas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~;
*Step 4: Merge on analyst, firm, years;
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~;
%include "C:\Users\j.flake\Dropbox\GitHub\CapIQ_IBES_Match\inputs.sas";
data ciqafy_merge;
%let _EFIERR_ = 0; /* set the ERROR detection macro variable */
infile &ciqfile delimiter = ',' MISSOVER DSD lrecl=32767 firstobs=2 ;
input
transcriptpersonid
proid
companyofperson :$92.
year
gvkey :$6.
fname :$19.
lname :$22.
;
if _ERROR_ then call symputx('_EFIERR_',1);
run;
data ibesafy_merge;
%let _EFIERR_ = 0; /* set the ERROR detection macro variable */
infile &ibesfile delimiter = ',' MISSOVER DSD lrecl=32767 firstobs=2 ;
input
ticker :$6.
estimid :$8.
amaskcd
year
permno
gvkey :$6.
analyst :$20.
lname1 :$17.
lname2 :$12.
;
if _ERROR_ then call symputx('_EFIERR_',1);
run;
* Prepare merge;
proc sort data = ciqafy_merge nodupkey; by transcriptpersonid companyofperson gvkey year lname;
where lname ne "" and gvkey ne "" and year ne .;
run;
proc sort data = ibesafy_merge nodupkey; by amaskcd estimid gvkey year lname1;
where lname1 ne "" and gvkey ne "" and year ne .;
run;
proc sql;
create table merged as select
a.*, b.ticker, b.estimid, b.amaskcd, b.permno, b.gvkey as ibes_gvkey,
b.year as ibes_year, b.lname1 as ibes_lname1, b.lname2 as ibes_lname2, b.analyst
from ciqafy_merge as a full join ibesafy_merge as b
on a.gvkey = b.gvkey
and (a.lname = b.lname1 or a.lname = b.lname2)
and a.year = b.year;
quit;
data merged2; set merged;
if year ne . and ibes_year ne . then _merge = 1;
else _merge = 0;
if year = . then year = ibes_year;
if gvkey = "" then gvkey = ibes_gvkey;
drop ibes_year ibes_gvkey;
run;
proc sort data = merged2 out = merged3 nodupkey; by transcriptpersonid amaskcd companyofperson estimid gvkey year;
where _merge = 1;
run;
* Count the number of analyst-firm-year matches at the companyofperson level to prepare to map estimid to companyofperson
- where last name, company and year are exact matches
- If an analyst switches brokers midyear, they may create two matches between the esimid and the companyofperson
- We keep the most frequent match betweem companyofperson and estimid as the valid match;
proc sql;
create table brokers2 as select distinct
companyofperson, estimid,
count(companyofperson) as n_matches
from merged3
group by companyofperson, estimid;
quit;
proc sort data=brokers2;
by companyofperson descending n_matches;
where companyofperson ne "";
run;
* keep estimid - companyofperson match with the most number of matches;
proc sort data = brokers2 out = brokers3 nodupkey; by companyofperson;
run;
* 4,066 unique company of person matched to 1 estimid;
proc sort data=brokers3 out=brokers4; by estimid descending n_matches;
run;
* companyofperson contains multiple variations of the same name. To fix this, we get the importance of each companyofperson
for the estimid;
proc sql;
create table brokers5 as select distinct *,
sum(n_matches) as total_estimid
from brokers4
group by estimid;
quit;
proc sort data=brokers5; by estimid descending n_matches;
run;
data brokers6; set brokers5;
pct_total = n_matches / total_estimid;
run;
proc sql;
create table brokers7 as select
a.*, b.*
from adj.ciqbrokers as a left join brokers6 as b
on a.companyofperson = b.companyofperson
order by estimid,pct_total desc;
quit;
data adj.CIQ_IBESBrokerMerge_&date; set brokers7;
run;
proc sql;
create table analysts as select distinct
proid, transcriptpersonid, amaskcd,
count(transcriptpersonid) as n_matches
from merged3
group by transcriptpersonid, amaskcd;
quit;
proc sort data=analysts;
by transcriptpersonid descending n_matches;
where transcriptpersonid ne .;
run;
proc sort data=analysts nodupkey;
by transcriptpersonid;
where transcriptpersonid ne .;
run;
data adj.CIQ_IBESAnalystMerge_&date; set analysts;
run;
proc sql;
select count(*) as Total_CompanyOfPerson, count(estimid) as Total_Matched
from adj.ciq_ibesbrokerMerge_&date;
quit;
proc sort data = adj.ibesanalystfirmyear_&date out = ibes_broker_years (keep = estimid year) nodupkey; by estimid year;
run;
proc sql;
create table ibes_NBrokers_years as select
year, count(estimid) as ibes_NBrokers
from ibes_broker_years group by year;
quit;
proc means data = ibes_broker_years n;
var year;
class year;
run;
proc sort data = adj.ciqanalystfirmyear_&date out = ciq_broker_years (keep = companyofperson year) nodupkey; by companyofperson year;
run;
proc sql;
create table ciq_NBrokers_years as select
year, count(companyofperson) as CIQ_NBrokers
from ciq_broker_years group by year;
quit;
proc means data = ciq_broker_years n;
var year;
class year;
run;
proc sql;
create table NBrokersYears as select
a.year, a.ibes_NBrokers, b.year as CIQYear, b.ciq_NBrokers
from ibes_Nbrokers_years as a full join ciq_Nbrokers_years as b
on a.year = b.year
order by a.year;
quit;
data NBrokersYears; set NBrokersYears;
if year = . then year = CIQYear;
drop CIQYear;
run;
proc sort data = NBrokersYears out = adj.NBrokerYears_&date; by year;
run;