Skip to content

Commit

Permalink
updates for 2013 CDC analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
kruggles7 committed Sep 25, 2014
1 parent b0194b9 commit 2be001f
Show file tree
Hide file tree
Showing 14 changed files with 1,294 additions and 18 deletions.
135 changes: 135 additions & 0 deletions clustergram/OR_clustergram_ques_RC_2013.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
%make heatmaps out of the relative risk matrix

cd ..
cd ..
cd matrices
load OR_2013.mat
load qlabel_090914.mat
cd ..
cd programs
cd clustergrams
cd results
xlab={'2013', '2011', '2009', '2007', '2005', '2003', '2001'};

ques=input ('Enter in the question number you want to use (ex. Q01): ', 's');
for i=1:82
i_char=num2str(i);
q1_=i_char;
num2=i;
if length(i_char)<2
i_char=['0' i_char];
end
q1=['Q' i_char];
if strcmp(q1,ques)==1
indx=find (strcmp(odds_ratio_cell(:,1),q1)==1);
if isempty(indx)==0
lab=odds_ratio_cell(indx,2);
P=odds_ratio_cell(indx,3:9);
P2=cell.empty;
qlabel2=cell.empty;
[rl,cl]=size(qlabel);
counter=1;
for j=1:rl
indx=find(strcmp(qlabel{j,2},lab)==1 & strcmp(lab, ques)==0);
if numel(indx)>0
P2(counter,:)=P(indx,:);
qlabel2(counter,:)=qlabel(j,:);
counter=counter+1;
end

end
%replace NaN with -10000
indx=find(strcmp(P2,'NaN')==1);
for j=1:numel(indx)
P2{indx(j)}=-10000;
end
indx=find(strcmp(P2,'Inf')==1);
for j=1:numel(indx)
P2{indx(j)}=10000;
end
emptycells=cellfun(@isempty, P2);
[r,c]=size(emptycells);
for j=1:r
for k=1:c
if (emptycells(j,k)==1)
P2{j,k}=-10000;
end
end
end
plot_mat=cell2mat(P2);
[r,c]=size(plot_mat);

q2=odds_ratio_cell(indx,2);
q2_=cell(length(q2)-1,1);
for p=1:length(q2)-1
s=q2{p,1};
q2_{p,1}=s(2:3);
end
plot_mat=rot90(plot_mat);
[r,c]=size(plot_mat);
plot_mat(plot_mat==-10000)=NaN;

%create second matrix without NaN
plot_mat2=double.empty;
xlab_new=cell.empty;
counter=1;
for j=1:r
indx=find(isnan(plot_mat(j,:))==0) ;
if isempty(indx)==0 %entire row is NOT nan
plot_mat2(counter,:)=plot_mat(j,:);
xlab_new{counter}=xlab{j};
counter=counter+1;
end
end
%remove questions that don't have all of the same years
[r,c]=size(plot_mat2);
plot_mat3=double.empty;
qlab_new=cell.empty;
counter=1;
for j=1:c
indx=find(isnan(plot_mat2(:,j))==1);
if numel(indx)==0
plot_mat3(:,counter)=plot_mat2(:,j);
qlab_new{counter}=qlabel2{j,:};
counter=counter+1;
end
end

%create 3rd matrix without the maximum values
[r,c]=size(plot_mat3);
plot_mat3(isinf(plot_mat3)==1)=-10000 ;
plot_mat3(plot_mat3==10000)=-10000;
for j=1:r
maxv=nanmax(plot_mat3(j,:));
indx_inf=find(plot_mat3(j,:)==-10000);
if numel(indx_inf)>0
plot_mat3(j,indx_inf)=maxv;
end
end
%median center
for j=1:r
med=nanmedian(plot_mat3(j,:));
plot_mat3(j,:)=plot_mat3(j,:)/med;
end
log_rel_risk=log2(plot_mat3);
for j=1:r
temp=log_rel_risk(j,:);
minv=min(temp);
indx3=find(log_rel_risk(j,:)<-100000);
log_rel_risk(j,indx3)=minv;
end
log_rel_risk(isnan(log_rel_risk)==1)=0;
% xlab=fliplr(xlab);
%log_rel_risk=flipud(log_rel_risk);
%qqplot_figs(log_rel_risk, [ q1 '_log']);
%cg=clustergram(log_rel_risk,'Cluster',2, 'Colormap','jet', 'DisplayRange',3, 'Symmetric','true');

cg=clustergram(log_rel_risk,'RowLabels', xlab_new, 'ColumnLabels',qlab_new,'Cluster',2, 'Colormap','jet', 'DisplayRange',3, 'Symmetric','true');
fig=plot(cg);
%tightfig;
print (gcf,'-dpng',[q1 '_clustermap_OR_RC_2013.png']);
saveas(gcf,[q1 '_clustermap_OR_RC_2013.fig']);
end
%close
end
end
126 changes: 126 additions & 0 deletions heatmaps/create_hm_graph_2013.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
function [per_mat_map] = create_hm_graph_2013( question_mat, filename, race, sex, weight )
% CREATE_HM_GRAPH takes in the question binary matrix (rows= years,
% columns=subjects) and outputs and outputs the summary matrix with the percentage of "yes" in each subgroup.
% also makes a heatmap and/or graph out of the output matrix matrix and saves them as PDFs (or whatever other file type you chose)
% Input variables:
% QUESTION_MAT: binary matrix for question (row=years, column=subjects)
% FILENAME: name of the question for file saving
% HEATMAP: 1 for yes, 0 for no
% GRAPH: 1 for yes, 0 for no


% TOTAL=importdata('TOTAL.txt', '\t');
% ^ not necessary for this program because we're only interested in the students who answered and didn't leave out the Q

[r,c]=size(question_mat);
label_=question_mat(:,1);
question_mat=question_mat(:,2:c);
sex=sex(:,2:c);
race=race(:,2:c);
weight=weight(:,2:c);
[r,c]=size(question_mat);

per_mat = zeros(16,r);
for i=1:r
% total(i)=TOTAL(i,1);
index_yes{i}=find(question_mat(i,:)==1);
index_girls{i}=find(sex(i,:)==1);
index_boys{i}=find(sex(i,:)==2);
index_W{i}=find(race(i,:)== 1 );
index_B{i}=find(race(i,:)== 2 );
index_H{i}=find(race(i,:)== 3 );
index_O{i}=find(race(i,:)== 4 );
index_missQ{i}=find(question_mat(i,:)==9); %students who didn't answer the Q
index_nomiss{i}=find(question_mat(i,:)==0 | question_mat(i,:)==1); %answers that were NOT missing (ie. 0's and 1's / no's and yes's)
missQ(i)=length(index_missQ{i}); %number of students who answered the question each year
index_total_b{i}=intersect(index_nomiss{i},index_boys{i}); %index of all boys who answered
index_total_g{i}=intersect(index_nomiss{i},index_girls{i}); %index of all girls who answered
w=weight(i,:)';
total_ans(i)=nansum(w(index_nomiss{i}));
total_girls(i)=nansum(w(index_total_g{i})); %total # of girls who answered
total_boys(i)=nansum(w(index_total_b{i})); %total number of boys who answered
total_W{i}=nansum(w(intersect(index_nomiss{i}, index_W{i}))); %total # of white students who answered
total_B{i}=nansum(w(intersect(index_nomiss{i}, index_B{i}))); %total # of black students who answered
total_H{i}=nansum(w(intersect(index_nomiss{i}, index_H{i}))); %total # of hispanic students who answered
total_O{i}=nansum(w(intersect(index_nomiss{i}, index_O{i}))); %total # of "other" students who answered
total_Wb(i)=nansum(w(intersect(index_total_b{i},index_W{i})));
total_Wg(i)=nansum(w(intersect(index_total_g{i},index_W{i})));
total_Bb(i)=nansum(w(intersect(index_total_b{i},index_B{i})));
total_Bg(i)=nansum(w(intersect(index_total_g{i},index_B{i})));
total_Hb(i)=nansum(w(intersect(index_total_b{i},index_H{i})));
total_Hg(i)=nansum(w(intersect(index_total_g{i},index_H{i})));
total_Ob(i)=nansum(w(intersect(index_total_b{i},index_O{i})));
total_Og(i)=nansum(w(intersect(index_total_g{i},index_O{i})));

w=weight(i,:)';
index_yesgirls{i}=intersect(index_yes{i},index_girls{i});
index_yesboys{i}=intersect(index_yes{i},index_boys{i});
yes_girls(i)=nansum(w(index_yesgirls{i}));
yes_boys(i)=nansum(w(index_yesboys{i}));
yes_W(i)=nansum(w(intersect(index_yes{i}, index_W{i})));
yes_B(i)=nansum(w(intersect(index_yes{i}, index_B{i})));
yes_H(i)=nansum(w(intersect(index_yes{i}, index_H{i})));
yes_O(i)=nansum(w(intersect(index_yes{i}, index_O{i})));
yes_WG(i)=nansum(w(intersect(index_yesgirls{i},index_W{i})));
yes_BG(i)=nansum(w(intersect(index_yesgirls{i},index_B{i})));
yes_HG(i)=nansum(w(intersect(index_yesgirls{i},index_H{i})));
yes_OG(i)=nansum(w(intersect(index_yesgirls{i},index_O{i})));
yes_WB(i)=nansum(w(intersect(index_yesboys{i},index_W{i})));
yes_BB(i)=nansum(w(intersect(index_yesboys{i},index_B{i})));
yes_HB(i)=nansum(w(intersect(index_yesboys{i},index_H{i})));
yes_OB(i)=nansum(w(intersect(index_yesboys{i},index_O{i})));
total_yes(i)=nansum(w(index_yes{i}));
total_w(i)=total_W{i};
total_b(i)=total_B{i};
total_h(i)=total_H{i};
total_o(i)=total_O{i};
per_mat(15, i)=total_yes(i)/total_ans(i)*100; %total
per_mat(14, i)=yes_boys(i)/total_boys(i)*100; %boys
per_mat(13, i)=yes_girls(i)/total_girls(i)*100; %girls
per_mat(12, i)=yes_W(i)/total_w(i)*100; %whites
per_mat(11, i)=yes_B(i)/total_b(i)*100; %blacks
per_mat(10, i)=yes_H(i)/total_h(i)*100; %hispanics
per_mat(9, i)=yes_O(i)/total_o(i)*100; %other
per_mat(8, i)=yes_WB(i)/total_Wb(i)*100; %WB
per_mat(7, i)=yes_WG(i)/total_Wg(i)*100; %WG
per_mat(6, i)=yes_BB(i)/total_Bb(i)*100; %BB
per_mat(5, i)=yes_BG(i)/total_Bg(i)*100; %BG
per_mat(4, i)=yes_HB(i)/total_Hb(i)*100; %HB
per_mat(3, i)=yes_HG(i)/total_Hg(i)*100; %HG
per_mat(2, i)=yes_OB(i)/total_Ob(i)*100; %OB
per_mat(1, i)=yes_OG(i)/total_Og(i)*100; %OG
end

%Make heatmap


label_year=num2cell(label_);
label_cell2={'Total', 'Boys', 'Girls', 'W', 'B', 'H', 'O', 'W Boys', 'W Girls', 'B Boys', 'B Girls', 'H Boys', 'H Girls', 'O Boys', 'O Girls'};
per_mat_map(1:15,1:r)=per_mat(1:15,1:r);
per_mat_map=flipdim(per_mat_map,1);
max_mat=max(max(per_mat_map));
if max_mat>75
M=100;
elseif max_mat>50
M=75;
elseif max_mat>25
M=50;
else
M=25;
end
%get rid of deimals
per_mat_map=per_mat_map*10;
per_mat_map=round(per_mat_map);
per_mat_map=per_mat_map/10;
h=figure;
[hImage]=heatmap_rb(per_mat_map, label_year, label_cell2, 1, M, 0, 'Colormap','money', 'UseLogColormap', false, 'ShowAllTicks',true, 'Colorbar',true,'TextColor','k', 'FontSize', 12);
%title (title1, 'FontSize', 12);
set (gca, 'FontSize',12);
cd results
saveas (gcf, [ filename '_heatmap_2013.fig'] ); %can make pdf, jnp, or jpg
print (gcf, '-dpng', [ filename '_heatmap_2013.png']);
cd ..
close all


end %end of function
37 changes: 37 additions & 0 deletions heatmaps/run_all_hm_graph_2013.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
heatmap=1;
graph=0;

files1=dir(fullfile('C:','Users','kruggles7','Dropbox (Personal)','CDC','data','results_091614','NaN', '*.txt'));

% reads all the text files in the folder 'binary_NaN_files' and saves them in an array called files
% make sure that folder contains only the NaN files for the questions you want to run create_hm_graph for
N=length(files1);

for i=1:N
cd ..
cd ..
cd data
cd Controls_061514
sex=importdata('sex-NaN.txt', '\t');
race=importdata('race-NaN.txt', '\t');
weight=importdata('weights-NaN.txt','\t');
cd ..
cd results_091614
cd NaN
question_mat=importdata(files1(i).name, '\t');
%%%
cd ..
cd ..
cd ..
cd programs
cd heatmaps
filename='';
a=char(files1(i).name);
b=strfind(a,'-');
for p=1:(b(1)-1)
c=a(p);
filename=[filename c];
end
%%% ^ this piece of the program just makes the variable 'filename' out of everything before the '-' in 'Q#--NaN'
[ per_mat_map ] = create_hm_graph_2013 ( question_mat, filename, race, sex, weight );
end
36 changes: 18 additions & 18 deletions odds_ratio/create_RR_OR_2013.m
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@

k=1; %counter for rows in rel_risk_cell and odds_ratio_cell

files1=dir(fullfile('C:','Users','kruggles7','Documents','MATLAB', 'CDC', 'data','results_061514','NaN', '*.txt'));
files1=dir(fullfile('C:','Users','rugglk01','Dropbox (Personal)','CDC','data','results_091614','NaN', '*.txt'));
cd ..
cd ..
cd matrices
load reverse_code.mat
cd ..
cd data
cd results_061514
cd results_091614
cd NaN

P=length(files1);
Expand All @@ -43,7 +43,7 @@
q1_RC=reverse_code(ct,1);

%filename2
files2=dir(fullfile('C:','Users','kruggles7','Documents','MATLAB', 'Rajan', 'relative_risk_final','NaN_results_010314', '*.txt'));
files2=dir(fullfile('C:','Users','rugglk01','Dropbox (Personal)','CDC','data','results_091614','NaN', '*.txt'));
N=length(files2);
for n=1:N
quest_2=importdata(files2(n).name, '\t');
Expand Down Expand Up @@ -84,8 +84,8 @@
indx2=find(year2==y);
if numel(indx1)>0 && numel(indx2)>0 %both in the matrix
year_final(counter,1)=y;
quest_1F(counter,:)=quest_1(indx1,:);
quest_2F(counter,:)=quest_2(indx2,:);
quest_1F(counter,:)=quest_1(indx1,2:c1);
quest_2F(counter,:)=quest_2(indx2,2:c2);
e=ii;
if (counter==1)
s=ii;
Expand Down Expand Up @@ -141,22 +141,22 @@
c=total_yes1_no2(i);
b=total_no1_yes2(i);

%formula for relative risk:
RR= ( a/(a+b) ) / ( c/(c+d) );

P1= a/(a+b);
P2= c/(c+d);
% %formula for relative risk:
% RR= ( a/(a+b) ) / ( c/(c+d) );
%
% P1= a/(a+b);
% P2= c/(c+d);

%formula for odds ratio:
OR= ( P1/(1-P1) ) / ( P2/(1-P2) );


%rel_risk cell matrix:
rel_risk_cell{k,1}=[filename1];
rel_risk_cell{k,2}=[filename2];
x=num2cell(RR);
rel_risk_cell(k,K)=x;
OR= (a*d)/(b*c);

%
% %rel_risk cell matrix:
% rel_risk_cell{k,1}=[filename1];
% rel_risk_cell{k,2}=[filename2];
% x=num2cell(RR);
% rel_risk_cell(k,K)=x;
%
%odds_ratio cell matrix:
odds_ratio_cell{k,1}=[filename1];
odds_ratio_cell{k,2}=[filename2];
Expand Down
Loading

0 comments on commit 2be001f

Please sign in to comment.