Skip to content

Commit

Permalink
update perl programs for 2013 data
Browse files Browse the repository at this point in the history
  • Loading branch information
kruggles7 committed Jun 16, 2014
1 parent e46898e commit d57c3f2
Show file tree
Hide file tree
Showing 16 changed files with 561 additions and 2 deletions.
File renamed without changes.
File renamed without changes.
File renamed without changes.
47 changes: 47 additions & 0 deletions perl/archive/perl_bin_cat_NAN.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#/usr/bin/perl -w

# This program goes through all the binary files in the folder 'binary_files'
# and adds in NaN to the end of each row into the empty spots (so the file can then be turned into a matrix)

use strict;

my $counter=0;
my $directory=".";
if ($ARGV[0]=~/\w/){$directory=$ARGV[0];}else {print "ERROR IN FILE";}
mkdir "$directory/cat";
opendir (DIR, "$directory") or die; #where the binary files are
my @files = readdir DIR; #creates an array 'files' with he names of all the files in that folder
close DIR;

foreach my $filename(@files) #now goes through each binary file
{

if($filename!~/binary.txt$/ && $filename!~/log/ && $filename=~/Q/) #makes sure it didnt pick up anything else, only want the binary files
{
open (IN, "<$directory/$filename") or die "can't open file";
$filename=~s/.txt$//; #takes out 'binary.txt' from the filename (so now it looks like "Q1-")

# Now create a new file called Q1--NaN and specify where youw ant it to go:
open (OUT, ">$directory/cat/$filename-cat-NaN.txt") or die "cant create file";
while (my $line=<IN>)
{
while ($line=~s/^([^\t]*)\t//)
{
print OUT "$1\t";
$counter++;
}
# ^ reprint the whole line, while keeping track of how many answers there are in that line
# Then, until the line reaches the max (16410), keep adding 'NaN' separated by tabs
while ($counter<=16410)
{
print OUT "NaN\t";
$counter++;
}
#Once you reach the max, go to the next line, reset teh counter, and start again
print OUT "\n";
$counter=0;
}
close IN;
close OUT;
}
}
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
33 changes: 33 additions & 0 deletions perl/control_NaN.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#/usr/bin/perl -w

use strict;

my $counter=0;
my $error=0;
my $line="";
my $dir="Controls_061514";
my @filename = ("race", "sex", "grade");

for (my $i=0; $i<3; $i++)
{
open (IN, "$dir/$filename[$i].txt") or die "can't open file";
print "$filename[$i].txt\n";
open (OUT, ">$dir/$filename[$i]-NaN.txt") or die "cant create file";
while ($line=<IN>)
{
while ($line=~s/^([^\t]*)\t//)
{
print OUT "$1\t";
$counter++;
}
while ($counter<=16410)
{
print OUT "NaN\t";
$counter++;
}
print OUT "\n";
$counter=0;
}
close IN;
close OUT;
}
47 changes: 47 additions & 0 deletions perl/perl_bin_NaN_all.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#/usr/bin/perl -w

# This program goes through all the binary files in the folder 'binary_files'
# and adds in NaN to the end of each row into the empty spots (so the file can then be turned into a matrix)

use strict;

my $counter=0;
my $directory="results_061514";

opendir (DIR, "$directory") or die; #where the binary files are
my @files = readdir DIR; #creates an array 'files' with he names of all the files in that folder
close DIR;
mkdir "$directory/NaN";

foreach my $filename(@files) #now goes through each binary file
{

if($filename=~/binary.txt$/) #makes sure it didnt pick up anything else, only want the binary files
{
open (IN, "<$directory/$filename") or die "can't open file";
$filename=~s/binary.txt$//; #takes out 'binary.txt' from the filename (so now it looks like "Q1-")

# Now create a new file called Q1--NaN and specify where youw ant it to go:
open (OUT, ">$directory/NaN/$filename-NaN.txt") or die "cant create file";
while (my $line=<IN>)
{
while ($line=~s/^([^\t]*)\t//)
{
print OUT "$1\t";
$counter++;
}
# ^ reprint the whole line, while keeping track of how many answers there are in that line
# Then, until the line reaches the max (16410), keep adding 'NaN' separated by tabs
while ($counter<=16410)
{
print OUT "NaN\t";
$counter++;
}
#Once you reach the max, go to the next line, reset teh counter, and start again
print OUT "\n";
$counter=0;
}
close IN;
close OUT;
}
}
3 changes: 1 addition & 2 deletions perl/perl_bin_cat_NAN.pl
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
use strict;

my $counter=0;
my $directory=".";
if ($ARGV[0]=~/\w/){$directory=$ARGV[0];}else {print "ERROR IN FILE";}
my $directory='results_061514';
mkdir "$directory/cat";
opendir (DIR, "$directory") or die; #where the binary files are
my @files = readdir DIR; #creates an array 'files' with he names of all the files in that folder
Expand Down
187 changes: 187 additions & 0 deletions perl/perl_controls.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
#!/usr/bin/perl -w

# this program creates a tab delimited file which contains the race information for all 6 years

use strict;

my @filename = ("Controls_2013", "Controls_2011", "Controls_2009", "Controls_2007", "Controls_2005", "Controls_2003", "Controls_2001");
my @year=(2013, 2011, 2009, 2007, 2005, 2003, 2001);
my %final_hash = ();
my @miss=();
my @total=();
my $line=();
my $temp="";
my %final_sex=();
my %final_grade=();
my $out="Controls_061514";
mkdir "$out";

# for 2013-2007: the race is in the first column (questions[0])
# 1:other, 2:other, 3:black, 4:other, 5:white, 6:hispanic, 7:hispanic, 8:other
for (my $i=0; $i<4; $i++)
{
open (IN, "$filename[$i].txt") or die "can't open file $filename[$i].txt\n";
my $line1=<IN>;
while ($line1=<IN>)
{
my @questions=();
@questions = split ("\t", $line1);
$temp=$questions[0];
my $sex=$questions[2];
my $grade=$questions[3];
#deal with race----------------
if ($temp!~/\w/) #no answer
{
$questions[0]=0;
$miss[$i]++;
}
elsif ($temp == 5) #whites
{
$questions[0]=1;
}
elsif ($temp == 3) #blacks
{
$questions[0]=2;
}
elsif ($temp == 6 || $temp ==7 ) #hispanics
{
$questions[0]=3;
}
elsif ($temp == 1 || $temp ==2 || $temp ==4 || $temp ==8) #other
{
$questions[0]=4;
}
else
{
$questions[0]="NaN";
}
$final_hash{$year[$i]}.="#$questions[0]#";
#deal with sex---------------------------
if ($sex!~/\w/)
{
$questions[2]=0;
}
$final_sex{$year[$i]}.="#$questions[2]#";
#deal with grade---------------------------
if ($grade!~/\w/)
{
$questions[3]=0;
}
$final_grade{$year[$i]}.="#$questions[3]#";
$total[$i]++;
}
close IN;
}

# for 2005-2001: the race is in the fourth column (questions[3])
# 1:other, 2:other, 3:black, 4:hispanic, 5:other, 6:white: 7:hispanic, 8:other
for (my $i=4; $i<7; $i++)
{
open (IN, "$filename[$i].txt") or die "can't open file";
my $line2=<IN>;
while ($line2=<IN>)
{
my @questions=();
@questions = split ("\t", $line2);
$temp=$questions[3];
my $sex=$questions[1];
my $grade=$questions[2];
if ($temp!~/\w/) #no answer
{
$questions[3]=0;
$miss[$i]++;
}
elsif ($temp == 6) #whites
{
$questions[3]=1;
}
elsif ($temp == 3) #blacks
{
$questions[3]=2;
}
elsif ($temp == 4 || $temp ==7 ) #hispanics
{
$questions[3]=3;
}
elsif ($temp == 1 || $temp ==2 || $temp ==5 || $temp ==8) #other
{
$questions[3]=4;
}
else
{
$questions[3]="NaN";
}
$final_hash{$year[$i]}.="#$questions[3]#";
#deal with sex---------------------------
if ($sex!~/\w/)
{
$questions[1]=0;
}
$final_sex{$year[$i]}.="#$questions[1]#";
#deal with grade---------------------------
if ($grade!~/\w/)
{
$questions[2]=0;
}
$final_grade{$year[$i]}.="#$questions[2]#";
$total[$i]++;
}
close IN;
}

#put it all into a tab-delim file called race
open (OUT, ">$out/race.txt") or die "cant't create file";
my $ans="";
foreach my $key (sort keys %final_hash)
{
print OUT "$key\t";
my $temp = $final_hash{$key};
while ($temp=~s/^#([^#]+)#//)
{
$ans=$1;
print OUT "$ans\t";
}
print OUT "\n";
}
close OUT;


open (OUT, ">$out/sex.txt") or die "cant't create file";
$ans="";
foreach my $key (sort keys %final_sex)
{
print OUT "$key\t";
my $temp = $final_sex{$key};
while ($temp=~s/^#([^#]+)#//)
{
$ans=$1;
print OUT "$ans\t";
}
print OUT "\n";
}
close OUT;


open (OUT, ">$out/grade.txt") or die "cant't create file";
$ans="";
foreach my $key (sort keys %final_grade)
{
print OUT "$key\t";
my $temp = $final_grade{$key};
while ($temp=~s/^#([^#]+)#//)
{
$ans=$1;
print OUT "$ans\t";
}
print OUT "\n";
}
close OUT;


open (OUT, ">$out/race-log.log") or die;
print OUT "Year \tTotal# \tMissing\n";
for (my $i=0; $i<6; $i++)
{
print OUT "$year[$i]\t$total[$i]\t$miss[$i]\n"
}
close OUT
Loading

0 comments on commit d57c3f2

Please sign in to comment.