Skip to content

Commit

Permalink
Creating a set of analyzers which do not hold the history of the metr…
Browse files Browse the repository at this point in the history
…ics but just a single metric per type and user. Added a small python script to create an arff file to vizualize in weka
  • Loading branch information
fotisp committed Jan 8, 2012
1 parent 01a7a13 commit bac4c95
Show file tree
Hide file tree
Showing 18 changed files with 634 additions and 39 deletions.
13 changes: 13 additions & 0 deletions eu.alert-project.iccs.analysis/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>eu.alert-project.iccs.analysis</groupId>
<artifactId>core</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>pom</packaging>
<name>eu.alert-project.iccs.analysis</name>

</project>
2 changes: 2 additions & 0 deletions eu.alert-project.iccs.analysis/py/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*.pyc
config.py
23 changes: 23 additions & 0 deletions eu.alert-project.iccs.analysis/py/config.py.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#The connection details of the stardom database
alert_db_host="localhost"
alert_db_port=8889
alert_db_user="alert"
alert_db_name="alert_dev"
alert_db_password="1234"


#The connection details of the cvsanaly database
cvsanaly_db_host="localhost"
cvsanaly_db_port=8889
cvsanaly_db_user="alert"
cvsanaly_db_name="cvsanaly_kde_solid"
cvsanaly_db_password="1234"


#The location containing the betweeness values
io_betweenness_file="~/Betweenness.csv"

#The directory where the results should be stored
io_results_directory=~/tmp"

#### Do not modify under this line
226 changes: 226 additions & 0 deletions eu.alert-project.iccs.analysis/py/create_weka_correlation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
#!/usr/bin/env python
from datetime import date, datetime

import MySQLdb
import csv
import re
import time
import config


QRY_DROP_TABLE="DROP TABLE IF EXISTS `identity_csvid` ; " \
"CREATE TABLE `identity_csvid` ( " \
"`identity_id` INT NOT NULL, " \
"`user_id` INT NOT NULL, " \
"`name` TEXT NOT NULL, " \
"`email` TEXT NOT NULL) ENGINE = InnoDB;"

QRY_SELECT_ALL_PEOPLE = "select id,name, email from people"

QRY_SELECT_IDENTITY = """select distinct identity_id
from profile p
join identity_is_profile i on p.id=i.profile_id
where email=%s;"""

QRY_INSERT_CSVID = """insert into identity_csvid VALUES(%s,%s,%s,%s) ;"""




def date_distance(date1):
fmt = '%Y-%m-%d %H:%M:%S'
d1 = datetime.strptime('2012-01-01 17:31:22', fmt)
d2 = datetime.strptime(date1, fmt)
return (d2-d1).days

def get_alert_connection():
return MySQLdb.connect (
host = config.alert_db_host,
port = config.alert_db_port,
user = config.alert_db_user,
passwd = config.alert_db_password,
db = config.alert_db_name)




def people_mapping_init():


user_to_identity_dict={}

alert_conn = get_alert_connection()

cvsanaly_conn = MySQLdb.connect (
host = config.cvsanaly_db_host,
port = config.cvsanaly_db_port,
user = config.cvsanaly_db_user,
passwd = config.cvsanaly_db_password,
db = config.cvsanaly_db_name)


# Apparently this is a compound statement and
# we should close and reopen the cursor
alert_cursor=alert_conn.cursor ()
alert_cursor.execute (QRY_DROP_TABLE)
alert_cursor.close()

csv_cursor = cvsanaly_conn.cursor ()
all_cvsanaly_people = csv_cursor.execute (QRY_SELECT_ALL_PEOPLE)

counter = 0
for x in range(all_cvsanaly_people):
row=csv_cursor.fetchone ()
user_id=row[0]
name=row[1]
email = row[2]
repl=' '
email = re.sub(r"[-_\+\.@]", repl, email)
name = re.sub(r"[\']", repl, name)

print "Processing %s => %s " % (user_id,email,)

alert_cursor=alert_conn.cursor ()
alert_cursor.execute (QRY_SELECT_IDENTITY, (email,))
alert_row=alert_cursor.fetchone ()

if alert_row is None:
print '%s NOT FOUND!!! ' % email
continue
identity_id=alert_row[0]
print "Identity id %s found for %s " % (identity_id,email,)
alert_cursor.execute (QRY_INSERT_CSVID, (identity_id,user_id,name,email,))

user_to_identity_dict[user_id]=identity_id
alert_cursor.close ()

csv_cursor.close()
alert_conn.close()
cvsanaly_conn.close()

return user_to_identity_dict


def get_metric(connection, query, identity_id):
print "Executing "+query
cursor = connection.cursor()
cursor.execute (
query,
(identity_id,)
)

ret = 0
row=cursor.fetchone ()
if row is not None:
ret =row[1]

cursor.close()

return ret

def get_quantitative_metric(connection, identity_id , metric_name):

qry = """select m.id,quantity,m.identity_id,m.created_at
from %s as scm
inner join metric_quantitative as mq on scm.id=mq.id
inner join metric as m on scm.id=m.id
where m.identity_id=%s
limit 1;""" % (metric_name,"%s",)

return get_metric(connection,qry,identity_id)


def get_temporal_metric(connection, identity_id , metric_name):
qry = """select m.id,temporal,identity_id,created_at
from %s as scm
inner join metric_temporal as mt on scm.id=mt.id
inner join metric as m on scm.id=m.id
where identity_id=%s
limit 1;""" % (metric_name,"%s",)
return date_distance(get_metric(connection,qry,metric_name,identity_id))


if __name__ == '__main__':

metrics = {
"scm_activity_metric" :[1, "q"],
"its_activity_metric" :[1, "q"],
"mailing_list_activity_metric" :[1, "q"],
"scm_api_introduced_metric" :[1, "q"],
"scm_api_usage_count_metric" :[1, "q"],

"scm_temporal_metric" :[0, "t"],
"its_temporal_metric" :[0, "t"],
"mailing_list_temporal_metric" :[0, "t"],
}

# You propably shouldnt bother with what is under this line
#
# ---------------------------------------------------------
#



#create the file
output_file=config.io_results_file+"/%s.arff" % ("correlation-"+datetime.now().strftime("%Y%m%d%H%S"))
print "Creating output file %s" % output_file;

bugAWriter = open(output_file, 'wb')
bugAWriter.writelines(
""" % 1. Title: Bug Solution Dataset Numeric
%
% 2. Sources:
%(a) Creator: ALERT Project
%(b) Date: August, 2011
%\n\n"""
)

bugAWriter.writelines("@RELATION bug\n")
bugAWriter.writelines("\n\n\n")
bugAWriter.writelines("@ATTRIBUTE identity_id NUMERIC\n")

# Go over the enabled metrics and create the heading
for metric_name, values in metrics.iteritems():

if values[0] >= 1:
bugAWriter.writelines("@ATTRIBUTE %s NUMERIC\n" % metric_name)



bugAWriter.writelines("@ATTRIBUTE betweeness NUMERIC\n")
bugAWriter.writelines("\n@DATA\n")



people_map_dict={}
people_map_dict = people_mapping_init()

alert_conn = get_alert_connection()
spamReader = csv.reader(open(config.io_betweenness_file, 'rb'),delimiter='\t', quotechar='|')

for row in spamReader:

identity_id=people_map_dict[int(row[0])]
btness=row[1]

#tdesc = "select m.id,quantity,m.identity_id,created_at from scm_activity_metric as scm inner join metric_quantitative as mq on scm.id=mq.id inner join metric as m on scm.id=m.id where m.identity_id='"+str(identity_id)+"' order by quantity desc limit 1;"

result =[]
result.append(identity_id)

for metric_name, values in metrics.iteritems():

if values[0] >= 1:

if values[1] == 'q':
result.append(get_quantitative_metric(alert_conn,identity_id,metric_name))
elif values[1] == 't':
result.append(get_temporal_metric(alert_conn,identity_id,metric_name))


result.append(btness)
bugAWriter.writelines(', '.join(str(x) for x in result)+"\n")


bugAWriter.close()
alert_conn.close ()
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,26 @@

<dependencies>

<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-core</artifactId>
</dependency>

<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-context</artifactId>
</dependency>

<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-support</artifactId>
</dependency>

<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-jdbc</artifactId>
</dependency>

<dependency>
<groupId>net.sf.jung</groupId>
<artifactId>jung-api</artifactId>
Expand Down Expand Up @@ -42,6 +62,11 @@
<artifactId>jung-jai</artifactId>
</dependency>

<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
</dependency>

<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,18 @@ public Graph<Integer, String> create(File inputFile,File outputFile){
}


//correct authors




logger.trace("void main(args) Vertex Count: {} ", graph.getVertexCount());

bc = new BetweennessCentrality<Integer, String>(graph, TransformerUtils.mapTransformer(weights));
logger.trace("void main(args) Betweeness calculated");



fos = new FileOutputStream(outputFile);
osw = new OutputStreamWriter(fos, "UTF-8");
fw = new BufferedWriter(osw);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ public boolean evaluate(String s) {
}
}).transform(simpleGraph);


return simpleGraph;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,20 +174,20 @@
<!--<bean class="eu.alertproject.iccs.stardom.analyzers.forums.bus.ForumEventHandler>-->

<bean class="eu.alertproject.iccs.stardom.analyzers.its.constructor.ItsActivityAnalyzer"/>
<bean class="eu.alertproject.iccs.stardom.analyzers.its.constructor.ItsTemporalAnalyzer"/>
<!--<bean class="eu.alertproject.iccs.stardom.analyzers.its.constructor.ItsTemporalAnalyzer"/>-->


<!-- Analyzers -->

<bean class="eu.alertproject.iccs.stardom.analyzers.scm.constructor.ScmActivityAnalyzer"/>
<bean class="eu.alertproject.iccs.stardom.analyzers.scm.constructor.ScmTemporalAnalyzer"/>
<!--<bean class="eu.alertproject.iccs.stardom.analyzers.scm.constructor.ScmTemporalAnalyzer"/>-->
<bean class="eu.alertproject.iccs.stardom.analyzers.scm.constructor.ScmApiIntroducedAnalyzer"/>



<!--<bean class="eu.alertproject.iccs.stardom.analyzers.forums.constructor.ForumActivityAnalyzer"/>-->
<bean class="eu.alertproject.iccs.stardom.analyzers.mailing.constructor.MailingListActivityAnalyzer"/>
<bean class="eu.alertproject.iccs.stardom.analyzers.mailing.constructor.MailingListTemporalAnalyzer"/>
<!--<bean class="eu.alertproject.iccs.stardom.analyzers.mailing.constructor.MailingListTemporalAnalyzer"/>-->



Expand Down
Loading

0 comments on commit bac4c95

Please sign in to comment.