Skip to content

Commit

Permalink
initial
Browse files Browse the repository at this point in the history
  • Loading branch information
vitrun committed Dec 16, 2009
0 parents commit 2227828
Show file tree
Hide file tree
Showing 12 changed files with 2,364 additions and 0 deletions.
1,750 changes: 1,750 additions & 0 deletions a.html

Large diffs are not rendered by default.

41 changes: 41 additions & 0 deletions cream.data
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
20 4 1
270 0.97 0.0 0.0
1
353 0.98 0.0 0.98
1
100 0.92 0 0.6
1
426 0.98 0.98 0.97
1
292 0.97 0.98 0.98
1
276 0.97 0.98 0.0
1
154 0.95 0.97 0
1
114 0.94 0.98 0.96
1
0 0 0 0
-1
79 0 0 0
-1
10 0 0 0
-1
0 0 0.92 0.93
-1
0 0 0.31 0.07
-1
12 0.17 0 0
-1
32 0.42 0 0
-1
118 0.81 0 0
-1
123 0.82 0 0.31
-1
18 0.55 0 0.73
-1
34 0.33 0.23 0.82
-1
200 0.89 0.06 0
-1
34 changes: 34 additions & 0 deletions cream.net
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
FANN_FLO_2.1
num_layers=2
learning_rate=0.700000
connection_rate=1.000000
network_type=0
learning_momentum=0.000000
training_algorithm=2
train_error_function=1
train_stop_function=0
cascade_output_change_fraction=0.010000
quickprop_decay=-0.000100
quickprop_mu=1.750000
rprop_increase_factor=1.200000
rprop_decrease_factor=0.500000
rprop_delta_min=0.000000
rprop_delta_max=50.000000
rprop_delta_zero=0.100000
cascade_output_stagnation_epochs=12
cascade_candidate_change_fraction=0.010000
cascade_candidate_stagnation_epochs=12
cascade_max_out_epochs=150
cascade_max_cand_epochs=150
cascade_num_candidate_groups=2
bit_fail_limit=3.49999999999999977796e-01
cascade_candidate_limit=1.00000000000000000000e+03
cascade_weight_multiplier=4.00000000000000022204e-01
cascade_activation_functions_count=10
cascade_activation_functions=3 5 7 8 10 11 14 15 16 17
cascade_activation_steepnesses_count=4
cascade_activation_steepnesses=2.50000000000000000000e-01 5.00000000000000000000e-01 7.50000000000000000000e-01 1.00000000000000000000e+00
layer_sizes=5 2
scale_included=0
neurons (num_inputs, activation_function, activation_steepness)=(0, 0, 0.00000000000000000000e+00) (0, 0, 0.00000000000000000000e+00) (0, 0, 0.00000000000000000000e+00) (0, 0, 0.00000000000000000000e+00) (0, 0, 0.00000000000000000000e+00) (5, 6, 5.00000000000000000000e-01) (0, 6, 0.00000000000000000000e+00)
connections (connected_to_neuron, weight)=(0, 1.16192409060151324862e-01) (1, 2.55974968703720300311e+01) (2, 1.46277055544460896641e+01) (3, 3.51973527701262511869e+01) (4, -5.13725333978066203144e+01)
14 changes: 14 additions & 0 deletions creamer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#coding:utf-8
from urllib import urlopen
import pageparser,datamgr

url='http://house.focus.cn/showarticle/1911/572831.html'
rawContent=datamgr.to_utf8(urlopen(url).read())
pageparser=pageparser.CreamParser()
pageparser.feed(rawContent)
#~ print 'url: ',url
#~ print 'title: ',pageparser.spot.title
#~ print 'keywords: ',pageparser.spot.keywords
#~ print 'body data: ',pageparser.bdata
pageparser.get_cream()
#~ print pageparser.cream
69 changes: 69 additions & 0 deletions datamgr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#coding:utf-8
from os import path
from types import *
import chardet
class Spot(object):
def __init__(self,url,title='',keywords='',timestamp='',literal=''):
self.url=url
self.title=title
self.keywords=keywords
self.literal=literal
self.timestamp=timestamp
self.scream=None

def set_scream(self,scream):
self.scream=scream
#~ def __str__(self):
#~ return self.url
#~ def __eq__(self,item):
#~ return self.url==str(item).lower()

class CaselessDict(dict):

def __init__(self, mapping=None):
if mapping:
if type(mapping) is dict:
for k,v in d.items():
self.__setitem__(k, v)
elif type(mapping) in (list, tuple):
d = dict(mapping)
for k,v in d.items():
self.__setitem__(k, v)

# super(CaselessDict, self).__init__(d)

def __setitem__(self, name, value):

if type(name) in StringTypes:
super(CaselessDict, self).__setitem__(name.lower(), value)
else:
super(CaselessDict, self).__setitem__(name, value)

def __getitem__(self, name):
if type(name) in StringTypes:
return super(CaselessDict, self).__getitem__(name.lower())
else:
return super(CaselessDict, self).__getitem__(name)

def __copy__(self):
pass

def to_utf8(data,sencoding=None):
if sencoding:
try:
return data.decode(sencoding).encode('utf-8')
except Exception,e:
pass

try:
return data.decode('GBK18030').encode('utf-8')
except Exception,e:
try:
return data.decode('GBK').encode('utf-8')
except Exception,e:
try:
sencoding=chardet.detect(data)['encoding']
return data.decode(sencoding).encode('utf-8')
except Exception,e:
return data

32 changes: 32 additions & 0 deletions fann.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from pyfann.libfann import neural_net,SIGMOID_SYMMETRIC_STEPWISE

connectionRate = 1
learningRate = 0.7
neuronsHiddenNum = 4

desiredError = 0.00005
maxIterations = 100000
iterationsBetweenReports = 1000
inNum=4
outNum=1
class NeuNet(neural_net):
def __init__(self):
neural_net.__init__(self)
#~ neural_net.create_sparse_array(self,connectionRate,(inNum,neuronsHiddenNum, outNum))
neural_net.create_standard_array(self,(inNum,outNum))
neural_net.set_learning_rate(self,learningRate)
neural_net.set_activation_function_output(self,SIGMOID_SYMMETRIC_STEPWISE)

def train_on_file(self,fileName):
neural_net.train_on_file(self,fileName,maxIterations,iterationsBetweenReports,desiredError)

#~ def
#~ ann = libfann.neural_net()
#~ ann.create_sparse_array(connection_rate, (num_input, num_neurons_hidden, num_output))
#~ ann.set_learning_rate(learning_rate)
#~ ann.set_activation_function_output(libfann.SIGMOID_SYMMETRIC_STEPWISE)

#~ ann.train_on_file("../../examples/xor.data", max_iterations, iterations_between_reports, desired_error)

#~ ann.save("xor_float.net")

81 changes: 81 additions & 0 deletions grubbs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import math
GrubbsRatio={3:[1.15,1.16],
4:[1.46,1.49],
5:[1.67,1.75],
6:[1.82,1.94],
7:[1.94,2.10],
8:[2.03,2.22],
9:[2.11,2.32],
10:[2.18,2.41],
11:[2.23,2.48],
12:[2.28,2.55],
13:[2.33,2.61],
14:[2.37,2.66],
15:[2.41,2.70],
16:[2.44,2.75],
17:[2.48,2.78],
18:[2.50,2.82],
19:[2.53,2.85],
20:[2.56,2.88],
21:[2.58,2.91],
22:[2.60,2.94],
23:[2.62,2.96],
24:[2.64,2.99],
25:[2.66,3.01],
#the following data were not ganranteed to be true:
26:[2.68,3.03],
27:[2.70,3.05],
28:[2.72,3.07],
29:[2.73,3.09],
30:[2.74,3.10],
}

def grubb_eleminate_outliers(rawList,a=0.05):
if a==0.05:
idx=0
else:
idx=1
count=len(rawList)
if count<=2 or count>30:
return rawList
ave=average(rawList)
variance=get_variance(rawList,ave)
newList=[]
for i in rawList:
if math.fabs((ave-i)/float(variance))<GrubbsRatio[count][idx]:
newList.append(i)
return newList

def get_variance(inList,ave):
sum=0
for i in inList:
var=i-ave
sum+=var*var
num=len(inList)
if num>1:
return math.sqrt(sum/float(num-1))
return None

def average(inList):
sum=0
for i in inList:
sum+=i
num=len(inList)
if num>0:
return sum/float(num)
return None















Loading

0 comments on commit 2227828

Please sign in to comment.