diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..7ab4689
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,28 @@
+syntax: glob
+
+.hg
+*.coverage
+*.egg-info
+*.log
+*.pyc
+*.db
+*.swp
+*.swo
+*.zip
+*.orig
+*.cfg
+*.tox
+
+build
+
+*~
+
+fab_settings.py
+production_settings.py
+
+dist
+docs/output
+
+_uploads
+
+.hgignore
diff --git a/code.py b/code.py
new file mode 100644
index 0000000..9c81d30
--- /dev/null
+++ b/code.py
@@ -0,0 +1,153 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# code.py: based on web.py
+#
+# author: observer
+# email: jingchaohu@gmail.com
+# blog: http://obmem.com
+# last edit @ 2009.12.23
+import os,sys
+import web
+from web import form
+import sqlite3
+import fetchvc
+import time
+
+web.config.debug = False
+
+path = os.path.dirname(os.path.realpath(sys.argv[0]))
+
+db = web.database(dbn='sqlite', db='f:/verycd.sqlite3.db')#db='verycd.sqlite3.db')
+#customdb = web.database(dbn='sqlite',db='custom.sqlite3.db')
+
+conn = sqlite3.connect('custom.sqlite3.db')
+conn.text_factory = str
+
+urls = (
+ '/', 'index',
+ '/add','add',
+ '/edit','edit',
+ '/del','del',
+ '/egg','egg',
+)
+
+render = web.template.render('templates/')
+
+vform = form.regexp(r".{3,20}$", '资源密码必须是3-20长度的字符串')
+vemail = form.regexp(r".*@.*", "必须提供合法的邮件地址")
+
+add_form = form.Form(
+ form.Textbox("email",vemail,description="邮件地址"),
+ form.Textbox("password",vform,description="资源密码"),
+ form.Textbox("title", description="标题"),
+ form.Textbox("brief", description="摘要"),
+ form.Dropdown("category1",args=['电影','剧集','音乐','游戏','动漫','综艺','软件','资料'],value="电影",description="分类"),
+ form.Textbox("category2",description="子类别"),
+ form.Textarea("ed2k",value="[格式]\n文件名#地址\n文件名#地址#字幕地址\n",description="资源链接",cols=60,rows=5),
+ form.Textarea("content",description="资源介绍",cols=60,rows=10),
+ form.Button("提交", type="submit", description="提交"),
+)
+
+app = web.application(urls, globals())
+
+class index:
+ def GET(self):
+ i = web.input(id=None,page='1',q=None,download=None,qa=None,cat=None)
+ hot=open('static/hot.html','r').read()
+ #显示单个资源
+ if i.id:
+ myvar = dict(id=i.id)
+ rec = db.select('verycd',vars=myvar,where="verycdid=$id")
+ for r in rec:
+ fl = None
+ if i.download:
+ links = r['ed2k'].split('`')
+ links = [ x for x in links if 'ed2k:' in x ]
+ fl = ' \n'.join(links)
+ return render.id([r,fl,str(r['verycdid'])])
+ return render.error(404)
+ #显示最新更新的资源
+ else:
+ #深度搜索
+ if i.qa:
+ qa = '+'.join(i.qa.split(' '))
+ open(path+'/searchqueue','a').write(qa.encode('utf-8')+'\n')
+ return render.fin(qa)
+ #默认情况,不指定分类,没有搜索关键字
+ elif (not i.q) and (not i.cat):
+ vc = db.select('verycd',order='updtime DESC',limit=20,offset=20*(int(i.page)-1))
+ num = db.select('verycd',what="count(*) as count")[0].count
+ arg = '/?page'
+ #无搜索关键字,指定分类
+ elif (not i.q) and (i.cat):
+ myvar = dict(cat=i.cat)
+ vc = db.select('verycd',order='updtime DESC',vars=myvar,where='category1=$cat',limit=20,offset=20*(int(i.page)-1))
+ num = db.select('verycd',what="count(*) as count",vars=myvar,where='category1=$cat')[0].count
+ arg = '/?cat='+i.cat+'&page'
+ #有搜索关键字,指定分类
+ elif (i.q) and (i.cat):
+ qs = i.q.split(' ')
+ qs = [ 'title like \'%'+x+'%\'' for x in qs ]
+ where = ' and '.join(qs)
+ where += ' and category1=\''+i.cat+'\''
+ vc = db.select('verycd',order='updtime DESC',limit=20,\
+ offset=20*(int(i.page)-1),where=where)
+ num = db.select('verycd',what="count(*) as count",where=where)[0].count
+ arg = '/?q='+i.q+'&cat='+i.cat+'&page'
+ #有搜索关键字,不指定分类
+ else:
+ qs = i.q.split(' ')
+ qs = [ 'title like \'%'+x+'%\'' for x in qs ]
+ where = ' and '.join(qs)
+ vc = db.select('verycd',order='updtime DESC',limit=20,\
+ offset=20*(int(i.page)-1),where=where)
+ num = db.select('verycd',what="count(*) as count",where=where)[0].count
+ arg = '/?q='+i.q+'&page'
+ prev = int(i.page)-1 == 0 and '1' or str(int(i.page)-1)
+ next = int(i.page)+1 <= (num-1)/20+1 and str(int(i.page)+1) or i.page
+ end = str((num-1)/20+1)
+ pages = [prev,next,end]
+ left = min(4,int(i.page)-1)
+ right = min(4,int(end)-int(i.page))
+ if left < 4:
+ right = min(8-left,int(end)-int(i.page))
+ if right < 4:
+ left = min(8-right,int(i.page)-1)
+ while left > 0:
+ pages.append(str(int(i.page)-left))
+ left -= 1
+ j = 0
+ while j <= right:
+ pages.append(str(int(i.page)+j))
+ j += 1
+ return render.index([vc,pages,arg,i.q,num,i.cat,hot])
+
+class add:
+ def GET(self):
+ # do $:f.render() in the template
+ f = add_form()
+ return render.add(f)
+
+ def POST(self):
+ f = add_form()
+ if not f.validates():
+ return render.add(f)
+ else:
+ # do whatever is required for registration
+ now = time.strftime('%Y/%m/%d %H:%M:%S',time.gmtime(time.time()+3600*8))
+ c = conn.cursor()
+ c.execute('insert into custom (title,status,brief,pubtime,updtime,\
+ category1,category2,ed2k,content) values(?,?,?,?,?,?,?,?,?)',\
+ (f.title.get_value(),'新建',f.brief.get_value(),now,now,\
+ f.category1.get_value(),f.category2.get_value(),\
+ f.ed2k.get_value(),f.content.get_value()))
+ c.execute('insert into user (email,password,customid) values (?,?,?)',\
+ (f.email.get_value(),f.password.get_value(),c.lastrowid))
+ conn.commit()
+ c.close()
+ return '...'
+
+if __name__ == "__main__":
+ # web.wsgi.runwsgi = lambda func, addr=None: web.wsgi.runfcgi(func, addr)
+ app.run()
diff --git a/daemon.py b/daemon.py
new file mode 100644
index 0000000..db3c3a6
--- /dev/null
+++ b/daemon.py
@@ -0,0 +1,129 @@
+#!/usr/bin/env python
+
+import sys, os, time, atexit
+from signal import SIGTERM
+
+class Daemon:
+ """
+ A generic daemon class.
+
+ Usage: subclass the Daemon class and override the run() method
+ """
+ def __init__(self, pidfile, stdin='/dev/null', stdout='/dev/null', stderr='/dev/null'):
+ self.stdin = stdin
+ self.stdout = stdout
+ self.stderr = stderr
+ self.pidfile = pidfile
+
+ def daemonize(self):
+ """
+ do the UNIX double-fork magic, see Stevens' "Advanced
+ Programming in the UNIX Environment" for details (ISBN 0201563177)
+ http://www.erlenstar.demon.co.uk/unix/faq_2.html#SEC16
+ """
+ try:
+ pid = os.fork()
+ if pid > 0:
+ # exit first parent
+ sys.exit(0)
+ except OSError, e:
+ sys.stderr.write("fork #1 failed: %d (%s)\n" % (e.errno, e.strerror))
+ sys.exit(1)
+
+ # decouple from parent environment
+ os.chdir("/")
+ os.setsid()
+ os.umask(0)
+
+ # do second fork
+ try:
+ pid = os.fork()
+ if pid > 0:
+ # exit from second parent
+ sys.exit(0)
+ except OSError, e:
+ sys.stderr.write("fork #2 failed: %d (%s)\n" % (e.errno, e.strerror))
+ sys.exit(1)
+
+ # redirect standard file descriptors
+ sys.stdout.flush()
+ sys.stderr.flush()
+ si = file(self.stdin, 'r')
+ so = file(self.stdout, 'a+')
+ se = file(self.stderr, 'a+', 0)
+ os.dup2(si.fileno(), sys.stdin.fileno())
+ os.dup2(so.fileno(), sys.stdout.fileno())
+ os.dup2(se.fileno(), sys.stderr.fileno())
+
+ # write pidfile
+ atexit.register(self.delpid)
+ pid = str(os.getpid())
+ file(self.pidfile,'w+').write("%s\n" % pid)
+
+ def delpid(self):
+ os.remove(self.pidfile)
+
+ def start(self):
+ """
+ Start the daemon
+ """
+ # Check for a pidfile to see if the daemon already runs
+ try:
+ pf = file(self.pidfile,'r')
+ pid = int(pf.read().strip())
+ pf.close()
+ except IOError:
+ pid = None
+
+ if pid:
+ message = "pidfile %s already exist. Daemon already running?\n"
+ sys.stderr.write(message % self.pidfile)
+ sys.exit(1)
+
+ # Start the daemon
+ self.daemonize()
+ self.run()
+
+ def stop(self):
+ """
+ Stop the daemon
+ """
+ # Get the pid from the pidfile
+ try:
+ pf = file(self.pidfile,'r')
+ pid = int(pf.read().strip())
+ pf.close()
+ except IOError:
+ pid = None
+
+ if not pid:
+ message = "pidfile %s does not exist. Daemon not running?\n"
+ sys.stderr.write(message % self.pidfile)
+ return # not an error in a restart
+
+ # Try killing the daemon process
+ try:
+ while 1:
+ os.kill(pid, SIGTERM)
+ time.sleep(0.1)
+ except OSError, err:
+ err = str(err)
+ if err.find("No such process") > 0:
+ if os.path.exists(self.pidfile):
+ os.remove(self.pidfile)
+ else:
+ print str(err)
+ sys.exit(1)
+
+ def restart(self):
+ """
+ Restart the daemon
+ """
+ self.stop()
+ self.start()
+
+ def run(self):
+ """
+ You should override this method when you subclass Daemon. It will be called after the process has been
+ daemonized by start() or restart().
+ """
diff --git a/download.py b/download.py
new file mode 100644
index 0000000..30af443
--- /dev/null
+++ b/download.py
@@ -0,0 +1,130 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# download.py: download with report
+#
+# author: observer
+# email: jingchaohu@gmail.com
+# blog: http://obmem.com
+# last edit @ 2009.12.23
+import os,sys
+import urllib,urllib2,cookielib
+import re
+from time import time,sleep
+
+path = os.path.dirname(os.path.realpath(sys.argv[0]))
+
+islogin = False
+isproxy = False
+
+def useproxy(proxy='http://localhost:3128'):
+# proxies = {'http':proxy}
+# proxy_support = urllib2.ProxyHandler(proxies)
+# opener = urllib2.build_opener(proxy_support, urllib2.HTTPHandler)
+# urllib2.install_opener(opener)
+# global isproxy
+ isproxy = True
+
+def login():
+ print 'try to login...'
+# proxies = {'http':'http://localhost:3128'}
+# proxy_support = urllib2.ProxyHandler(proxies)
+ cookie=cookielib.CookieJar()
+# opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie), urllib2.HTTPHandler,proxy_support)
+ opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie), urllib2.HTTPHandler)
+ urllib2.install_opener(opener)
+
+ print '...getting login form...'
+ loginform = urllib2.urlopen('http://secure.verycd.com/signin/*/http://www.verycd.com/').read()
+ fk = re.compile(r'id="fk" value="(.*)"').findall(loginform)[0]
+ postdata=urllib.urlencode({'username':'simcdple',
+ 'password':'simcdple',
+ 'continueURI':'http://www.verycd.com/',
+ 'fk':fk,
+ 'login_submit':'登录',
+ })
+ req = urllib2.Request(
+ url = 'http://secure.verycd.com/signin/*/http://www.verycd.com/',
+ data = postdata
+ )
+ req.add_header('User-Agent','Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6')
+ req.add_header('Accept','text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8')
+ print '...login form submitted'
+ result = urllib2.urlopen(req).read()
+ print '...login succeed!'
+ global islogin
+ islogin = True
+
+#functions
+def report(blocknum, bs, size, t):
+ if t == 0:
+ t = 1
+ if size == -1:
+ print '%10s' % (str(blocknum*bs)) + ' downloaded | Speed =' + '%5.2f' % (bs/t/1024) + 'KB/s'
+ else:
+ percent = int(blocknum*bs*100/size)
+ print '%10s' % (str(blocknum*bs)) + '/' + str(size) + 'downloaded | ' + str(percent) + '% Speed =' + '%5.2f'%(bs/t/1024) + 'KB/s'
+
+def httpfetch(url, headers={}, reporthook=report, postData=None, report=True, needlogin=False):
+ ok = False
+ if (not islogin) and needlogin:
+ login()
+ if (not isproxy) and (not islogin):
+ useproxy()
+ for _ in range(10):
+ try:
+ reqObj = urllib2.Request(url, postData, headers)
+ fp = urllib2.urlopen(reqObj)
+ headers = fp.info()
+ ok = True
+ break
+ except:
+ sleep(1)
+ continue
+
+ if not ok:
+ open(path+'/errors','a').write(url+'\n')
+ return ''
+
+ rawdata = ''
+ bs = 1024*8
+ size = -1
+ read = 0
+ blocknum = 0
+
+ if reporthook and report:
+ if "content-length" in headers:
+ size = int(headers["Content-Length"])
+ reporthook(blocknum, bs, size, 1)
+
+ t0 = time()
+ while 1:
+ block = ''
+ try:
+ block = fp.read(bs)
+ except:
+ open(path+'/errors','a').write(url+'\n')
+ return ''
+ if block == "":
+ print '...',url,'downloaded'
+ break
+ rawdata += block
+ read += len(block)
+ blocknum += 1
+ if reporthook and report:
+ reporthook(blocknum, bs, size, time()-t0)
+ t0 = time()
+
+ # raise exception if actual size does not match content-length header
+ if size >= 0 and read < size:
+ raise ContentTooShortError("retrieval incomplete: got only %i out "
+ "of %i bytes" % (read, size), result)
+ return rawdata
+
+if __name__ == '__main__':
+ url = 'http://www.verycd.com/topics/2788317'
+
+ #test it
+ data = httpfetch(url)
+ open('down','w').write(data)
+
diff --git a/feed.py b/feed.py
new file mode 100644
index 0000000..4b75440
--- /dev/null
+++ b/feed.py
@@ -0,0 +1,84 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# feed.py generate rss feed
+#
+# author: observer
+# email: jingchaohu@gmail.com
+# blog: http://obmem.com
+# last edit @ 2009.12.22
+import sqlite3
+import time
+import os,sys
+
+def feed(path,conn):
+
+ c=conn.cursor()
+
+ for _ in range(10):
+ try:
+ c.execute('select * from verycd order by updtime desc limit 20');
+ break
+ except:
+ time.sleep(5)
+ continue
+
+ data = None
+
+ try:
+ data = c.fetchall()
+ except:
+ c.close()
+ conn.commit()
+ return
+
+ c.close()
+ conn.commit()
+
+ pubdate = time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())
+
+ feed = '''
+
+
+ SimpleCD - 最新电驴资源
+
+http://www.simplecd.org
+
+zh-cn\n'''
+ feed += '%s\n' % pubdate
+ feed += '%s\n' % pubdate
+ feed += '''http://blogs.law.harvard.edu/tech/rss
+SimpleCD.com
+jingchaohu@gmail.com (webmaster)
+jingchaohu@gmail.com (webmaster)
+4
+'''
+
+ for d in data:
+ # data:0 1 2 3 4 5 6 7 8 9
+ # id ttl status brief pubtime pudtime cat1 cat2 ed2k content
+ title = d[1]
+ link = 'http://www.simplecd.org/?id=%s' % d[0]
+ rss = '摘要信息:'+d[3]+' \n类别:'+d[6]+' \n子类别:'+d[7]+' \n'
+ rss += d[9]
+ feed +='''
+
+
+ %s
+
+ %s
+ observer
+
+'''% (title,link,rss,pubdate)
+
+ feed +='''
+'''
+ return feed
+
+if __name__ == '__main__':
+ path = os.path.dirname(os.path.realpath(sys.argv[0]))
+ conn = sqlite3.connect(path+'/verycd.sqlite3.db')
+ conn.text_factory = str
+ print feed(path,conn)
diff --git a/fetchvc.py b/fetchvc.py
new file mode 100644
index 0000000..a901231
--- /dev/null
+++ b/fetchvc.py
@@ -0,0 +1,369 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# fetchvc.py fetch resources from verycd
+#
+# author: observer
+# email: jingchaohu@gmail.com
+# blog: http://obmem.com
+# last edit @ 2009.12.23
+import urllib
+import re
+import sqlite3
+import time
+import os,sys
+
+from threading import Thread
+from Queue import Queue
+
+from download import httpfetch
+
+path = os.path.dirname(os.path.realpath(sys.argv[0]))
+conn = sqlite3.connect(path+'/verycd.sqlite3.db')
+conn.text_factory = str
+q = Queue()
+MAXC = 8
+
+def thread_fetch():
+ conn = sqlite3.connect(path+'/verycd.sqlite3.db')
+ conn.text_factory = str
+ while True:
+ topic = q.get()
+ fetch(topic,conn)
+ q.task_done()
+
+def search(keyword,full=True):
+ '''search verycd, fetch search results'''
+
+ searchlog = path+'/search.log'
+ open(searchlog,'a').write('\n'+keyword+'\n')
+
+ url = 'http://www.verycd.com/search/folders/'+keyword
+ print 'fetching search results ...'
+ res = httpfetch(url)
+ topics = re.compile(r'/topics/(\d+)',re.DOTALL).findall(res)
+ topics = set(topics)
+ links = []
+ if full:
+ links = re.compile(r'/search/folders/(.*?\?start=\d+)',re.DOTALL).findall(res)
+ print links
+ print topics
+ if topics:
+ for topic in topics:
+ open(searchlog,'a').write(topic+',')
+ q.put(topic)
+ if full and links:
+ for key in links:
+ search(key,full=False)
+
+
+def hot():
+ ''' read verycd hot res and keep update very day '''
+ url = 'http://www.verycd.com/'
+ print 'fetching homepage ...'
+ home = httpfetch(url)
+ hotzone = re.compile(r'热门资源.*?',re.DOTALL).search(home).group()
+ hot = re.compile(r']*>(《.*?》)[^<]*',re.DOTALL).findall(hotzone)
+ html = '
每日热门资源
\n'
+ for topic in hot:
+ print 'fetching hot topic',topic[0],'...'
+ q.put(topic[0])
+ html += ' %s \n' % topic
+ open(path+'/static/hot.html','w').write(html)
+
+def normal(pages):
+ '''fetch normal res that need login'''
+ if '-' in pages:
+ (f,t)=[ int(x) for x in pages.split('-') ]
+ else:
+ f = t = int(pages)
+ for page in range(f,t+1):
+ url = 'http://www.verycd.com/orz/page%d?stat=normal' % page
+ idx = httpfetch(url,needlogin=True)
+ ids = re.compile(r'/topics/(\d+)',re.DOTALL).findall(idx)
+ print ids[0]
+ for id in ids:
+ q.put(id)
+
+def request(pages):
+ '''fetch request res that need login'''
+ if '-' in pages:
+ (f,t)=[ int(x) for x in pages.split('-') ]
+ else:
+ f = t = int(pages)
+ for page in range(f,t+1):
+ url = 'http://www.verycd.com/orz/page%d?stat=request' % page
+ idx = httpfetch(url,needlogin=True)
+ ids = re.compile(r'/topics/(\d+)',re.DOTALL).findall(idx)
+ print ids[0]
+ for id in ids:
+ q.put(id)
+
+def feed():
+ ''' read verycd feed and keep update very 30 min '''
+ url = 'http://www.verycd.com/sto/feed'
+ print 'fetching feed ...'
+ feeds = httpfetch(url)
+ ids = re.compile(r'/topics/(\d+)',re.DOTALL).findall(feeds)
+ ids = set(ids)
+ print ids
+ now = time.mktime(time.gmtime())
+ for id in ids:
+ q.put(id)
+ #updtime = fetch(id)
+ #updtime = time.mktime(time.strptime(updtime,'%Y/%m/%d %H:%M:%S'))-8*3600 #gmt+8->gmt
+ #diff = now - updtime
+ #print '%10s secs since update' % (diff)
+ #if diff > 1900: # only need recent 30min updates
+ # break
+
+def update(num=10):
+ urlbase = 'http://www.verycd.com/sto/~all/page'
+ for i in range(1,num+1):
+ print 'fetching list',i,'...'
+ url = urlbase+str(i)
+ res = httpfetch(url)
+ res2 = re.compile(r'"topic-list"(.*?)"pnav"',re.DOTALL).findall(res)
+ if res2:
+ res2 = res2[0]
+ else:
+ continue
+ topics = re.compile(r'/topics/(\d+)',re.DOTALL).findall(res2)
+ topics = set(topics)
+ print topics
+ for topic in topics:
+ q.put(topic)
+
+
+def fetchall(ran='1-max',debug=False):
+ urlbase = 'http://www.verycd.com/archives/'
+ if ran == '1-max':
+ m1 = 1
+ res = urllib.urlopen(urlbase).read()
+ m2 = int(re.compile(r'archives/(\d+)').search(res).group(1))
+ else:
+ m = ran.split('-')
+ m1 = int(m[0])
+ m2 = int(m[1])
+ print 'fetching list from',m1,'to',m2,'...'
+ for i in range(m1,m2+1):
+ url = urlbase + '%05d'%i + '.html'
+ print 'fetching from',url,'...'
+ res = httpfetch(url)
+ ids = re.compile(r'topics/(\d+)/',re.DOTALL).findall(res)
+ print ids
+ for id in ids:
+ q.put(id)
+
+
+def fetch(id,conn=conn,debug=False):
+ print 'fetching topic',id,'...'
+ urlbase = 'http://www.verycd.com/topics/'
+ url = urlbase + str(id)
+
+ res = ''
+ for _ in range(3):
+ try:
+ res = httpfetch(url,report=True)
+ break
+ except:
+ continue
+
+ abstract = re.compile(r'
.*?visit',re.DOTALL).findall(res)
+ if not abstract:
+ print res
+ if res == '' or '很抱歉' in res:
+ print 'resource does not exist'
+ return
+ else:
+ print 'fetching',id,'again...'
+ return fetch(id,conn)
+ abstract = abstract[0]
+
+ title = re.compile(r'