From 2241f4e6e9ae2e6613b5371a29f60d6971474eec Mon Sep 17 00:00:00 2001 From: aglebov2 Date: Sat, 15 Aug 2015 22:22:43 +0300 Subject: [PATCH] initial commit --- .ignore_dupes | 0 add_remove.sh | 10 ++ finduniq.py | 335 ++++++++++++++++++++++++++++++++++++++++++++++++ progress_bar.py | 144 +++++++++++++++++++++ remove_int.py | 132 +++++++++++++++++++ 5 files changed, 621 insertions(+) create mode 100644 .ignore_dupes create mode 100644 add_remove.sh create mode 100644 finduniq.py create mode 100644 progress_bar.py create mode 100644 remove_int.py diff --git a/.ignore_dupes b/.ignore_dupes new file mode 100644 index 0000000..e69de29 diff --git a/add_remove.sh b/add_remove.sh new file mode 100644 index 0000000..27d0a3f --- /dev/null +++ b/add_remove.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +while read line +do + if [ "x$line" != "x" ]; then + echo "rm -f \"$line\"" + else + echo "" + fi +done diff --git a/finduniq.py b/finduniq.py new file mode 100644 index 0000000..cb7ee95 --- /dev/null +++ b/finduniq.py @@ -0,0 +1,335 @@ +#!/usr/bin/python + +import glob +import os +import string +import hashlib +import sys +import progress_bar +from stat import * + +debug=0 +progress_enabled=1 +short_print=1 + +SILENT=0 +ERROR=1 +DEBUG=3 +EERBOSE=5 + +FALSE=0 +TRUE=1 + +def log(min_deb, str, vals=None): + if debug>=min_deb: + if vals!=None: + print str % vals + else: + print str + +def md5sum(filename, buf_size=8192): + m = hashlib.md5() + # the with statement makes sure the file will be closed + with open(filename) as f: + # We read the file in small chunk until EOF + data = f.read(buf_size) + while data: + # We had data to the md5 hash + m.update(data) + data = f.read(buf_size) + # We return the md5 hash in hexadecimal format + f.close() + return m.hexdigest() + +class File: + name="" + __md5="" + size=0 + md5group=0 + sizegroup=0 + + def saveToFile(self, fl): + fl.write("%s:%d:%s\n" % (self.__md5, self.size, self.name)) + + def parseStr(self, str): + sz="" + self.__md5, sz, self.name=str.split(':') + self.size=string.atoi(sz) + self.name=self.name.rstrip() + try: + mode=os.stat(self.name)[ST_MODE] + if S_ISREG(mode): + log(DEBUG, "restored: md5=%s, size=%d, name=%s" , (self.__md5, self.size, self.name)) + return TRUE + except OSError: + log(DEBUG, "file access error, may be removed: '%s'" % self.name) + return FALSE + + def calcMD5(self): + if self.__md5=="": + #print "cache missed: %s" %self.name + self.__md5=md5sum(self.name) + + def getMD5(self, fl): + if self.__md5=="": + self.__md5=fl.getCachedMD5(self.name) + if self.__md5=="": + self.calcMD5() + return self.__md5 + + def getMD5_2(self): + if self.__md5=="": + return "BADBAD" + else: + return self.__md5 + +class FileList: + files=list() + samesizefiles=list() + samemd5files=list() + + __cur_p=0 + __total_p=0 + + __scan_progr=None + + __cached_fnames=list() + __cached_files=list() + + def getCachedMD5(self, name): + try: + idx=self.__cached_fnames.index(name) + res=self.__cached_files[idx].getMD5_2() + if res=="BADBAD": + return "" + return res + except ValueError: + return "" + + def scan(self, path): + print "Scan directories" + self.__scan_rec(path) + print "" + + def __scan_rec(self, path): + dcont=os.listdir(path) + for f in dcont: + if f==".ignore_dupes": + return + + if progress_enabled: + self.__total_p=self.__total_p+len(dcont) + self.__scan_progr=progress_bar.ProgressBar(0, self.__total_p, 50, mode='fixed', char='#') + for f in dcont: + if progress_enabled: + self.__cur_p=self.__cur_p+1 + self.__scan_progr.update_amount(self.__cur_p) + print "cur=%d, tot=%d "% (self.__cur_p, self.__total_p),self.__scan_progr,"\r", + sys.stdout.flush() + pn=os.path.join(path, f) + try: + mode=os.stat(pn)[ST_MODE] + if S_ISDIR(mode) and f!=".git" and f!=".svn": + self.__scan_rec(pn) + elif S_ISREG(mode): + self.check(pn) + else: + log(ERROR, 'unknown file %s' , pn) + except OSError: + log(ERROR, 'file access error %s' , pn) +# self.__cur_p=self.__cur_p-len(dcont) + # self.__total_p=self.__total_p-len(dcont) + + def check(self, filename): + log (DEBUG, "check %s" , filename) + + #if filename in self.__cached_fnames: + # return + + f=File() + f.name=filename + f.size=os.stat(filename)[ST_SIZE] + self.files.append(f) + log ( DEBUG, "added: %s, (%d)" , (filename, f.size)) + + def sortBySize(self): + print "Sort by size" + self.files=sorted(self.files, key=lambda File: File.size) + + def saveToFile(self): + fle=file("uniq_cache.cache", "w") + for f in self.files: + if f.getMD5_2()!="BADBAD": + f.saveToFile(fle) + fle.close() + + def loadFromFile(self): + print "Load cache" + try: + fle=file("uniq_cache.cache", "r") + for str in fle: + fl=File() + if fl.parseStr(str): + self.__cached_files.append(fl) + self.__cached_fnames=list(obj.name for obj in self.__cached_files) + #for s in self.__cached_fnames: + # print "cac=%s" %s + + except IOError: + log(DEBUG, "Cache not found") + + def checkSizes(self): + print "Check size" + cursize=self.files[0].size + curszlist=list() + append_prev=1 + + if progress_enabled: + pb=progress_bar.ProgressBar(0, len(self.files), 50, mode='fixed', char="#") + for i in range(1, len(self.files)): + if progress_enabled: + pb.increment_amount() + print pb, '\r', + if self.files[i].size==cursize: +# print "append" + if append_prev!=0: + append_prev=0 + curszlist.append(self.files[i-1]) + curszlist.append(self.files[i]) + else: +# print "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" +# print "len=%d" % len(curszlist) +# print "len2=%d" % len(self.samesizefiles) + if len(curszlist)!=0: + self.samesizefiles.append(curszlist) + if len(curszlist)==1: + print "EPIC_FAIL" + del curszlist + curszlist=list() + cursize=self.files[i].size + append_prev=1 + if len(curszlist)!=0: + self.samesizefiles.append(curszlist) + print "" + + def sortByMD5(self): + print "Sort by MD5" + curmd5group=1 + tmplist=list() + if progress_enabled: + pb=progress_bar.ProgressBar(0, len(self.samesizefiles), 50, mode='fixed', char="#") + for ss in self.samesizefiles: + #for i in (range(0,len(ss))): + # ss[i].calcMD5() + # print "sort=%s" % ss[i].name + ss=sorted(ss, key=lambda File: File.getMD5(self)) + tmplist.append(ss) + if progress_enabled: + pb.increment_amount() + print pb, '\r', + self.samesizefiles=tmplist + print "" + + def checkMD5(self): + print "Check MD5" + if progress_enabled: + pb=progress_bar.ProgressBar(0, len(self.samesizefiles), 50, mode='fixed', char="#") + for ss in self.samesizefiles: + if progress_enabled: + pb.increment_amount() + print pb, '\r', + curmd5list=list() + curmd5=ss[0].getMD5(self) + ap_pr=1 + for i in range(1, len(ss)): + log(DEBUG, "i=%d, ifn=%s, sz=%d" , (i, ss[i].name, ss[i].size)) + if curmd5==ss[i].getMD5(self): + log(DEBUG, "same") + if ap_pr!=0: + ap_pr=0 + curmd5list.append(ss[i-1]) + curmd5list.append(ss[i]) + else: + log(DEBUG, "diff") + if len(curmd5list)!=0: + log(DEBUG, "app") + self.samemd5files.append(curmd5list) + del curmd5list + curmd5list=list() + curmd5=ss[i].getMD5(self) + ap_pr=1 + #n=n+1 + log(DEBUG, "") + #n=0 + if len(curmd5list)!=0: + log(DEBUG, "app") + self.samemd5files.append(curmd5list) + del curmd5list + print "" + + def printResult(self, resfile): + fle=file(resfile, "w") + + for ss in fl.samemd5files: + for t in ss: + if short_print: + fle.write("%s\n" % (t.name)) + else: + fle.write("%s\t%d\t%s\n" % (t.name, t.size, t.getMD5_2())) + fle.write("\n") + + fle.close() + + + def printFiles(self): + for t in self.files: + print "file=%s, md5=%s" % (t.name, t.getMD5_2()) + + +resfile=sys.argv[1] + +pathes=list() +i=2 +if len(sys.argv): + while i +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# This class is an improvement from the original found at: +# +# http://code.activestate.com/recipes/168639/ +# + +import sys,os + +class ProgressBar: + def __init__(self, min_value = 0, max_value = 100, width=77,**kwargs): + self.char = kwargs.get('char', '#') + self.mode = kwargs.get('mode', 'dynamic') # fixed or dynamic + if not self.mode in ['fixed', 'dynamic']: + self.mode = 'fixed' + + self.bar = '' + self.min = min_value + self.max = max_value + self.span = max_value - min_value + self.width = width + self.amount = 0 # When amount == max, we are 100% done + self.update_amount(0) + + + def increment_amount(self, add_amount = 1): + """ + Increment self.amount by 'add_ammount' or default to incrementing + by 1, and then rebuild the bar string. + """ + new_amount = self.amount + add_amount + if new_amount < self.min: new_amount = self.min + if new_amount > self.max: new_amount = self.max + self.amount = new_amount + self.build_bar() + + + def update_amount(self, new_amount = None): + """ + Update self.amount with 'new_amount', and then rebuild the bar + string. + """ + if not new_amount: new_amount = self.amount + if new_amount < self.min: new_amount = self.min + if new_amount > self.max: new_amount = self.max + self.amount = new_amount + self.build_bar() + + + def build_bar(self): + """ + Figure new percent complete, and rebuild the bar string base on + self.amount. + """ + diff = float(self.amount - self.min) + percent_done = int(round((diff / float(self.span)) * 100.0)) + + # figure the proper number of 'character' make up the bar + all_full = self.width - 2 + num_hashes = int(round((percent_done * all_full) / 100)) + + if self.mode == 'dynamic': + # build a progress bar with self.char (to create a dynamic bar + # where the percent string moves along with the bar progress. + self.bar = self.char * num_hashes + else: + # build a progress bar with self.char and spaces (to create a + # fixe bar (the percent string doesn't move) + self.bar = self.char * num_hashes + ' ' * (all_full-num_hashes) + + percent_str = str(percent_done) + "%" + self.bar = '[ ' + self.bar + ' ] ' + percent_str + + + def __str__(self): + return str(self.bar) + + +def main(): + print + limit = 1000000 + + print 'Example 1: Fixed Bar' + prog = ProgressBar(0, limit, 77, mode='fixed') + oldprog = str(prog) + for i in xrange(limit+1): + prog.update_amount(i) + if oldprog != str(prog): + print prog, "\r", + sys.stdout.flush() + oldprog=str(prog) + + print '\n\n' + + print 'Example 2: Dynamic Bar' + prog = ProgressBar(0, limit, 77, mode='dynamic', char='-') + oldprog = str(prog) + for i in xrange(limit+1): + prog.increment_amount() + if oldprog != str(prog): + print prog, "\r", + sys.stdout.flush() + oldprog=str(prog) + + print '\n\n' + + +if __name__ == '__main__': + main() +""" + +

+Now add it to your app: + + +from progress_bar import ProgressBar +import sys + +count = 0 +total = 100000 + +prog = ProgressBar(count, total, 77, mode='fixed', char='#') +while count <= total: + count += 1 + prog.increment_amount() + print prog, '\r', + sys.stdout.flush() +print +""" diff --git a/remove_int.py b/remove_int.py new file mode 100644 index 0000000..284cb14 --- /dev/null +++ b/remove_int.py @@ -0,0 +1,132 @@ +#!/usr/bin/python + +import curses +import sys +import string +import locale +import os + +class Screen: + + def __init__(self): + locale.setlocale(locale.LC_ALL,"") + self.scr=curses.initscr() + curses.noecho() + curses.cbreak() + self.scr.keypad(1) + + def destroy(self): + curses.nocbreak() + self.scr.keypad(0) + curses.echo() + curses.endwin() + + def select(self, variants): + if len(variants)<2: + return + if len(variants)>9: + return + while 1: + self.scr.clear() + i=1 + for fn in variants: + self.scr.addstr("%d) " % i) + self.scr.addstr(fn) + self.scr.addstr("\n") + i+=1 + self.scr.addstr(25, 0, "(%d of %d) Which will stay (1..9, n, q):" %(self.curpos, self.allcount)) + self.scr.refresh() + c = self.scr.getch() + if c in(ord('q'),ord('Q')): + self.destroy() + sys.exit(0) + if c in(ord('n'), ord('N'), ord('S'), ord('s')): + #just skip + return + #n=string.atoi(c) + if c<=ord('9') and c>ord('0'): + n=c-ord('0')-1 + + if n1: + groups_tmp.append(gr2) + self.groups=groups_tmp + +s = Screen() + +s.load(sys.argv[1]) +s.parse_librusec() +#s.go() +s.destroy()