Category Archives: Python

Powered by Python: Rename files

I have too many files that I want to unify the naming schemes, by replacing all spaces(" ") with periods(".") and capitalizing each part. e.g "this is a file.txt" -> "This.Is.A.File.txt"

Python comes to rescue with a breeze.

  1: from os import walk
  2: from os.path import join
  3: from os.path import basename
  4: from string import capwords
  5: import datetime
  6: import sys
  7: import os
  8: 
  9: def GoRename(path):
 10:     """ this function take a valid directory path as input
 11:     walk through all files and directories in the passed in path,
 12:     Raname the file by replacing all spaces with dots, and capitalize each word
 13:     path: path to a directory or file
 14:     Copyright @ Tomgee, 2007
 15:     """
 16: 
 17:     print sys.getdefaultencoding()
 18:     print sys.getfilesystemencoding()
 19:     logfile = path + r'\log.txt'
 20:     print "Going through %s to rename files, \nsubdirectory included..."%(path)
 21:     print "a log will be saved to %s"%logfile
 22:     f = open(logfile, 'a') # open a log file
 23:     sys.stdout = f # un-comment this line out if the output to a file is preferred
 24:     now = datetime.datetime.now()
 25:     print "\n***********************************************"
 26:     print "Timestamp: %s"%now.strftime("%y-%m-%d, %H:%M:%S")
 27: 
 28:     totalCount = 0
 29:     for root,dirs, files in walk(path):
 30:         for file in files:
 31:         if (file != os.path.basename(sys.argv[0])) and (file != "log.txt")and " " in file:
 32:             newfile = ".".join(capwords(file).split(" "))
 33:             os.rename(file, newfile)
 34:             print "File #%d: %s --> %s"%(totalCount,file, newfile)
 35:             totalCount += 1
 36:     print "Totally %d files/directories under %s have been searched"%(totalCount, path)
 37:     print r'********* The End *********'
 38: 
 39: def main():
 40:     argCurDir = os.getcwd()
 41:     GoRename(argCurDir)
 42: 
 43: if __name__ == '__main__': main()
 44: 

How to Write a Spelling Corrector

All you need is 20 lines of Python 2.5 code. 

  1: import re, string, collections
  2: 
  3: def words(text): return re.findall('[a-z]+', text.lower()) 
  4: 
  5: def train(features):
  6:     model = collections.defaultdict(lambda: 1)
  7:     for f in features:
  8:         model[f] += 1
  9:     return model
 10: 
 11: NWORDS = train(words(file('Documents/holmes.txt').read()))
 12: 
 13: def edits1(word):
 14:     n = len(word)
 15:     return set([word[0:i]+word[i+1:] for i in range(n)] + ## deletion
 16:                [word[0:i]+word[i+1]+word[i]+word[i+2:] for i in range(n-1)] + ## transposition
 17:                [word[0:i]+c+word[i+1:] for i in range(n) for c in string.lowercase] + ## alteration
 18:                [word[0:i]+c+word[i:] for i in range(n+1) for c in string.lowercase]) ## insertion
 19: 
 20: def known_edits2(word):
 21:     return set(e2 for e1 in edits1(word) for e2 in edits1(e1) if e2 in NWORDS)
 22: 
 23: def known(words): return set(w for w in words if w in NWORDS)
 24: 
 25: def correct(word):
 26:     return max(known([word]) or known(edits1(word)) or known_edits2(word) or [word],
 27:                key=lambda w: NWORDS[w])
 28: 
 29:  
 30: 
 31: 

A complete analasis is availabe here: http://norvig.com/spell-correct.html

Powered by Python: Easy File Finder

More often than not I dig into tens of gigas of files to search for one I vaguely remember. I had though I could have shortcuts to files I frequent according to keywords.

Here comes the solution implemented with Python. This script take a criterion substring and searching all file names, whenever one is matched,  a corresponding shortcut is created into a designated folder.

  1: from os import walk
  2: from os.path import join, getsize
  3: import datetime
  4: import sys
  5: import os
  6: import re
  7: import win32com.client
  8: 
  9: def CreateShortcut(WhereSrc, WhereShortcut, ShortcutName):
 10:     """ This function creates a Windows shortcut for a directory or file,
 11:         WhereSrc:      path to a directory or file
 12:         WhereShortcut: path to the shortcut being placed
 13:         ShortcutName:  name of the Shortcut
 14:         Copyright @ Tomgee, 2007
 15:     """
 16: 
 17:     shell = win32com.client.Dispatch("WScript.Shell")
 18:     shortcut = shell.CreateShortCut(WhereShortcut + "\\" + ShortcutName + ".lnk")
 19:     shortcut.Targetpath = WhereSrc
 20:     shortcut.save()
 21: 
 22: def GoFind(path, substr, WhereShortcut):
 23:     """ this function take a valid directory path as input
 24:         walk through all files and directories in the passed in path, 
 25:         create a shortcut for each matched item.
 26:         path:           path to a directory or file
 27:         WhereShortcut:  path to the shortcut being placed
 28:         substr:         criteria string to match
 29:         Copyright @ Tomgee, 2007
 30:     """
 31: 
 32:     logfile = WhereShortcut + r'\log.txt'
 33:     print "Going through %s to find \'%s\'(case insensitive), \nsubdirectory included..."%(path, substr)
 34:     print "a log will be saved to %s"%logfile
 35:     f = open(logfile, 'a') # open a log file
 36:     sys.stdout = f   # un-comment this line out if the output to a file is preferred 
 37: 
 38:     now = datetime.datetime.now()
 39:     print "\n***********************************************"
 40:     print "Timestamp: %s"%now.strftime("%y-%m-%d, %H:%M:%S")
 41: 
 42:     totalCount = 0
 43:     substr_upper = substr.upper()
 44:     for root,dirs, files in walk(path):
 45:         for file in files:
 46:             if substr_upper in file.upper():
 47:                 fullpath = join(root,file)
 48:                 CreateShortcut(fullpath, WhereShortcut, file)
 49:                 print "File #%d: %s"%(totalCount,fullpath)
 50:             totalCount += 1
 51:     print "Totally %d files/directories under %s have been searched"%(totalCount, path)
 52:     print r'********* The End *********'
 53: 
 54: def main():
 55:     argCount            = len(sys.argv)
 56:     argCurDir           = os.getcwd()
 57:     argMatchStr         = ""
 58:     argShortcutsDir     = os.getcwd() + r'\shortcuts_for_'
 59: 
 60:     print "usage: %s [StringToMatch] [DirectoryToSearch, e.g. c:\] [DirectoryToPutShortcuts, e.g. c:\]" %os.path.basename(sys.argv[0])
 61:     if argCount > 1:
 62:         argMatchStr = sys.argv[1]
 63:     if argCount > 2:
 64:         argCurDir = sys.argv[2]
 65:     if argCount > 3:
 66:         argShortcutsDir = sys.argv[3]
 67: 
 68:     argFinalShortcutsDir = argShortcutsDir + argMatchStr
 69:     if not os.path.lexists(argFinalShortcutsDir):
 70:         os.mkdir(argFinalShortcutsDir)
 71:     GoFind(argCurDir, argMatchStr, argFinalShortcutsDir)
 72: if __name__ == '__main__': main()
 73: 
 74: 

Powered by Python: How to create Windows Shortcuts

import sys
import win32com.client

def CreateShortcut(WhereSrc, WhereShortcut, ShortcutName):
    """ This function creates a Windows shortcut for a directory or file,
        WhereSrc:      path to a directory or file
        WhereShortcut: path to the shortcut being placed
        ShortcutName:  name of the Shortcut
        Copyright @ Tomgee, 2007
    """

    shell = win32com.client.Dispatch("WScript.Shell")
    shortcut = shell.CreateShortCut(WhereShortcut + "\\" + ShortcutName + ".lnk")
    shortcut.Targetpath = WhereSrc
    shortcut.save()

Powered by Python: find files whose name contain a criteria string

from os import walk
from os.path import join, getsize
import datetime
import sys
import os
import re

def parse(path, substr="boost"):
    """ this function take a valid directory path as input
    walk through all files and directories and output all 
    matches into log.txt
    """

    logfile = path + r'\log.txt'
    print "Going through %s to find \'%s\'(case insensitive), \nsubdirectory included..."%(path, substr)
    print "a log will be saved to %s"%logfile
    f = open(logfile, 'a') # open a log file
    sys.stdout = f   # un-comment this line out if the output to a file is preferred 

    now = datetime.datetime.now()
    print "\n***********************************************"
    print "\nTimestamp: %s"%now.strftime("%y-%m-%d, %H:%M:%S")

    totalCount = 0
    substr_upper = substr.upper()
    for root,dirs, files in walk(path):
        for file in files:
            if substr_upper in file.upper():
                fullpath = join(root,file)
                print "File #%d: %s"%(totalCount,fullpath)
            #print root, "|", dirs, "|",files
            totalCount += 1
    print "Totally %d files/directories under %s have been searched"%(totalCount, path)
    print r'********* The End *********'

def main():
    from timeit import Timer
    if not len(sys.argv)== 2:
        print "usage: %s [directory, e.g. c:\]" %os.path.basename(sys.argv[0])
        parse(os.getcwd())
        #parse(os.curdir())        
        #raw_input('press <Enter> to continue...')    
    else:
        parse(sys.argv[1])
        #t = Timer("parse(sys.argv[1])", "from __main__ import parse")
        #t.repeat(3, 1000)    #.timeit()    

if __name__ == '__main__': main()