Tuesday, December 28, 2010

Escaping HTML String using Python

Sometimes back I wrote a Groovy script for escaping the HTML that can be useful, especially for putting source code into Blogger. Here is the Python version :) I'm seriously in love with Python's "batteries' included" motto.

escapehtml.py
import os, sys, cgi

def escapehtml(path):
    if not os.path.exists(path):
        print "Error:", path, "does not exist"
        sys.exit(1)
    text = open(path).read()
    return cgi.escape(text)

if __name__ == "__main__":
    if not len(sys.argv) == 2:
        print "Error: Invalid argument!"
        sys.exit(1)
    print escapehtml(sys.argv[1])

escapehtml_recurse.py
import os, sys, escapehtml

def escapehtml_recurse(source, destination):
    if not os.path.exists(source):
        print "Error:", path, "does not exist"
        sys.exit(1)
    for root, dirs, files in os.walk(source):
        for f in files:
            filename = os.path.join(root, f)
            content = escapehtml.escapehtml(filename)
            if not os.path.exists(destination):
                os.makedirs(destination)
            newfilename = os.path.join(destination, f + ".txt")
            newpath = open(newfilename, "w")
            print "Creating", newfilename
            newpath.write(content)
            
if __name__ == "__main__":
    if not len(sys.argv) == 3:
        print "Error: Invalid argument!"
        sys.exit(1)
    escapehtml_recurse(sys.argv[1], sys.argv[2])
    print "*** Done ***"

No comments:

Post a Comment