#!/usr/bin/env python2.4 # $Id: detag,v 1.1 2006-03-22 10:15:36-06 annis Exp annis $ # $Source: /u/annis/talks_articles/pythontut/RCS/detag,v $ """Remove HTML tags from a document. I didn't construct this regular expression myself, but lifted from a web page somewhere. Usage: detag < file.html """ import sys import re detag = re.compile(r"""]*))*|/?>""", re.MULTILINE) for line in sys.stdin.readlines(): print detag.sub("", line), # EOF