#!/usr/bin/env python2.4 # $Id: find404,v 1.1 2006/03/22 18:31:47 annis Exp $ # $Source: /u/annis/talks_articles/pythontut/RCS/find404,v $ """Find 404 errors in web logs. find404 < log """ import sys import re # 152.1.119.190 - - [22/Mar/2006:12:15:59 -0600] # "GET /favicon.ico HTTP/1.1" 404 283 "-" "Mozilla/5.0 # (X11; U; Linux i686; en-US; rv:1.8.0.1) # Gecko/20060124 Firefox/1.5.0.1" err = re.compile(r"""[\d\.]*\s.+\s+.\s\[(.*)\]\s"(.*)"\s(\d+).*""") for line in sys.stdin.readlines(): match = err.search(line) if match and match.group(3) == "404": print '%s: %s "%s"' % (match.group(3), match.group(1), match.group(2)) # EOF