#!/usr/bin/env python # $Id$ # Author: Chris Green # Purpose: helper script for htdig to parse html files # Created: Sun Jun 03 16:39:39 CDT 2001 # split_msg adapted from a script by Paul Moore # (gustav@morpheus.demon.co.uk # # # quote_body taken from the HyperText package import sys, os, glob, re, mimetools, multifile, string from mimetools import Message def quote_body(s): r=string.replace return r(r(r(s, '&', '&'), '<', '<'), '>', '>') boundary_checkre = re.compile(r'^"(.*)".*',re.DOTALL) def split_msg(msg): parts = [] file = multifile.MultiFile(msg.fp) boundary = msg.getparam("boundary") if boundary[0] == '"': # fix broken sourceforge postings boundary = re.sub(boundary_checkre, r'\1', boundary) file.push(boundary) try: while file.next(): submsg = mimetools.Message(file) if submsg.gettype() == "text/plain": parts.append(file.read()) except multifile.Error: # the max size of parsing kills us here # and won't give us that byte loving we want. # I could check for bytes seen + max size in the htdig.conf # but that seems over kill - comments? pass file.pop() return parts def print_msg(file): fp = open(file) msgfile = multifile.MultiFile(fp, 1) msg = Message(msgfile) print '' print '' subject = quote_body(msg.getheader('Subject')) if subject == None: subject = "No subject given" print '%s' % subject print '' % quote_body(msg.getheader('From')) msg_type = msg.gettype() content = "" if msg_type == 'text/plain': content = msg.fp.read() else: maintype = msg.getmaintype() # for each in msg.getplist(): # print msg.getparam('boundary') # print '**' # print 'I dont know how to handle' + msg_type try: parts = split_msg(msg) for each in parts: content = content + each except TypeError, multifile.Error: # somethings have broken mime headers # just contruct something psuedo workable content = "Can't Parse Message." print "" #
"
    print quote_body(content)

    print ""
    
    

if __name__=='__main__':
    if len(sys.argv) != 5:
         print "usage: mailparse.py file content-type URL configuration_file"
	 # print len(sys.argv)
         sys.exit(1)

    print_msg(sys.argv[1])