| 1 | n/a | #!/usr/bin/env python3 |
|---|
| 2 | n/a | """Classes to parse mailer-daemon messages.""" |
|---|
| 3 | n/a | |
|---|
| 4 | n/a | import calendar |
|---|
| 5 | n/a | import email.message |
|---|
| 6 | n/a | import re |
|---|
| 7 | n/a | import os |
|---|
| 8 | n/a | import sys |
|---|
| 9 | n/a | |
|---|
| 10 | n/a | |
|---|
| 11 | n/a | class Unparseable(Exception): |
|---|
| 12 | n/a | pass |
|---|
| 13 | n/a | |
|---|
| 14 | n/a | |
|---|
| 15 | n/a | class ErrorMessage(email.message.Message): |
|---|
| 16 | n/a | def __init__(self): |
|---|
| 17 | n/a | email.message.Message.__init__(self) |
|---|
| 18 | n/a | self.sub = '' |
|---|
| 19 | n/a | |
|---|
| 20 | n/a | def is_warning(self): |
|---|
| 21 | n/a | sub = self.get('Subject') |
|---|
| 22 | n/a | if not sub: |
|---|
| 23 | n/a | return 0 |
|---|
| 24 | n/a | sub = sub.lower() |
|---|
| 25 | n/a | if sub.startswith('waiting mail'): |
|---|
| 26 | n/a | return 1 |
|---|
| 27 | n/a | if 'warning' in sub: |
|---|
| 28 | n/a | return 1 |
|---|
| 29 | n/a | self.sub = sub |
|---|
| 30 | n/a | return 0 |
|---|
| 31 | n/a | |
|---|
| 32 | n/a | def get_errors(self): |
|---|
| 33 | n/a | for p in EMPARSERS: |
|---|
| 34 | n/a | self.rewindbody() |
|---|
| 35 | n/a | try: |
|---|
| 36 | n/a | return p(self.fp, self.sub) |
|---|
| 37 | n/a | except Unparseable: |
|---|
| 38 | n/a | pass |
|---|
| 39 | n/a | raise Unparseable |
|---|
| 40 | n/a | |
|---|
| 41 | n/a | # List of re's or tuples of re's. |
|---|
| 42 | n/a | # If a re, it should contain at least a group (?P<email>...) which |
|---|
| 43 | n/a | # should refer to the email address. The re can also contain a group |
|---|
| 44 | n/a | # (?P<reason>...) which should refer to the reason (error message). |
|---|
| 45 | n/a | # If no reason is present, the emparse_list_reason list is used to |
|---|
| 46 | n/a | # find a reason. |
|---|
| 47 | n/a | # If a tuple, the tuple should contain 2 re's. The first re finds a |
|---|
| 48 | n/a | # location, the second re is repeated one or more times to find |
|---|
| 49 | n/a | # multiple email addresses. The second re is matched (not searched) |
|---|
| 50 | n/a | # where the previous match ended. |
|---|
| 51 | n/a | # The re's are compiled using the re module. |
|---|
| 52 | n/a | emparse_list_list = [ |
|---|
| 53 | n/a | 'error: (?P<reason>unresolvable): (?P<email>.+)', |
|---|
| 54 | n/a | ('----- The following addresses had permanent fatal errors -----\n', |
|---|
| 55 | n/a | '(?P<email>[^ \n].*)\n( .*\n)?'), |
|---|
| 56 | n/a | 'remote execution.*\n.*rmail (?P<email>.+)', |
|---|
| 57 | n/a | ('The following recipients did not receive your message:\n\n', |
|---|
| 58 | n/a | ' +(?P<email>.*)\n(The following recipients did not receive your message:\n\n)?'), |
|---|
| 59 | n/a | '------- Failure Reasons --------\n\n(?P<reason>.*)\n(?P<email>.*)', |
|---|
| 60 | n/a | '^<(?P<email>.*)>:\n(?P<reason>.*)', |
|---|
| 61 | n/a | '^(?P<reason>User mailbox exceeds allowed size): (?P<email>.+)', |
|---|
| 62 | n/a | '^5\\d{2} <(?P<email>[^\n>]+)>\\.\\.\\. (?P<reason>.+)', |
|---|
| 63 | n/a | '^Original-Recipient: rfc822;(?P<email>.*)', |
|---|
| 64 | n/a | '^did not reach the following recipient\\(s\\):\n\n(?P<email>.*) on .*\n +(?P<reason>.*)', |
|---|
| 65 | n/a | '^ <(?P<email>[^\n>]+)> \\.\\.\\. (?P<reason>.*)', |
|---|
| 66 | n/a | '^Report on your message to: (?P<email>.*)\nReason: (?P<reason>.*)', |
|---|
| 67 | n/a | '^Your message was not delivered to +(?P<email>.*)\n +for the following reason:\n +(?P<reason>.*)', |
|---|
| 68 | n/a | '^ was not +(?P<email>[^ \n].*?) *\n.*\n.*\n.*\n because:.*\n +(?P<reason>[^ \n].*?) *\n', |
|---|
| 69 | n/a | ] |
|---|
| 70 | n/a | # compile the re's in the list and store them in-place. |
|---|
| 71 | n/a | for i in range(len(emparse_list_list)): |
|---|
| 72 | n/a | x = emparse_list_list[i] |
|---|
| 73 | n/a | if type(x) is type(''): |
|---|
| 74 | n/a | x = re.compile(x, re.MULTILINE) |
|---|
| 75 | n/a | else: |
|---|
| 76 | n/a | xl = [] |
|---|
| 77 | n/a | for x in x: |
|---|
| 78 | n/a | xl.append(re.compile(x, re.MULTILINE)) |
|---|
| 79 | n/a | x = tuple(xl) |
|---|
| 80 | n/a | del xl |
|---|
| 81 | n/a | emparse_list_list[i] = x |
|---|
| 82 | n/a | del x |
|---|
| 83 | n/a | del i |
|---|
| 84 | n/a | |
|---|
| 85 | n/a | # list of re's used to find reasons (error messages). |
|---|
| 86 | n/a | # if a string, "<>" is replaced by a copy of the email address. |
|---|
| 87 | n/a | # The expressions are searched for in order. After the first match, |
|---|
| 88 | n/a | # no more expressions are searched for. So, order is important. |
|---|
| 89 | n/a | emparse_list_reason = [ |
|---|
| 90 | n/a | r'^5\d{2} <>\.\.\. (?P<reason>.*)', |
|---|
| 91 | n/a | r'<>\.\.\. (?P<reason>.*)', |
|---|
| 92 | n/a | re.compile(r'^<<< 5\d{2} (?P<reason>.*)', re.MULTILINE), |
|---|
| 93 | n/a | re.compile('===== stderr was =====\nrmail: (?P<reason>.*)'), |
|---|
| 94 | n/a | re.compile('^Diagnostic-Code: (?P<reason>.*)', re.MULTILINE), |
|---|
| 95 | n/a | ] |
|---|
| 96 | n/a | emparse_list_from = re.compile('^From:', re.IGNORECASE|re.MULTILINE) |
|---|
| 97 | n/a | def emparse_list(fp, sub): |
|---|
| 98 | n/a | data = fp.read() |
|---|
| 99 | n/a | res = emparse_list_from.search(data) |
|---|
| 100 | n/a | if res is None: |
|---|
| 101 | n/a | from_index = len(data) |
|---|
| 102 | n/a | else: |
|---|
| 103 | n/a | from_index = res.start(0) |
|---|
| 104 | n/a | errors = [] |
|---|
| 105 | n/a | emails = [] |
|---|
| 106 | n/a | reason = None |
|---|
| 107 | n/a | for regexp in emparse_list_list: |
|---|
| 108 | n/a | if type(regexp) is type(()): |
|---|
| 109 | n/a | res = regexp[0].search(data, 0, from_index) |
|---|
| 110 | n/a | if res is not None: |
|---|
| 111 | n/a | try: |
|---|
| 112 | n/a | reason = res.group('reason') |
|---|
| 113 | n/a | except IndexError: |
|---|
| 114 | n/a | pass |
|---|
| 115 | n/a | while 1: |
|---|
| 116 | n/a | res = regexp[1].match(data, res.end(0), from_index) |
|---|
| 117 | n/a | if res is None: |
|---|
| 118 | n/a | break |
|---|
| 119 | n/a | emails.append(res.group('email')) |
|---|
| 120 | n/a | break |
|---|
| 121 | n/a | else: |
|---|
| 122 | n/a | res = regexp.search(data, 0, from_index) |
|---|
| 123 | n/a | if res is not None: |
|---|
| 124 | n/a | emails.append(res.group('email')) |
|---|
| 125 | n/a | try: |
|---|
| 126 | n/a | reason = res.group('reason') |
|---|
| 127 | n/a | except IndexError: |
|---|
| 128 | n/a | pass |
|---|
| 129 | n/a | break |
|---|
| 130 | n/a | if not emails: |
|---|
| 131 | n/a | raise Unparseable |
|---|
| 132 | n/a | if not reason: |
|---|
| 133 | n/a | reason = sub |
|---|
| 134 | n/a | if reason[:15] == 'returned mail: ': |
|---|
| 135 | n/a | reason = reason[15:] |
|---|
| 136 | n/a | for regexp in emparse_list_reason: |
|---|
| 137 | n/a | if type(regexp) is type(''): |
|---|
| 138 | n/a | for i in range(len(emails)-1,-1,-1): |
|---|
| 139 | n/a | email = emails[i] |
|---|
| 140 | n/a | exp = re.compile(re.escape(email).join(regexp.split('<>')), re.MULTILINE) |
|---|
| 141 | n/a | res = exp.search(data) |
|---|
| 142 | n/a | if res is not None: |
|---|
| 143 | n/a | errors.append(' '.join((email.strip()+': '+res.group('reason')).split())) |
|---|
| 144 | n/a | del emails[i] |
|---|
| 145 | n/a | continue |
|---|
| 146 | n/a | res = regexp.search(data) |
|---|
| 147 | n/a | if res is not None: |
|---|
| 148 | n/a | reason = res.group('reason') |
|---|
| 149 | n/a | break |
|---|
| 150 | n/a | for email in emails: |
|---|
| 151 | n/a | errors.append(' '.join((email.strip()+': '+reason).split())) |
|---|
| 152 | n/a | return errors |
|---|
| 153 | n/a | |
|---|
| 154 | n/a | EMPARSERS = [emparse_list] |
|---|
| 155 | n/a | |
|---|
| 156 | n/a | def sort_numeric(a, b): |
|---|
| 157 | n/a | a = int(a) |
|---|
| 158 | n/a | b = int(b) |
|---|
| 159 | n/a | if a < b: |
|---|
| 160 | n/a | return -1 |
|---|
| 161 | n/a | elif a > b: |
|---|
| 162 | n/a | return 1 |
|---|
| 163 | n/a | else: |
|---|
| 164 | n/a | return 0 |
|---|
| 165 | n/a | |
|---|
| 166 | n/a | def parsedir(dir, modify): |
|---|
| 167 | n/a | os.chdir(dir) |
|---|
| 168 | n/a | pat = re.compile('^[0-9]*$') |
|---|
| 169 | n/a | errordict = {} |
|---|
| 170 | n/a | errorfirst = {} |
|---|
| 171 | n/a | errorlast = {} |
|---|
| 172 | n/a | nok = nwarn = nbad = 0 |
|---|
| 173 | n/a | |
|---|
| 174 | n/a | # find all numeric file names and sort them |
|---|
| 175 | n/a | files = list(filter(lambda fn, pat=pat: pat.match(fn) is not None, os.listdir('.'))) |
|---|
| 176 | n/a | files.sort(sort_numeric) |
|---|
| 177 | n/a | |
|---|
| 178 | n/a | for fn in files: |
|---|
| 179 | n/a | # Lets try to parse the file. |
|---|
| 180 | n/a | fp = open(fn) |
|---|
| 181 | n/a | m = email.message_from_file(fp, _class=ErrorMessage) |
|---|
| 182 | n/a | sender = m.getaddr('From') |
|---|
| 183 | n/a | print('%s\t%-40s\t'%(fn, sender[1]), end=' ') |
|---|
| 184 | n/a | |
|---|
| 185 | n/a | if m.is_warning(): |
|---|
| 186 | n/a | fp.close() |
|---|
| 187 | n/a | print('warning only') |
|---|
| 188 | n/a | nwarn = nwarn + 1 |
|---|
| 189 | n/a | if modify: |
|---|
| 190 | n/a | os.rename(fn, ','+fn) |
|---|
| 191 | n/a | ## os.unlink(fn) |
|---|
| 192 | n/a | continue |
|---|
| 193 | n/a | |
|---|
| 194 | n/a | try: |
|---|
| 195 | n/a | errors = m.get_errors() |
|---|
| 196 | n/a | except Unparseable: |
|---|
| 197 | n/a | print('** Not parseable') |
|---|
| 198 | n/a | nbad = nbad + 1 |
|---|
| 199 | n/a | fp.close() |
|---|
| 200 | n/a | continue |
|---|
| 201 | n/a | print(len(errors), 'errors') |
|---|
| 202 | n/a | |
|---|
| 203 | n/a | # Remember them |
|---|
| 204 | n/a | for e in errors: |
|---|
| 205 | n/a | try: |
|---|
| 206 | n/a | mm, dd = m.getdate('date')[1:1+2] |
|---|
| 207 | n/a | date = '%s %02d' % (calendar.month_abbr[mm], dd) |
|---|
| 208 | n/a | except: |
|---|
| 209 | n/a | date = '??????' |
|---|
| 210 | n/a | if e not in errordict: |
|---|
| 211 | n/a | errordict[e] = 1 |
|---|
| 212 | n/a | errorfirst[e] = '%s (%s)' % (fn, date) |
|---|
| 213 | n/a | else: |
|---|
| 214 | n/a | errordict[e] = errordict[e] + 1 |
|---|
| 215 | n/a | errorlast[e] = '%s (%s)' % (fn, date) |
|---|
| 216 | n/a | |
|---|
| 217 | n/a | fp.close() |
|---|
| 218 | n/a | nok = nok + 1 |
|---|
| 219 | n/a | if modify: |
|---|
| 220 | n/a | os.rename(fn, ','+fn) |
|---|
| 221 | n/a | ## os.unlink(fn) |
|---|
| 222 | n/a | |
|---|
| 223 | n/a | print('--------------') |
|---|
| 224 | n/a | print(nok, 'files parsed,',nwarn,'files warning-only,', end=' ') |
|---|
| 225 | n/a | print(nbad,'files unparseable') |
|---|
| 226 | n/a | print('--------------') |
|---|
| 227 | n/a | list = [] |
|---|
| 228 | n/a | for e in errordict.keys(): |
|---|
| 229 | n/a | list.append((errordict[e], errorfirst[e], errorlast[e], e)) |
|---|
| 230 | n/a | list.sort() |
|---|
| 231 | n/a | for num, first, last, e in list: |
|---|
| 232 | n/a | print('%d %s - %s\t%s' % (num, first, last, e)) |
|---|
| 233 | n/a | |
|---|
| 234 | n/a | def main(): |
|---|
| 235 | n/a | modify = 0 |
|---|
| 236 | n/a | if len(sys.argv) > 1 and sys.argv[1] == '-d': |
|---|
| 237 | n/a | modify = 1 |
|---|
| 238 | n/a | del sys.argv[1] |
|---|
| 239 | n/a | if len(sys.argv) > 1: |
|---|
| 240 | n/a | for folder in sys.argv[1:]: |
|---|
| 241 | n/a | parsedir(folder, modify) |
|---|
| 242 | n/a | else: |
|---|
| 243 | n/a | parsedir('/ufs/jack/Mail/errorsinbox', modify) |
|---|
| 244 | n/a | |
|---|
| 245 | n/a | if __name__ == '__main__' or sys.argv[0] == __name__: |
|---|
| 246 | n/a | main() |
|---|