Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)
-
skipped 23 lines 24 24 __program_name__ = 'Got Your Back: Gmail Backup' 25 25 __author__ = 'Jay Lee' 26 26 __email__ = '[email protected]' 27 - __version__ = '1.54' 27 + __version__ = '1.55' 28 28 __license__ = 'Apache License 2.0 (https://www.apache.org/licenses/LICENSE-2.0)' 29 29 __website__ = 'https://git.io/gyb' 30 30 __db_schema_version__ = '6' skipped 207 lines 238 238 dest='config_folder', 239 239 help='Optional: Alternate folder to store config and credentials', 240 240 default=getProgPath()) 241 + parser.add_argument('--cleanup', 242 + action='store_true', 243 + dest='cleanup', 244 + help='Attempt to cleanup Message-Id, From and Date headers on restore to avoid issues. MAKES PERMANENT CHANGES TO RESTORED MESSAGES.') 245 + now_date_header = email.utils.formatdate(localtime=True) 246 + parser.add_argument('--cleanup-date', 247 + dest='cleanup_date', 248 + help=f'Date header to use if --cleanup is specified and IF message date header is missing or is not parsable. Format should look like "{now_date_header}". Defaults to now.', 249 + default=now_date_header) 250 + default_cleanup_from = 'GYB Restore <[email protected]>' 251 + parser.add_argument('--cleanup-from', 252 + dest='cleanup_from', 253 + help=f'From header to use if --cleanup is specified and IF message from header is missing or not parasable. Default is "{default_cleanup_from}". Use a similar format.', 254 + default=default_cleanup_from) 241 255 parser.add_argument('--version', 242 256 action='store_true', 243 257 dest='version', skipped 1449 lines 1693 1707 (message_num,)) 1694 1708 sqlconn.commit() 1695 1709 1710 + def cleanup_from(old_from): 1711 + if not old_from: 1712 + return options.cleanup_from 1713 + parsed_from = list(email.utils.parseaddr(old_from)) 1714 + # empty values mean error in parseaddr 1715 + if not parsed_from[0] and not parsed_from[1]: 1716 + return options.cleanup_from 1717 + # no valid email address like: 1718 + # From: Joe Schmo 1719 + # Clean this up to: 1720 + # From: Joe Schmo <[email protected] 1721 + # so that we don't lose the real name. 1722 + if not parsed_from[1] or parsed_from[1].count('@') != 1: 1723 + parsed_from[1] = '[email protected]' 1724 + # Note that parsed_from[0] == None is perfectly acceptable. 1725 + # It means the from header is just an email address. 1726 + # That's what we should land with here also so we don't 1727 + # change it needlessly. 1728 + return email.utils.formataddr(tuple(parsed_from)) 1729 + 1730 + def message_hygiene(msg): 1731 + '''Ensure Message-Id, Date and From headers are valid. Replace if not.''' 1732 + omsg = email.message_from_bytes(msg) 1733 + orig_id = omsg['message-id'] 1734 + orig_date = omsg['date'] 1735 + orig_from = omsg['from'] 1736 + gyb_changes = [] 1737 + _, orig_id_email = email.utils.parseaddr(orig_id) 1738 + if not orig_id_email: 1739 + new_id = email.utils.make_msgid(domain='gyb-restore.local') 1740 + try: 1741 + omsg.replace_header('Message-ID', new_id) 1742 + omsg.add_header('X-GYB-Orig-Msg-Id', orig_id) 1743 + gyb_changes.append('replaced msgid') 1744 + except KeyError: 1745 + omsg.add_header('Message-ID', new_id) 1746 + gyb_changes.append('added msgid') 1747 + if not orig_date: 1748 + new_date = options.cleanup_date 1749 + else: 1750 + parsed_datetime = email.utils.parsedate_to_datetime(orig_date) 1751 + new_date = email.utils.format_datetime(parsed_datetime) 1752 + # preserve timezone values in parenthesis at end of date header 1753 + # Python doesn't generate these but they seem to be valid and common. 1754 + tz_suffix = re.search(r"(\s\(\w{1,6}\))$", orig_date.strip()) 1755 + if tz_suffix: 1756 + new_date += tz_suffix.group(1) 1757 + try: 1758 + new_date_gmt = email.utils.format_datetime(parsed_datetime, usegmt=True) 1759 + except ValueError: 1760 + new_date_gmt = 'not valid gmt' 1761 + if not orig_date or (orig_date != new_date and orig_date != new_date_gmt): 1762 + try: 1763 + omsg.replace_header('Date', new_date) 1764 + omsg.add_header('X-GYB-Orig-Msg-Date', orig_date) 1765 + gyb_changes.append('replaced date') 1766 + except KeyError: 1767 + omsg.add_header('Date', new_date) 1768 + gyb_changes.append('added date') 1769 + new_from = cleanup_from(orig_from) 1770 + if orig_from != new_from: 1771 + try: 1772 + omsg.replace_header('From', new_from) 1773 + omsg.add_header('X-GYB-Orig-Msg-From', orig_from) 1774 + gyb_changes.append('replaced from') 1775 + except KeyError: 1776 + omsg.add_header('From', new_from) 1777 + gyb_changes.append('added from') 1778 + if gyb_changes: 1779 + omsg.add_header('X-GYB-Changes', ', '.join(gyb_changes)) 1780 + omsg.add_header('X-GYB-Changes-Made', email.utils.formatdate(localtime=True)) 1781 + return omsg.as_bytes() 1782 + 1696 1783 1697 1784 def main(argv): 1698 1785 global options, gmail skipped 215 lines 1914 2001 message_num)) 1915 2002 print(' this message will be skipped.') 1916 2003 continue 1917 - f = open(os.path.join(options.local_folder, message_filename), 'rb') 1918 - full_message = f.read() 1919 - f.close() 2004 + with open(os.path.join(options.local_folder, message_filename), 'rb') as f: 2005 + full_message = f.read() 2006 + if options.cleanup: 2007 + full_message = message_hygiene(full_message) 1920 2008 labels = [] 1921 2009 if not options.strip_labels: 1922 2010 sqlcur.execute('SELECT DISTINCT label FROM labels WHERE message_num \ skipped 147 lines 2070 2158 message = mbox.next() 2071 2159 except StopIteration: 2072 2160 break 2073 - if not message.get_header(b'from', case_insensitive=True): 2074 - message.set_headers({b'From': b'Not Set <[email protected]>'}) 2075 2161 mbox_pct = percentage(mbox._mbox_position, mbox._mbox_size) 2076 2162 deleted = options.vault 2077 2163 labels = options.label_restored.copy() skipped 39 lines 2117 2203 labelIds = labelsToLabelIds(cased_labels) 2118 2204 rewrite_line(" message %s - %s%%" % (current, mbox_pct)) 2119 2205 full_message = message.as_bytes() 2206 + if options.cleanup: 2207 + full_message = message_hygiene(full_message) 2120 2208 body = {} 2121 2209 if labelIds: 2122 2210 body['labelIds'] = labelIds skipped 99 lines 2222 2310 (os.path.join(options.local_folder, message_filename), message_num)) 2223 2311 print(' this message will be skipped.') 2224 2312 continue 2225 - f = open(os.path.join(options.local_folder, message_filename), 'rb') 2226 - full_message = f.read() 2227 - f.close() 2313 + with open(os.path.join(options.local_folder, message_filename), 'rb') as f: 2314 + full_message = f.read() 2315 + if options.cleanup: 2316 + full_message = message_hygiene(full_message) 2228 2317 restore_msg_to_group(gmig, full_message, message_num, sqlconn) 2229 2318 else: # mbox format 2230 2319 sqlcur.execute('ATTACH ? as resume', (resumedb,)) skipped 31 lines 2262 2351 message = mbox.next() 2263 2352 except StopIteration: 2264 2353 break 2265 - if not message.get_header(b'from', case_insensitive=True): 2266 - message.set_headers({b'From': b'Not Set <[email protected]>'}) 2267 2354 mbox_pct = percentage(mbox._mbox_position, mbox._mbox_size) 2268 2355 rewrite_line(" message %s - %s%%" % (current, mbox_pct)) 2269 2356 full_message = message.as_bytes() 2357 + if options.cleanup: 2358 + full_message = message_hygiene(full_message) 2270 2359 restore_msg_to_group(gmig, full_message, request_id, sqlconn) 2271 2360 sqlconn.commit() 2272 2361 sqlconn.execute('DETACH resume') skipped 191 lines