Projects STRLCPY got-your-back Commits da27ae7b
🤬
  • Add --cleanup options

    --cleanup - ensure restored messages have a valid Message-ID, From and Date header. Should help with #342.
    --cleanup-date - on --cleanup, use provided date instead of current date when we can't get a valid date on the message at all.
    --cleanup-from - on --cleanup, use provided from header value when we can't get a valid value on the message at all.
  • Loading...
  • Jay Lee committed with GitHub 2 years ago
    da27ae7b
    1 parent ef55ad07
Revision indexing in progress... (symbol navigation in revisions will be accurate after indexed)
  • ■ ■ ■ ■ ■
    gyb.py
    skipped 23 lines
    24 24  __program_name__ = 'Got Your Back: Gmail Backup'
    25 25  __author__ = 'Jay Lee'
    26 26  __email__ = '[email protected]'
    27  -__version__ = '1.54'
     27 +__version__ = '1.55'
    28 28  __license__ = 'Apache License 2.0 (https://www.apache.org/licenses/LICENSE-2.0)'
    29 29  __website__ = 'https://git.io/gyb'
    30 30  __db_schema_version__ = '6'
    skipped 207 lines
    238 238   dest='config_folder',
    239 239   help='Optional: Alternate folder to store config and credentials',
    240 240   default=getProgPath())
     241 + parser.add_argument('--cleanup',
     242 + action='store_true',
     243 + dest='cleanup',
     244 + help='Attempt to cleanup Message-Id, From and Date headers on restore to avoid issues. MAKES PERMANENT CHANGES TO RESTORED MESSAGES.')
     245 + now_date_header = email.utils.formatdate(localtime=True)
     246 + parser.add_argument('--cleanup-date',
     247 + dest='cleanup_date',
     248 + help=f'Date header to use if --cleanup is specified and IF message date header is missing or is not parsable. Format should look like "{now_date_header}". Defaults to now.',
     249 + default=now_date_header)
     250 + default_cleanup_from = 'GYB Restore <[email protected]>'
     251 + parser.add_argument('--cleanup-from',
     252 + dest='cleanup_from',
     253 + help=f'From header to use if --cleanup is specified and IF message from header is missing or not parasable. Default is "{default_cleanup_from}". Use a similar format.',
     254 + default=default_cleanup_from)
    241 255   parser.add_argument('--version',
    242 256   action='store_true',
    243 257   dest='version',
    skipped 1449 lines
    1693 1707   (message_num,))
    1694 1708   sqlconn.commit()
    1695 1709   
     1710 +def cleanup_from(old_from):
     1711 + if not old_from:
     1712 + return options.cleanup_from
     1713 + parsed_from = list(email.utils.parseaddr(old_from))
     1714 + # empty values mean error in parseaddr
     1715 + if not parsed_from[0] and not parsed_from[1]:
     1716 + return options.cleanup_from
     1717 + # no valid email address like:
     1718 + # From: Joe Schmo
     1719 + # Clean this up to:
     1720 + # From: Joe Schmo <[email protected]
     1721 + # so that we don't lose the real name.
     1722 + if not parsed_from[1] or parsed_from[1].count('@') != 1:
     1723 + parsed_from[1] = '[email protected]'
     1724 + # Note that parsed_from[0] == None is perfectly acceptable.
     1725 + # It means the from header is just an email address.
     1726 + # That's what we should land with here also so we don't
     1727 + # change it needlessly.
     1728 + return email.utils.formataddr(tuple(parsed_from))
     1729 + 
     1730 +def message_hygiene(msg):
     1731 + '''Ensure Message-Id, Date and From headers are valid. Replace if not.'''
     1732 + omsg = email.message_from_bytes(msg)
     1733 + orig_id = omsg['message-id']
     1734 + orig_date = omsg['date']
     1735 + orig_from = omsg['from']
     1736 + gyb_changes = []
     1737 + _, orig_id_email = email.utils.parseaddr(orig_id)
     1738 + if not orig_id_email:
     1739 + new_id = email.utils.make_msgid(domain='gyb-restore.local')
     1740 + try:
     1741 + omsg.replace_header('Message-ID', new_id)
     1742 + omsg.add_header('X-GYB-Orig-Msg-Id', orig_id)
     1743 + gyb_changes.append('replaced msgid')
     1744 + except KeyError:
     1745 + omsg.add_header('Message-ID', new_id)
     1746 + gyb_changes.append('added msgid')
     1747 + if not orig_date:
     1748 + new_date = options.cleanup_date
     1749 + else:
     1750 + parsed_datetime = email.utils.parsedate_to_datetime(orig_date)
     1751 + new_date = email.utils.format_datetime(parsed_datetime)
     1752 + # preserve timezone values in parenthesis at end of date header
     1753 + # Python doesn't generate these but they seem to be valid and common.
     1754 + tz_suffix = re.search(r"(\s\(\w{1,6}\))$", orig_date.strip())
     1755 + if tz_suffix:
     1756 + new_date += tz_suffix.group(1)
     1757 + try:
     1758 + new_date_gmt = email.utils.format_datetime(parsed_datetime, usegmt=True)
     1759 + except ValueError:
     1760 + new_date_gmt = 'not valid gmt'
     1761 + if not orig_date or (orig_date != new_date and orig_date != new_date_gmt):
     1762 + try:
     1763 + omsg.replace_header('Date', new_date)
     1764 + omsg.add_header('X-GYB-Orig-Msg-Date', orig_date)
     1765 + gyb_changes.append('replaced date')
     1766 + except KeyError:
     1767 + omsg.add_header('Date', new_date)
     1768 + gyb_changes.append('added date')
     1769 + new_from = cleanup_from(orig_from)
     1770 + if orig_from != new_from:
     1771 + try:
     1772 + omsg.replace_header('From', new_from)
     1773 + omsg.add_header('X-GYB-Orig-Msg-From', orig_from)
     1774 + gyb_changes.append('replaced from')
     1775 + except KeyError:
     1776 + omsg.add_header('From', new_from)
     1777 + gyb_changes.append('added from')
     1778 + if gyb_changes:
     1779 + omsg.add_header('X-GYB-Changes', ', '.join(gyb_changes))
     1780 + omsg.add_header('X-GYB-Changes-Made', email.utils.formatdate(localtime=True))
     1781 + return omsg.as_bytes()
     1782 + 
    1696 1783   
    1697 1784  def main(argv):
    1698 1785   global options, gmail
    skipped 215 lines
    1914 2001   message_num))
    1915 2002   print(' this message will be skipped.')
    1916 2003   continue
    1917  - f = open(os.path.join(options.local_folder, message_filename), 'rb')
    1918  - full_message = f.read()
    1919  - f.close()
     2004 + with open(os.path.join(options.local_folder, message_filename), 'rb') as f:
     2005 + full_message = f.read()
     2006 + if options.cleanup:
     2007 + full_message = message_hygiene(full_message)
    1920 2008   labels = []
    1921 2009   if not options.strip_labels:
    1922 2010   sqlcur.execute('SELECT DISTINCT label FROM labels WHERE message_num \
    skipped 147 lines
    2070 2158   message = mbox.next()
    2071 2159   except StopIteration:
    2072 2160   break
    2073  - if not message.get_header(b'from', case_insensitive=True):
    2074  - message.set_headers({b'From': b'Not Set <[email protected]>'})
    2075 2161   mbox_pct = percentage(mbox._mbox_position, mbox._mbox_size)
    2076 2162   deleted = options.vault
    2077 2163   labels = options.label_restored.copy()
    skipped 39 lines
    2117 2203   labelIds = labelsToLabelIds(cased_labels)
    2118 2204   rewrite_line(" message %s - %s%%" % (current, mbox_pct))
    2119 2205   full_message = message.as_bytes()
     2206 + if options.cleanup:
     2207 + full_message = message_hygiene(full_message)
    2120 2208   body = {}
    2121 2209   if labelIds:
    2122 2210   body['labelIds'] = labelIds
    skipped 99 lines
    2222 2310   (os.path.join(options.local_folder, message_filename), message_num))
    2223 2311   print(' this message will be skipped.')
    2224 2312   continue
    2225  - f = open(os.path.join(options.local_folder, message_filename), 'rb')
    2226  - full_message = f.read()
    2227  - f.close()
     2313 + with open(os.path.join(options.local_folder, message_filename), 'rb') as f:
     2314 + full_message = f.read()
     2315 + if options.cleanup:
     2316 + full_message = message_hygiene(full_message)
    2228 2317   restore_msg_to_group(gmig, full_message, message_num, sqlconn)
    2229 2318   else: # mbox format
    2230 2319   sqlcur.execute('ATTACH ? as resume', (resumedb,))
    skipped 31 lines
    2262 2351   message = mbox.next()
    2263 2352   except StopIteration:
    2264 2353   break
    2265  - if not message.get_header(b'from', case_insensitive=True):
    2266  - message.set_headers({b'From': b'Not Set <[email protected]>'})
    2267 2354   mbox_pct = percentage(mbox._mbox_position, mbox._mbox_size)
    2268 2355   rewrite_line(" message %s - %s%%" % (current, mbox_pct))
    2269 2356   full_message = message.as_bytes()
     2357 + if options.cleanup:
     2358 + full_message = message_hygiene(full_message)
    2270 2359   restore_msg_to_group(gmig, full_message, request_id, sqlconn)
    2271 2360   sqlconn.commit()
    2272 2361   sqlconn.execute('DETACH resume')
    skipped 191 lines
Please wait...
Page is in error, reload to recover