Projects STRLCPY got-your-back Commits 030494d1
🤬
  • ■ ■ ■ ■ ■
    .github/workflows/build.yml
    skipped 444 lines
    445 445   $gyb --action print-labels --email $gyb_user --service-account
    446 446   $gyb --action backup --email $gyb_user
    447 447   $gyb --action backup --email $gyb_user --service-account --local-folder sa-backup
     448 + $gyb --action backup-chat --email $gyb_user
    448 449   
    449 450   - name: Upload to Google Drive, build only.
    450 451   continue-on-error: true
    skipped 24 lines
  • ■ ■ ■ ■ ■
    gyb.py
    skipped 23 lines
    24 24  __program_name__ = 'Got Your Back: Gmail Backup'
    25 25  __author__ = 'Jay Lee'
    26 26  __email__ = '[email protected]'
    27  -__version__ = '1.63'
     27 +__version__ = '1.70'
    28 28  __license__ = 'Apache License 2.0 (https://www.apache.org/licenses/LICENSE-2.0)'
    29 29  __website__ = 'jaylee.us/gyb'
    30 30  __db_schema_version__ = '6'
    31 31  __db_schema_min_version__ = '6' #Minimum for restore
    32 32   
    33  -global extra_args, options, allLabelIds, allLabels, gmail, reserved_labels
     33 +global extra_args, options, allLabelIds, allLabels, gmail, reserved_labels, thread_msgid_map
    34 34  extra_args = {'prettyPrint': False}
    35 35  allLabelIds = dict()
    36 36  allLabels = dict()
    skipped 3 lines
    40 40  system_labels = ['INBOX', 'SPAM', 'TRASH', 'UNREAD', 'STARRED', 'IMPORTANT',
    41 41   'SENT', 'DRAFT', 'CATEGORY_PERSONAL', 'CATEGORY_SOCIAL',
    42 42   'CATEGORY_PROMOTIONS', 'CATEGORY_UPDATES', 'CATEGORY_FORUMS']
     43 +thread_msgid_map = {}
     44 + 
    43 45  import argparse
    44 46  import importlib
    45 47  from io import BytesIO
    skipped 15 lines
    61 63  import sqlite3
    62 64  import ssl
    63 65  import email
     66 +from email.mime.text import MIMEText
     67 +from email.mime.multipart import MIMEMultipart
     68 +from email.utils import (format_datetime,
     69 + make_msgid)
    64 70  import hashlib
    65 71  import pkg_resources
    66 72  import re
    skipped 57 lines
    124 130   parser.add_argument('--email',
    125 131   dest='email',
    126 132   help='Full email address of user or group to act against')
    127  - action_choices = ['backup','restore', 'restore-group', 'restore-mbox',
     133 + action_choices = ['backup','backup-chat', 'restore', 'restore-group', 'restore-mbox',
    128 134   'count', 'purge', 'purge-labels', 'print-labels', 'estimate', 'quota', 'reindex', 'revoke',
    129 135   'split-mbox', 'create-project', 'delete-projects', 'check-service-account', 'create-label']
    130 136   parser.add_argument('--action',
    skipped 542 lines
    673 679   except googleapiclient.errors.UnknownApiNameOrVersion:
    674 680   disc_file = os.path.join(options.config_folder, '%s-%s.json' % (api, version))
    675 681   if os.path.isfile(disc_file):
    676  - f = file(disc_file, 'r')
    677  - discovery = f.read()
    678  - f.close()
     682 + with open(disc_file, 'r') as f:
     683 + discovery = f.read()
    679 684   return googleapiclient.discovery.build_from_document(discovery,
    680 685   base='https://www.googleapis.com', http=httpc)
    681 686   else:
    skipped 838 lines
    1520 1525   WHERE rfc822_msgid IS NULL'''):
    1521 1526   message_full_filename = os.path.join(backup_folder, filename)
    1522 1527   if os.path.isfile(message_full_filename):
    1523  - f = open(message_full_filename, 'r')
    1524  - msgid = header_parser.parse(f, True).get('message-id') or '<DummyMsgID>'
    1525  - f.close()
     1528 + with open(message_full_filename, 'r') as f:
     1529 + msgid = header_parser.parse(f, True).get('message-id') or '<DummyMsgID>'
    1526 1530   sqlcur.execute(
    1527 1531   'UPDATE messages SET rfc822_msgid = ? WHERE message_num = ?',
    1528 1532   (msgid, message_num))
    skipped 167 lines
    1696 1700   if exception is not None:
    1697 1701   raise exception
    1698 1702   
     1703 +def backup_chat(request_id, response, exception):
     1704 + if exception is not None:
     1705 + print(exception)
     1706 + return
     1707 + labelIds = response.get('labelIds', [])
     1708 + labels = labelIdsToLabels(labelIds)
     1709 + message_file_name = "%s.eml" % (response['id'])
     1710 + message_time = int(response['internalDate'])/1000
     1711 + message_datetime = datetime.datetime.fromtimestamp(message_time)
     1712 + message_date = time.gmtime(message_time)
     1713 + message = MIMEMultipart("alternative")
     1714 + html = base64.urlsafe_b64decode(response['payload']['body'].get('data', '')).decode()
     1715 + part = MIMEText(html, "html")
     1716 + message.attach(part)
     1717 + for header in response['payload'].get('headers', []):
     1718 + if header['name'] == 'From':
     1719 + message['From'] = header['value']
     1720 + message['Date'] = format_datetime(message_datetime)
     1721 + thread_id = response.get('threadId')
     1722 + message['Message-ID'] = make_msgid(domain='gyb-chat-backup')
     1723 + if thread_id not in thread_msgid_map:
     1724 + # for each thread_id we create a fake msgid and use it for
     1725 + # In-Reply-To. The fake msgid isn't the Message-ID for any
     1726 + # real message but using it as In-Reply-To on all messages
     1727 + # in the thread lets Gmail properly thread the Chat conversation.
     1728 + thread_msgid_map[thread_id] = make_msgid(domain='gyb-chat-backup')
     1729 + message['In-Reply-To'] = thread_msgid_map[thread_id]
     1730 + try:
     1731 + time_for_sqlite = datetime.datetime.fromtimestamp(message_time)
     1732 + except (OSError, IOError, OverflowError):
     1733 + time_for_sqlite = datetime.datetime.fromtimestamp(86400) # minimal value Win accepts
     1734 + message_rel_path = os.path.join(str(message_date.tm_year),
     1735 + str(message_date.tm_mon),
     1736 + str(message_date.tm_mday))
     1737 + message_rel_filename = os.path.join(message_rel_path,
     1738 + message_file_name)
     1739 + message_full_path = os.path.join(options.local_folder,
     1740 + message_rel_path)
     1741 + message_full_filename = os.path.join(options.local_folder,
     1742 + message_rel_filename)
     1743 + if not os.path.isdir(message_full_path):
     1744 + os.makedirs(message_full_path)
     1745 + with open(message_full_filename, 'wb') as f:
     1746 + f.write(message.as_string().encode())
     1747 + sqlcur.execute("""
     1748 + INSERT INTO messages (
     1749 + message_filename,
     1750 + message_internaldate) VALUES (?, ?)""",
     1751 + (message_rel_filename,
     1752 + time_for_sqlite))
     1753 + message_num = sqlcur.lastrowid
     1754 + sqlcur.execute("""
     1755 + REPLACE INTO uids (message_num, uid) VALUES (?, ?)""",
     1756 + (message_num, response['id']))
     1757 + for label in labels:
     1758 + sqlcur.execute("""
     1759 + INSERT INTO labels (message_num, label) VALUES (?, ?)""",
     1760 + (message_num, label))
     1761 + 
    1699 1762  def backup_message(request_id, response, exception):
    1700 1763   if exception is not None:
    1701 1764   print(exception)
    1702 1765   else:
    1703  - if 'labelIds' in response:
    1704  - labelIds = response['labelIds']
    1705  - else:
    1706  - labelIds = list()
    1707  - if 'CHATS' in labelIds: # skip CHATS
     1766 + labelIds = response.get('labelIds', [])
     1767 + if 'CHATS' in labelIds or 'CHAT' in labelIds: # skip CHATS
    1708 1768   return
    1709 1769   labels = labelIdsToLabels(labelIds)
    1710 1770   message_file_name = "%s.eml" % (response['id'])
    skipped 14 lines
    1725 1785   message_rel_filename)
    1726 1786   if not os.path.isdir(message_full_path):
    1727 1787   os.makedirs(message_full_path)
    1728  - f = open(message_full_filename, 'wb')
    1729 1788   raw_message = str(response['raw'])
    1730 1789   full_message = base64.urlsafe_b64decode(raw_message)
    1731  - f.write(full_message)
    1732  - f.close()
     1790 + with open(message_full_filename, 'wb') as f:
     1791 + f.write(full_message)
    1733 1792   sqlcur.execute("""
    1734 1793   INSERT INTO messages (
    1735 1794   message_filename,
    skipped 218 lines
    1954 2013   else:
    1955 2014   gmail = buildGAPIServiceObject('gmail')
    1956 2015   if not os.path.isdir(options.local_folder):
    1957  - if options.action in ['backup',]:
     2016 + if options.action in ['backup', 'backup-chat']:
    1958 2017   os.mkdir(options.local_folder)
    1959 2018   elif options.action in ['restore', 'restore-group', 'restore-mbox']:
    1960 2019   print('ERROR: Folder %s does not exist. Cannot restore.'
    skipped 115 lines
    2076 2135   (refreshed_messages, refresh_count))
    2077 2136   print("\n")
    2078 2137   
     2138 + # BACKUP-CHAT
     2139 + elif options.action == 'backup-chat':
     2140 + if options.batch_size == 0:
     2141 + options.batch_size = 50
     2142 + if options.gmail_search == '-is:chat':
     2143 + options.gmail_search = 'is:chat'
     2144 + page_message = 'Got %%total_items%% Chat IDs'
     2145 + messages_to_process = callGAPIpages(gmail.users().messages(),
     2146 + 'list', items='messages', page_message=page_message, maxResults=500,
     2147 + userId='me', includeSpamTrash=options.spamtrash, q=options.gmail_search,
     2148 + fields='nextPageToken,messages/id')
     2149 + backup_path = options.local_folder
     2150 + if not os.path.isdir(backup_path):
     2151 + os.mkdir(backup_path)
     2152 + messages_to_backup = []
     2153 + # Determine which messages from the search we haven't processed before.
     2154 + print("GYB needs to examine %s Chats" % len(messages_to_process))
     2155 + for message_num in messages_to_process:
     2156 + if newDB or not message_is_backed_up(message_num['id'], sqlcur, sqlconn,
     2157 + options.local_folder):
     2158 + messages_to_backup.append(message_num['id'])
     2159 + print("GYB already has a backup of %s Chats" %
     2160 + (len(messages_to_process) - len(messages_to_backup)))
     2161 + backup_count = len(messages_to_backup)
     2162 + print("GYB needs to backup %s Chats" % backup_count)
     2163 + if options.memory_limit:
     2164 + memory_limit = options.memory_limit * 1024 * 1024
     2165 + message_sizes = getSizeOfMessages(messages_to_backup, gmail)
     2166 + request_size = 0
     2167 + backed_up_messages = 0
     2168 + gbatch = gmail.new_batch_http_request()
     2169 + for a_message in messages_to_backup:
     2170 + if options.memory_limit:
     2171 + request_size += message_sizes[a_message]
     2172 + if len(gbatch._order) == options.batch_size or (options.memory_limit and request_size >= memory_limit):
     2173 + callGAPI(gbatch, None, soft_errors=True)
     2174 + gbatch = gmail.new_batch_http_request()
     2175 + sqlconn.commit()
     2176 + if options.memory_limit:
     2177 + request_size = message_sizes[a_message]
     2178 + rewrite_line("backed up %s of %s Chats" %
     2179 + (backed_up_messages, backup_count))
     2180 + gbatch.add(gmail.users().messages().get(userId='me',
     2181 + id=a_message, format='full',
     2182 + fields='id,threadId,internalDate,labelIds,payload'),
     2183 + callback=backup_chat)
     2184 + backed_up_messages += 1
     2185 + if len(gbatch._order) > 0:
     2186 + callGAPI(gbatch, None, soft_errors=True)
     2187 + sqlconn.commit()
     2188 + rewrite_line("backed up %s of %s messages" %
     2189 + (backed_up_messages, backup_count))
     2190 + print("\n")
     2191 + 
    2079 2192   # RESTORE #
    2080 2193   elif options.action == 'restore':
    2081 2194   if options.batch_size == 0:
    skipped 48 lines
    2130 2243   labels_results = sqlcur.fetchall()
    2131 2244   for l in labels_results:
    2132 2245   if options.label_prefix:
    2133  - if l[0].lower()!="unread":
     2246 + if l[0].lower() != 'unread':
    2134 2247   labels.append(options.label_prefix[0] + "/" + l[0])
    2135 2248   else:
    2136 2249   labels.append(l[0])
    2137 2250   else:
     2251 + if l == ('CHAT',):
     2252 + l = ('Chats_restored',)
    2138 2253   labels.append(l[0])
    2139 2254   if options.label_restored:
    2140 2255   for restore_label in options.label_restored:
    skipped 532 lines
Please wait...
Page is in error, reload to recover