| skipped 23 lines |
24 | 24 | | __program_name__ = 'Got Your Back: Gmail Backup' |
25 | 25 | | __author__ = 'Jay Lee' |
26 | 26 | | __email__ = '[email protected]' |
27 | | - | __version__ = '1.63' |
| 27 | + | __version__ = '1.70' |
28 | 28 | | __license__ = 'Apache License 2.0 (https://www.apache.org/licenses/LICENSE-2.0)' |
29 | 29 | | __website__ = 'jaylee.us/gyb' |
30 | 30 | | __db_schema_version__ = '6' |
31 | 31 | | __db_schema_min_version__ = '6' #Minimum for restore |
32 | 32 | | |
33 | | - | global extra_args, options, allLabelIds, allLabels, gmail, reserved_labels |
| 33 | + | global extra_args, options, allLabelIds, allLabels, gmail, reserved_labels, thread_msgid_map |
34 | 34 | | extra_args = {'prettyPrint': False} |
35 | 35 | | allLabelIds = dict() |
36 | 36 | | allLabels = dict() |
| skipped 3 lines |
40 | 40 | | system_labels = ['INBOX', 'SPAM', 'TRASH', 'UNREAD', 'STARRED', 'IMPORTANT', |
41 | 41 | | 'SENT', 'DRAFT', 'CATEGORY_PERSONAL', 'CATEGORY_SOCIAL', |
42 | 42 | | 'CATEGORY_PROMOTIONS', 'CATEGORY_UPDATES', 'CATEGORY_FORUMS'] |
| 43 | + | thread_msgid_map = {} |
| 44 | + | |
43 | 45 | | import argparse |
44 | 46 | | import importlib |
45 | 47 | | from io import BytesIO |
| skipped 15 lines |
61 | 63 | | import sqlite3 |
62 | 64 | | import ssl |
63 | 65 | | import email |
| 66 | + | from email.mime.text import MIMEText |
| 67 | + | from email.mime.multipart import MIMEMultipart |
| 68 | + | from email.utils import (format_datetime, |
| 69 | + | make_msgid) |
64 | 70 | | import hashlib |
65 | 71 | | import pkg_resources |
66 | 72 | | import re |
| skipped 57 lines |
124 | 130 | | parser.add_argument('--email', |
125 | 131 | | dest='email', |
126 | 132 | | help='Full email address of user or group to act against') |
127 | | - | action_choices = ['backup','restore', 'restore-group', 'restore-mbox', |
| 133 | + | action_choices = ['backup','backup-chat', 'restore', 'restore-group', 'restore-mbox', |
128 | 134 | | 'count', 'purge', 'purge-labels', 'print-labels', 'estimate', 'quota', 'reindex', 'revoke', |
129 | 135 | | 'split-mbox', 'create-project', 'delete-projects', 'check-service-account', 'create-label'] |
130 | 136 | | parser.add_argument('--action', |
| skipped 542 lines |
673 | 679 | | except googleapiclient.errors.UnknownApiNameOrVersion: |
674 | 680 | | disc_file = os.path.join(options.config_folder, '%s-%s.json' % (api, version)) |
675 | 681 | | if os.path.isfile(disc_file): |
676 | | - | f = file(disc_file, 'r') |
677 | | - | discovery = f.read() |
678 | | - | f.close() |
| 682 | + | with open(disc_file, 'r') as f: |
| 683 | + | discovery = f.read() |
679 | 684 | | return googleapiclient.discovery.build_from_document(discovery, |
680 | 685 | | base='https://www.googleapis.com', http=httpc) |
681 | 686 | | else: |
| skipped 838 lines |
1520 | 1525 | | WHERE rfc822_msgid IS NULL'''): |
1521 | 1526 | | message_full_filename = os.path.join(backup_folder, filename) |
1522 | 1527 | | if os.path.isfile(message_full_filename): |
1523 | | - | f = open(message_full_filename, 'r') |
1524 | | - | msgid = header_parser.parse(f, True).get('message-id') or '<DummyMsgID>' |
1525 | | - | f.close() |
| 1528 | + | with open(message_full_filename, 'r') as f: |
| 1529 | + | msgid = header_parser.parse(f, True).get('message-id') or '<DummyMsgID>' |
1526 | 1530 | | sqlcur.execute( |
1527 | 1531 | | 'UPDATE messages SET rfc822_msgid = ? WHERE message_num = ?', |
1528 | 1532 | | (msgid, message_num)) |
| skipped 167 lines |
1696 | 1700 | | if exception is not None: |
1697 | 1701 | | raise exception |
1698 | 1702 | | |
| 1703 | + | def backup_chat(request_id, response, exception): |
| 1704 | + | if exception is not None: |
| 1705 | + | print(exception) |
| 1706 | + | return |
| 1707 | + | labelIds = response.get('labelIds', []) |
| 1708 | + | labels = labelIdsToLabels(labelIds) |
| 1709 | + | message_file_name = "%s.eml" % (response['id']) |
| 1710 | + | message_time = int(response['internalDate'])/1000 |
| 1711 | + | message_datetime = datetime.datetime.fromtimestamp(message_time) |
| 1712 | + | message_date = time.gmtime(message_time) |
| 1713 | + | message = MIMEMultipart("alternative") |
| 1714 | + | html = base64.urlsafe_b64decode(response['payload']['body'].get('data', '')).decode() |
| 1715 | + | part = MIMEText(html, "html") |
| 1716 | + | message.attach(part) |
| 1717 | + | for header in response['payload'].get('headers', []): |
| 1718 | + | if header['name'] == 'From': |
| 1719 | + | message['From'] = header['value'] |
| 1720 | + | message['Date'] = format_datetime(message_datetime) |
| 1721 | + | thread_id = response.get('threadId') |
| 1722 | + | message['Message-ID'] = make_msgid(domain='gyb-chat-backup') |
| 1723 | + | if thread_id not in thread_msgid_map: |
| 1724 | + | # for each thread_id we create a fake msgid and use it for |
| 1725 | + | # In-Reply-To. The fake msgid isn't the Message-ID for any |
| 1726 | + | # real message but using it as In-Reply-To on all messages |
| 1727 | + | # in the thread lets Gmail properly thread the Chat conversation. |
| 1728 | + | thread_msgid_map[thread_id] = make_msgid(domain='gyb-chat-backup') |
| 1729 | + | message['In-Reply-To'] = thread_msgid_map[thread_id] |
| 1730 | + | try: |
| 1731 | + | time_for_sqlite = datetime.datetime.fromtimestamp(message_time) |
| 1732 | + | except (OSError, IOError, OverflowError): |
| 1733 | + | time_for_sqlite = datetime.datetime.fromtimestamp(86400) # minimal value Win accepts |
| 1734 | + | message_rel_path = os.path.join(str(message_date.tm_year), |
| 1735 | + | str(message_date.tm_mon), |
| 1736 | + | str(message_date.tm_mday)) |
| 1737 | + | message_rel_filename = os.path.join(message_rel_path, |
| 1738 | + | message_file_name) |
| 1739 | + | message_full_path = os.path.join(options.local_folder, |
| 1740 | + | message_rel_path) |
| 1741 | + | message_full_filename = os.path.join(options.local_folder, |
| 1742 | + | message_rel_filename) |
| 1743 | + | if not os.path.isdir(message_full_path): |
| 1744 | + | os.makedirs(message_full_path) |
| 1745 | + | with open(message_full_filename, 'wb') as f: |
| 1746 | + | f.write(message.as_string().encode()) |
| 1747 | + | sqlcur.execute(""" |
| 1748 | + | INSERT INTO messages ( |
| 1749 | + | message_filename, |
| 1750 | + | message_internaldate) VALUES (?, ?)""", |
| 1751 | + | (message_rel_filename, |
| 1752 | + | time_for_sqlite)) |
| 1753 | + | message_num = sqlcur.lastrowid |
| 1754 | + | sqlcur.execute(""" |
| 1755 | + | REPLACE INTO uids (message_num, uid) VALUES (?, ?)""", |
| 1756 | + | (message_num, response['id'])) |
| 1757 | + | for label in labels: |
| 1758 | + | sqlcur.execute(""" |
| 1759 | + | INSERT INTO labels (message_num, label) VALUES (?, ?)""", |
| 1760 | + | (message_num, label)) |
| 1761 | + | |
1699 | 1762 | | def backup_message(request_id, response, exception): |
1700 | 1763 | | if exception is not None: |
1701 | 1764 | | print(exception) |
1702 | 1765 | | else: |
1703 | | - | if 'labelIds' in response: |
1704 | | - | labelIds = response['labelIds'] |
1705 | | - | else: |
1706 | | - | labelIds = list() |
1707 | | - | if 'CHATS' in labelIds: # skip CHATS |
| 1766 | + | labelIds = response.get('labelIds', []) |
| 1767 | + | if 'CHATS' in labelIds or 'CHAT' in labelIds: # skip CHATS |
1708 | 1768 | | return |
1709 | 1769 | | labels = labelIdsToLabels(labelIds) |
1710 | 1770 | | message_file_name = "%s.eml" % (response['id']) |
| skipped 14 lines |
1725 | 1785 | | message_rel_filename) |
1726 | 1786 | | if not os.path.isdir(message_full_path): |
1727 | 1787 | | os.makedirs(message_full_path) |
1728 | | - | f = open(message_full_filename, 'wb') |
1729 | 1788 | | raw_message = str(response['raw']) |
1730 | 1789 | | full_message = base64.urlsafe_b64decode(raw_message) |
1731 | | - | f.write(full_message) |
1732 | | - | f.close() |
| 1790 | + | with open(message_full_filename, 'wb') as f: |
| 1791 | + | f.write(full_message) |
1733 | 1792 | | sqlcur.execute(""" |
1734 | 1793 | | INSERT INTO messages ( |
1735 | 1794 | | message_filename, |
| skipped 218 lines |
1954 | 2013 | | else: |
1955 | 2014 | | gmail = buildGAPIServiceObject('gmail') |
1956 | 2015 | | if not os.path.isdir(options.local_folder): |
1957 | | - | if options.action in ['backup',]: |
| 2016 | + | if options.action in ['backup', 'backup-chat']: |
1958 | 2017 | | os.mkdir(options.local_folder) |
1959 | 2018 | | elif options.action in ['restore', 'restore-group', 'restore-mbox']: |
1960 | 2019 | | print('ERROR: Folder %s does not exist. Cannot restore.' |
| skipped 115 lines |
2076 | 2135 | | (refreshed_messages, refresh_count)) |
2077 | 2136 | | print("\n") |
2078 | 2137 | | |
| 2138 | + | # BACKUP-CHAT |
| 2139 | + | elif options.action == 'backup-chat': |
| 2140 | + | if options.batch_size == 0: |
| 2141 | + | options.batch_size = 50 |
| 2142 | + | if options.gmail_search == '-is:chat': |
| 2143 | + | options.gmail_search = 'is:chat' |
| 2144 | + | page_message = 'Got %%total_items%% Chat IDs' |
| 2145 | + | messages_to_process = callGAPIpages(gmail.users().messages(), |
| 2146 | + | 'list', items='messages', page_message=page_message, maxResults=500, |
| 2147 | + | userId='me', includeSpamTrash=options.spamtrash, q=options.gmail_search, |
| 2148 | + | fields='nextPageToken,messages/id') |
| 2149 | + | backup_path = options.local_folder |
| 2150 | + | if not os.path.isdir(backup_path): |
| 2151 | + | os.mkdir(backup_path) |
| 2152 | + | messages_to_backup = [] |
| 2153 | + | # Determine which messages from the search we haven't processed before. |
| 2154 | + | print("GYB needs to examine %s Chats" % len(messages_to_process)) |
| 2155 | + | for message_num in messages_to_process: |
| 2156 | + | if newDB or not message_is_backed_up(message_num['id'], sqlcur, sqlconn, |
| 2157 | + | options.local_folder): |
| 2158 | + | messages_to_backup.append(message_num['id']) |
| 2159 | + | print("GYB already has a backup of %s Chats" % |
| 2160 | + | (len(messages_to_process) - len(messages_to_backup))) |
| 2161 | + | backup_count = len(messages_to_backup) |
| 2162 | + | print("GYB needs to backup %s Chats" % backup_count) |
| 2163 | + | if options.memory_limit: |
| 2164 | + | memory_limit = options.memory_limit * 1024 * 1024 |
| 2165 | + | message_sizes = getSizeOfMessages(messages_to_backup, gmail) |
| 2166 | + | request_size = 0 |
| 2167 | + | backed_up_messages = 0 |
| 2168 | + | gbatch = gmail.new_batch_http_request() |
| 2169 | + | for a_message in messages_to_backup: |
| 2170 | + | if options.memory_limit: |
| 2171 | + | request_size += message_sizes[a_message] |
| 2172 | + | if len(gbatch._order) == options.batch_size or (options.memory_limit and request_size >= memory_limit): |
| 2173 | + | callGAPI(gbatch, None, soft_errors=True) |
| 2174 | + | gbatch = gmail.new_batch_http_request() |
| 2175 | + | sqlconn.commit() |
| 2176 | + | if options.memory_limit: |
| 2177 | + | request_size = message_sizes[a_message] |
| 2178 | + | rewrite_line("backed up %s of %s Chats" % |
| 2179 | + | (backed_up_messages, backup_count)) |
| 2180 | + | gbatch.add(gmail.users().messages().get(userId='me', |
| 2181 | + | id=a_message, format='full', |
| 2182 | + | fields='id,threadId,internalDate,labelIds,payload'), |
| 2183 | + | callback=backup_chat) |
| 2184 | + | backed_up_messages += 1 |
| 2185 | + | if len(gbatch._order) > 0: |
| 2186 | + | callGAPI(gbatch, None, soft_errors=True) |
| 2187 | + | sqlconn.commit() |
| 2188 | + | rewrite_line("backed up %s of %s messages" % |
| 2189 | + | (backed_up_messages, backup_count)) |
| 2190 | + | print("\n") |
| 2191 | + | |
2079 | 2192 | | # RESTORE # |
2080 | 2193 | | elif options.action == 'restore': |
2081 | 2194 | | if options.batch_size == 0: |
| skipped 48 lines |
2130 | 2243 | | labels_results = sqlcur.fetchall() |
2131 | 2244 | | for l in labels_results: |
2132 | 2245 | | if options.label_prefix: |
2133 | | - | if l[0].lower()!="unread": |
| 2246 | + | if l[0].lower() != 'unread': |
2134 | 2247 | | labels.append(options.label_prefix[0] + "/" + l[0]) |
2135 | 2248 | | else: |
2136 | 2249 | | labels.append(l[0]) |
2137 | 2250 | | else: |
| 2251 | + | if l == ('CHAT',): |
| 2252 | + | l = ('Chats_restored',) |
2138 | 2253 | | labels.append(l[0]) |
2139 | 2254 | | if options.label_restored: |
2140 | 2255 | | for restore_label in options.label_restored: |
| skipped 532 lines |