1 | | - | #!/usr/bin/python3 |
2 | | - | |
3 | | - | |
4 | | - | """Telepathy cli interface: |
5 | | - | An OSINT toolkit for investigating Telegram chats. |
6 | | - | """ |
7 | | - | |
8 | | - | from tokenize import group |
9 | | - | import pandas as pd |
10 | | - | import datetime |
11 | | - | import requests |
12 | | - | import json |
13 | | - | import random |
14 | | - | import glob |
15 | | - | import csv |
16 | | - | import os |
17 | | - | import getpass |
18 | | - | import click |
19 | | - | import re |
20 | | - | import textwrap |
21 | | - | import time |
22 | | - | import pprint |
23 | | - | |
24 | | - | from libs.utils import ( |
25 | | - | print_banner, |
26 | | - | color_print_green, |
27 | | - | populate_user, |
28 | | - | process_message, |
29 | | - | process_description, |
30 | | - | parse_tg_date, |
31 | | - | parse_html_page |
32 | | - | ) |
33 | | - | import libs.const as const |
34 | | - | |
35 | | - | from colorama import Fore, Back, Style |
36 | | - | |
37 | | - | from telethon.errors import SessionPasswordNeededError, ChannelPrivateError |
38 | | - | from telethon.tl.types import ( |
39 | | - | InputPeerEmpty, |
40 | | - | PeerUser, |
41 | | - | PeerChat, |
42 | | - | PeerChannel, |
43 | | - | PeerLocated, |
44 | | - | ChannelParticipantCreator, |
45 | | - | ChannelParticipantAdmin, |
46 | | - | ) |
47 | | - | from telethon.tl.functions.messages import GetDialogsRequest |
48 | | - | from telethon import TelegramClient, functions, types, utils |
49 | | - | from telethon.utils import get_display_name, get_message_id |
50 | | - | from alive_progress import alive_bar |
51 | | - | from bs4 import BeautifulSoup |
52 | | - | import pikepdf |
53 | | - | from hachoir.parser import createParser |
54 | | - | from hachoir.metadata import extractMetadata |
55 | | - | |
56 | | - | |
57 | | - | |
58 | | - | @click.command() |
59 | | - | @click.option( |
60 | | - | "--target", |
61 | | - | "-t", |
62 | | - | #default="", |
63 | | - | multiple=True, |
64 | | - | help="Specifies a chat to investigate.", |
65 | | - | ) |
66 | | - | @click.option( |
67 | | - | "--comprehensive", |
68 | | - | "-c", |
69 | | - | is_flag=True, |
70 | | - | help="Comprehensive scan, includes archiving.", |
71 | | - | ) |
72 | | - | @click.option( |
73 | | - | "--media", "-m", is_flag=True, help="Archives media in the specified chat." |
74 | | - | ) |
75 | | - | @click.option("--forwards", "-f", is_flag=True, help="Scrapes forwarded messages.") |
76 | | - | @click.option("--user", "-u", is_flag=True, help="Looks up a specified user ID.") |
77 | | - | @click.option( |
78 | | - | "--location", "-l", is_flag=True, help="Finds users near to specified coordinates." |
79 | | - | ) |
80 | | - | @click.option( |
81 | | - | "--alt", "-a", is_flag=True, default=False, help="Uses an alternative login." |
82 | | - | ) |
83 | | - | @click.option("--json", "-j", is_flag=True, default=False, help="Export to JSON.") |
84 | | - | @click.option( |
85 | | - | "--export", |
86 | | - | "-e", |
87 | | - | is_flag=True, |
88 | | - | default=False, |
89 | | - | help="Export a list of chats your account is part of.", |
90 | | - | ) |
91 | | - | @click.option( |
92 | | - | "--replies", |
93 | | - | "-r", |
94 | | - | is_flag=True, |
95 | | - | default=False, |
96 | | - | help="Enable replies analysis in channels.", |
97 | | - | ) |
98 | | - | def cli( |
99 | | - | target, comprehensive, media, forwards, user, location, alt, json, export, replies |
100 | | - | ): |
101 | | - | print_banner() |
102 | | - | telepathy_file = "./telepathy_files/" |
103 | | - | try: |
104 | | - | os.makedirs(telepathy_file) |
105 | | - | except FileExistsError: |
106 | | - | pass |
107 | | - | |
108 | | - | # Defining default values |
109 | | - | basic = False |
110 | | - | comp_check = False |
111 | | - | media_archive = False |
112 | | - | forwards_check = False |
113 | | - | forward_verify = False |
114 | | - | reply_analysis = False |
115 | | - | user_check = False |
116 | | - | location_check = False |
117 | | - | last_date = None |
118 | | - | chunk_size = 1000 |
119 | | - | filetime = datetime.datetime.now().strftime("%Y_%m_%d-%H_%M") |
120 | | - | filetime_clean = str(filetime) |
121 | | - | |
122 | | - | # Will add more languages later |
123 | | - | user_language = "en" |
124 | | - | |
125 | | - | if target: |
126 | | - | basic = True |
127 | | - | if replies: |
128 | | - | reply_analysis = True |
129 | | - | if forwards: |
130 | | - | forwards_check = True |
131 | | - | if user: |
132 | | - | user_check = True |
133 | | - | basic = False |
134 | | - | if location: |
135 | | - | location_check = True |
136 | | - | basic = False |
137 | | - | if comprehensive: |
138 | | - | comp_check = True |
139 | | - | if media: |
140 | | - | media_archive = True |
141 | | - | if export: |
142 | | - | t = " " |
143 | | - | if alt: |
144 | | - | alt_check = True |
145 | | - | else: |
146 | | - | alt_check = False |
147 | | - | |
148 | | - | if json: |
149 | | - | json_check = True |
150 | | - | json_file = telepathy_file + "json_files/" |
151 | | - | try: |
152 | | - | os.makedirs(json_file) |
153 | | - | except FileExistsError: |
154 | | - | pass |
155 | | - | else: |
156 | | - | json_check = False |
157 | | - | |
158 | | - | if alt_check == True: |
159 | | - | login = telepathy_file + "login_alt.txt" |
160 | | - | |
161 | | - | if os.path.isfile(login) == False: |
162 | | - | api_id = input(" Please enter your API ID:\n") |
163 | | - | api_hash = input(" Please enter your API Hash:\n") |
164 | | - | phone_number = input(" Please enter your phone number:\n") |
165 | | - | with open(login, "w+", encoding="utf-8") as f: |
166 | | - | f.write(api_id + "," + api_hash + "," + phone_number) |
167 | | - | else: |
168 | | - | with open(login, encoding="utf-8") as f: |
169 | | - | details = f.read() |
170 | | - | api_id, api_hash, phone_number = details.split(sep=",") |
171 | | - | else: |
172 | | - | login = telepathy_file + "login.txt" |
173 | | - | |
174 | | - | if os.path.isfile(login) == False: |
175 | | - | api_id = input(" Please enter your API ID:\n") |
176 | | - | api_hash = input(" Please enter your API Hash:\n") |
177 | | - | phone_number = input(" Please enter your phone number:\n") |
178 | | - | with open(login, "w+", encoding="utf-8") as f: |
179 | | - | f.write(api_id + "," + api_hash + "," + phone_number) |
180 | | - | else: |
181 | | - | with open(login, encoding="utf-8") as f: |
182 | | - | details = f.read() |
183 | | - | api_id, api_hash, phone_number = details.split(sep=",") |
184 | | - | |
185 | | - | client = TelegramClient(phone_number, api_id, api_hash) |
186 | | - | |
187 | | - | async def main(): |
188 | | - | |
189 | | - | await client.connect() |
190 | | - | if not await client.is_user_authorized(): |
191 | | - | await client.send_code_request(phone_number) |
192 | | - | await client.sign_in(phone_number) |
193 | | - | try: |
194 | | - | await client.sign_in(code=input(" Enter code: ")) |
195 | | - | except SessionPasswordNeededError: |
196 | | - | await client.sign_in( |
197 | | - | password=getpass.getpass(prompt="Password: ", stream=None) |
198 | | - | ) |
199 | | - | result = client( |
200 | | - | GetDialogsRequest( |
201 | | - | offset_date=last_date, |
202 | | - | offset_id=0, |
203 | | - | offset_peer=InputPeerEmpty(), |
204 | | - | limit=chunk_size, |
205 | | - | hash=0, |
206 | | - | ) |
207 | | - | ) |
208 | | - | else: |
209 | | - | |
210 | | - | if export == True: |
211 | | - | export_file = telepathy_file + "export.csv" |
212 | | - | exports = [] |
213 | | - | |
214 | | - | print("Exporting...") |
215 | | - | |
216 | | - | # progress bar |
217 | | - | |
218 | | - | for Dialog in await client.get_dialogs(): |
219 | | - | try: |
220 | | - | if Dialog.entity.username: |
221 | | - | group_url = "http://t.me/" + Dialog.entity.username |
222 | | - | group_username = Dialog.entity.username |
223 | | - | |
224 | | - | web_req = parse_html_page(group_url) |
225 | | - | group_description = web_req["group_description"] |
226 | | - | total_participants = web_req["total_participants"] |
227 | | - | |
228 | | - | _desc = process_description( |
229 | | - | group_description, user_language |
230 | | - | ) |
231 | | - | description_text = _desc["group_description"] |
232 | | - | original_language = _mess[ |
233 | | - | "original_language" |
234 | | - | ] |
235 | | - | translated_description = _desc["translated_text"] |
236 | | - | |
237 | | - | if Dialog.entity.broadcast is True: |
238 | | - | chat_type = "Channel" |
239 | | - | elif Dialog.entity.megagroup is True: |
240 | | - | chat_type = "Megagroup" |
241 | | - | elif Dialog.entity.gigagroup is True: |
242 | | - | chat_type = "Gigagroup" |
243 | | - | else: |
244 | | - | chat_type = "Chat" |
245 | | - | |
246 | | - | if Dialog.entity.restriction_reason is not None: |
247 | | - | ios_restriction = Dialog.entity.restriction_reason[ |
248 | | - | 0 |
249 | | - | ] |
250 | | - | if 1 in Dialog.entity.restriction_reason: |
251 | | - | android_restriction = ( |
252 | | - | Dialog.entity.restriction_reason[1] |
253 | | - | ) |
254 | | - | group_status = ( |
255 | | - | str(ios_restriction) |
256 | | - | + ", " |
257 | | - | + str(android_restriction) |
258 | | - | ) |
259 | | - | else: |
260 | | - | group_status = str(ios_restriction) |
261 | | - | else: |
262 | | - | group_status = "None" |
263 | | - | |
264 | | - | exports.append( |
265 | | - | [ |
266 | | - | filetime, |
267 | | - | Dialog.entity.title, |
268 | | - | group_description, |
269 | | - | translated_description, |
270 | | - | total_participants, |
271 | | - | group_username, |
272 | | - | group_url, |
273 | | - | chat_type, |
274 | | - | Dialog.entity.id, |
275 | | - | Dialog.entity.access_hash, |
276 | | - | group_status, |
277 | | - | ] |
278 | | - | ) |
279 | | - | |
280 | | - | export_df = pd.DataFrame( |
281 | | - | exports, |
282 | | - | columns=[ |
283 | | - | "Access Date", |
284 | | - | "Title", |
285 | | - | "Description", |
286 | | - | "Translated description", |
287 | | - | "Total participants", |
288 | | - | "Username", |
289 | | - | "URL", |
290 | | - | "Chat type", |
291 | | - | "Chat ID", |
292 | | - | "Access hash", |
293 | | - | "Restrictions", |
294 | | - | ], |
295 | | - | ) |
296 | | - | |
297 | | - | if not os.path.isfile(export_file): |
298 | | - | export_df.to_csv(export_file, sep=";", index=False) |
299 | | - | else: |
300 | | - | export_df.to_csv( |
301 | | - | export_file, sep=";", mode="w", index=False |
302 | | - | ) |
303 | | - | |
304 | | - | except AttributeError: |
305 | | - | pass |
306 | | - | |
307 | | - | else: |
308 | | - | |
309 | | - | for t in target: |
310 | | - | target_clean = t |
311 | | - | alphanumeric = "" |
312 | | - | |
313 | | - | |
314 | | - | for character in target_clean: |
315 | | - | if character.isalnum(): |
316 | | - | alphanumeric += character |
317 | | - | |
318 | | - | if "https://t.me/+" in t: |
319 | | - | t = t.replace('https://t.me/+', 'https://t.me/joinchat/') |
320 | | - | |
321 | | - | if basic is True or comp_check is True: |
322 | | - | save_directory = telepathy_file + alphanumeric |
323 | | - | try: |
324 | | - | os.makedirs(save_directory) |
325 | | - | except FileExistsError: |
326 | | - | pass |
327 | | - | |
328 | | - | # Creating logfile |
329 | | - | log_file = telepathy_file + "log.csv" |
330 | | - | |
331 | | - | if media_archive: |
332 | | - | media_directory = save_directory + "/media" |
333 | | - | try: |
334 | | - | os.makedirs(media_directory) |
335 | | - | except FileExistsError: |
336 | | - | pass |
337 | | - | |
338 | | - | if basic == True and comp_check == False: |
339 | | - | color_print_green(" [!] ", "Performing basic scan") |
340 | | - | elif comp_check == True: |
341 | | - | color_print_green(" [!] ", "Performing comprehensive scan") |
342 | | - | file_archive = ( |
343 | | - | save_directory |
344 | | - | + "/" |
345 | | - | + alphanumeric |
346 | | - | + "_" |
347 | | - | + filetime_clean |
348 | | - | + "_archive.csv" |
349 | | - | ) |
350 | | - | reply_file_archive = ( |
351 | | - | save_directory |
352 | | - | + "/" |
353 | | - | + alphanumeric |
354 | | - | + "_" |
355 | | - | + filetime_clean |
356 | | - | + "_reply_archive.csv" |
357 | | - | ) |
358 | | - | |
359 | | - | if forwards_check == True: |
360 | | - | color_print_green(" [!] ", "Forwards will be fetched") |
361 | | - | file_forwards = ( |
362 | | - | save_directory |
363 | | - | + "/edgelists/" |
364 | | - | + alphanumeric |
365 | | - | + "_" |
366 | | - | + filetime_clean |
367 | | - | + "_edgelist.csv" |
368 | | - | ) |
369 | | - | forward_directory = save_directory + "/edgelists/" |
370 | | - | |
371 | | - | try: |
372 | | - | os.makedirs(forward_directory) |
373 | | - | except FileExistsError: |
374 | | - | pass |
375 | | - | |
376 | | - | edgelist_file = ( |
377 | | - | forward_directory + "/" + alphanumeric + "_edgelist.csv" |
378 | | - | ) |
379 | | - | |
380 | | - | if basic is True or comp_check is True: |
381 | | - | |
382 | | - | color_print_green(" [-] ", "Fetching details for " + t + "...") |
383 | | - | memberlist_directory = save_directory + "/memberlists" |
384 | | - | |
385 | | - | try: |
386 | | - | os.makedirs(memberlist_directory) |
387 | | - | except FileExistsError: |
388 | | - | pass |
389 | | - | |
390 | | - | memberlist_filename = ( |
391 | | - | memberlist_directory + "/" + alphanumeric + "_members.csv" |
392 | | - | ) |
393 | | - | reply_memberlist_filename = ( |
394 | | - | memberlist_directory |
395 | | - | + "/" |
396 | | - | + alphanumeric |
397 | | - | + "_active_members.csv" |
398 | | - | ) |
399 | | - | |
400 | | - | entity = await client.get_entity(t) |
401 | | - | first_post = "Not found" |
402 | | - | |
403 | | - | async for message in client.iter_messages(t, reverse=True): |
404 | | - | datepost = parse_tg_date(message.date) |
405 | | - | date = datepost["date"] |
406 | | - | mtime = datepost["mtime"] |
407 | | - | first_post = datepost["timestamp"] |
408 | | - | break |
409 | | - | |
410 | | - | if entity.username: |
411 | | - | name = entity.title |
412 | | - | group_url = "http://t.me/" + entity.username |
413 | | - | group_username = entity.username |
414 | | - | web_req = parse_html_page(group_url) |
415 | | - | elif "https://t.me/" in t: |
416 | | - | group_url = t |
417 | | - | web_req = parse_html_page(group_url) |
418 | | - | group_username = "Private group" |
419 | | - | else: |
420 | | - | group_url = "Private group" |
421 | | - | group_username = "Private group" |
422 | | - | |
423 | | - | |
424 | | - | group_description = web_req["group_description"] |
425 | | - | total_participants = web_req["total_participants"] |
426 | | - | |
427 | | - | _desc = process_description( |
428 | | - | group_description, user_language |
429 | | - | ) |
430 | | - | description_text = _desc["description_text"] |
431 | | - | original_language = _desc[ |
432 | | - | "original_language" |
433 | | - | ] |
434 | | - | |
435 | | - | translated_description = _desc["translated_text"] |
436 | | - | |
437 | | - | preferredWidth = 70 |
438 | | - | descript = Fore.GREEN + "Description: " + Style.RESET_ALL |
439 | | - | prefix = descript |
440 | | - | wrapper_d = textwrap.TextWrapper( |
441 | | - | initial_indent=prefix, |
442 | | - | width=preferredWidth, |
443 | | - | subsequent_indent=" ", |
444 | | - | ) |
445 | | - | |
446 | | - | trans_descript = Fore.GREEN + "Translated: " + Style.RESET_ALL |
447 | | - | prefix = trans_descript |
448 | | - | wrapper_td = textwrap.TextWrapper( |
449 | | - | initial_indent=prefix, |
450 | | - | width=preferredWidth, |
451 | | - | subsequent_indent=" ", |
452 | | - | ) |
453 | | - | |
454 | | - | group_description = ('"' + group_description + '"') |
455 | | - | |
456 | | - | if entity.broadcast is True: |
457 | | - | chat_type = "Channel" |
458 | | - | elif entity.megagroup is True: |
459 | | - | chat_type = "Megagroup" |
460 | | - | elif entity.gigagroup is True: |
461 | | - | chat_type = "Gigagroup" |
462 | | - | else: |
463 | | - | chat_type = "Chat" |
464 | | - | |
465 | | - | if entity.restriction_reason is not None: |
466 | | - | ios_restriction = entity.restriction_reason[0] |
467 | | - | if 1 in entity.restriction_reason: |
468 | | - | android_restriction = entity.restriction_reason[1] |
469 | | - | group_status = ( |
470 | | - | str(ios_restriction) + ", " + str(android_restriction) |
471 | | - | ) |
472 | | - | else: |
473 | | - | group_status = str(ios_restriction) |
474 | | - | else: |
475 | | - | group_status = "None" |
476 | | - | |
477 | | - | restrict = Fore.GREEN + "Restrictions:" + Style.RESET_ALL |
478 | | - | prefix = restrict + " " |
479 | | - | preferredWidth = 70 |
480 | | - | wrapper_r = textwrap.TextWrapper( |
481 | | - | initial_indent=prefix, |
482 | | - | width=preferredWidth, |
483 | | - | subsequent_indent=" ", |
484 | | - | ) |
485 | | - | |
486 | | - | if chat_type != "Channel": |
487 | | - | members = [] |
488 | | - | all_participants = [] |
489 | | - | all_participants = await client.get_participants(t, limit=5000) |
490 | | - | |
491 | | - | members_df = None |
492 | | - | for user in all_participants: |
493 | | - | members_df = pd.DataFrame( |
494 | | - | members, |
495 | | - | columns=[ |
496 | | - | "Username", |
497 | | - | "Full name", |
498 | | - | "User ID", |
499 | | - | "Phone number", |
500 | | - | "Group name", |
501 | | - | ], |
502 | | - | ) |
503 | | - | members.append(populate_user(user, t)) |
504 | | - | |
505 | | - | if members_df is not None: |
506 | | - | with open( |
507 | | - | memberlist_filename, "w+", encoding="utf-8" |
508 | | - | ) as save_members: |
509 | | - | members_df.to_csv(save_members, sep=";") |
510 | | - | |
511 | | - | if json_check == True: |
512 | | - | members_df.to_json( |
513 | | - | json_file + alphanumeric + "_memberlist.json", |
514 | | - | orient="records", |
515 | | - | compression="infer", |
516 | | - | lines=True, |
517 | | - | index=True, |
518 | | - | ) |
519 | | - | else: |
520 | | - | pass |
521 | | - | |
522 | | - | found_participants = len(all_participants) |
523 | | - | found_participants = int(found_participants) |
524 | | - | found_percentage = ( |
525 | | - | int(found_participants) / int(total_participants) * 100 |
526 | | - | ) |
527 | | - | |
528 | | - | log = [] |
529 | | - | |
530 | | - | if chat_type != "Channel": |
531 | | - | print("\n") |
532 | | - | color_print_green(" [+] Memberlist fetched", "") |
533 | | - | else: |
534 | | - | pass |
535 | | - | |
536 | | - | color_print_green(" ⬠Chat details", "") |
537 | | - | color_print_green(" â Title: ", str(entity.title)) |
538 | | - | color_print_green(" â ", wrapper_d.fill(group_description)) |
539 | | - | if translated_description != group_description: |
540 | | - | color_print_green(" â ", wrapper_td.fill(translated_description)) |
541 | | - | color_print_green( |
542 | | - | " â Total participants: ", str(total_participants) |
543 | | - | ) |
544 | | - | |
545 | | - | if chat_type != "Channel": |
546 | | - | color_print_green( |
547 | | - | " â Participants found: ", |
548 | | - | str(found_participants) |
549 | | - | + " (" |
550 | | - | + str(format(found_percentage, ".2f")) |
551 | | - | + "%)", |
552 | | - | ) |
553 | | - | else: |
554 | | - | found_participants = "N/A" |
555 | | - | |
556 | | - | color_print_green(" â Username: ", str(group_username)) |
557 | | - | color_print_green(" â URL: ", str(group_url)) |
558 | | - | color_print_green(" â Chat type: ", str(chat_type)) |
559 | | - | color_print_green(" â Chat id: ", str(entity.id)) |
560 | | - | color_print_green(" â Access hash: ", str(entity.access_hash)) |
561 | | - | |
562 | | - | if chat_type == "Channel": |
563 | | - | scam_status = str(entity.scam) |
564 | | - | color_print_green(" â Scam: ", str(scam_status)) |
565 | | - | else: |
566 | | - | scam_status = "N/A" |
567 | | - | |
568 | | - | color_print_green(" â First post date: ", str(first_post)) |
569 | | - | |
570 | | - | if chat_type != "Channel": |
571 | | - | color_print_green( |
572 | | - | " â Memberlist saved to: ", memberlist_filename |
573 | | - | ) |
574 | | - | |
575 | | - | color_print_green( |
576 | | - | " â ", wrapper_r.fill(group_status) |
577 | | - | ) |
578 | | - | #print("\n") |
579 | | - | |
580 | | - | log.append( |
581 | | - | [ |
582 | | - | filetime, |
583 | | - | entity.title, |
584 | | - | group_description, |
585 | | - | translated_description, |
586 | | - | total_participants, |
587 | | - | found_participants, |
588 | | - | group_username, |
589 | | - | group_url, |
590 | | - | chat_type, |
591 | | - | entity.id, |
592 | | - | entity.access_hash, |
593 | | - | scam_status, |
594 | | - | date, |
595 | | - | mtime, |
596 | | - | group_status, |
597 | | - | ] |
598 | | - | ) |
599 | | - | |
600 | | - | log_df = pd.DataFrame( |
601 | | - | log, |
602 | | - | columns=[ |
603 | | - | "Access Date", |
604 | | - | "Title", |
605 | | - | "Description", |
606 | | - | "Translated description", |
607 | | - | "Total participants", |
608 | | - | "Participants found", |
609 | | - | "Username", |
610 | | - | "URL", |
611 | | - | "Chat type", |
612 | | - | "Chat ID", |
613 | | - | "Access hash", |
614 | | - | "Scam", |
615 | | - | "First post date", |
616 | | - | "First post time (UTC)", |
617 | | - | "Restrictions", |
618 | | - | ], |
619 | | - | ) |
620 | | - | |
621 | | - | if not os.path.isfile(log_file): |
622 | | - | log_df.to_csv(log_file, sep=";", index=False) |
623 | | - | else: |
624 | | - | log_df.to_csv( |
625 | | - | log_file, sep=";", mode="a", index=False, header=False |
626 | | - | ) |
627 | | - | |
628 | | - | if forwards_check is True and comp_check is False: |
629 | | - | color_print_green( |
630 | | - | " [-] ", "Calculating number of forwarded messages..." |
631 | | - | ) |
632 | | - | forwards_list = [] |
633 | | - | forward_count = 0 |
634 | | - | private_count = 0 |
635 | | - | to_ent = await client.get_entity(t) |
636 | | - | to_title = to_ent.title |
637 | | - | |
638 | | - | forwards_df = pd.DataFrame( |
639 | | - | forwards_list, |
640 | | - | columns=[ |
641 | | - | "To", |
642 | | - | "To_title", |
643 | | - | "From", |
644 | | - | "From_ID", |
645 | | - | "Username", |
646 | | - | "Timestamp", |
647 | | - | ], |
648 | | - | ) |
649 | | - | |
650 | | - | async for message in client.iter_messages(t): |
651 | | - | if message.forward is not None: |
652 | | - | forward_count += 1 |
653 | | - | |
654 | | - | #print("\n") |
655 | | - | color_print_green(" [-] ", "Fetching forwarded messages...") |
656 | | - | |
657 | | - | progress_bar = ( |
658 | | - | Fore.GREEN + " [-] " + Style.RESET_ALL + "Progress: " |
659 | | - | ) |
660 | | - | |
661 | | - | with alive_bar( |
662 | | - | forward_count, dual_line=True, title=progress_bar, length=20 |
663 | | - | ) as bar: |
664 | | - | |
665 | | - | async for message in client.iter_messages(t): |
666 | | - | if message.forward is not None: |
667 | | - | try: |
668 | | - | f_from_id = message.forward.original_fwd.from_id |
669 | | - | if f_from_id is not None: |
670 | | - | ent = await client.get_entity(f_from_id) |
671 | | - | username = ent.username |
672 | | - | timestamp = parse_tg_date(message.date)[ |
673 | | - | "timestamp" |
674 | | - | ] |
675 | | - | |
676 | | - | substring = "PeerUser" |
677 | | - | string = str(f_from_id) |
678 | | - | if substring in string: |
679 | | - | user_id = re.sub("[^0-9]", "", string) |
680 | | - | user_id = await client.get_entity( |
681 | | - | PeerUser(int(user_id)) |
682 | | - | ) |
683 | | - | user_id = str(user_id) |
684 | | - | result = ( |
685 | | - | "User: " |
686 | | - | + str(ent.first_name) |
687 | | - | + " / ID: " |
688 | | - | + str(user_id.id) |
689 | | - | ) |
690 | | - | else: |
691 | | - | result = str(ent.title) |
692 | | - | |
693 | | - | forwards_df = pd.DataFrame( |
694 | | - | forwards_list, |
695 | | - | columns=[ |
696 | | - | "To username", |
697 | | - | "To name", |
698 | | - | "From", |
699 | | - | "From ID", |
700 | | - | "From_username", |
701 | | - | "Timestamp", |
702 | | - | ], |
703 | | - | ) |
704 | | - | |
705 | | - | forwards_list.append( |
706 | | - | [ |
707 | | - | t, |
708 | | - | to_title, |
709 | | - | result, |
710 | | - | f_from_id, |
711 | | - | username, |
712 | | - | timestamp, |
713 | | - | ] |
714 | | - | ) |
715 | | - | |
716 | | - | except Exception as e: |
717 | | - | if e is ChannelPrivateError: |
718 | | - | print("Private channel") |
719 | | - | continue |
720 | | - | |
721 | | - | time.sleep(0.5) |
722 | | - | bar() |
723 | | - | |
724 | | - | with open( |
725 | | - | edgelist_file, "w+", encoding="utf-8" |
726 | | - | ) as save_forwards: |
727 | | - | forwards_df.to_csv(save_forwards, sep=";") |
728 | | - | |
729 | | - | if json_check == True: |
730 | | - | forwards_df.to_json( |
731 | | - | json_file + alphanumeric + "_edgelist.json", |
732 | | - | orient="records", |
733 | | - | compression="infer", |
734 | | - | lines=True, |
735 | | - | index=True, |
736 | | - | ) |
737 | | - | else: |
738 | | - | pass |
739 | | - | |
740 | | - | if forward_count >= 15: |
741 | | - | forwards_found = forwards_df.From.count() |
742 | | - | value_count = forwards_df["From"].value_counts() |
743 | | - | df01 = value_count.rename_axis("unique_values").reset_index( |
744 | | - | name="counts" |
745 | | - | ) |
746 | | - | |
747 | | - | top_forward_one = df01.iloc[0]["unique_values"] |
748 | | - | top_value_one = df01.iloc[0]["counts"] |
749 | | - | top_forward_two = df01.iloc[1]["unique_values"] |
750 | | - | top_value_two = df01.iloc[1]["counts"] |
751 | | - | top_forward_three = df01.iloc[2]["unique_values"] |
752 | | - | top_value_three = df01.iloc[2]["counts"] |
753 | | - | top_forward_four = df01.iloc[3]["unique_values"] |
754 | | - | top_value_four = df01.iloc[3]["counts"] |
755 | | - | top_forward_five = df01.iloc[4]["unique_values"] |
756 | | - | top_value_five = df01.iloc[4]["counts"] |
757 | | - | |
758 | | - | forward_one = ( |
759 | | - | str(top_forward_one) |
760 | | - | + ", " |
761 | | - | + str(top_value_one) |
762 | | - | + " forwarded messages" |
763 | | - | ) |
764 | | - | forward_two = ( |
765 | | - | str(top_forward_two) |
766 | | - | + ", " |
767 | | - | + str(top_value_two) |
768 | | - | + " forwarded messages" |
769 | | - | ) |
770 | | - | forward_three = ( |
771 | | - | str(top_forward_three) |
772 | | - | + ", " |
773 | | - | + str(top_value_three) |
774 | | - | + " forwarded messages" |
775 | | - | ) |
776 | | - | forward_four = ( |
777 | | - | str(top_forward_four) |
778 | | - | + ", " |
779 | | - | + str(top_value_four) |
780 | | - | + " forwarded messages" |
781 | | - | ) |
782 | | - | forward_five = ( |
783 | | - | str(top_forward_five) |
784 | | - | + ", " |
785 | | - | + str(top_value_five) |
786 | | - | + " forwarded messages" |
787 | | - | ) |
788 | | - | |
789 | | - | df02 = forwards_df.From.unique() |
790 | | - | unique_forwards = len(df02) |
791 | | - | |
792 | | - | #print("\n") |
793 | | - | color_print_green(" [+] Forward scrape complete", "") |
794 | | - | color_print_green(" ⬠Statistics", "") |
795 | | - | color_print_green( |
796 | | - | " â Forwarded messages found: ", str(forward_count) |
797 | | - | ) |
798 | | - | color_print_green( |
799 | | - | " â Forwards from active public chats: ", |
800 | | - | str(forwards_found), |
801 | | - | ) |
802 | | - | color_print_green( |
803 | | - | " â Unique forward sources: ", str(unique_forwards) |
804 | | - | ) |
805 | | - | color_print_green( |
806 | | - | " â Top forward source 1: ", str(forward_one) |
807 | | - | ) |
808 | | - | color_print_green( |
809 | | - | " â Top forward source 2: ", str(forward_two) |
810 | | - | ) |
811 | | - | color_print_green( |
812 | | - | " â Top forward source 3: ", str(forward_three) |
813 | | - | ) |
814 | | - | color_print_green( |
815 | | - | " â Top forward source 4: ", str(forward_four) |
816 | | - | ) |
817 | | - | color_print_green( |
818 | | - | " â Top forward source 5: ", str(forward_five) |
819 | | - | ) |
820 | | - | color_print_green(" â Edgelist saved to: ", edgelist_file) |
821 | | - | #print("\n") |
822 | | - | |
823 | | - | else: |
824 | | - | print( |
825 | | - | "\n" |
826 | | - | + Fore.GREEN |
827 | | - | + " [!] Insufficient forwarded messages found" |
828 | | - | + Style.RESET_ALL |
829 | | - | ) |
830 | | - | |
831 | | - | else: |
832 | | - | |
833 | | - | if comp_check is True: |
834 | | - | |
835 | | - | messages = client.iter_messages(t) |
836 | | - | |
837 | | - | message_list = [] |
838 | | - | forwards_list = [] |
839 | | - | |
840 | | - | user_reaction_list = [] |
841 | | - | |
842 | | - | replies_list = [] |
843 | | - | user_replier_list = [] |
844 | | - | |
845 | | - | timecount = [] |
846 | | - | |
847 | | - | forward_count = 0 |
848 | | - | private_count = 0 |
849 | | - | |
850 | | - | if media_archive is True: |
851 | | - | files = [] |
852 | | - | print("\n") |
853 | | - | color_print_green( |
854 | | - | " [!] ", "Media content will be archived" |
855 | | - | ) |
856 | | - | |
857 | | - | color_print_green( |
858 | | - | " [!] ", "Calculating number of messages..." |
859 | | - | ) |
860 | | - | |
861 | | - | message_count = 0 |
862 | | - | |
863 | | - | async for message in messages: |
864 | | - | if message is not None: |
865 | | - | message_count += 1 |
866 | | - | |
867 | | - | print("\n") |
868 | | - | color_print_green(" [-] ", "Fetching message archive...") |
869 | | - | progress_bar = ( |
870 | | - | Fore.GREEN + " [-] " + Style.RESET_ALL + "Progress: " |
871 | | - | ) |
872 | | - | |
873 | | - | with alive_bar( |
874 | | - | message_count, |
875 | | - | dual_line=True, |
876 | | - | title=progress_bar, |
877 | | - | length=20, |
878 | | - | ) as bar: |
879 | | - | |
880 | | - | to_ent = await client.get_entity(t) |
881 | | - | |
882 | | - | async for message in client.iter_messages( |
883 | | - | t, limit=None |
884 | | - | ): |
885 | | - | if message is not None: |
886 | | - | |
887 | | - | try: |
888 | | - | |
889 | | - | c_archive = pd.DataFrame( |
890 | | - | message_list, |
891 | | - | columns=[ |
892 | | - | "To", |
893 | | - | "Message ID", |
894 | | - | "Display_name", |
895 | | - | "ID", |
896 | | - | "Message_text", |
897 | | - | "Original_language", |
898 | | - | "Translated_text", |
899 | | - | "Translation_confidence", |
900 | | - | "Timestamp", |
901 | | - | "Reply", |
902 | | - | "Views", |
903 | | - | ], |
904 | | - | ) |
905 | | - | |
906 | | - | c_forwards = pd.DataFrame( |
907 | | - | forwards_list, |
908 | | - | columns=[ |
909 | | - | "To", |
910 | | - | "To_title", |
911 | | - | "From", |
912 | | - | "From_ID", |
913 | | - | "Username", |
914 | | - | "Timestamp", |
915 | | - | ], |
916 | | - | ) |
917 | | - | |
918 | | - | #if message.reactions: |
919 | | - | # if message.reactions.can_see_list: |
920 | | - | # c_reactioneer = pd.DataFrame( |
921 | | - | # user_reaction_list, |
922 | | - | # columns=[ |
923 | | - | # "Username", |
924 | | - | # "Full name", |
925 | | - | # "User ID", |
926 | | - | # "Phone number", |
927 | | - | # "Group name", |
928 | | - | # ], |
929 | | - | # ) |
930 | | - | |
931 | | - | if ( |
932 | | - | message.replies |
933 | | - | and reply_analysis |
934 | | - | and chat_type == "Channel" |
935 | | - | ): |
936 | | - | if message.replies.replies > 0: |
937 | | - | c_repliers = pd.DataFrame( |
938 | | - | user_replier_list, |
939 | | - | columns=[ |
940 | | - | "Username", |
941 | | - | "Full name", |
942 | | - | "User ID", |
943 | | - | "Phone number", |
944 | | - | "Group name", |
945 | | - | ], |
946 | | - | ) |
947 | | - | c_replies = pd.DataFrame( |
948 | | - | replies_list, |
949 | | - | columns=[ |
950 | | - | "To", |
951 | | - | "Message ID", |
952 | | - | "Reply ID", |
953 | | - | "Display_name", |
954 | | - | "ID", |
955 | | - | "Message_text", |
956 | | - | "Original_language", |
957 | | - | "Translated_text", |
958 | | - | "Translation_confidence", |
959 | | - | "Timestamp", |
960 | | - | ], |
961 | | - | ) |
962 | | - | |
963 | | - | if message.replies: |
964 | | - | if message.replies.replies > 0: |
965 | | - | async for repl in client.iter_messages( |
966 | | - | message.chat_id, |
967 | | - | reply_to=message.id, |
968 | | - | ): |
969 | | - | user = await client.get_entity( |
970 | | - | repl.from_id.user_id |
971 | | - | ) |
972 | | - | userdet = populate_user(user, t) |
973 | | - | user_replier_list.append( |
974 | | - | userdet |
975 | | - | ) |
976 | | - | mss_txt = process_message( |
977 | | - | repl.text, user_language |
978 | | - | ) |
979 | | - | replies_list.append( |
980 | | - | [ |
981 | | - | t, |
982 | | - | message.id, |
983 | | - | repl.id, |
984 | | - | userdet[1], |
985 | | - | userdet[2], |
986 | | - | mss_txt["message_text"], |
987 | | - | mss_txt[ |
988 | | - | "original_language" |
989 | | - | ], |
990 | | - | mss_txt[ |
991 | | - | "translated_text" |
992 | | - | ], |
993 | | - | mss_txt[ |
994 | | - | "translation_confidence" |
995 | | - | ], |
996 | | - | parse_tg_date( |
997 | | - | repl.date |
998 | | - | )["timestamp"], |
999 | | - | ] |
1000 | | - | ) |
1001 | | - | |
1002 | | - | display_name = get_display_name( |
1003 | | - | message.sender |
1004 | | - | ) |
1005 | | - | if chat_type != "Channel": |
1006 | | - | substring = "PeerUser" |
1007 | | - | string = str(message.from_id) |
1008 | | - | if substring in string: |
1009 | | - | user_id = re.sub( |
1010 | | - | "[^0-9]", "", string |
1011 | | - | ) |
1012 | | - | nameID = str(user_id) |
1013 | | - | else: |
1014 | | - | nameID = str(message.from_id) |
1015 | | - | else: |
1016 | | - | nameID = to_ent.id |
1017 | | - | |
1018 | | - | timestamp = parse_tg_date(message.date)[ |
1019 | | - | "timestamp" |
1020 | | - | ] |
1021 | | - | reply = message.reply_to_msg_id |
1022 | | - | |
1023 | | - | _mess = process_message( |
1024 | | - | message.text, user_language |
1025 | | - | ) |
1026 | | - | message_text = _mess["message_text"] |
1027 | | - | original_language = _mess[ |
1028 | | - | "original_language" |
1029 | | - | ] |
1030 | | - | translated_text = _mess["translated_text"] |
1031 | | - | translation_confidence = _mess[ |
1032 | | - | "translation_confidence" |
1033 | | - | ] |
1034 | | - | |
1035 | | - | if message.forwards is not None: |
1036 | | - | forwards = int(message.forwards) |
1037 | | - | else: |
1038 | | - | forwards = "None" |
1039 | | - | |
1040 | | - | if message.views is not None: |
1041 | | - | views = int(message.views) |
1042 | | - | else: |
1043 | | - | views = "Not found" |
1044 | | - | |
1045 | | - | #if message.reactions: |
1046 | | - | #if message.reactions.can_see_list: |
1047 | | - | #print(dir(message.reactions.results)) |
1048 | | - | #print("#### TODO: REACTIONS") |
1049 | | - | |
1050 | | - | if media_archive == True: |
1051 | | - | #add a progress bar for each file download |
1052 | | - | if message.media: |
1053 | | - | path = await message.download_media( |
1054 | | - | file=media_directory |
1055 | | - | ) |
1056 | | - | files.append(path) |
1057 | | - | else: |
1058 | | - | pass |
1059 | | - | |
1060 | | - | |
1061 | | - | |
1062 | | - | message_list.append( |
1063 | | - | [ |
1064 | | - | t, |
1065 | | - | message.id, |
1066 | | - | display_name, |
1067 | | - | nameID, |
1068 | | - | message_text, |
1069 | | - | original_language, |
1070 | | - | translated_text, |
1071 | | - | translation_confidence, |
1072 | | - | timestamp, |
1073 | | - | reply, |
1074 | | - | views, |
1075 | | - | ] |
1076 | | - | ) |
1077 | | - | |
1078 | | - | if message.forward is not None: |
1079 | | - | forward_verify = True |
1080 | | - | try: |
1081 | | - | forward_count += 1 |
1082 | | - | to_title = to_ent.title |
1083 | | - | f_from_id = ( |
1084 | | - | message.forward.original_fwd.from_id |
1085 | | - | ) |
1086 | | - | |
1087 | | - | if f_from_id is not None: |
1088 | | - | ent = await client.get_entity( |
1089 | | - | f_from_id |
1090 | | - | ) |
1091 | | - | user_string = "user_id" |
1092 | | - | channel_string = "broadcast" |
1093 | | - | |
1094 | | - | if user_string in str(ent): |
1095 | | - | ent_type = "User" |
1096 | | - | else: |
1097 | | - | if channel_string in str( |
1098 | | - | ent |
1099 | | - | ): |
1100 | | - | if ( |
1101 | | - | ent.broadcast |
1102 | | - | is True |
1103 | | - | ): |
1104 | | - | ent_type = "Channel" |
1105 | | - | elif ( |
1106 | | - | ent.megagroup |
1107 | | - | is True |
1108 | | - | ): |
1109 | | - | ent_type = ( |
1110 | | - | "Megagroup" |
1111 | | - | ) |
1112 | | - | elif ( |
1113 | | - | ent.gigagroup |
1114 | | - | is True |
1115 | | - | ): |
1116 | | - | ent_type = ( |
1117 | | - | "Gigagroup" |
1118 | | - | ) |
1119 | | - | else: |
1120 | | - | ent_type = "Chat" |
1121 | | - | else: |
1122 | | - | continue |
1123 | | - | |
1124 | | - | if ent.username is not None: |
1125 | | - | username = ent.username |
1126 | | - | else: |
1127 | | - | username = "none" |
1128 | | - | |
1129 | | - | if ent_type != "Chat": |
1130 | | - | result = str(ent.title) |
1131 | | - | else: |
1132 | | - | result = "none" |
1133 | | - | |
1134 | | - | if ent_type == "User": |
1135 | | - | substring_1 = "PeerUser" |
1136 | | - | string_1 = str(ent.user_id) |
1137 | | - | if substring_1 in string_1: |
1138 | | - | user_id = re.sub( |
1139 | | - | "[^0-9]", |
1140 | | - | "", |
1141 | | - | string_1, |
1142 | | - | ) |
1143 | | - | user_id = await client.get_entity( |
1144 | | - | PeerUser( |
1145 | | - | int(user_id) |
1146 | | - | ) |
1147 | | - | ) |
1148 | | - | user_id = str(user_id) |
1149 | | - | result = ( |
1150 | | - | "User: " |
1151 | | - | + str( |
1152 | | - | ent.first_name |
1153 | | - | ) |
1154 | | - | + " / ID: " |
1155 | | - | + str(user_id) |
1156 | | - | ) |
1157 | | - | else: |
1158 | | - | result = str(ent.title) |
1159 | | - | else: |
1160 | | - | result = str(ent.title) |
1161 | | - | |
1162 | | - | forwards_list.append( |
1163 | | - | [ |
1164 | | - | t, |
1165 | | - | to_title, |
1166 | | - | result, |
1167 | | - | f_from_id, |
1168 | | - | username, |
1169 | | - | timestamp, |
1170 | | - | ] |
1171 | | - | ) |
1172 | | - | |
1173 | | - | |
1174 | | - | |
1175 | | - | except ChannelPrivateError: |
1176 | | - | private_count += 1 |
1177 | | - | continue |
1178 | | - | |
1179 | | - | except Exception as e: |
1180 | | - | print("An exception occurred.", e) |
1181 | | - | continue |
1182 | | - | |
1183 | | - | except Exception as e: |
1184 | | - | print("An exception occurred.", e) |
1185 | | - | |
1186 | | - | else: |
1187 | | - | message_list.append( |
1188 | | - | [ |
1189 | | - | "None", |
1190 | | - | "None", |
1191 | | - | "None", |
1192 | | - | "None", |
1193 | | - | "None", |
1194 | | - | "None", |
1195 | | - | "None", |
1196 | | - | "None", |
1197 | | - | ] |
1198 | | - | ) |
1199 | | - | pass |
1200 | | - | |
1201 | | - | time.sleep(0.5) |
1202 | | - | bar() |
1203 | | - | |
1204 | | - | if reply_analysis is True: |
1205 | | - | if len(replies_list) > 0: |
1206 | | - | with open( |
1207 | | - | reply_file_archive, "w+", encoding="utf-8" |
1208 | | - | ) as rep_file: |
1209 | | - | c_replies.to_csv(rep_file, sep=";") |
1210 | | - | |
1211 | | - | if len(user_replier_list) > 0: |
1212 | | - | with open( |
1213 | | - | reply_memberlist_filename, "w+", encoding="utf-8" |
1214 | | - | ) as repliers_file: |
1215 | | - | c_repliers.to_csv(repliers_file, sep=";") |
1216 | | - | |
1217 | | - | with open( |
1218 | | - | file_archive, "w+", encoding="utf-8" |
1219 | | - | ) as archive_file: |
1220 | | - | c_archive.to_csv(archive_file, sep=";") |
1221 | | - | |
1222 | | - | if json_check == True: |
1223 | | - | c_archive.to_json( |
1224 | | - | json_file + alphanumeric + "_archive.json", |
1225 | | - | orient="records", |
1226 | | - | compression="infer", |
1227 | | - | lines=True, |
1228 | | - | index=True, |
1229 | | - | ) |
1230 | | - | else: |
1231 | | - | pass |
1232 | | - | |
1233 | | - | if forwards_check is True: |
1234 | | - | with open( |
1235 | | - | file_forwards, "w+", encoding="utf-8" |
1236 | | - | ) as forwards_file: |
1237 | | - | c_forwards.to_csv(forwards_file, sep=";") |
1238 | | - | |
1239 | | - | if json_check == True: |
1240 | | - | c_forwards.to_json( |
1241 | | - | json_file + alphanumeric + "_edgelist.json", |
1242 | | - | orient="records", |
1243 | | - | compression="infer", |
1244 | | - | lines=True, |
1245 | | - | index=True, |
1246 | | - | ) |
1247 | | - | else: |
1248 | | - | pass |
1249 | | - | else: |
1250 | | - | pass |
1251 | | - | |
1252 | | - | messages_found = int(c_archive.To.count()) - 1 |
1253 | | - | message_frequency_count = {} |
1254 | | - | message_text = {} |
1255 | | - | word_count = {} |
1256 | | - | most_used_words = {} |
1257 | | - | most_used_words_filtered = {} |
1258 | | - | # message stats, top words |
1259 | | - | |
1260 | | - | if chat_type != "Channel": |
1261 | | - | pcount = c_archive.Display_name.count() |
1262 | | - | pvalue_count = c_archive["Display_name"].value_counts() |
1263 | | - | df03 = pvalue_count.rename_axis( |
1264 | | - | "unique_values" |
1265 | | - | ).reset_index(name="counts") |
1266 | | - | |
1267 | | - | top_poster_one = str(df03.iloc[0]["unique_values"]) |
1268 | | - | top_pvalue_one = df03.iloc[0]["counts"] |
1269 | | - | top_poster_two = str(df03.iloc[1]["unique_values"]) |
1270 | | - | top_pvalue_two = df03.iloc[1]["counts"] |
1271 | | - | top_poster_three = str(df03.iloc[2]["unique_values"]) |
1272 | | - | top_pvalue_three = df03.iloc[2]["counts"] |
1273 | | - | top_poster_four = str(df03.iloc[3]["unique_values"]) |
1274 | | - | top_pvalue_four = df03.iloc[3]["counts"] |
1275 | | - | top_poster_five = str(df03.iloc[4]["unique_values"]) |
1276 | | - | top_pvalue_five = df03.iloc[4]["counts"] |
1277 | | - | |
1278 | | - | poster_one = ( |
1279 | | - | str(top_poster_one) |
1280 | | - | + ", " |
1281 | | - | + str(top_pvalue_one) |
1282 | | - | + " messages" |
1283 | | - | ) |
1284 | | - | poster_two = ( |
1285 | | - | str(top_poster_two) |
1286 | | - | + ", " |
1287 | | - | + str(top_pvalue_two) |
1288 | | - | + " messages" |
1289 | | - | ) |
1290 | | - | poster_three = ( |
1291 | | - | str(top_poster_three) |
1292 | | - | + ", " |
1293 | | - | + str(top_pvalue_three) |
1294 | | - | + " messages" |
1295 | | - | ) |
1296 | | - | poster_four = ( |
1297 | | - | str(top_poster_four) |
1298 | | - | + ", " |
1299 | | - | + str(top_pvalue_four) |
1300 | | - | + " messages" |
1301 | | - | ) |
1302 | | - | poster_five = ( |
1303 | | - | str(top_poster_five) |
1304 | | - | + ", " |
1305 | | - | + str(top_pvalue_five) |
1306 | | - | + " messages" |
1307 | | - | ) |
1308 | | - | |
1309 | | - | df04 = c_archive.Display_name.unique() |
1310 | | - | plength = len(df03) |
1311 | | - | unique_active = len(df04) |
1312 | | - | # one day this'll work out sleeping times |
1313 | | - | # print(c_t_stats) |
1314 | | - | |
1315 | | - | elif reply_analysis is True: |
1316 | | - | if len(replies_list) > 0: |
1317 | | - | replier_count = c_repliers["User id"].count() |
1318 | | - | replier_value_count = c_repliers["User id"].value_counts() |
1319 | | - | replier_df = replier_value_count.rename_axis( |
1320 | | - | "unique_values" |
1321 | | - | ).reset_index(name="counts") |
1322 | | - | |
1323 | | - | top_replier_one = str(replier_df.iloc[0]["unique_values"]) |
1324 | | - | top_replier_value_one = replier_df.iloc[0]["counts"] |
1325 | | - | top_replier_two = str(replier_df.iloc[1]["unique_values"]) |
1326 | | - | top_replier_value_two = replier_df.iloc[1]["counts"] |
1327 | | - | top_replier_three = str(replier_df.iloc[2]["unique_values"]) |
1328 | | - | top_replier_value_three = replier_df.iloc[2]["counts"] |
1329 | | - | top_replier_four = str(replier_df.iloc[3]["unique_values"]) |
1330 | | - | top_replier_value_four = replier_df.iloc[3]["counts"] |
1331 | | - | top_replier_five = str(replier_df.iloc[4]["unique_values"]) |
1332 | | - | top_replier_value_five = replier_df.iloc[4]["counts"] |
1333 | | - | |
1334 | | - | replier_one = ( |
1335 | | - | str(top_replier_one) |
1336 | | - | + ", " |
1337 | | - | + str(top_replier_value_one) |
1338 | | - | + " replies" |
1339 | | - | ) |
1340 | | - | replier_two = ( |
1341 | | - | str(top_replier_two) |
1342 | | - | + ", " |
1343 | | - | + str(top_replier_value_two) |
1344 | | - | + " replies" |
1345 | | - | ) |
1346 | | - | replier_three = ( |
1347 | | - | str(top_replier_three) |
1348 | | - | + ", " |
1349 | | - | + str(top_replier_value_three) |
1350 | | - | + " replies" |
1351 | | - | ) |
1352 | | - | replier_four = ( |
1353 | | - | str(top_replier_four) |
1354 | | - | + ", " |
1355 | | - | + str(top_replier_value_four) |
1356 | | - | + " replies" |
1357 | | - | ) |
1358 | | - | replier_five = ( |
1359 | | - | str(top_replier_five) |
1360 | | - | + ", " |
1361 | | - | + str(top_replier_value_five) |
1362 | | - | + " replies" |
1363 | | - | ) |
1364 | | - | |
1365 | | - | replier_count_df = c_repliers["User id"].unique() |
1366 | | - | replier_length = len(replier_df) |
1367 | | - | replier_unique = len(replier_count_df) |
1368 | | - | |
1369 | | - | else: |
1370 | | - | pass |
1371 | | - | |
1372 | | - | #print("\n") |
1373 | | - | color_print_green(" [+] Chat archive saved", "") |
1374 | | - | color_print_green(" ⬠Chat statistics", "") |
1375 | | - | color_print_green( |
1376 | | - | " â Number of messages found: ", str(messages_found) |
1377 | | - | ) |
1378 | | - | |
1379 | | - | if chat_type != "Channel": |
1380 | | - | color_print_green( |
1381 | | - | " â Top poster 1: ", str(poster_one) |
1382 | | - | ) |
1383 | | - | color_print_green( |
1384 | | - | " â Top poster 2: ", str(poster_two) |
1385 | | - | ) |
1386 | | - | color_print_green( |
1387 | | - | " â Top poster 3: ", str(poster_three) |
1388 | | - | ) |
1389 | | - | color_print_green( |
1390 | | - | " â Top poster 4: ", str(poster_four) |
1391 | | - | ) |
1392 | | - | color_print_green( |
1393 | | - | " â Top poster 5: ", str(poster_five) |
1394 | | - | ) |
1395 | | - | color_print_green( |
1396 | | - | " â Total unique posters: ", str(unique_active) |
1397 | | - | ) |
1398 | | - | |
1399 | | - | else: |
1400 | | - | pass |
1401 | | - | # timestamp analysis |
1402 | | - | # print(Fore.GREEN |
1403 | | - | # + ' â Number of messages: ' |
1404 | | - | # + Style.RESET_ALL |
1405 | | - | # + str(message_count)) |
1406 | | - | |
1407 | | - | color_print_green( |
1408 | | - | " â Archive saved to: ", str(file_archive) |
1409 | | - | ) |
1410 | | - | |
1411 | | - | if reply_analysis is True: |
1412 | | - | if len(replies_list) > 0: |
1413 | | - | middle_char = "â" |
1414 | | - | if user_replier_list == 0: |
1415 | | - | middle_char = "â" |
1416 | | - | |
1417 | | - | #print("\n") |
1418 | | - | color_print_green(" [+] Replies analysis ", "") |
1419 | | - | color_print_green(" ⬠Chat statistics", "") |
1420 | | - | color_print_green( |
1421 | | - | f" {middle_char} Archive of replies saved to: ", |
1422 | | - | str(reply_file_archive), |
1423 | | - | ) |
1424 | | - | if len(user_replier_list) > 0: |
1425 | | - | color_print_green( |
1426 | | - | " â Active members list who replied to messages, saved to: ", |
1427 | | - | str(reply_memberlist_filename), |
1428 | | - | ) |
1429 | | - | |
1430 | | - | color_print_green( |
1431 | | - | " â Top replier 1: ", str(replier_one) |
1432 | | - | ) |
1433 | | - | color_print_green( |
1434 | | - | " â Top replier 2: ", str(replier_two) |
1435 | | - | ) |
1436 | | - | color_print_green( |
1437 | | - | " â Top replier 3: ", str(replier_three) |
1438 | | - | ) |
1439 | | - | color_print_green( |
1440 | | - | " â Top replier 4: ", str(replier_four) |
1441 | | - | ) |
1442 | | - | color_print_green( |
1443 | | - | " â Top replier 5: ", str(replier_five) |
1444 | | - | ) |
1445 | | - | color_print_green( |
1446 | | - | " â Total unique repliers: ", str(replier_unique) |
1447 | | - | ) |
1448 | | - | # add a figure for unique current posters who are active |
1449 | | - | |
1450 | | - | if forwards_check is True: |
1451 | | - | if forward_count >= 15: |
1452 | | - | forwards_found = c_forwards.From.count() |
1453 | | - | value_count = c_forwards["From"].value_counts() |
1454 | | - | c_f_stats = value_count.rename_axis( |
1455 | | - | "unique_values" |
1456 | | - | ).reset_index(name="counts") |
1457 | | - | |
1458 | | - | top_forward_one = c_f_stats.iloc[0]["unique_values"] |
1459 | | - | top_value_one = c_f_stats.iloc[0]["counts"] |
1460 | | - | top_forward_two = c_f_stats.iloc[1]["unique_values"] |
1461 | | - | top_value_two = c_f_stats.iloc[1]["counts"] |
1462 | | - | top_forward_three = c_f_stats.iloc[2][ |
1463 | | - | "unique_values" |
1464 | | - | ] |
1465 | | - | top_value_three = c_f_stats.iloc[2]["counts"] |
1466 | | - | top_forward_four = c_f_stats.iloc[3][ |
1467 | | - | "unique_values" |
1468 | | - | ] |
1469 | | - | top_value_four = c_f_stats.iloc[3]["counts"] |
1470 | | - | top_forward_five = c_f_stats.iloc[4][ |
1471 | | - | "unique_values" |
1472 | | - | ] |
1473 | | - | top_value_five = c_f_stats.iloc[4]["counts"] |
1474 | | - | |
1475 | | - | forward_one = ( |
1476 | | - | str(top_forward_one) |
1477 | | - | + ", " |
1478 | | - | + str(top_value_one) |
1479 | | - | + " forwarded messages" |
1480 | | - | ) |
1481 | | - | forward_two = ( |
1482 | | - | str(top_forward_two) |
1483 | | - | + ", " |
1484 | | - | + str(top_value_two) |
1485 | | - | + " forwarded messages" |
1486 | | - | ) |
1487 | | - | forward_three = ( |
1488 | | - | str(top_forward_three) |
1489 | | - | + ", " |
1490 | | - | + str(top_value_three) |
1491 | | - | + " forwarded messages" |
1492 | | - | ) |
1493 | | - | forward_four = ( |
1494 | | - | str(top_forward_four) |
1495 | | - | + ", " |
1496 | | - | + str(top_value_four) |
1497 | | - | + " forwarded messages" |
1498 | | - | ) |
1499 | | - | forward_five = ( |
1500 | | - | str(top_forward_five) |
1501 | | - | + ", " |
1502 | | - | + str(top_value_five) |
1503 | | - | + " forwarded messages" |
1504 | | - | ) |
1505 | | - | |
1506 | | - | c_f_unique = c_forwards.From.unique() |
1507 | | - | unique_forwards = len(c_f_unique) |
1508 | | - | |
1509 | | - | #print("\n") |
1510 | | - | color_print_green(" [+] Edgelist saved", "") |
1511 | | - | color_print_green( |
1512 | | - | " ⬠Forwarded message statistics", "" |
1513 | | - | ) |
1514 | | - | color_print_green( |
1515 | | - | " â Forwarded messages found: ", |
1516 | | - | str(forward_count), |
1517 | | - | ) |
1518 | | - | color_print_green( |
1519 | | - | " â Forwards from active public chats: ", |
1520 | | - | str(forwards_found), |
1521 | | - | ) |
1522 | | - | color_print_green( |
1523 | | - | " â Forwards from private (or now private) chats: ", |
1524 | | - | str(private_count), |
1525 | | - | ) |
1526 | | - | color_print_green( |
1527 | | - | " â Unique forward sources: ", |
1528 | | - | str(unique_forwards), |
1529 | | - | ) |
1530 | | - | color_print_green( |
1531 | | - | " â Top forward source 1: ", str(forward_one) |
1532 | | - | ) |
1533 | | - | color_print_green( |
1534 | | - | " â Top forward source 2: ", str(forward_two) |
1535 | | - | ) |
1536 | | - | color_print_green( |
1537 | | - | " â Top forward source 3: ", |
1538 | | - | str(forward_three), |
1539 | | - | ) |
1540 | | - | color_print_green( |
1541 | | - | " â Top forward source 4: ", str(forward_four) |
1542 | | - | ) |
1543 | | - | color_print_green( |
1544 | | - | " â Top forward source 5: ", str(forward_five) |
1545 | | - | ) |
1546 | | - | color_print_green( |
1547 | | - | " â Edgelist saved to: ", edgelist_file |
1548 | | - | ) |
1549 | | - | #print("\n") |
1550 | | - | |
1551 | | - | else: |
1552 | | - | #print("\n") |
1553 | | - | color_print_green( |
1554 | | - | " [!] Insufficient forwarded messages found", |
1555 | | - | edgelist_file, |
1556 | | - | ) |
1557 | | - | else: |
1558 | | - | pass |
1559 | | - | |
1560 | | - | if user_check == True: |
1561 | | - | my_user = None |
1562 | | - | try: |
1563 | | - | |
1564 | | - | user = int(t) |
1565 | | - | my_user = await client.get_entity(PeerUser(int(user))) |
1566 | | - | |
1567 | | - | user_first_name = my_user.first_name |
1568 | | - | user_last_name = my_user.last_name |
1569 | | - | if user_last_name is not None: |
1570 | | - | user_full_name = ( |
1571 | | - | str(user_first_name) + " " + str(user_last_name) |
1572 | | - | ) |
1573 | | - | else: |
1574 | | - | user_full_name = str(user_first_name) |
1575 | | - | |
1576 | | - | if my_user.photo is not None: |
1577 | | - | user_photo = my_user.photo.photo_id |
1578 | | - | else: |
1579 | | - | user_photo = "None" |
1580 | | - | |
1581 | | - | if my_user.restriction_reason is not None: |
1582 | | - | ios_restriction = entity.restriction_reason[0] |
1583 | | - | if 1 in entity.restriction_reason: |
1584 | | - | android_restriction = entity.restriction_reason[1] |
1585 | | - | user_restrictions = ( |
1586 | | - | str(ios_restriction) |
1587 | | - | + ", " |
1588 | | - | + str(android_restriction) |
1589 | | - | ) |
1590 | | - | else: |
1591 | | - | user_restrictions = str(ios_restriction) |
1592 | | - | else: |
1593 | | - | user_restrictions = "None" |
1594 | | - | |
1595 | | - | color_print_green(" [+] ", "User details for " + t) |
1596 | | - | color_print_green(" â Username: ", str(my_user.username)) |
1597 | | - | color_print_green(" â Name: ", str(user_full_name)) |
1598 | | - | color_print_green(" â Verification: ", str(my_user.verified)) |
1599 | | - | color_print_green(" â Photo ID: ", str(user_photo)) |
1600 | | - | color_print_green(" â Phone number: ", str(my_user.phone)) |
1601 | | - | color_print_green( |
1602 | | - | " â Access hash: ", str(my_user.access_hash) |
1603 | | - | ) |
1604 | | - | color_print_green(" â Language: ", str(my_user.lang_code)) |
1605 | | - | color_print_green(" â Bot: ", str(my_user.bot)) |
1606 | | - | color_print_green(" â Scam: ", str(my_user.scam)) |
1607 | | - | color_print_green(" â Restrictions: ", str(user_restrictions)) |
1608 | | - | |
1609 | | - | except ValueError: |
1610 | | - | pass |
1611 | | - | if my_user is None: |
1612 | | - | print( |
1613 | | - | Fore.GREEN |
1614 | | - | + " [!] " |
1615 | | - | + Style.RESET_ALL |
1616 | | - | + "User not found, this is likely because Telepathy has not encountered them yet." |
1617 | | - | ) |
1618 | | - | |
1619 | | - | if location_check == True: |
1620 | | - | |
1621 | | - | location = t |
1622 | | - | |
1623 | | - | print( |
1624 | | - | Fore.GREEN |
1625 | | - | + " [!] " |
1626 | | - | + Style.RESET_ALL |
1627 | | - | + "Searching for users near " |
1628 | | - | + location |
1629 | | - | + "\n" |
1630 | | - | ) |
1631 | | - | latitude, longitude = location.split(sep=",") |
1632 | | - | |
1633 | | - | locations_file = telepathy_file + "locations/" |
1634 | | - | |
1635 | | - | try: |
1636 | | - | os.makedirs(locations_file) |
1637 | | - | except FileExistsError: |
1638 | | - | pass |
1639 | | - | |
1640 | | - | save_file = ( |
1641 | | - | locations_file |
1642 | | - | + latitude |
1643 | | - | + "_" |
1644 | | - | + longitude |
1645 | | - | + "_" |
1646 | | - | + "locations_" |
1647 | | - | + filetime_clean |
1648 | | - | + ".csv" |
1649 | | - | ) |
1650 | | - | |
1651 | | - | locations_list = [] |
1652 | | - | result = await client( |
1653 | | - | functions.contacts.GetLocatedRequest( |
1654 | | - | geo_point=types.InputGeoPoint( |
1655 | | - | lat=float(latitude), |
1656 | | - | long=float(longitude), |
1657 | | - | accuracy_radius=42, |
1658 | | - | ), |
1659 | | - | self_expires=42, |
1660 | | - | ) |
1661 | | - | ) |
1662 | | - | |
1663 | | - | # progress bar? |
1664 | | - | |
1665 | | - | for user in result.updates[0].peers: |
1666 | | - | try: |
1667 | | - | user_df = pd.DataFrame( |
1668 | | - | locations_list, columns=["User_ID", "Distance"] |
1669 | | - | ) |
1670 | | - | if hasattr(user, "peer"): |
1671 | | - | ID = user.peer.user_id |
1672 | | - | else: |
1673 | | - | pass |
1674 | | - | if hasattr(user, "distance"): |
1675 | | - | distance = user.distance |
1676 | | - | else: |
1677 | | - | pass |
1678 | | - | |
1679 | | - | locations_list.append([ID, distance]) |
1680 | | - | |
1681 | | - | except: |
1682 | | - | pass |
1683 | | - | |
1684 | | - | d_500 = 0 |
1685 | | - | d_1000 = 0 |
1686 | | - | d_2000 = 0 |
1687 | | - | d_3000 = 0 |
1688 | | - | |
1689 | | - | for account, distance in user_df.itertuples(index=False): |
1690 | | - | account = int(account) |
1691 | | - | my_user = await client.get_entity(PeerUser(account)) |
1692 | | - | user_id = my_user.id |
1693 | | - | name = my_user.first_name |
1694 | | - | distance = int(distance) |
1695 | | - | |
1696 | | - | if distance == 500: |
1697 | | - | d_500 += 1 |
1698 | | - | elif distance == 1000: |
1699 | | - | d_1000 += 1 |
1700 | | - | elif distance == 2000: |
1701 | | - | d_2000 += 1 |
1702 | | - | elif distance == 3000: |
1703 | | - | d_3000 += 1 |
1704 | | - | |
1705 | | - | with open( |
1706 | | - | save_file, "w+", encoding="utf-8" |
1707 | | - | ) as f: # could one day append, including access time to differentiate |
1708 | | - | user_df.to_csv(f, sep=";", index=False) |
1709 | | - | |
1710 | | - | total = len(locations_list) |
1711 | | - | |
1712 | | - | color_print_green(" [+] Users located", "") |
1713 | | - | color_print_green(" â Users within 500m: ", str(d_500)) |
1714 | | - | color_print_green(" â Users within 1000m: ", str(d_1000)) |
1715 | | - | color_print_green(" â Users within 2000m: ", str(d_2000)) |
1716 | | - | color_print_green(" â Users within 3000m: ", str(d_3000)) |
1717 | | - | color_print_green(" â Total users found: ", str(total)) |
1718 | | - | color_print_green(" â Location list saved to: ", save_file) |
1719 | | - | |
1720 | | - | # can also do the same for channels with similar output file to users |
1721 | | - | # may one day add trilateration to find users closest to exact point |
1722 | | - | |
1723 | | - | with client: |
1724 | | - | client.loop.run_until_complete(main()) |
1725 | | - | |
1726 | | - | if __name__ == "__main__": |
1727 | | - | cli() |
1728 | | - | |