STRLCPY/Maryam

Merge branch 'saeeddhqan:master' into TopicModeling_0.1.0
keamanansiber committed with GitHub 2 years ago

bd4089ee

2 parents
314a2fbc
d6168581

■ ■ ■ ■ ■ ■

maryam/core/util/engines/google.py

		skipped 24 lines
25	25		self.q = q
26	26		self.agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:86.0) Gecko/20100101 Firefox/86.0'
27	27		self.xpath_name_original = {
28		-	'results': '//div[@class="g"]\|//div[@class="g tF2Cxc"]',
29		-	'results_content': './/div[@class="IsZvec"]',
	28	+	'results': '//div[@class="g"]\|//div[@class="g tF2Cxc"]\|//div[@class="g Ww4FFb tF2Cxc"]',
	29	+	'results_content': './/div[@data-content-feature="1"]\|.//div[@class="VwiC3b yXK7lf MUxGbd yDYNvb lyLwlc lEBKkf"]',
30	30		'results_title': './/h3[1]',
31	31		'results_a': './/div[@class="yuRUbf"]/a',
32	32		'results_cite': './/div[@class="yuRUbf"]/a//cite'
		skipped 170 lines

■ ■ ■ ■ ■ ■

maryam/core/util/helpers/reglib.py

		skipped 36 lines
37	37		ip_s = r"^\d+\.[\d]+\.[\d]+\.[\d]+$"
38	38		ip_m = r"\d+\.[\d]+\.[\d]+\.[\d]+"
39	39		social_network_ulinks = {
40		-	'Instagram': r"instagram\.com/[A-z_0-9.\-]{1,30}",
41		-	'Facebook': [r"facebook\.com/[A-z_0-9\-]{2,50}", r"fb\.com/[A-z_0-9\-]{2,50}"],
42		-	'Twitter': r"twitter\.com/[A-z_0-9\-.]{2,40}",
43		-	'Github': r"github\.com/[A-z0-9_-]{1,39}",
44		-	'Github site': [r"[A-z0-9_-]{1,39}\.github\.io", r"[A-z0-9_-]{1,39}\.github\.com"],
45		-	'Telegram': r"telegram\.me/[A-z_0-9]{5,32}",
46		-	'Youtube user': r"youtube\.com/user/[A-z_0-9\-\.]{2,100}",
47		-	'Youtube channel': [r"youtube\.com/c/[A-z_0-9\-\.]{2,100}", \
48		-	r"youtube\.com/channel/[A-z_0-9\-\.]{2,100}"],
49		-	'Linkedin company': r"linkedin\.com/company/[A-z_0-9\.\-]{3,50}",
50		-	'Linkedin individual': r"linkedin\.com/in/[A-z_0-9\.\-]{3,50}",
51		-	'Googleplus': r"\.?plus\.google\.com/[A-z0-9_\-.+]{3,255}",
52		-	'WordPress': r"[A-z0-9\-]+\.wordpress\.com",
	40	+	'Instagram': r"instagram\.com/[\w.\-]{1,30}",
	41	+	'Facebook': [r"facebook\.com/[\w\-]{2,50}", r"fb\.com/[\w\-]{2,50}"],
	42	+	'Twitter': r"twitter\.com/[\w\-.]{2,40}",
	43	+	'Github': r"github\.com/[\w\-]{1,39}",
	44	+	'Github site': [r"[\w\-]{1,39}\.github\.io", r"[\w\-]{1,39}\.github\.com"],
	45	+	'Telegram': r"telegram\.me/[\w]{5,32}",
	46	+	'Youtube user': r"youtube\.com/user/[\w\-\.]{2,100}",
	47	+	'Youtube channel': [r"youtube\.com/c/[\w\-.]{2,100}", \
	48	+	r"youtube\.com/channel/[\w\-.]{2,100}"],
	49	+	'Linkedin company': r"linkedin\.com/company/[\w\.\-]{3,50}",
	50	+	'Linkedin individual': r"linkedin\.com/in/[\w\.\-]{3,50}",
	51	+	'Googleplus': r"\.?plus\.google\.com/[\w\-.+]{3,255}",
	52	+	'WordPress': r"[\w\-]+\.wordpress\.com",
53	53		'Reddit': r"reddit\.com/user/[A-z0-9_\-]{3,20}",
54		-	'Tumblr': r"[A-z0-9\-]{3,32}\.tumblr\.com",
55		-	'Blogger': r"[A-z0-9\-]{3,50}\.blogspot\.com"
	54	+	'Tumblr': r"[\w\-]{3,32}\.tumblr\.com",
	55	+	'Blogger': r"[\w\-]{3,50}\.blogspot\.com"
56	56		}
57	57
58	58		def search(self, regex, _type=list):
		skipped 29 lines

■ ■ ■ ■ ■ ■

maryam/modules/search/youtube.py

		skipped 18 lines
19	19		meta = {
20	20		'name': 'Youtube Search',
21	21		'author': 'Aman Rawat',
22		-	'version': '0.5',
	22	+	'version': '0.6',
23	23		'description': 'Search your query in the youtube.com and show the results.',
24	24		'sources': ('google', 'carrot2', 'bing', 'yahoo', 'millionshort', 'qwant', 'duckduckgo'),
25	25		'options': (
		skipped 24 lines
50	50		attr.run_crawl()
51	51		LINKS += attr.links
52	52		PAGES += attr.pages
53		-	if name == 'google':
54		-	attr.q = q_formats['ch_q']
55		-	attr.run_crawl()
56		-	PAGES += attr.pages
57	53
58	54		def module_api(self):
59	55		query = self.options['query']
		skipped 2 lines
62	58		engines = self.options['engine'].split(',')
63	59		output = {'videos': [], 'channels': [], 'usernames': []}
64	60		q_formats = {
65		-	'ch_q': f"site:youtube.com inurl:/c/ OR inurl:/user/ {query}",
66	61		'default_q': f"site:youtube.com {query}",
67	62		'qwant_q': f"site:www.youtube.com {query}",
68	63		'millionshort_q': f'site:www.youtube.com "{query}"',
69	64		}
70		-	# self.thread(search, self.options['thread'], engines, query, q_formats, limit, count, meta['sources'])
71		-	search(self, 'google', query, q_formats, limit, count)
72		-	# self.options['thread'], engines, query, q_formats, limit, count, meta['sources'])
	65	+	self.thread(search, self.options['thread'], engines, query, q_formats, limit, count, meta['sources'])
73	66
74	67		links = filter(lambda x: '/feed/' not in\
75	68		x and 'www.youtube.com' in x and ('/watch?' in x.lower() or '/playlist?' in x.lower()), list(set(LINKS)))
		skipped 11 lines

Merge branch 'saeeddhqan:master' into TopicModeling_0.1.0