STRLCPY/Maryam

■ ■ ■ ■ ■ ■

README.md | 100755 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100644

		skipped 41 lines
42	42
43	43		# Latest Updates
44	44
	45	+	- Added image_search module
45	46		- Clustering, Meta Search Engine, Dark-Web Search
46	47		- Iris: the first beta version
47	48		- Added famous_person
48	49		- Core speedup optimizations
49	50		- Added setup.py
50		-	- Web API: web command
	51	+
51	52
52	53
53	54
		skipped 21 lines

maryam/__init__.py | 100644 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100755

Content is identical

maryam/__main__.py | 100644 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100755

Content is identical

maryam/core/__init__.py | 100644 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100755

Content is identical

maryam/core/basedir.py | 100644 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100755

Content is identical

maryam/core/core.py | 100644 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100755

Content is identical

maryam/core/initial.py | 100644 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100755

Content is identical

maryam/core/util/__init__.py | 100644 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100755

Content is identical

■ ■ ■ ■ ■ ■

maryam/core/util/engines/bing_images.py

1	+	"""
2	+	OWASP Maryam!
3	+	This program is free software: you can redistribute it and/or modify
4	+	it under the terms of the GNU General Public License as published by
5	+	the Free Software Foundation, either version 3 of the License, or
6	+	any later version.
7	+	This program is distributed in the hope that it will be useful,
8	+	but WITHOUT ANY WARRANTY; without even the implied warranty of
9	+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10	+	GNU General Public License for more details.
11	+	You should have received a copy of the GNU General Public License
12	+	along with this program. If not, see <http://www.gnu.org/licenses/>.
13	+	"""
14	+
15	+	from lxml import html
16	+	from json import loads
17	+
18	+	class main:
19	+	def __init__(self, q, limit=3, count=28):
20	+	""" bing.com search engine
21	+	q : Query for search
22	+	limit : Number of pages
23	+	count : Number of results
24	+	"""
25	+	self.framework = main.framework
26	+	self.q = q
27	+	self.agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:100.0) Gecko/20100101 Firefox/100.0'
28	+	self.url = 'https://www.bing.com/images/search'
29	+	self._pages = ''
30	+	self.count = 28 if count > 28 else count
31	+	self.limit = 10 if limit > 10 else limit
32	+
33	+	def run_crawl(self):
34	+	set_page = lambda x: (x*self.count)+1
35	+	payload = {'count': self.count, 'first': 1, 'form': 'IBASEP', 'q': self.q}
36	+	max_attempt = 0
37	+	for i in range(self.limit):
38	+	self.framework.verbose(f"[BING Image] Searching in {i} page...", end='\r')
39	+	try:
40	+	req = self.framework.request(
41	+	url=self.url,
42	+	params=payload,
43	+	headers={'user-agent': self.agent},
44	+	allow_redirects=True)
45	+	except Exception as e:
46	+	self.framework.error(f"ConnectionError: {e}", 'util/engines/bing_images', 'run_crawl')
47	+	max_attempt += 1
48	+	if max_attempt == self.limit:
49	+	self.framework.error('Bing is missed!', 'util/engines/bing_images', 'run_crawl')
50	+	break
51	+	else:
52	+	self._pages += req.text
53	+	payload['first'] = set_page(i+1)
54	+
55	+	@property
56	+	def results(self):
57	+	items = []
58	+	tree = html.fromstring(self._pages)
59	+	for item in tree.xpath('//div[@class="imgpt"]'):
60	+	try:
61	+	info = item.xpath('.//div[@class="img_info hon"]')[0].xpath("./span[@class='nowrap']")[0].text
62	+	m = loads(item.xpath('./a/@m')[0])
63	+	title = m.get('t', '').replace(u'\ue000', '').replace(u'\ue001', '')
64	+	items.append({'i': m['murl'],
65	+	't': title,
66	+	'a': m['purl'],
67	+	'd': info
68	+	})
69	+	except:
70	+	continue
71	+	return items
72	+
73	+	@property
74	+	def pages(self):
75	+	return self._pages
76	+
77	+	@property
78	+	def links(self):
79	+	return [x['a'] for x in self.results]
80	+
81	+

■ ■ ■ ■ ■ ■

maryam/core/util/engines/google.py

		skipped 24 lines
25	25		self.q = q
26	26		self.agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:86.0) Gecko/20100101 Firefox/86.0'
27	27		self.xpath_name_original = {
28		-	'results': '//div[@class="g"]\|//div[@class="g tF2Cxc"]\|//div[@class="g Ww4FFb tF2Cxc"]',
	28	+	'results': '//div[@class="MjjYud"]\|//div[@class="g"]\|//div[@class="g tF2Cxc"]\|//div[@class="g Ww4FFb tF2Cxc"]',
29	29		'results_content': './/div[@data-content-feature="1"]\|.//div[@class="VwiC3b yXK7lf MUxGbd yDYNvb lyLwlc lEBKkf"]',
30	30		'results_title': './/h3[1]',
31	31		'results_a': './/div[@class="yuRUbf"]/a',
		skipped 171 lines

■ ■ ■ ■ ■ ■

maryam/core/util/engines/google_images.py

1	+	"""
2	+	OWASP Maryam!
3	+	This program is free software: you can redistribute it and/or modify
4	+	it under the terms of the GNU General Public License as published by
5	+	the Free Software Foundation, either version 3 of the License, or
6	+	any later version.
7	+	This program is distributed in the hope that it will be useful,
8	+	but WITHOUT ANY WARRANTY; without even the implied warranty of
9	+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10	+	GNU General Public License for more details.
11	+	You should have received a copy of the GNU General Public License
12	+	along with this program. If not, see <http://www.gnu.org/licenses/>.
13	+	"""
14	+
15	+	from lxml import html
16	+
17	+
18	+	class main:
19	+	def __init__(self, q):
20	+	""" google.com image search engine
21	+	q : Query for search
22	+	"""
23	+	self.framework = main.framework
24	+	self.q = q
25	+	self.agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:86.0) Gecko/20100101 Firefox/86.0'
26	+	self.xpath_name_original = {
27	+	'results': '//div[@class="isv-r PNCib MSM1fd BUooTd"]',
28	+	'results_content': './/img',
29	+	'results_title': './/h3',
30	+	'results_a': './/a',
31	+	'results_cite': './/div[@class="dmeZbb"]'
32	+	}
33	+	self.xpath_original = {
34	+	self.xpath_name_original['results']: [
35	+	self.xpath_name_original['results_content'],
36	+	self.xpath_name_original['results_title'],
37	+	self.xpath_name_original['results_a'],
38	+	self.xpath_name_original['results_cite']
39	+	]
40	+	}
41	+	self.url = 'https://www.google.com/search'
42	+	self._pages = ''
43	+
44	+	def run_crawl(self):
45	+	payload = {'ie': 'utf-8', 'oe': 'utf-8', 'q': self.q, 'filter': '0', 'tbm': 'isch'}
46	+	self.framework.verbose(f"[GOOGLE Images] Searching...", end='\r')
47	+	try:
48	+	req = self.framework.request(
49	+	url=self.url,
50	+	params=payload,
51	+	headers={'user-agent': self.agent},
52	+	allow_redirects=True)
53	+	except Exception as e:
54	+	self.framework.error(f"ConnectionError: {e}", 'util/google_images', 'run_crawl')
55	+	self.framework.error('Google is missed!', 'util/google_images', 'run_crawl')
56	+	else:
57	+	if req.status_code in (503, 429):
58	+	self.framework.error('Google CAPTCHA triggered.', 'util/google_images', 'run_crawl')
59	+	return
60	+	if req.status_code in (301, 302):
61	+	redirect = req.headers['location']
62	+	req = self.framework.request(url=redirect, allow_redirects=False)
63	+
64	+	self._pages += req.text
65	+
66	+	@property
67	+	def results(self):
68	+	tree = html.fromstring(self._pages)
69	+	output = []
70	+	results = []
71	+	results = tree.xpath(self.xpath_name_original['results'])
72	+	for i in results:
73	+	a = i.xpath(self.xpath_name_original['results_content'])
74	+	b = i.xpath(self.xpath_name_original['results_title'])
75	+	c = i.xpath(self.xpath_name_original['results_a'])
76	+	d = i.xpath(self.xpath_name_original['results_cite'])
77	+	roler = {}
78	+	if all([a,b,c,d]):
79	+	if 'https://' not in a[0].values()[0]:
80	+	continue
81	+	roler['i'] = a[0].values()[0]
82	+	roler['t'] = b[0].text_content()
83	+	roler['a'] = c[1].get('href')
84	+	roler['d'] = d[0].text_content()
85	+	output.append(roler)
86	+	return output
87	+
88	+	@property
89	+	def pages(self):
90	+	return self._pages
91	+
92	+	@property
93	+	def links(self):
94	+	links = [x['a'] for x in self.results]
95	+	return links
96	+
97	+	@property
98	+	def dns(self):
99	+	return self.framework.page_parse(self._pages).get_dns(self.q, self.links)
100	+
101	+	@property
102	+	def emails(self):
103	+	return self.framework.page_parse(self._pages).get_emails(self.q)
104	+
105	+	@property
106	+	def docs(self):
107	+	return self.framework.page_parse(self._pages).get_docs(self.q, self.links)
108	+

■ ■ ■ ■ ■ ■

maryam/core/util/engines/qwant.py

		skipped 14 lines
15	15		along with this program. If not, see <http://www.gnu.org/licenses/>.
16	16		"""
17	17
18		-	import re
19	18
20	19		class main:
21	20
		skipped 21 lines
43	42		try:
44	43		req = self.framework.request(url=self.qwant, params=payload)
45	44		except Exception as e:
46		-	self.framework.error(f"ConnectionError {e}.", 'util/engines/qwant', 'name_crawl')
47		-	self.framework.error('Qwant is missed!', 'util/engines/qwant', 'name_crawl')
	45	+	self.framework.error(f"ConnectionError {e}.", 'util/engines/qwant', 'util/engines/qwant', 'run_crawl')
	46	+	self.framework.error('Qwant is missed!', 'util/engines/qwant', 'util/engines/qwant', 'run_crawl')
48	47		break
49	48		else:
50	49		if req.status_code == 429 and "I can't let you do that..." in req.text and '<div class="error-code">' in req.text:
51		-	self.framework.error('429 Too Many Requests')
	50	+	self.framework.error('429 Too Many Requests', 'util/engines/qwant', 'run_crawl')
52	51		return
53	52		self._pages += req.text
54	53		try:
55	54		self._json.append(req.json())
56	55		except Exception as e:
57		-	self.framework.error('429 Too Many Requests')
	56	+	self.framework.error('429 Too Many Requests', 'util/engines/qwant', 'run_crawl')
58	57		return
59	58		else:
60	59		if req.json() == {'status': 'error', 'data': {'error_code': 22}}:
61		-	self.framework.error('429 Too Many Requests')
	60	+	self.framework.error('429 Too Many Requests', 'util/engines/qwant', 'run_crawl')
62	61		return
63	62		else:
64	63		if page == self.limit:
		skipped 53 lines

■ ■ ■ ■ ■ ■

maryam/core/util/engines/qwant_images.py

1	+	"""
2	+	OWASP Maryam!
3	+
4	+	This program is free software: you can redistribute it and/or modify
5	+	it under the terms of the GNU General Public License as published by
6	+	the Free Software Foundation, either version 3 of the License, or
7	+	any later version.
8	+
9	+	This program is distributed in the hope that it will be useful,
10	+	but WITHOUT ANY WARRANTY; without even the implied warranty of
11	+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12	+	GNU General Public License for more details.
13	+
14	+	You should have received a copy of the GNU General Public License
15	+	along with this program. If not, see <http://www.gnu.org/licenses/>.
16	+	"""
17	+
18	+
19	+	class main:
20	+
21	+	def __init__(self, q, limit=1, count=50):
22	+	""" qwant.com image search engine
23	+
24	+	q : Query for search
25	+	limit : Number of pages
26	+	count : Number of results
27	+	"""
28	+	self.framework = main.framework
29	+	self.q = q
30	+	self.limit = 10 if limit > 10 else limit
31	+	self.count = 50 if count > 50 else count
32	+	self._pages = ''
33	+	self._json = []
34	+	self.qwant = 'https://api.qwant.com/v3/search/images'
35	+
36	+	def run_crawl(self):
37	+	page = 1
38	+	set_page = lambda x: (x-1)*10
39	+	payload = {'t': 'images', 'q': self.q, 'offset': set_page(page), 'count': '10', \
40	+	'safesearch': '0', 'device': 'desktop', 'locale': 'en_GB', 'device': 'desktop'}
41	+	while True:
42	+	self.framework.verbose(f"[QWANT] Searching in {page+1} page...")
43	+	try:
44	+	req = self.framework.request(url=self.qwant, params=payload)
45	+	except Exception as e:
46	+	self.framework.error(f"ConnectionError {e}.", 'util/engines/qwant_images', 'run_crawl')
47	+	self.framework.error('Qwant is missed!', 'util/engines/qwant_images', 'run_crawl')
48	+	break
49	+	else:
50	+	if req.status_code == 429 and "I can't let you do that..." in req.text and '<div class="error-code">' in req.text:
51	+	self.framework.error('429 Too Many Requests', 'util/engines/qwant_images', 'run_crawl')
52	+	return
53	+	self._pages += req.text
54	+	try:
55	+	self._json.append(req.json())
56	+	except Exception as e:
57	+	self.framework.error('429 Too Many Requests', 'util/engines/qwant_images', 'run_crawl')
58	+	return
59	+	else:
60	+	if req.json() == {'status': 'error', 'data': {'error_code': 22}}:
61	+	self.framework.error('429 Too Many Requests', 'util/engines/qwant_images', 'run_crawl')
62	+	return
63	+	else:
64	+	if page == self.limit:
65	+	break
66	+	page += 1
67	+	payload['offset'] = set_page(page)
68	+
69	+	@property
70	+	def pages(self):
71	+	return self._pages
72	+
73	+	@property
74	+	def json(self):
75	+	return self._json
76	+
77	+	@property
78	+	def results(self):
79	+	results = []
80	+	for page in self._json:
81	+	items = page.get('data', {}).get('result', {})
82	+	if items:
83	+	items = items.get('items', [])
84	+	for item in items:
85	+	results.append({
86	+	"a": item.get("url"),
87	+	"i": item.get("media"),
88	+	"t": item.get("title"),
89	+	"d": f"{item.get('width')}*{item.get('height')} {item.get('size')}B"}
90	+	)
91	+
92	+	return results
93	+
94	+	@property
95	+	def dns(self):
96	+	return self.framework.page_parse(self._pages).get_dns(self.q, self.links)
97	+
98	+	@property
99	+	def emails(self):
100	+	return self.framework.page_parse(self._pages).get_emails(self.q)
101	+
102	+	@property
103	+	def docs(self):
104	+	return self.framework.page_parse(self._pages).get_docs(self.q, self.links)
105	+
106	+

■ ■ ■ ■ ■ ■

maryam/core/util/iris/meta_search_util.py

		skipped 39 lines
40	40		cite = f"{host}{path}"
41	41		return cite
42	42
43		-	def simple_merge(results):
	43	+	def remove_dups(self, res):
	44	+	urls = []
	45	+	new = []
	46	+	for i in res:
	47	+	a = self.urlib(i['a'].lower()).sub_service()
	48	+	if a not in urls:
	49	+	urls.append(a)
	50	+	new.append(i)
	51	+	return new
	52	+
	53	+	def simple_merge(results) -> 'merging results based on quality of engines':
44	54		engines_len = len(results)
45	55		merged = []
46	56
47	57		for i in range(len(min(results, key=len))):
48	58		for e in range(engines_len):
49	59		merged.append(results[e%engines_len].pop(0))
	60	+
50	61
51	62		for i in results:
52	63		for j in i:
		skipped 4 lines
57	68		def compute_count_consensus(
58	69		e: dict(type=list, help='list of search engines sorted by quality'),
59	70		l: dict(type=int, help='number of results')) -> 'a list of numbers':
60		-	x=len(e)
61		-	o={}
	71	+	x = len(e)
	72	+	o = {}
62	73		for i in e:
63		-	o[i]=trunc(l/x)
64		-	l-=l-(l%x)
65		-	if l!=0:
66		-	if l<x:
	74	+	o[i] = trunc(l/x)
	75	+	l -= l - (l%x)
	76	+	if l != 0:
	77	+	if l < x:
67	78		for i in range(l):
68		-	o[e[i]]+=1
	79	+	o[e[i]] += 1
69	80		return o
70	81

maryam/modules/__init__.py | 100644 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100755

Content is identical

maryam/modules/iris/__init__.py | 100644 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100755

Content is identical

maryam/modules/iris/cluster.py | 100644 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100755

Content is identical

■ ■ ■ ■ ■ ■

maryam/modules/iris/iris.py | 100644 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100755

		skipped 18 lines
19	19		'author': 'Saeed, Kaushik',
20	20		'version': '0.4',
21	21		'description': 'Iris is a built-in meta search engine.',
22		-	'comments': ('It should be note that this is a beta version and has lots of bugs!',),
	22	+	'comments': ('It should be note that this is a beta version and has many bugs!',),
23	23		'contributors': 'Aman, Dimitris, Divya, Vikas, Kunal',
24	24		'sources': ('google', 'bing', 'duckduckgo', 'millionshort', 'etools'),
25	25		'options': (
		skipped 11 lines
37	37		for _ in concurrent.futures.as_completed(futures):
38	38		pass
39	39
40		-	def remove_dups(self, res):
41		-	urls = []
42		-	new = []
43		-	for i in res:
44		-	a = self.urlib(i['a'].lower()).sub_service()
45		-	if a not in urls:
46		-	urls.append(a)
47		-	new.append(i)
48		-	return new
49		-
50	40		def search(self, name, q, limit):
51	41		global RESULTS
52	42		count = MAPPED[name]
		skipped 18 lines
71	61		engines = MAPPED.keys()
72	62		thread(self, search, query, 3, engines)
73	63		simple_merge = self.meta_search_util.simple_merge([RESULTS[x] for x in engines if x in RESULTS])
74		-	final_results = remove_dups(self, simple_merge)
	64	+	# TODO: Removing duplicates should be done during merging
	65	+	final_results = self.meta_search_util.remove_dups(self, simple_merge)
75	66		output = {'results': final_results}
76	67		self.save_gather(output, 'iris/iris', query, output=self.options['output'])
77	68		return output
		skipped 4 lines

maryam/modules/iris/iris_cluster.py | 100644 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100755

Content is identical

maryam/modules/iris/sentiment.py | 100644 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100755

Content is identical

maryam/modules/iris/topicmodeling.py | 100644 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100755

Content is identical

■ ■ ■ ■ ■ ■

maryam/modules/osint/image_search.py

1	+	"""
2	+	OWASP Maryam!
3	+	This program is free software: you can redistribute it and/or modify
4	+	it under the terms of the GNU General Public License as published by
5	+	the Free Software Foundation, either version 3 of the License, or
6	+	any later version.
7	+	This program is distributed in the hope that it will be useful,
8	+	but WITHOUT ANY WARRANTY; without even the implied warranty of
9	+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10	+	GNU General Public License for more details.
11	+	You should have received a copy of the GNU General Public License
12	+	along with this program. If not, see <http://www.gnu.org/licenses/>.
13	+	"""
14	+
15	+	meta = {
16	+	'name': 'Image Search',
17	+	'author': 'Saeed',
18	+	'version': '0.1',
19	+	'description': 'Search in open-sources to find images.',
20	+	'sources': ('bing', 'google', 'qwant'),
21	+	'options': (
22	+	('query', None, True, 'Query, host Name, company Name, keyword, , etc', '-q', 'store', str),
23	+	('engines', 'google', True, 'Search engines with comma separator', '-e', 'store', str),
24	+	('thread', 3, False, 'The number of engine that run per round(default=2)', '-t', 'store', int),
25	+	),
26	+	'examples': ('image_search -q amazon -e google,bing,qwant',)
27	+	}
28	+
29	+	IMGS = []
30	+	def remove_dups(self):
31	+	urls = []
32	+	new = []
33	+	for i in IMGS:
34	+	a = self.urlib(i['i'].lower()).sub_service()
35	+	if a not in urls:
36	+	urls.append(a)
37	+	new.append(i)
38	+	return new
39	+
40	+	def search(self, name, q):
41	+	global IMGS
42	+	try:
43	+	engine = getattr(self, name + '_images')
44	+	q = q
45	+	varnames = engine.__init__.__code__.co_varnames
46	+	attr = engine(q)
47	+
48	+	attr.run_crawl()
49	+	IMGS += attr.results
50	+	except Exception as e:
51	+	print(e)
52	+
53	+	def module_api(self):
54	+	query = self.options['query']
55	+	engines = self.options['engines'].lower().split(',')
56	+
57	+	self.thread(search, self.options['thread'], engines, query, meta['sources'])
58	+	INGS = remove_dups(self)
59	+	output = {'results' : IMGS}
60	+	self.save_gather(output, 'osint/image_search', query, output=self.options['output'])
61	+	return output
62	+
63	+	def module_run(self):
64	+	for i in module_api(self)['results']:
65	+	self.output(i['a'])
66	+	self.output(i['i'])
67	+	self.output(i['t'])
68	+	self.output(i['d'])
69	+	print()
70	+

maryam.py | 100755 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100644

Content is identical

requirements | 100755 /~icons-ver-BEF942F0F42935333EFA072090F4E956.svg#arrow3 100644

Content is identical

Add image_search