forked from smicallef/spiderfoot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsfp_ahmia.py
174 lines (137 loc) · 5.8 KB
/
sfp_ahmia.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# -*- coding: utf-8 -*-
# -------------------------------------------------------------------------------
# Name: sfp_ahmia
# Purpose: Searches the Tor search engine 'Ahmia' for content related to the
# target.
#
# Author: Steve Micallef <[email protected]>
#
# Created: 20/06/2017
# Copyright: (c) Steve Micallef 2017
# Licence: MIT
# -------------------------------------------------------------------------------
import re
import urllib.error
import urllib.parse
import urllib.request
from spiderfoot import SpiderFootEvent, SpiderFootPlugin
class sfp_ahmia(SpiderFootPlugin):
meta = {
'name': "Ahmia",
'flags': ["tor"],
'summary': "Search Tor 'Ahmia' search engine for mentions of the target.",
'useCases': ["Footprint", "Investigate"],
'categories': ["Search Engines"],
'dataSource': {
'website': "https://ahmia.fi/",
'model': "FREE_NOAUTH_UNLIMITED",
'references': [
"https://ahmia.fi/documentation/",
"https://github.com/ahmia/",
"http://msydqstlz2kzerdg.onion/",
"https://ahmia.fi/stats"
],
'favIcon': "https://ahmia.fi/static/images/favicon.ico",
'logo': "https://ahmia.fi/static/images/ahmiafi_black.png",
'description': "Ahmia searches hidden services on the Tor network. To access these hidden services,"
"you need the Tor browser bundle. Abuse material is not allowed on Ahmia. "
"See our service blacklist and report abuse material if you find it in the index. "
"It will be removed as soon as possible.\n"
"Contributors to Ahmia believe that the Tor network is an important and "
"resilient distributed platform for anonymity and privacy worldwide. "
"By providing a search engine for what many call the \"deep web\" or \"dark net\", "
"Ahmia makes hidden services accessible to a wide range of people, not just Tor network early adopters."
}
}
# Default options
opts = {
'fetchlinks': True,
'fullnames': True
}
# Option descriptions
optdescs = {
'fetchlinks': "Fetch the darknet pages (via TOR, if enabled) to verify they mention your target.",
'fullnames': "Search for human names?"
}
results = None
def setup(self, sfc, userOpts=dict()):
self.sf = sfc
self.results = self.tempStorage()
for opt in list(userOpts.keys()):
self.opts[opt] = userOpts[opt]
# What events is this module interested in for input
def watchedEvents(self):
return ["DOMAIN_NAME", "HUMAN_NAME", "EMAILADDR"]
# What events this module produces
def producedEvents(self):
return ["DARKNET_MENTION_URL", "DARKNET_MENTION_CONTENT"]
def handleEvent(self, event):
eventName = event.eventType
srcModuleName = event.module
eventData = event.data
self.debug(f"Received event, {eventName}, from {srcModuleName}")
if not self.opts['fullnames'] and eventName == 'HUMAN_NAME':
self.debug(f"Skipping HUMAN_NAME: {eventData}")
return
if eventData in self.results:
self.debug(f"Skipping {eventData}, already checked.")
return
self.results[eventData] = True
params = urllib.parse.urlencode({
'q': eventData
})
data = self.sf.fetchUrl(
f"https://ahmia.fi/search/?{params}",
useragent=self.opts['_useragent'],
timeout=15
)
if not data:
self.info(f"No results for {eventData} returned from Ahmia.fi.")
return
content = data.get('content')
if not content:
self.info(f"No results for {eventData} returned from Ahmia.fi.")
return
# We don't bother with pagination as Ahmia seems fairly limited in coverage
# and displays hundreds of results per page
links = re.findall("redirect_url=(.[^\"]+)\"", content, re.IGNORECASE | re.DOTALL)
if not links:
self.info(f"No results for {eventData} returned from Ahmia.fi.")
return
for link in links:
if self.checkForStop():
return
if link in self.results:
continue
self.results[link] = True
self.debug(f"Found a darknet mention: {link}")
if not self.sf.urlFQDN(link).endswith(".onion"):
continue
if not self.opts['fetchlinks']:
evt = SpiderFootEvent("DARKNET_MENTION_URL", link, self.__name__, event)
self.notifyListeners(evt)
continue
res = self.sf.fetchUrl(
link,
timeout=self.opts['_fetchtimeout'],
useragent=self.opts['_useragent'],
verify=False
)
if res['content'] is None:
self.debug(f"Ignoring {link} as no data returned")
continue
if eventData not in res['content']:
self.debug(f"Ignoring {link} as no mention of {eventData}")
continue
evt = SpiderFootEvent("DARKNET_MENTION_URL", link, self.__name__, event)
self.notifyListeners(evt)
try:
startIndex = res['content'].index(eventData) - 120
endIndex = startIndex + len(eventData) + 240
except Exception:
self.debug(f"String '{eventData}' not found in content.")
continue
wdata = res['content'][startIndex:endIndex]
evt = SpiderFootEvent("DARKNET_MENTION_CONTENT", f"...{wdata}...", self.__name__, evt)
self.notifyListeners(evt)
# End of sfp_ahmia class