forked from swisskyrepo/Vulny-Code-Static-Analysis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
detection.py
178 lines (143 loc) · 6.68 KB
/
detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
#!/usr/bin/python
# -*- coding: utf-8 -*-
import os
import re
import math
from indicators import *
from functions import *
result_count = 0
result_files = 0
# Compute a Shannon entropy for a string based on an iterator
def shannon_entropy(data, iterator):
"""
Borrowed from http://blog.dkbza.org/2007/05/scanning-data-for-entropy-anomalies.html
"""
if not data:
return 0
entropy = 0
for x in iterator:
p_x = float(data.count(x))/len(data)
if p_x > 0:
entropy += - p_x*math.log(p_x, 2)
return entropy
# Analyse the source code of a single page
def analysis(path, plain):
global result_count
global result_files
result_files += 1
with open(path, 'r', encoding='utf-8', errors='replace') as content_file:
# Clean source for a better detection
content = content_file.read()
content = clean_source_and_format(content)
# Hardcoded credentials (work as an exception, it's not function based)
credz = ['pass', 'secret', 'token', 'pwd']
for credential in credz:
content_pure = content.replace(' ', '')
# detect all variables
regex_var_detect = "\$[\w\s]+\s?=\s?[\"|'].*[\"|']|define\([\"|'].*[\"|']\)"
regex = re.compile(regex_var_detect , re.I)
matches = regex.findall(content_pure)
# If we find a variable with a constant for a given indicator
for vuln_content in matches:
if credential in vuln_content.lower():
payload = ["", "Hardcoded Credential", []]
add_vuln_var(payload, plain, path, vuln_content, content, regex_var_detect)
# High Entropy String
content_pure = content.replace(' ', '')
regex_var_detect = ".*?=\s?[\"|'].*?[\"|'].*?"
regex = re.compile(regex_var_detect , re.I)
matches = regex.findall(content_pure)
BASE64_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="
HEX_CHARS = "1234567890abcdefABCDEF"
for vuln_content in matches:
payload = ["", "High Entropy String", []]
if shannon_entropy(vuln_content, BASE64_CHARS) >= 4.1 or \
shannon_entropy(vuln_content, HEX_CHARS) >= 2.5:
add_vuln_var(payload, plain, path, vuln_content, content, regex_var_detect)
# Detection of RCE/SQLI/LFI/RFI/RFU/XSS/...
for payload in payloads:
regex = re.compile(payload[0] + regex_indicators)
matches = regex.findall(content.replace(" ", "(PLACEHOLDER"))
for vuln_content in matches:
# Handle "require something" vs "require(something)"
# Dirty trick to force a parenthesis before the function's argument
vuln_content = list(vuln_content)
for i in range(len(vuln_content)):
vuln_content[i] = vuln_content[i].replace("(PLACEHOLDER", " ")
vuln_content[i] = vuln_content[i].replace("PLACEHOLDER", "")
occurence = 0
# Security hole detected, is it protected ?
if not check_protection(payload[2], vuln_content):
declaration_text, line = "", ""
# Managing multiple variable in a single line/function
sentence = "".join(vuln_content)
regex = re.compile(regex_indicators[2:-2])
for vulnerable_var in regex.findall(sentence):
false_positive = False
occurence += 1
# No declaration for $_GET, $_POST ...
if not check_exception(vulnerable_var[1]):
# Look for the declaration of $something = xxxxx
false_positive, declaration_text, line = check_declaration(
content,
vulnerable_var[1],
path)
# Set false positive if protection is in the variable's declaration
is_protected = check_protection(payload[2], declaration_text)
false_positive = is_protected if is_protected else false_positive
# Display all the vuln
line_vuln = find_line_vuln(payload, vuln_content, content)
# Check for not $dest="constant"; $dest='cste'; $dest=XX;
if "$_" not in vulnerable_var[1]:
if "$" not in declaration_text.replace(vulnerable_var[1], ''):
false_positive = True
if not false_positive:
result_count = result_count + 1
display(path, payload, vuln_content, line_vuln, declaration_text, line, vulnerable_var[1], occurence, plain)
# Run thru every files and subdirectories
def recursive(dir, progress, plain):
progress += 1
progress_indicator = '⬛'
if plain:
progress_indicator = "█"
try:
for name in os.listdir(dir):
print('\tAnalyzing : ' + progress_indicator * progress + '\r', end="\r"),
# Targetting only PHP Files
if os.path.isfile(os.path.join(dir, name)):
if ".php" in os.path.join(dir, name):
analysis(dir + "/" + name, plain)
else:
recursive(dir + "/" + name, progress, plain)
except OSError as e:
print("Error 404 - Not Found, maybe you need more right ?" + " " * 30)
exit(-1)
# Display basic informations about the scan
def scanresults():
global result_count
global result_files
print("Found {} vulnerabilities in {} files".format(result_count, result_files))
def add_vuln_var(payload, plain, path, vuln_content, page_content, regex_var_detect, occurence=1):
# Get the line of the vulnerability
line_vuln = -1
splitted_content = page_content.split('\n')
for i in range(len(splitted_content)):
regex = re.compile(regex_var_detect, re.I)
matches = regex.findall(splitted_content[i])
if len(matches) > 0:
line_vuln = i
# display the result
display(
path, # path
payload, # payload
vuln_content, # vulnerability
line_vuln, # line
vuln_content, # declaration_text
str(line_vuln), # declaration_line
vuln_content, # colored
occurence, # occurence
plain # plain
)
# increment the global vulnerability count
global result_count
result_count = result_count + 1