3 # Description: Utility to scan a file path for encrypted and obfuscated files
4 # Authors: Ben Hagen (ben.hagen@neohapsis.com)
5 # Scott Behrens (scott.behrens@neohapsis.com)
9 # pep-0008 - Is stupid. TABS FO'EVER!
11 # Try catch regular expressions/bad path/bad filename/bad regex/
21 from collections
import defaultdict
22 from optparse
import OptionParser
28 # Smallest filesize to checkfor in bytes.
32 """Class that calculates a file's Index of Coincidence as
33 as well as a a subset of files average Index of Coincidence.
36 """Initialize results arrays as well as character counters."""
37 self
.char_count
= defaultdict(int)
38 self
.total_char_count
= 0
40 self
.ic_total_results
= ""
42 def calculate_char_count(self
,data
):
43 """Method to calculate character counts for a particular data file."""
48 charcount
= data
.count(char
)
49 self
.char_count
[char
] += charcount
50 self
.total_char_count
+= charcount
53 def calculate_IC(self
):
54 """Calculate the Index of Coincidence for the self variables"""
56 for val
in self
.char_count
.values():
60 total
+= val
* (val
-1)
63 ic_total
= float(total
)/(self
.total_char_count
* (self
.total_char_count
- 1))
66 self
.ic_total_results
= ic_total
69 def calculate(self
,data
,filename
):
70 """Calculate the Index of Coincidence for a file and append to self.ic_results array"""
78 charcount
= data
.count(char
)
79 char_count
+= charcount
* (charcount
- 1)
80 total_char_count
+= charcount
82 ic
= float(char_count
)/(total_char_count
* (total_char_count
- 1))
83 self
.results
.append({"filename":filename
, "value":ic
})
84 # Call method to calculate_char_count and append to total_char_count
85 self
.calculate_char_count(data
)
89 self
.results
.sort(key
=lambda item
: item
["value"])
90 self
.results
= resultsAddRank(self
.results
)
92 def printer(self
, count
):
93 """Print the top signature count match files for a given search"""
94 # Calculate the Total IC for a Search
96 print "\n[[ Average IC for Search ]]"
97 print self
.ic_total_results
98 print "\n[[ Top %i lowest IC files ]]" % (count
)
99 if (count
> len(self
.results
)): count
= len(self
.results
)
100 for x
in range(count
):
101 print ' {0:>7.4f} {1}'.format(self
.results
[x
]["value"], self
.results
[x
]["filename"])
105 """Class that calculates a file's Entropy."""
108 """Instantiate the entropy_results array."""
111 def calculate(self
,data
,filename
):
112 """Calculate the entropy for 'data' and append result to entropy_results array."""
117 self
.stripped_data
=data
.replace(' ', '')
119 p_x
= float(self
.stripped_data
.count(chr(x
)))/len(self
.stripped_data
)
121 entropy
+= - p_x
* math
.log(p_x
, 2)
122 self
.results
.append({"filename":filename
, "value":entropy
})
126 self
.results
.sort(key
=lambda item
: item
["value"])
127 self
.results
.reverse()
128 self
.results
= resultsAddRank(self
.results
)
130 def printer(self
, count
):
131 """Print the top signature count match files for a given search"""
132 print "\n[[ Top %i entropic files for a given search ]]" % (count
)
133 if (count
> len(self
.results
)): count
= len(self
.results
)
134 for x
in range(count
):
135 print ' {0:>7.4f} {1}'.format(self
.results
[x
]["value"], self
.results
[x
]["filename"])
139 """Class that determines the longest word for a particular file."""
141 """Instantiate the longestword_results array."""
144 def calculate(self
,data
,filename
):
145 """Find the longest word in a string and append to longestword_results array"""
150 words
= re
.split("[\s,\n,\r]", data
)
157 self
.results
.append({"filename":filename
, "value":longest
})
161 self
.results
.sort(key
=lambda item
: item
["value"])
162 self
.results
.reverse()
163 self
.results
= resultsAddRank(self
.results
)
165 def printer(self
, count
):
166 """Print the top signature count match files for a given search"""
167 print "\n[[ Top %i longest word files ]]" % (count
)
168 if (count
> len(self
.results
)): count
= len(self
.results
)
169 for x
in range(count
):
170 print ' {0:>7} {1}'.format(self
.results
[x
]["value"], self
.results
[x
]["filename"])
173 class SignatureNasty
:
174 """Generator that searches a given file for nasty expressions"""
177 """Instantiate the results array."""
180 def calculate(self
, data
, filename
):
183 # Lots taken from the wonderful post at http://stackoverflow.com/questions/3115559/exploitable-php-functions
184 valid_regex
= re
.compile('(eval\(|file_put_contents|base64_decode|python_eval|exec\(|passthru|popen|proc_open|pcntl|assert\(|system\(|shell)', re
.I
)
185 matches
= re
.findall(valid_regex
, data
)
186 self
.results
.append({"filename":filename
, "value":len(matches
)})
190 self
.results
.sort(key
=lambda item
: item
["value"])
191 self
.results
.reverse()
192 self
.results
= resultsAddRank(self
.results
)
194 def printer(self
, count
):
195 """Print the top signature count match files for a given search"""
196 print "\n[[ Top %i signature match counts ]]" % (count
)
197 if (count
> len(self
.results
)): count
= len(self
.results
)
198 for x
in range(count
):
199 print ' {0:>7} {1}'.format(self
.results
[x
]["value"], self
.results
[x
]["filename"])
202 class SignatureSuperNasty
:
203 """Generator that searches a given file for SUPER-nasty expressions (These are almost always bad!)"""
206 """Instantiate the results array."""
209 def calculate(self
, data
, filename
):
212 valid_regex
= re
.compile('(@\$_\[\]=|\$_=@\$_GET|\$_\[\+""\]=)', re
.I
)
213 matches
= re
.findall(valid_regex
, data
)
214 self
.results
.append({"filename":filename
, "value":len(matches
)})
218 self
.results
.sort(key
=lambda item
: item
["value"])
219 self
.results
.reverse()
220 self
.results
= resultsAddRank(self
.results
)
222 def printer(self
, count
):
223 """Print the top signature count match files for a given search"""
224 print "\n[[ Top %i SUPER-signature match counts (These are usually bad!) ]]" % (count
)
225 if (count
> len(self
.results
)): count
= len(self
.results
)
226 for x
in range(count
):
227 print ' {0:>7} {1}'.format(self
.results
[x
]["value"], self
.results
[x
]["filename"])
231 """Generator that searches a given file for nasty eval with variable"""
234 """Instantiate the eval_results array."""
237 def calculate(self
, data
, filename
):
240 # Lots taken from the wonderful post at http://stackoverflow.com/questions/3115559/exploitable-php-functions
241 valid_regex
= re
.compile('(eval\(\$(\w|\d))', re
.I
)
242 matches
= re
.findall(valid_regex
, data
)
243 self
.results
.append({"filename":filename
, "value":len(matches
)})
247 self
.results
.sort(key
=lambda item
: item
["value"])
248 self
.results
.reverse()
249 self
.results
= resultsAddRank(self
.results
)
251 def printer(self
, count
):
252 """Print the files that use eval"""
253 print "\n[[ Top %i eval match counts ]]" % (count
)
254 if (count
> len(self
.results
)): count
= len(self
.results
)
255 for x
in range(count
):
256 print ' {0:>7} {1}'.format(self
.results
[x
]["value"], self
.results
[x
]["filename"])
261 """Generator finds compression ratio"""
264 """Instantiate the results array."""
267 def calculate(self
, data
, filename
):
270 compressed
= zlib
.compress(data
)
271 ratio
= float(len(compressed
)) / float(len(data
))
272 self
.results
.append({"filename":filename
, "value":ratio
})
276 self
.results
.sort(key
=lambda item
: item
["value"])
277 self
.results
.reverse()
278 self
.results
= resultsAddRank(self
.results
)
280 def printer(self
, count
):
281 """Print the top files for a given search"""
282 print "\n[[ Top %i compression match counts ]]" % (count
)
283 if (count
> len(self
.results
)): count
= len(self
.results
)
284 for x
in range(count
):
285 print ' {0:>7.4f} {1}'.format(self
.results
[x
]["value"], self
.results
[x
]["filename"])
288 def resultsAddRank(results
):
291 previousValue
= False
294 if (previousValue
and previousValue
!= file["value"]):
298 previousValue
= file["value"]
303 """Generator that searches a given filepath with an optional regular
304 expression and returns the filepath and filename"""
305 def search_file_path(self
, args
, valid_regex
):
306 for root
, dirs
, files
in os
.walk(args
[0]):
308 filename
= os
.path
.join(root
, file)
309 if (valid_regex
.search(file) and os
.path
.getsize(filename
) > SMALLEST
):
311 data
= open(root
+ "/" + file, 'rb').read()
314 print "Could not read file :: %s/%s" % (root
, file)
317 if __name__
== "__main__":
318 """Parse all the options"""
320 timeStart
= time
.clock()
326 ((_)\ ))\ ( /(_))(_))
328 | \| (_)) ((_) _ \_ _|
330 |_|\_\___\___/_| |___| Ver. *.USEGIT
333 parser
= OptionParser(usage
="usage: %prog [options] <start directory> <OPTIONAL: filename regex>",
335 parser
.add_option("-c", "--csv",
339 help="generate CSV outfile",
341 parser
.add_option("-a", "--all",
345 help="Run all (useful) tests [Entropy, Longest Word, IC, Signature]",)
346 parser
.add_option("-z", "--zlib",
350 help="Run compression Test",)
351 parser
.add_option("-e", "--entropy",
355 help="Run entropy Test",)
356 parser
.add_option("-E", "--eval",
360 help="Run signature test for the eval",)
361 parser
.add_option("-l", "--longestword",
365 help="Run longest word test",)
366 parser
.add_option("-i", "--ic",
371 parser
.add_option("-s", "--signature",
375 help="Run signature test",)
376 parser
.add_option("-S", "--supersignature",
378 dest
="is_supersignature",
380 help="Run SUPER-signature test",)
381 parser
.add_option("-A", "--auto",
385 help="Run auto file extension tests",)
386 parser
.add_option("-u", "--unicode",
388 dest
="ignore_unicode",
390 help="Skip over unicode-y/UTF'y files",)
392 (options
, args
) = parser
.parse_args()
394 # Error on invalid number of arguements
400 # Error on an invalid path
401 if os
.path
.exists(args
[0]) == False:
402 parser
.error("Invalid path")
405 if (len(args
) == 2 and options
.is_auto
is False):
407 valid_regex
= re
.compile(args
[1])
409 parser
.error("Invalid regular expression")
411 valid_regex
= re
.compile('.*')
415 valid_regex
= re
.compile('(\.php|\.asp|\.aspx|\.scath|\.bash|\.zsh|\.csh|\.tsch|\.pl|\.py|\.txt|\.cgi|\.cfm|\.htaccess)$')
418 tests
.append(LanguageIC())
419 tests
.append(Entropy())
420 tests
.append(LongestWord())
421 tests
.append(SignatureNasty())
422 tests
.append(SignatureSuperNasty())
424 if options
.is_entropy
:
425 tests
.append(Entropy())
426 if options
.is_longest
:
427 tests
.append(LongestWord())
429 tests
.append(LanguageIC())
430 if options
.is_signature
:
431 tests
.append(SignatureNasty())
432 if options
.is_supersignature
:
433 tests
.append(SignatureSuperNasty())
435 tests
.append(UsesEval())
437 tests
.append(Compression())
439 # Instantiate the Generator Class used for searching, opening, and reading files
440 locator
= SearchFile()
442 # CSV file output array
444 csv_header
= ["filename"]
446 # Grab the file and calculate each test against file
449 for data
, filename
in locator
.search_file_path(args
, valid_regex
):
451 # a row array for the CSV
453 csv_row
.append(filename
)
455 if options
.ignore_unicode
:
457 for character
in data
:
458 if ord(character
) > 127:
459 asciiHighCount
= asciiHighCount
+ 1
461 fileAsciiHighRatio
= float(asciiHighCount
) / float(len(data
))
463 if (options
.ignore_unicode
== False or fileAsciiHighRatio
< .1):
465 calculated_value
= test
.calculate(data
, filename
)
466 # Make the header row if it hasn't been fully populated, +1 here to account for filename column
467 if len(csv_header
) < len(tests
) + 1:
468 csv_header
.append(test
.__class
__.__name
__)
469 csv_row
.append(calculated_value
)
470 fileCount
= fileCount
+ 1
471 csv_array
.append(csv_row
)
473 fileIgnoreCount
= fileIgnoreCount
+ 1
476 csv_array
.insert(0,csv_header
)
477 fileOutput
= csv
.writer(open(options
.is_csv
, "wb"))
478 fileOutput
.writerows(csv_array
)
480 timeFinish
= time
.clock()
483 print "\n[[ Total files scanned: %i ]]" % (fileCount
)
484 print "[[ Total files ignored: %i ]]" % (fileIgnoreCount
)
485 print "[[ Scan Time: %f seconds ]]" % (timeFinish
- timeStart
)
487 # Print top rank lists
492 for file in test
.results
:
493 rank_list
[file["filename"]] = rank_list
.setdefault(file["filename"], 0) + file["rank"]
495 rank_sorted
= sorted(rank_list
.items(), key
=lambda x
: x
[1])
497 print "\n[[ Top cumulative ranked files ]]"
499 if (count
> len(rank_sorted
)): count
= len(rank_sorted
)
500 for x
in range(count
):
501 print ' {0:>7} {1}'.format(rank_sorted
[x
][1], rank_sorted
[x
][0])