3 # Description: Utility to scan a file path for encrypted and obfuscated files
4 # Authors: Ben Hagen (ben.hagen@neohapsis.com)
5 # Scott Behrens (scott.behrens@neohapsis.com)
9 # pep-0008 - Is stupid. TABS FO'EVER! too bad, spaces are back!
14 # Try catch regular expressions/bad path/bad filename/bad regex/
24 from collections
import defaultdict
25 from optparse
import OptionParser
28 """Class that calculates a file's Index of Coincidence as
29 as well as a a subset of files average Index of Coincidence.
32 """Initialize results arrays as well as character counters."""
33 self
.char_count
= defaultdict(int)
34 self
.total_char_count
= 0
36 self
.ic_total_results
= ""
38 def calculate_char_count(self
,data
):
39 """Method to calculate character counts for a particular data file."""
44 charcount
= data
.count(char
)
45 self
.char_count
[char
] += charcount
46 self
.total_char_count
+= charcount
49 def calculate_IC(self
):
50 """Calculate the Index of Coincidence for the self variables"""
52 for val
in self
.char_count
.values():
56 total
+= val
* (val
-1)
59 ic_total
= float(total
)/(self
.total_char_count
* (self
.total_char_count
- 1))
62 self
.ic_total_results
= ic_total
65 def calculate(self
,data
,filename
):
66 """Calculate the Index of Coincidence for a file and append to self.ic_results array"""
74 charcount
= data
.count(char
)
75 char_count
+= charcount
* (charcount
- 1)
76 total_char_count
+= charcount
78 ic
= float(char_count
)/(total_char_count
* (total_char_count
- 1))
79 self
.results
.append({"filename":filename
, "value":ic
})
80 # Call method to calculate_char_count and append to total_char_count
81 self
.calculate_char_count(data
)
85 self
.results
.sort(key
=lambda item
: item
["value"])
86 self
.results
= resultsAddRank(self
.results
)
88 def printer(self
, count
):
89 """Print the top signature count match files for a given search"""
90 # Calculate the Total IC for a Search
92 print "\n[[ Average IC for Search ]]"
93 print self
.ic_total_results
94 print "\n[[ Top %i lowest IC files ]]" % (count
)
95 if (count
> len(self
.results
)): count
= len(self
.results
)
96 for x
in range(count
):
97 print ' {0:>7.4f} {1}'.format(self
.results
[x
]["value"], self
.results
[x
]["filename"])
101 """Class that calculates a file's Entropy."""
104 """Instantiate the entropy_results array."""
107 def calculate(self
,data
,filename
):
108 """Calculate the entropy for 'data' and append result to entropy_results array."""
113 self
.stripped_data
=data
.replace(' ', '')
115 p_x
= float(self
.stripped_data
.count(chr(x
)))/len(self
.stripped_data
)
117 entropy
+= - p_x
* math
.log(p_x
, 2)
118 self
.results
.append({"filename":filename
, "value":entropy
})
122 self
.results
.sort(key
=lambda item
: item
["value"])
123 self
.results
.reverse()
124 self
.results
= resultsAddRank(self
.results
)
126 def printer(self
, count
):
127 """Print the top signature count match files for a given search"""
128 print "\n[[ Top %i entropic files for a given search ]]" % (count
)
129 if (count
> len(self
.results
)): count
= len(self
.results
)
130 for x
in range(count
):
131 print ' {0:>7.4f} {1}'.format(self
.results
[x
]["value"], self
.results
[x
]["filename"])
135 """Class that determines the longest word for a particular file."""
137 """Instantiate the longestword_results array."""
140 def calculate(self
,data
,filename
):
141 """Find the longest word in a string and append to longestword_results array"""
146 words
= re
.split("[\s,\n,\r]", data
)
153 self
.results
.append({"filename":filename
, "value":longest
})
157 self
.results
.sort(key
=lambda item
: item
["value"])
158 self
.results
.reverse()
159 self
.results
= resultsAddRank(self
.results
)
161 def printer(self
, count
):
162 """Print the top signature count match files for a given search"""
163 print "\n[[ Top %i longest word files ]]" % (count
)
164 if (count
> len(self
.results
)): count
= len(self
.results
)
165 for x
in range(count
):
166 print ' {0:>7} {1}'.format(self
.results
[x
]["value"], self
.results
[x
]["filename"])
169 class SignatureNasty
:
170 """Generator that searches a given file for nasty expressions"""
173 """Instantiate the results array."""
176 def calculate(self
, data
, filename
):
179 # Lots taken from the wonderful post at http://stackoverflow.com/questions/3115559/exploitable-php-functions
180 valid_regex
= re
.compile('(eval\(|file_put_contents|base64_decode|python_eval|exec\(|passthru|popen|proc_open|pcntl|assert\(|system\(|shell)', re
.I
)
181 matches
= re
.findall(valid_regex
, data
)
182 self
.results
.append({"filename":filename
, "value":len(matches
)})
186 self
.results
.sort(key
=lambda item
: item
["value"])
187 self
.results
.reverse()
188 self
.results
= resultsAddRank(self
.results
)
190 def printer(self
, count
):
191 """Print the top signature count match files for a given search"""
192 print "\n[[ Top %i signature match counts ]]" % (count
)
193 if (count
> len(self
.results
)): count
= len(self
.results
)
194 for x
in range(count
):
195 print ' {0:>7} {1}'.format(self
.results
[x
]["value"], self
.results
[x
]["filename"])
200 """Generator that searches a given file for nasty eval with variable"""
203 """Instantiate the eval_results array."""
206 def calculate(self
, data
, filename
):
209 # Lots taken from the wonderful post at http://stackoverflow.com/questions/3115559/exploitable-php-functions
210 valid_regex
= re
.compile('(eval\(\$(\w|\d))', re
.I
)
211 matches
= re
.findall(valid_regex
, data
)
212 self
.results
.append({"filename":filename
, "value":len(matches
)})
216 self
.results
.sort(key
=lambda item
: item
["value"])
217 self
.results
.reverse()
218 self
.results
= resultsAddRank(self
.results
)
220 def printer(self
, count
):
221 """Print the files that use eval"""
222 print "\n[[ Top %i eval match counts ]]" % (count
)
223 if (count
> len(self
.results
)): count
= len(self
.results
)
224 for x
in range(count
):
225 print ' {0:>7} {1}'.format(self
.results
[x
]["value"], self
.results
[x
]["filename"])
230 """Generator finds compression ratio"""
233 """Instantiate the results array."""
236 def calculate(self
, data
, filename
):
239 compressed
= zlib
.compress(data
)
240 ratio
= float(len(compressed
)) / float(len(data
))
241 self
.results
.append({"filename":filename
, "value":ratio
})
245 self
.results
.sort(key
=lambda item
: item
["value"])
246 self
.results
.reverse()
247 self
.results
= resultsAddRank(self
.results
)
249 def printer(self
, count
):
250 """Print the top files for a given search"""
251 print "\n[[ Top %i compression match counts ]]" % (count
)
252 if (count
> len(self
.results
)): count
= len(self
.results
)
253 for x
in range(count
):
254 print ' {0:>7.4f} {1}'.format(self
.results
[x
]["value"], self
.results
[x
]["filename"])
257 def resultsAddRank(results
):
260 previousValue
= False
263 if (previousValue
and previousValue
!= file["value"]):
267 previousValue
= file["value"]
272 """Generator that searches a given filepath with an optional regular
273 expression and returns the filepath and filename"""
274 def search_file_path(self
, args
, valid_regex
):
275 for root
, dirs
, files
in os
.walk(args
[0]):
277 filename
= os
.path
.join(root
, file)
278 if (valid_regex
.search(file) and os
.path
.getsize(filename
) > 60):
280 data
= open(root
+ "/" + file, 'rb').read()
283 print "Could not read file :: %s/%s" % (root
, file)
286 if __name__
== "__main__":
287 """Parse all the options"""
289 timeStart
= time
.clock()
295 ((_)\ ))\ ( /(_))(_))
297 | \| (_)) ((_) _ \_ _|
299 |_|\_\___\___/_| |___| Ver. *.USEGIT
302 parser
= OptionParser(usage
="usage: %prog [options] <start directory> <OPTIONAL: filename regex>",
304 parser
.add_option("-c", "--csv",
308 help="generate CSV outfile",
310 parser
.add_option("-a", "--all",
314 help="Run all (useful) tests [Entropy, Longest Word, IC, Signature]",)
315 parser
.add_option("-z", "--zlib",
319 help="Run compression Test",)
320 parser
.add_option("-e", "--entropy",
324 help="Run entropy Test",)
325 parser
.add_option("-E", "--eval",
329 help="Run signiture test for the eval",)
330 parser
.add_option("-l", "--longestword",
334 help="Run longest word test",)
335 parser
.add_option("-i", "--ic",
340 parser
.add_option("-s", "--signature",
344 help="Run signature test",)
345 parser
.add_option("-A", "--auto",
349 help="Run auto file extension tests",)
350 parser
.add_option("-u", "--unicode",
352 dest
="ignore_unicode",
354 help="Skip over unicode-y/UTF'y files",)
356 (options
, args
) = parser
.parse_args()
358 # Error on invalid number of arguements
364 # Error on an invalid path
365 if os
.path
.exists(args
[0]) == False:
366 parser
.error("Invalid path")
369 if (len(args
) == 2 and options
.is_auto
is False):
371 valid_regex
= re
.compile(args
[1])
373 parser
.error("Invalid regular expression")
375 valid_regex
= re
.compile('.*')
379 valid_regex
= re
.compile('(\.php|\.asp|\.aspx|\.scath|\.bash|\.zsh|\.csh|\.tsch|\.pl|\.py|\.txt|\.cgi|\.cfm|\.htaccess)$')
382 tests
.append(LanguageIC())
383 tests
.append(Entropy())
384 tests
.append(LongestWord())
385 tests
.append(SignatureNasty())
387 if options
.is_entropy
:
388 tests
.append(Entropy())
389 if options
.is_longest
:
390 tests
.append(LongestWord())
392 tests
.append(LanguageIC())
393 if options
.is_signature
:
394 tests
.append(SignatureNasty())
396 tests
.append(UsesEval())
398 tests
.append(Compression())
400 # Instantiate the Generator Class used for searching, opening, and reading files
401 locator
= SearchFile()
403 # CSV file output array
405 csv_header
= ["filename"]
407 # Grab the file and calculate each test against file
410 for data
, filename
in locator
.search_file_path(args
, valid_regex
):
412 # a row array for the CSV
414 csv_row
.append(filename
)
416 if options
.ignore_unicode
:
418 for character
in data
:
419 if ord(character
) > 127:
420 asciiHighCount
= asciiHighCount
+ 1
422 fileAsciiHighRatio
= float(asciiHighCount
) / float(len(data
))
424 if (options
.ignore_unicode
== False or fileAsciiHighRatio
< .1):
426 calculated_value
= test
.calculate(data
, filename
)
427 # Make the header row if it hasn't been fully populated, +1 here to account for filename column
428 if len(csv_header
) < len(tests
) + 1:
429 csv_header
.append(test
.__class
__.__name
__)
430 csv_row
.append(calculated_value
)
431 fileCount
= fileCount
+ 1
432 csv_array
.append(csv_row
)
434 fileIgnoreCount
= fileIgnoreCount
+ 1
437 csv_array
.insert(0,csv_header
)
438 fileOutput
= csv
.writer(open(options
.is_csv
, "wb"))
439 fileOutput
.writerows(csv_array
)
441 timeFinish
= time
.clock()
444 print "\n[[ Total files scanned: %i ]]" % (fileCount
)
445 print "[[ Total files ignored: %i ]]" % (fileIgnoreCount
)
446 print "[[ Scan Time: %f seconds ]]" % (timeFinish
- timeStart
)
448 # Print top rank lists
453 for file in test
.results
:
454 rank_list
[file["filename"]] = rank_list
.setdefault(file["filename"], 0) + file["rank"]
456 rank_sorted
= sorted(rank_list
.items(), key
=lambda x
: x
[1])
458 print "\n[[ Top cumulative ranked files ]]"
460 if (count
> len(rank_sorted
)): count
= len(rank_sorted
)
461 for x
in range(count
):
462 print ' {0:>7} {1}'.format(rank_sorted
[x
][1], rank_sorted
[x
][0])