2 Functions and classes which keep track of and use regexes to classify streams
5 By Ethan Sommer <sommere@users.sf.net> and Matthew Strait
6 <quadong@users.sf.net>, (C) 2006-2007
7 http://l7-filter.sf.net
9 This program is free software; you can redistribute it and/or
10 modify it under the terms of the GNU General Public License
11 as published by the Free Software Foundation; either version
12 2 of the License, or (at your option) any later version.
13 http://www.gnu.org/licenses/gpl.txt
15 This file is synced between the userspace source code and the test suite
16 source code. I don't think it's worth the effort to make it a proper library.
24 #include "l7-parse-patterns.h"
26 // Returns true if the line (from a pattern file) is a comment
27 static int is_comment(string line
)
29 // blank lines are comments
30 if(line
.size() == 0) return 1;
32 // lines starting with # are comments
33 if(line
[0] == '#') return 1;
35 // lines with only whitespace are comments
36 for(unsigned int i
= 0; i
< line
.size(); i
++)
42 // Extracts the protocol name from a line
43 // This line should be exactly the name of the file without the .pat extension
44 // However, we also allow junk after whitespace
45 static string
get_protocol_name(string line
)
48 for(unsigned int i
= 0; i
< line
.size(); i
++)
57 // Returns the given file name from the last slash to the next dot
58 string
basename(string filename
)
60 int lastslash
= filename
.find_last_of('/');
61 int nextdot
= filename
.find_first_of('.', lastslash
);
63 return filename
.substr(lastslash
+1, nextdot
- (lastslash
+1));
66 // Returns, e.g. "userspace pattern" if the line is "userspace pattern=.*foo"
67 static string
attribute(string line
)
69 return line
.substr(0, line
.find_first_of('='));
72 // Returns, e.g. ".*foo" if the line is "userspace pattern=.*foo"
73 static string
value(string line
)
75 return line
.substr(line
.find_first_of('=')+1);
78 // parse the regexec and regcomp flags
79 // Returns 1 on sucess, 0 if any unrecognized flags were encountered
80 static int parseflags(int & cflags
, int & eflags
, string line
)
85 for(unsigned int i
= 0; i
< line
.size(); i
++){
89 if(isspace(line
[i
]) || i
== line
.size()-1){
90 if(flag
== "REG_EXTENDED") cflags
|= REG_EXTENDED
;
91 else if(flag
== "REG_ICASE") cflags
|= REG_ICASE
;
92 else if(flag
== "REG_NOSUB") cflags
|= REG_NOSUB
;
93 else if(flag
== "REG_NEWLINE") cflags
|= REG_NEWLINE
;
94 else if(flag
== "REG_NOTBOL") eflags
|= REG_NOTBOL
;
95 else if(flag
== "REG_NOTEOL") eflags
|= REG_NOTEOL
;
97 cerr
<<"Error: encountered unknown flag in pattern file " <<flag
<<endl
;
106 // Returns 1 on sucess, 0 on failure.
107 // Takes a filename and "returns" the pattern and flags
108 int parse_pattern_file(int & cflags
, int & eflags
, string
& pattern
,
111 ifstream
the_file(filename
.c_str());
113 if(!the_file
.is_open()){
114 cerr
<< "couldn't read file.\n";
118 // What we're looking for. It's either the protocol name, the kernel pattern,
119 // which we'll use if no other is present, or any of various (ok, two)
120 // userspace config lines.
121 enum { protocol
, kpattern
, userspace
} state
= protocol
;
123 string name
= "", line
;
124 cflags
= REG_EXTENDED
| REG_ICASE
| REG_NOSUB
;
127 while (!the_file
.eof()){
128 getline(the_file
, line
);
130 if(is_comment(line
)) continue;
132 if(state
== protocol
){
133 name
= get_protocol_name(line
);
135 if(name
!= basename(filename
)){
136 cerr
<< "Error: Protocol declared in file does not match file name.\n"
137 << "File name is " << basename(filename
)
138 << ", but the file says " << name
<< endl
;
145 if(state
== kpattern
){
151 if(state
== userspace
){
153 if(line
.find_first_of('=') == string::npos
){
154 cerr
<<"Warning: ignored bad line in pattern file:\n\t"<<line
<<endl
;
158 if(attribute(line
) == "userspace pattern"){
159 pattern
= value(line
);
161 else if(attribute(line
) == "userspace flags"){
162 if(!parseflags(cflags
, eflags
, value(line
)))
166 cerr
<< "Warning: ignored unknown pattern file attribute \""
167 << attribute(line
) << "\"\n";