2 Copyright 2013 Karel Matas
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
28 #include <FL/fl_utf8.h>
39 // Endianness check ( little endian returns 0)
40 // echo -n I | od -to2 | head -n1 | cut -f2 -d" " | cut -c6
42 // http://scripts.sil.org/cms/scripts/page.php?site_id=nrsi&item_id=IWS-Appendix
43 // http://home.telfort.nl/~t876506/utf8tbl.html
51 // should be enough for now (tags without arguments)
52 const int MAX_TAGNAME_LENGTH
= 32; // used in parse_markup
54 class CantOpenFile
: public std::exception
{
58 CantOpenFile(const string
&filename
): msg_(filename
) {};
59 const char *what() const noexcept
{return msg_
.c_str();};
62 class ParsingError
: public std::exception
{
66 ParsingError(const string
&filename
): msg_(filename
) {};
67 const char *what() const noexcept
{return msg_
.c_str();};
82 inline void clear () { map_
.clear(); }
84 inline void add ( const T
&item
){
85 if ( map_
.find(item
) == map_
.end() )
91 inline void add ( const vector
<T
> &v
) {
96 inline std::map
<T
,int> map () { return map_
; }
98 inline std::multimap
<int,T
> sorted () {
99 std::multimap
<int,T
> mm
;
100 for ( auto mi
: map_
)
101 mm
.insert( {mi
.second
,mi
.first
} );
107 * Finds first digit (0-9) in input string.
108 * \param s input string
109 * \return position of first digit in string or length of the string if no digit found
111 inline size_t find_first_digit ( const char *s
)
114 while ( pos
< strlen(s
) ){
115 if ( s
[pos
] > '0' && s
[pos
] < '9' )
123 * Groups similar string in input vector. Similar strings means strings,
124 * which ends with different number.
125 * <b>Example: </b><br>
126 * { "a 1", "d", "b 3", "ddd", "a 4", "b 2" }<br>-><br>
127 * { "a 1", "a 4" }, { "b 2", "b 3" }, { "d" }, { "ddd" }
128 * \param v input vector of strings
129 * \return alphabetically sorted vector of groupped strings
131 vector
<vector
<string
>> separate_groups ( vector
<string
> &v
);
134 * Checks ZLIB error code. In case of an error throws exception with
135 * description of the error.
136 * \exception std::runtime_error
137 * \param err ZLIB error code
139 void check_zlib_error ( int err
);
141 * Decompress gzip file, copies file if it is not gzipped.
142 * Throws exception if cant open file. Calss check_zlib_error().
143 * \exception std::runtime_error
144 * \param infilename input file name
145 * \param outfilename output file name
147 void gzip_decompress_file( const char *infilename
, const char *outfilename
);
150 * Parse string containing integer range and returns minimum and maximum.
151 * Range is separated by -.
155 * 4-8 between 4 and 8
156 * Minimal acceptable value is 0. maximal 99.
157 * \param s string containing integer range
158 * \return { minimal_value, maximal_value }
160 inline std::pair
<int,int> parse_range ( const string
&s
)
165 size_t pos
= s
.find("-");
166 if ( s
.back() == '+' )
167 min
= s
.substr( 0, s
.size()-1 );
168 else if ( s
.back() == '-' )
169 max
= s
.substr( 0, s
.size()-1 );
170 else if ( pos
!= string::npos
){
171 min
= s
.substr(0, pos
);
172 max
= s
.substr( pos
+1, s
.size() );
179 return {std::stoi(min
),std::stoi(max
)};
183 * Tries guess filename from url. Filename is created as substring from url,
184 * beginning after position of the last /.
186 * \return guessed filename or "downloaded.file" when guess failed
188 inline string
guess_fname_from_url ( const string
&url
)
190 size_t pos
= url
.find_last_of("/")+1;
191 if ( pos
!= string::npos
)
192 return url
.substr( pos
);
194 return "downloaded.file";
199 * Convenient function for finding element in the vector.
201 * <pre>return std::find( v.begin(), v.end(), elt ) != v.end();</pre>
204 inline bool is_in ( const vector
<T
> &v
, const T
&elt
){
205 return std::find( v
.begin(), v
.end(), elt
) != v
.end();
209 * Check whether string contains an positive integer.
210 * \return true if s contains only 0-9, false otherwise
212 inline bool isint ( const char *s
)
214 for ( size_t i
=0; i
<strlen(s
); i
++ )
215 if ( s
[i
] < '0' || s
[i
] > '9' )
221 * Reads the string s until character end is found (or \0 until is reached).
222 * Throws an exception if the string is invalid.
223 * \exception ParsingError
224 * \param s [in] input string
225 * \param pos [in,out] starting position in the s, pos is modified in the process
226 * (and contains position of the <b>end</b>)
227 * \param end the input string is read until this character is reached
228 * \return substring starting at input pos and ending before character <b>end</b>
230 inline string
read_until ( const char *s
, size_t *pos
, const char end
)
232 char buff
[strlen(s
)]; // should be enough
235 while ( s
[i
] != end
&& s
[i
] != '\0' )
238 throw ParsingError("read_until(): Invalid string: " + string(s
));
251 // operator< for use with std::sort
252 bool operator< ( const TextTag
&rhs
) const { return (this->pos
< rhs
.pos
); };
255 std::pair
<string
,vector
<TextTag
>> parse_markup ( const char *s
);
257 //! Checks whether file path existst (i.e. can be open).
258 inline bool file_exists ( const char *path
)
262 bool ret
= f
.is_open();
267 inline bool file_exists ( const string
&s
) { return file_exists(s
.c_str()); }
269 // modifies input string
270 inline void replace_all ( string
&s
, const string
&match
, const string
&repl
)
273 while ( (pos
= s
.find(match
, pos
)) != string::npos
)
274 s
.replace( pos
, match
.size(), repl
);
278 * Strips whitespace from the start and end of the input string.
279 * \param s string to be stripped
280 * \return copy of the stripped input string
282 inline string
strip ( const char *s
)
285 size_t end
= strlen(s
);
286 while ( isspace(s
[start
]) ) start
++;
287 while ( isspace(s
[end
-1]) && end
> start
) end
--;
288 return string(s
, start
, end
-start
);
292 * Joins elements of the input vector into string with separator sep.
294 * vector<string> v = { "a", "b", "c" }
295 * to_string(v, ":") returns "a:b:c"
296 * \param v input vector
297 * \param sep string to be used as separator between elements of v in resulting string
298 * \return joined string
300 template <class T
=string
>
301 inline string
to_string ( const vector
<T
> &v
, const char *sep
=", " )
303 if ( v
.empty() ) return string("");
304 std::stringstream ss
;
307 ss
<< s
<< ( ( i
++ < v
.size()-1 ) ? sep
:"" );
311 //! Joins elements of the input set into string. See above.
312 template <class T
=string
>
313 inline string
to_string ( const std::set
<T
> &s
, const char *sep
=", " )
314 { return to_string
<T
>( vector
<T
>(s
.begin(),s
.end()), sep
); }
316 vector
<string
> split_string ( const string
&s
, const char *delimiters
=" " );
318 vector
<int> split_string_int ( const string
&s
, const char *delimiters
=" " );
319 vector
<string
> str_to_chars ( const char *s
);
321 inline std::vector
<unsigned int> utf8_to_ints ( const char *s
)
323 std::vector
<unsigned int> v
;
325 while ( i
< strlen(s
) ){
326 int len
= fl_utf8len(s
[i
]);;
329 unsigned int dec
= fl_utf8decode( s
+i
, s
+i
+len
,&dlen
);
331 printf("UTF8 decoding error: i: %d, len: %d, dlen: %d", i
, len
, dlen
);
342 #endif // __UTILS_HXX