1 <?php
defined('SYSPATH') OR die('No direct access allowed.');
5 * $Id: text.php 3917 2009-01-21 03:06:22Z zombor $
9 * @copyright (c) 2007-2008 Kohana Team
10 * @license http://kohanaphp.com/license.html
15 * Limits a phrase to a given number of words.
17 * @param string phrase to limit words of
18 * @param integer number of words to limit to
19 * @param string end character or entity
22 public static function limit_words($str, $limit = 100, $end_char = NULL)
24 $limit = (int) $limit;
25 $end_char = ($end_char === NULL) ?
'…' : $end_char;
27 if (trim($str) === '')
33 preg_match('/^\s*+(?:\S++\s*+){1,'.$limit.'}/u', $str, $matches);
35 // Only attach the end character if the matched string is shorter
36 // than the starting string.
37 return rtrim($matches[0]).(strlen($matches[0]) === strlen($str) ?
'' : $end_char);
41 * Limits a phrase to a given number of characters.
43 * @param string phrase to limit characters of
44 * @param integer number of characters to limit to
45 * @param string end character or entity
46 * @param boolean enable or disable the preservation of words while limiting
49 public static function limit_chars($str, $limit = 100, $end_char = NULL, $preserve_words = FALSE)
51 $end_char = ($end_char === NULL) ?
'…' : $end_char;
53 $limit = (int) $limit;
55 if (trim($str) === '' OR utf8
::strlen($str) <= $limit)
61 if ($preserve_words == FALSE)
63 return rtrim(utf8
::substr($str, 0, $limit)).$end_char;
66 preg_match('/^.{'.($limit - 1).'}\S*/us', $str, $matches);
68 return rtrim($matches[0]).(strlen($matches[0]) == strlen($str) ?
'' : $end_char);
72 * Alternates between two or more strings.
74 * @param string strings to alternate between
77 public static function alternate()
81 if (func_num_args() === 0)
87 $args = func_get_args();
88 return $args[($i++ %
count($args))];
92 * Generates a random string of a given type and length.
94 * @param string a type of pool, or a string of characters to use as the pool
95 * @param integer length of string to return
98 * @tutorial alnum alpha-numeric characters
99 * @tutorial alpha alphabetical characters
100 * @tutorial hexdec hexadecimal characters, 0-9 plus a-f
101 * @tutorial numeric digit characters, 0-9
102 * @tutorial nozero digit characters, 1-9
103 * @tutorial distinct clearly distinct alpha-numeric characters
105 public static function random($type = 'alnum', $length = 8)
112 $pool = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
115 $pool = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
118 $pool = '0123456789abcdef';
121 $pool = '0123456789';
127 $pool = '2345679ACDEFHJKLMNPRSTUVWXYZ';
130 $pool = (string) $type;
131 $utf8 = ! utf8
::is_ascii($pool);
135 // Split the pool into an array of characters
136 $pool = ($utf8 === TRUE) ? utf8
::str_split($pool, 1) : str_split($pool, 1);
139 $max = count($pool) - 1;
142 for ($i = 0; $i < $length; $i++
)
144 // Select a random character from the pool and add it to the string
145 $str .= $pool[mt_rand(0, $max)];
148 // Make sure alnum strings contain at least one letter and one digit
149 if ($type === 'alnum' AND $length > 1)
151 if (ctype_alpha($str))
153 // Add a random digit
154 $str[mt_rand(0, $length - 1)] = chr(mt_rand(48, 57));
156 elseif (ctype_digit($str))
158 // Add a random letter
159 $str[mt_rand(0, $length - 1)] = chr(mt_rand(65, 90));
167 * Reduces multiple slashes in a string to single slashes.
169 * @param string string to reduce slashes of
172 public static function reduce_slashes($str)
174 return preg_replace('#(?<!:)//+#', '/', $str);
178 * Replaces the given words with a string.
180 * @param string phrase to replace words in
181 * @param array words to replace
182 * @param string replacement string
183 * @param boolean replace words across word boundries (space, period, etc)
186 public static function censor($str, $badwords, $replacement = '#', $replace_partial_words = FALSE)
188 foreach ((array) $badwords as $key => $badword)
190 $badwords[$key] = str_replace('\*', '\S*?', preg_quote((string) $badword));
193 $regex = '('.implode('|', $badwords).')';
195 if ($replace_partial_words == TRUE)
197 // Just using \b isn't sufficient when we need to replace a badword that already contains word boundaries itself
198 $regex = '(?<=\b|\s|^)'.$regex.'(?=\b|\s|$)';
201 $regex = '!'.$regex.'!ui';
203 if (utf8
::strlen($replacement) == 1)
206 return preg_replace($regex, 'str_repeat($replacement, utf8::strlen(\'$1\'))', $str);
209 return preg_replace($regex, $replacement, $str);
213 * Finds the text that is similar between a set of words.
215 * @param array words to find similar text of
218 public static function similar(array $words)
220 // First word is the word to match against
221 $word = current($words);
223 for ($i = 0, $max = strlen($word); $i < $max; ++
$i)
225 foreach ($words as $w)
227 // Once a difference is found, break out of the loops
228 if ( ! isset($w[$i]) OR $w[$i] !== $word[$i])
233 // Return the similar text
234 return substr($word, 0, $i);
238 * Converts text email addresses and anchors into links.
240 * @param string text to auto link
243 public static function auto_link($text)
245 // Auto link emails first to prevent problems with "www.domain.com@example.com"
246 return text
::auto_link_urls(text
::auto_link_emails($text));
250 * Converts text anchors into links.
252 * @param string text to auto link
255 public static function auto_link_urls($text)
257 // Finds all http/https/ftp/ftps links that are not part of an existing html anchor
258 if (preg_match_all('~\b(?<!href="|">)(?:ht|f)tps?://\S+(?:/|\b)~i', $text, $matches))
260 foreach ($matches[0] as $match)
262 // Replace each link with an anchor
263 $text = str_replace($match, html
::anchor($match), $text);
267 // Find all naked www.links.com (without http://)
268 if (preg_match_all('~\b(?<!://)www(?:\.[a-z0-9][-a-z0-9]*+)+\.[a-z]{2,6}\b~i', $text, $matches))
270 foreach ($matches[0] as $match)
272 // Replace each link with an anchor
273 $text = str_replace($match, html
::anchor('http://'.$match, $match), $text);
281 * Converts text email addresses into links.
283 * @param string text to auto link
286 public static function auto_link_emails($text)
288 // Finds all email addresses that are not part of an existing html mailto anchor
289 // Note: The "58;" negative lookbehind prevents matching of existing encoded html mailto anchors
290 // The html entity for a colon (:) is : or : or : etc.
291 if (preg_match_all('~\b(?<!href="mailto:|">|58;)(?!\.)[-+_a-z0-9.]++(?<!\.)@(?![-.])[-a-z0-9.]+(?<!\.)\.[a-z]{2,6}\b~i', $text, $matches))
293 foreach ($matches[0] as $match)
295 // Replace each email with an encoded mailto
296 $text = str_replace($match, html
::mailto($match), $text);
304 * Automatically applies <p> and <br /> markup to text. Basically nl2br() on steroids.
306 * @param string subject
309 public static function auto_p($str)
312 if (($str = trim($str)) === '')
315 // Standardize newlines
316 $str = str_replace(array("\r\n", "\r"), "\n", $str);
318 // Trim whitespace on each line
319 $str = preg_replace('~^[ \t]+~m', '', $str);
320 $str = preg_replace('~[ \t]+$~m', '', $str);
322 // The following regexes only need to be executed if the string contains html
323 if ($html_found = (strpos($str, '<') !== FALSE))
325 // Elements that should not be surrounded by p tags
326 $no_p = '(?:p|div|h[1-6r]|ul|ol|li|blockquote|d[dlt]|pre|t[dhr]|t(?:able|body|foot|head)|c(?:aption|olgroup)|form|s(?:elect|tyle)|a(?:ddress|rea)|ma(?:p|th))';
328 // Put at least two linebreaks before and after $no_p elements
329 $str = preg_replace('~^<'.$no_p.'[^>]*+>~im', "\n$0", $str);
330 $str = preg_replace('~</'.$no_p.'\s*+>$~im', "$0\n", $str);
334 $str = '<p>'.trim($str).'</p>';
335 $str = preg_replace('~\n{2,}~', "</p>\n\n<p>", $str);
337 // The following regexes only need to be executed if the string contains html
338 if ($html_found !== FALSE)
340 // Remove p tags around $no_p elements
341 $str = preg_replace('~<p>(?=</?'.$no_p.'[^>]*+>)~i', '', $str);
342 $str = preg_replace('~(</?'.$no_p.'[^>]*+>)</p>~i', '$1', $str);
345 // Convert single linebreaks to <br />
346 $str = preg_replace('~(?<!\n)\n(?!\n)~', "<br />\n", $str);
352 * Returns human readable sizes.
353 * @see Based on original functions written by:
354 * @see Aidan Lister: http://aidanlister.com/repos/v/function.size_readable.php
355 * @see Quentin Zervaas: http://www.phpriot.com/d/code/strings/filesize-format/
357 * @param integer size in bytes
358 * @param string a definitive unit
359 * @param string the return string format
360 * @param boolean whether to use SI prefixes or IEC
363 public static function bytes($bytes, $force_unit = NULL, $format = NULL, $si = TRUE)
366 $format = ($format === NULL) ?
'%01.2f %s' : (string) $format;
368 // IEC prefixes (binary)
369 if ($si == FALSE OR strpos($force_unit, 'i') !== FALSE)
371 $units = array('B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB');
374 // SI prefixes (decimal)
377 $units = array('B', 'kB', 'MB', 'GB', 'TB', 'PB');
381 // Determine unit to use
382 if (($power = array_search((string) $force_unit, $units)) === FALSE)
384 $power = ($bytes > 0) ?
floor(log($bytes, $mod)) : 0;
387 return sprintf($format, $bytes / pow($mod, $power), $units[$power]);
391 * Prevents widow words by inserting a non-breaking space between the last two words.
392 * @see http://www.shauninman.com/archive/2006/08/22/widont_wordpress_plugin
394 * @param string string to remove widows from
397 public static function widont($str)
400 $space = strrpos($str, ' ');
402 if ($space !== FALSE)
404 $str = substr($str, 0, $space).' '.substr($str, $space +
1);