7 * This source file is subject to the new BSD license that is bundled
8 * with this package in the file LICENSE.txt.
9 * It is also available through the world-wide-web at this URL:
10 * http://framework.zend.com/license/new-bsd
11 * If you did not receive a copy of the license and are unable to
12 * obtain it through the world-wide-web, please send an email
13 * to license@zend.com so we can send you a copy immediately.
16 * @package Zend_Search_Lucene
18 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
19 * @license http://framework.zend.com/license/new-bsd New BSD License
24 /** Zend_Search_Lucene_Exception */
25 require_once $CFG->dirroot
.'/search/Zend/Search/Lucene/Exception.php';
30 * @package Zend_Search_Lucene
32 * @copyright Copyright (c) 2005-2007 Zend Technologies USA Inc. (http://www.zend.com)
33 * @license http://framework.zend.com/license/new-bsd New BSD License
35 abstract class Zend_Search_Lucene_Storage_File
38 * Reads $length number of bytes at the current position in the
39 * file and advances the file pointer.
41 * @param integer $length
44 abstract protected function _fread($length=1);
48 * Sets the file position indicator and advances the file pointer.
49 * The new position, measured in bytes from the beginning of the file,
50 * is obtained by adding offset to the position specified by whence,
51 * whose values are defined as follows:
52 * SEEK_SET - Set position equal to offset bytes.
53 * SEEK_CUR - Set position to current location plus offset.
54 * SEEK_END - Set position to end-of-file plus offset. (To move to
55 * a position before the end-of-file, you need to pass a negative value
57 * Upon success, returns 0; otherwise, returns -1
59 * @param integer $offset
60 * @param integer $whence
63 abstract public function seek($offset, $whence=SEEK_SET
);
70 abstract public function tell();
75 * Returns true on success or false on failure.
79 abstract public function flush();
82 * Writes $length number of bytes (all, if $length===null) to the end
86 * @param integer $length
88 abstract protected function _fwrite($data, $length=null);
93 * Lock type may be a LOCK_SH (shared lock) or a LOCK_EX (exclusive lock)
95 * @param integer $lockType
98 abstract public function lock($lockType, $nonBlockinLock = false);
103 abstract public function unlock();
106 * Reads a byte from the current position in the file
107 * and advances the file pointer.
111 public function readByte()
113 return ord($this->_fread(1));
117 * Writes a byte to the end of the file.
119 * @param integer $byte
121 public function writeByte($byte)
123 return $this->_fwrite(chr($byte), 1);
127 * Read num bytes from the current position in the file
128 * and advances the file pointer.
130 * @param integer $num
133 public function readBytes($num)
135 return $this->_fread($num);
139 * Writes num bytes of data (all, if $num===null) to the end
142 * @param string $data
143 * @param integer $num
145 public function writeBytes($data, $num=null)
147 $this->_fwrite($data, $num);
152 * Reads an integer from the current position in the file
153 * and advances the file pointer.
157 public function readInt()
159 $str = $this->_fread(4);
161 return ord($str{0}) << 24 |
169 * Writes an integer to the end of file.
171 * @param integer $value
173 public function writeInt($value)
175 settype($value, 'integer');
176 $this->_fwrite( chr($value>>24 & 0xFF) .
177 chr($value>>16 & 0xFF) .
178 chr($value>>8 & 0xFF) .
179 chr($value & 0xFF), 4 );
184 * Returns a long integer from the current position in the file
185 * and advances the file pointer.
188 * @throws Zend_Search_Lucene_Exception
190 public function readLong()
192 $str = $this->_fread(8);
195 * Check, that we work in 64-bit mode.
196 * fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
198 if (PHP_INT_SIZE
> 4) {
199 return ord($str{0}) << 56 |
208 if ((ord($str{0}) != 0) ||
209 (ord($str{1}) != 0) ||
210 (ord($str{2}) != 0) ||
211 (ord($str{3}) != 0) ||
212 ((ord($str{0}) & 0x80) != 0)) {
213 throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
216 return ord($str{4}) << 24 |
224 * Writes long integer to the end of file
226 * @param integer $value
227 * @throws Zend_Search_Lucene_Exception
229 public function writeLong($value)
232 * Check, that we work in 64-bit mode.
233 * fseek() and ftell() use long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
235 if (PHP_INT_SIZE
> 4) {
236 settype($value, 'integer');
237 $this->_fwrite( chr($value>>56 & 0xFF) .
238 chr($value>>48 & 0xFF) .
239 chr($value>>40 & 0xFF) .
240 chr($value>>32 & 0xFF) .
241 chr($value>>24 & 0xFF) .
242 chr($value>>16 & 0xFF) .
243 chr($value>>8 & 0xFF) .
244 chr($value & 0xFF), 8 );
246 if ($value > 0x7FFFFFFF) {
247 throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
250 $this->_fwrite( "\x00\x00\x00\x00" .
251 chr($value>>24 & 0xFF) .
252 chr($value>>16 & 0xFF) .
253 chr($value>>8 & 0xFF) .
254 chr($value & 0xFF), 8 );
261 * Returns a variable-length integer from the current
262 * position in the file and advances the file pointer.
266 public function readVInt()
268 $nextByte = ord($this->_fread(1));
269 $val = $nextByte & 0x7F;
271 for ($shift=7; ($nextByte & 0x80) != 0; $shift +
= 7) {
272 $nextByte = ord($this->_fread(1));
273 $val |
= ($nextByte & 0x7F) << $shift;
279 * Writes a variable-length integer to the end of file.
281 * @param integer $value
283 public function writeVInt($value)
285 settype($value, 'integer');
286 while ($value > 0x7F) {
287 $this->_fwrite(chr( ($value & 0x7F)|
0x80 ));
290 $this->_fwrite(chr($value));
295 * Reads a string from the current position in the file
296 * and advances the file pointer.
300 public function readString()
302 $strlen = $this->readVInt();
307 * This implementation supports only Basic Multilingual Plane
308 * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
309 * "supplementary characters" (characters whose code points are
310 * greater than 0xFFFF)
311 * Java 2 represents these characters as a pair of char (16-bit)
312 * values, the first from the high-surrogates range (0xD800-0xDBFF),
313 * the second from the low-surrogates range (0xDC00-0xDFFF). Then
314 * they are encoded as usual UTF-8 characters in six bytes.
315 * Standard UTF-8 representation uses four bytes for supplementary
319 $str_val = $this->_fread($strlen);
321 for ($count = 0; $count < $strlen; $count++
) {
322 if (( ord($str_val{$count}) & 0xC0 ) == 0xC0) {
324 if (ord($str_val{$count}) & 0x20 ) {
327 // Never used. Java2 doesn't encode strings in four bytes
328 if (ord($str_val{$count}) & 0x10 ) {
332 $str_val .= $this->_fread($addBytes);
333 $strlen +
= $addBytes;
335 // Check for null character. Java2 encodes null character
337 if (ord($str_val{$count}) == 0xC0 &&
338 ord($str_val{$count+
1}) == 0x80 ) {
339 $str_val{$count} = 0;
340 $str_val = substr($str_val,0,$count+
1)
341 . substr($str_val,$count+
2);
352 * Writes a string to the end of file.
355 * @throws Zend_Search_Lucene_Exception
357 public function writeString($str)
360 * This implementation supports only Basic Multilingual Plane
361 * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
362 * "supplementary characters" (characters whose code points are
363 * greater than 0xFFFF)
364 * Java 2 represents these characters as a pair of char (16-bit)
365 * values, the first from the high-surrogates range (0xD800-0xDBFF),
366 * the second from the low-surrogates range (0xDC00-0xDFFF). Then
367 * they are encoded as usual UTF-8 characters in six bytes.
368 * Standard UTF-8 representation uses four bytes for supplementary
372 // convert input to a string before iterating string characters
373 settype($str, 'string');
375 $chars = $strlen = strlen($str);
376 $containNullChars = false;
378 for ($count = 0; $count < $strlen; $count++
) {
380 * String is already in Java 2 representation.
381 * We should only calculate actual string length and replace
384 if ((ord($str{$count}) & 0xC0) == 0xC0) {
386 if (ord($str{$count}) & 0x20 ) {
389 // Never used. Java2 doesn't encode strings in four bytes
390 // and we dont't support non-BMP characters
391 if (ord($str{$count}) & 0x10 ) {
397 if (ord($str{$count}) == 0 ) {
398 $containNullChars = true;
405 throw new Zend_Search_Lucene_Exception('Invalid UTF-8 string');
408 $this->writeVInt($chars);
409 if ($containNullChars) {
410 $this->_fwrite(str_replace($str, "\x00", "\xC0\x80"));
412 $this->_fwrite($str);
418 * Reads binary data from the current position in the file
419 * and advances the file pointer.
423 public function readBinary()
425 return $this->_fread($this->readVInt());