1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
3 /* ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
16 * The Original Code is Mozilla.
18 * The Initial Developer of the Original Code is IBM Corporation.
19 * Portions created by IBM Corporation are Copyright (C) 2003
20 * IBM Corporation. All Rights Reserved.
23 * Darin Fisher <darin@meer.net>
25 * Alternatively, the contents of this file may be used under the terms of
26 * either the GNU General Public License Version 2 or later (the "GPL"), or
27 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 * in which case the provisions of the GPL or the LGPL are applicable instead
29 * of those above. If you wish to allow use of your version of this file only
30 * under the terms of either the GPL or the LGPL, and not to allow others to
31 * use your version of this file under the terms of the MPL, indicate your
32 * decision by deleting the provisions above and replace them with the notice
33 * and other provisions required by the GPL or the LGPL. If you do not delete
34 * the provisions above, a recipient may use your version of this file under
35 * the terms of any one of the MPL, the GPL or the LGPL.
37 * ***** END LICENSE BLOCK ***** */
40 #include "nsScannerString.h"
47 nsScannerBufferList::Buffer
*
48 nsScannerBufferList::AllocBufferFromString( const nsAString
& aString
)
50 PRUint32 len
= aString
.Length();
52 Buffer
* buf
= (Buffer
*) malloc(sizeof(Buffer
) + (len
+ 1) * sizeof(PRUnichar
));
55 // leave PRCList members of Buffer uninitialized
58 buf
->mDataEnd
= buf
->DataStart() + len
;
60 nsAString::const_iterator source
;
61 aString
.BeginReading(source
);
62 nsCharTraits
<PRUnichar
>::copy(buf
->DataStart(), source
.get(), len
);
64 // XXX null terminate. this shouldn't be required, but we do it because
65 // nsScanner erroneously thinks it can dereference DataEnd :-(
66 *buf
->mDataEnd
= PRUnichar(0);
71 nsScannerBufferList::Buffer
*
72 nsScannerBufferList::AllocBuffer( PRUint32 capacity
)
74 Buffer
* buf
= (Buffer
*) malloc(sizeof(Buffer
) + (capacity
+ 1) * sizeof(PRUnichar
));
77 // leave PRCList members of Buffer uninitialized
80 buf
->mDataEnd
= buf
->DataStart() + capacity
;
82 // XXX null terminate. this shouldn't be required, but we do it because
83 // nsScanner erroneously thinks it can dereference DataEnd :-(
84 *buf
->mDataEnd
= PRUnichar(0);
90 nsScannerBufferList::ReleaseAll()
92 while (!PR_CLIST_IS_EMPTY(&mBuffers
))
94 PRCList
* node
= PR_LIST_HEAD(&mBuffers
);
96 //printf(">>> freeing buffer @%p\n", node);
97 free(static_cast<Buffer
*>(node
));
102 nsScannerBufferList::SplitBuffer( const Position
& pos
)
104 // splitting to the right keeps the work string and any extant token
105 // pointing to and holding a reference count on the same buffer.
107 Buffer
* bufferToSplit
= pos
.mBuffer
;
108 NS_ASSERTION(bufferToSplit
, "null pointer");
110 PRUint32 splitOffset
= pos
.mPosition
- bufferToSplit
->DataStart();
111 NS_ASSERTION(pos
.mPosition
>= bufferToSplit
->DataStart() &&
112 splitOffset
<= bufferToSplit
->DataLength(),
113 "split offset is outside buffer");
115 PRUint32 len
= bufferToSplit
->DataLength() - splitOffset
;
116 Buffer
* new_buffer
= AllocBuffer(len
);
119 nsCharTraits
<PRUnichar
>::copy(new_buffer
->DataStart(),
120 bufferToSplit
->DataStart() + splitOffset
,
122 InsertAfter(new_buffer
, bufferToSplit
);
123 bufferToSplit
->SetDataLength(splitOffset
);
128 nsScannerBufferList::DiscardUnreferencedPrefix( Buffer
* aBuf
)
132 while (!PR_CLIST_IS_EMPTY(&mBuffers
) && !Head()->IsInUse())
134 Buffer
* buffer
= Head();
135 PR_REMOVE_LINK(buffer
);
142 nsScannerBufferList::Position::Distance( const Position
& aStart
, const Position
& aEnd
)
145 if (aStart
.mBuffer
== aEnd
.mBuffer
)
147 result
= aEnd
.mPosition
- aStart
.mPosition
;
151 result
= aStart
.mBuffer
->DataEnd() - aStart
.mPosition
;
152 for (Buffer
* b
= aStart
.mBuffer
->Next(); b
!= aEnd
.mBuffer
; b
= b
->Next())
153 result
+= b
->DataLength();
154 result
+= aEnd
.mPosition
- aEnd
.mBuffer
->DataStart();
164 nsScannerSubstring::nsScannerSubstring()
165 : mStart(nsnull
, nsnull
)
166 , mEnd(nsnull
, nsnull
)
167 , mBufferList(nsnull
)
173 nsScannerSubstring::nsScannerSubstring( const nsAString
& s
)
174 : mBufferList(nsnull
)
180 nsScannerSubstring::~nsScannerSubstring()
182 release_ownership_of_buffer_list();
186 nsScannerSubstring::CountChar( PRUnichar c
) const
189 re-write this to use a counting sink
192 size_type result
= 0;
193 size_type lengthToExamine
= Length();
195 nsScannerIterator iter
;
196 for ( BeginReading(iter
); ; )
198 PRInt32 lengthToExamineInThisFragment
= iter
.size_forward();
199 const PRUnichar
* fromBegin
= iter
.get();
200 result
+= size_type(NS_COUNT(fromBegin
, fromBegin
+lengthToExamineInThisFragment
, c
));
201 if ( !(lengthToExamine
-= lengthToExamineInThisFragment
) )
203 iter
.advance(lengthToExamineInThisFragment
);
205 // never reached; quiets warnings
210 nsScannerSubstring::Rebind( const nsScannerSubstring
& aString
,
211 const nsScannerIterator
& aStart
,
212 const nsScannerIterator
& aEnd
)
214 // allow for the case where &aString == this
216 aString
.acquire_ownership_of_buffer_list();
217 release_ownership_of_buffer_list();
221 mBufferList
= aString
.mBufferList
;
222 mLength
= Distance(aStart
, aEnd
);
227 nsScannerSubstring::Rebind( const nsAString
& aString
)
229 release_ownership_of_buffer_list();
231 mBufferList
= new nsScannerBufferList(AllocBufferFromString(aString
));
234 init_range_from_buffer_list();
235 acquire_ownership_of_buffer_list();
239 nsScannerSubstring::AsString() const
243 nsScannerSubstring
* mutable_this
= const_cast<nsScannerSubstring
*>(this);
245 if (mStart
.mBuffer
== mEnd
.mBuffer
) {
246 // We only have a single fragment to deal with, so just return it
248 mutable_this
->mFlattenedRep
.Rebind(mStart
.mPosition
, mEnd
.mPosition
);
250 // Otherwise, we need to copy the data into a flattened buffer.
251 nsScannerIterator start
, end
;
252 CopyUnicodeTo(BeginReading(start
), EndReading(end
), mutable_this
->mFlattenedRep
);
255 mutable_this
->mIsDirty
= PR_FALSE
;
258 return mFlattenedRep
;
262 nsScannerSubstring::BeginReading( nsScannerIterator
& iter
) const
266 iter
.mFragment
.mBuffer
= mStart
.mBuffer
;
267 iter
.mFragment
.mFragmentStart
= mStart
.mPosition
;
268 if (mStart
.mBuffer
== mEnd
.mBuffer
)
269 iter
.mFragment
.mFragmentEnd
= mEnd
.mPosition
;
271 iter
.mFragment
.mFragmentEnd
= mStart
.mBuffer
->DataEnd();
273 iter
.mPosition
= mStart
.mPosition
;
274 iter
.normalize_forward();
279 nsScannerSubstring::EndReading( nsScannerIterator
& iter
) const
283 iter
.mFragment
.mBuffer
= mEnd
.mBuffer
;
284 iter
.mFragment
.mFragmentEnd
= mEnd
.mPosition
;
285 if (mStart
.mBuffer
== mEnd
.mBuffer
)
286 iter
.mFragment
.mFragmentStart
= mStart
.mPosition
;
288 iter
.mFragment
.mFragmentStart
= mEnd
.mBuffer
->DataStart();
290 iter
.mPosition
= mEnd
.mPosition
;
291 // must not |normalize_backward| as that would likely invalidate tests like |while ( first != last )|
296 nsScannerSubstring::GetNextFragment( nsScannerFragment
& frag
) const
298 // check to see if we are at the end of the buffer list
299 if (frag
.mBuffer
== mEnd
.mBuffer
)
302 frag
.mBuffer
= static_cast<const Buffer
*>(PR_NEXT_LINK(frag
.mBuffer
));
304 if (frag
.mBuffer
== mStart
.mBuffer
)
305 frag
.mFragmentStart
= mStart
.mPosition
;
307 frag
.mFragmentStart
= frag
.mBuffer
->DataStart();
309 if (frag
.mBuffer
== mEnd
.mBuffer
)
310 frag
.mFragmentEnd
= mEnd
.mPosition
;
312 frag
.mFragmentEnd
= frag
.mBuffer
->DataEnd();
318 nsScannerSubstring::GetPrevFragment( nsScannerFragment
& frag
) const
320 // check to see if we are at the beginning of the buffer list
321 if (frag
.mBuffer
== mStart
.mBuffer
)
324 frag
.mBuffer
= static_cast<const Buffer
*>(PR_PREV_LINK(frag
.mBuffer
));
326 if (frag
.mBuffer
== mStart
.mBuffer
)
327 frag
.mFragmentStart
= mStart
.mPosition
;
329 frag
.mFragmentStart
= frag
.mBuffer
->DataStart();
331 if (frag
.mBuffer
== mEnd
.mBuffer
)
332 frag
.mFragmentEnd
= mEnd
.mPosition
;
334 frag
.mFragmentEnd
= frag
.mBuffer
->DataEnd();
344 nsScannerString::nsScannerString( Buffer
* aBuf
)
346 mBufferList
= new nsScannerBufferList(aBuf
);
348 init_range_from_buffer_list();
349 acquire_ownership_of_buffer_list();
353 nsScannerString::AppendBuffer( Buffer
* aBuf
)
355 mBufferList
->Append(aBuf
);
356 mLength
+= aBuf
->DataLength();
359 mEnd
.mPosition
= aBuf
->DataEnd();
365 nsScannerString::DiscardPrefix( const nsScannerIterator
& aIter
)
367 Position
old_start(mStart
);
369 mLength
-= Position::Distance(old_start
, mStart
);
371 mStart
.mBuffer
->IncrementUsageCount();
372 old_start
.mBuffer
->DecrementUsageCount();
374 mBufferList
->DiscardUnreferencedPrefix(old_start
.mBuffer
);
380 nsScannerString::UngetReadable( const nsAString
& aReadable
, const nsScannerIterator
& aInsertPoint
)
382 * Warning: this routine manipulates the shared buffer list in an unexpected way.
383 * The original design did not really allow for insertions, but this call promises
384 * that if called for a point after the end of all extant token strings, that no token string
385 * or the work string will be invalidated.
387 * This routine is protected because it is the responsibility of the derived class to keep those promises.
390 Position
insertPos(aInsertPoint
);
392 mBufferList
->SplitBuffer(insertPos
);
393 // splitting to the right keeps the work string and any extant token pointing to and
394 // holding a reference count on the same buffer
396 Buffer
* new_buffer
= AllocBufferFromString(aReadable
);
397 // make a new buffer with all the data to insert...
398 // BULLSHIT ALERT: we may have empty space to re-use in the split buffer, measure the cost
399 // of this and decide if we should do the work to fill it
401 Buffer
* buffer_to_split
= insertPos
.mBuffer
;
402 mBufferList
->InsertAfter(new_buffer
, buffer_to_split
);
403 mLength
+= aReadable
.Length();
405 mEnd
.mBuffer
= mBufferList
->Tail();
406 mEnd
.mPosition
= mEnd
.mBuffer
->DataEnd();
412 nsScannerString::ReplaceCharacter(nsScannerIterator
& aPosition
, PRUnichar aChar
)
414 // XXX Casting a const to non-const. Unless the base class
415 // provides support for writing iterators, this is the best
417 PRUnichar
* pos
= const_cast<PRUnichar
*>(aPosition
.get());
425 * nsScannerSharedSubstring
429 nsScannerSharedSubstring::Rebind(const nsScannerIterator
&aStart
,
430 const nsScannerIterator
&aEnd
)
432 // If the start and end positions are inside the same buffer, we must
433 // acquire ownership of the buffer. If not, we can optimize by not holding
436 Buffer
*buffer
= const_cast<Buffer
*>(aStart
.buffer());
437 PRBool sameBuffer
= buffer
== aEnd
.buffer();
439 nsScannerBufferList
*bufferList
;
442 bufferList
= aStart
.mOwner
->mBufferList
;
443 bufferList
->AddRef();
444 buffer
->IncrementUsageCount();
452 mBufferList
= bufferList
;
453 mString
.Rebind(aStart
.mPosition
, aEnd
.mPosition
);
456 mBufferList
= nsnull
;
457 CopyUnicodeTo(aStart
, aEnd
, mString
);
462 nsScannerSharedSubstring::ReleaseBuffer()
464 NS_ASSERTION(mBufferList
, "Should only be called with non-null mBufferList");
465 mBuffer
->DecrementUsageCount();
466 mBufferList
->DiscardUnreferencedPrefix(mBuffer
);
467 mBufferList
->Release();
471 nsScannerSharedSubstring::MakeMutable()
473 nsString
temp(mString
); // this will force a copy of the data
474 mString
.Assign(temp
); // mString will now share the just-allocated buffer
479 mBufferList
= nsnull
;
483 * utils -- based on code from nsReadableUtils.cpp
486 // private helper function
489 copy_multifragment_string( nsScannerIterator
& first
, const nsScannerIterator
& last
, nsAString::iterator
& result
)
491 typedef nsCharSourceTraits
<nsScannerIterator
> source_traits
;
492 typedef nsCharSinkTraits
<nsAString::iterator
> sink_traits
;
494 while ( first
!= last
)
496 PRUint32 distance
= source_traits::readable_distance(first
, last
);
497 sink_traits::write(result
, source_traits::read(first
), distance
);
498 NS_ASSERTION(distance
> 0, "|copy_multifragment_string| will never terminate");
499 source_traits::advance(first
, distance
);
506 CopyUnicodeTo( const nsScannerIterator
& aSrcStart
,
507 const nsScannerIterator
& aSrcEnd
,
510 nsAString::iterator writer
;
511 if (!EnsureStringLength(aDest
, Distance(aSrcStart
, aSrcEnd
))) {
513 return; // out of memory
515 aDest
.BeginWriting(writer
);
516 nsScannerIterator
fromBegin(aSrcStart
);
518 copy_multifragment_string(fromBegin
, aSrcEnd
, writer
);
522 AppendUnicodeTo( const nsScannerIterator
& aSrcStart
,
523 const nsScannerIterator
& aSrcEnd
,
524 nsScannerSharedSubstring
& aDest
)
526 // Check whether we can just create a dependent string.
527 if (aDest
.str().IsEmpty()) {
528 // We can just make |aDest| point to the buffer.
529 // This will take care of copying if the buffer spans fragments.
530 aDest
.Rebind(aSrcStart
, aSrcEnd
);
532 // The dest string is not empty, so it can't be a dependent substring.
533 AppendUnicodeTo(aSrcStart
, aSrcEnd
, aDest
.writable());
538 AppendUnicodeTo( const nsScannerIterator
& aSrcStart
,
539 const nsScannerIterator
& aSrcEnd
,
542 nsAString::iterator writer
;
543 PRUint32 oldLength
= aDest
.Length();
544 if (!EnsureStringLength(aDest
, oldLength
+ Distance(aSrcStart
, aSrcEnd
)))
545 return; // out of memory
546 aDest
.BeginWriting(writer
).advance(oldLength
);
547 nsScannerIterator
fromBegin(aSrcStart
);
549 copy_multifragment_string(fromBegin
, aSrcEnd
, writer
);
553 FindCharInReadable( PRUnichar aChar
,
554 nsScannerIterator
& aSearchStart
,
555 const nsScannerIterator
& aSearchEnd
)
557 while ( aSearchStart
!= aSearchEnd
)
559 PRInt32 fragmentLength
;
560 if ( SameFragment(aSearchStart
, aSearchEnd
) )
561 fragmentLength
= aSearchEnd
.get() - aSearchStart
.get();
563 fragmentLength
= aSearchStart
.size_forward();
565 const PRUnichar
* charFoundAt
= nsCharTraits
<PRUnichar
>::find(aSearchStart
.get(), fragmentLength
, aChar
);
567 aSearchStart
.advance( charFoundAt
- aSearchStart
.get() );
571 aSearchStart
.advance(fragmentLength
);
578 FindInReadable( const nsAString
& aPattern
,
579 nsScannerIterator
& aSearchStart
,
580 nsScannerIterator
& aSearchEnd
,
581 const nsStringComparator
& compare
)
583 PRBool found_it
= PR_FALSE
;
585 // only bother searching at all if we're given a non-empty range to search
586 if ( aSearchStart
!= aSearchEnd
)
588 nsAString::const_iterator aPatternStart
, aPatternEnd
;
589 aPattern
.BeginReading(aPatternStart
);
590 aPattern
.EndReading(aPatternEnd
);
592 // outer loop keeps searching till we find it or run out of string to search
595 // fast inner loop (that's what it's called, not what it is) looks for a potential match
596 while ( aSearchStart
!= aSearchEnd
&&
597 compare(*aPatternStart
, *aSearchStart
) )
600 // if we broke out of the `fast' loop because we're out of string ... we're done: no match
601 if ( aSearchStart
== aSearchEnd
)
604 // otherwise, we're at a potential match, let's see if we really hit one
605 nsAString::const_iterator
testPattern(aPatternStart
);
606 nsScannerIterator
testSearch(aSearchStart
);
608 // slow inner loop verifies the potential match (found by the `fast' loop) at the current position
611 // we already compared the first character in the outer loop,
612 // so we'll advance before the next comparison
616 // if we verified all the way to the end of the pattern, then we found it!
617 if ( testPattern
== aPatternEnd
)
620 aSearchEnd
= testSearch
; // return the exact found range through the parameters
624 // if we got to end of the string we're searching before we hit the end of the
625 // pattern, we'll never find what we're looking for
626 if ( testSearch
== aSearchEnd
)
628 aSearchStart
= aSearchEnd
;
632 // else if we mismatched ... it's time to advance to the next search position
633 // and get back into the `fast' loop
634 if ( compare(*testPattern
, *testSearch
) )
647 * This implementation is simple, but does too much work.
648 * It searches the entire string from left to right, and returns the last match found, if any.
649 * This implementation will be replaced when I get |reverse_iterator|s working.
652 RFindInReadable( const nsAString
& aPattern
,
653 nsScannerIterator
& aSearchStart
,
654 nsScannerIterator
& aSearchEnd
,
655 const nsStringComparator
& aComparator
)
657 PRBool found_it
= PR_FALSE
;
659 nsScannerIterator
savedSearchEnd(aSearchEnd
);
660 nsScannerIterator
searchStart(aSearchStart
), searchEnd(aSearchEnd
);
662 while ( searchStart
!= searchEnd
)
664 if ( FindInReadable(aPattern
, searchStart
, searchEnd
, aComparator
) )
668 // this is the best match so far, so remember it
669 aSearchStart
= searchStart
;
670 aSearchEnd
= searchEnd
;
672 // ...and get ready to search some more
673 // (it's tempting to set |searchStart=searchEnd| ... but that misses overlapping patterns)
675 searchEnd
= savedSearchEnd
;
679 // if we never found it, return an empty range
681 aSearchStart
= aSearchEnd
;