Bug 470455 - test_database_sync_embed_visits.js leaks, r=sdwilsh
[wine-gecko.git] / parser / htmlparser / src / nsScannerString.cpp
blobaa18df015fa9391446f123c3905a90c765e88fa1
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
3 /* ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
16 * The Original Code is Mozilla.
18 * The Initial Developer of the Original Code is IBM Corporation.
19 * Portions created by IBM Corporation are Copyright (C) 2003
20 * IBM Corporation. All Rights Reserved.
22 * Contributor(s):
23 * Darin Fisher <darin@meer.net>
25 * Alternatively, the contents of this file may be used under the terms of
26 * either the GNU General Public License Version 2 or later (the "GPL"), or
27 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 * in which case the provisions of the GPL or the LGPL are applicable instead
29 * of those above. If you wish to allow use of your version of this file only
30 * under the terms of either the GPL or the LGPL, and not to allow others to
31 * use your version of this file under the terms of the MPL, indicate your
32 * decision by deleting the provisions above and replace them with the notice
33 * and other provisions required by the GPL or the LGPL. If you do not delete
34 * the provisions above, a recipient may use your version of this file under
35 * the terms of any one of the MPL, the GPL or the LGPL.
37 * ***** END LICENSE BLOCK ***** */
39 #include <stdlib.h>
40 #include "nsScannerString.h"
43 /**
44 * nsScannerBufferList
47 nsScannerBufferList::Buffer*
48 nsScannerBufferList::AllocBufferFromString( const nsAString& aString )
50 PRUint32 len = aString.Length();
52 Buffer* buf = (Buffer*) malloc(sizeof(Buffer) + (len + 1) * sizeof(PRUnichar));
53 if (buf)
55 // leave PRCList members of Buffer uninitialized
57 buf->mUsageCount = 0;
58 buf->mDataEnd = buf->DataStart() + len;
60 nsAString::const_iterator source;
61 aString.BeginReading(source);
62 nsCharTraits<PRUnichar>::copy(buf->DataStart(), source.get(), len);
64 // XXX null terminate. this shouldn't be required, but we do it because
65 // nsScanner erroneously thinks it can dereference DataEnd :-(
66 *buf->mDataEnd = PRUnichar(0);
68 return buf;
71 nsScannerBufferList::Buffer*
72 nsScannerBufferList::AllocBuffer( PRUint32 capacity )
74 Buffer* buf = (Buffer*) malloc(sizeof(Buffer) + (capacity + 1) * sizeof(PRUnichar));
75 if (buf)
77 // leave PRCList members of Buffer uninitialized
79 buf->mUsageCount = 0;
80 buf->mDataEnd = buf->DataStart() + capacity;
82 // XXX null terminate. this shouldn't be required, but we do it because
83 // nsScanner erroneously thinks it can dereference DataEnd :-(
84 *buf->mDataEnd = PRUnichar(0);
86 return buf;
89 void
90 nsScannerBufferList::ReleaseAll()
92 while (!PR_CLIST_IS_EMPTY(&mBuffers))
94 PRCList* node = PR_LIST_HEAD(&mBuffers);
95 PR_REMOVE_LINK(node);
96 //printf(">>> freeing buffer @%p\n", node);
97 free(static_cast<Buffer*>(node));
101 void
102 nsScannerBufferList::SplitBuffer( const Position& pos )
104 // splitting to the right keeps the work string and any extant token
105 // pointing to and holding a reference count on the same buffer.
107 Buffer* bufferToSplit = pos.mBuffer;
108 NS_ASSERTION(bufferToSplit, "null pointer");
110 PRUint32 splitOffset = pos.mPosition - bufferToSplit->DataStart();
111 NS_ASSERTION(pos.mPosition >= bufferToSplit->DataStart() &&
112 splitOffset <= bufferToSplit->DataLength(),
113 "split offset is outside buffer");
115 PRUint32 len = bufferToSplit->DataLength() - splitOffset;
116 Buffer* new_buffer = AllocBuffer(len);
117 if (new_buffer)
119 nsCharTraits<PRUnichar>::copy(new_buffer->DataStart(),
120 bufferToSplit->DataStart() + splitOffset,
121 len);
122 InsertAfter(new_buffer, bufferToSplit);
123 bufferToSplit->SetDataLength(splitOffset);
127 void
128 nsScannerBufferList::DiscardUnreferencedPrefix( Buffer* aBuf )
130 if (aBuf == Head())
132 while (!PR_CLIST_IS_EMPTY(&mBuffers) && !Head()->IsInUse())
134 Buffer* buffer = Head();
135 PR_REMOVE_LINK(buffer);
136 free(buffer);
141 size_t
142 nsScannerBufferList::Position::Distance( const Position& aStart, const Position& aEnd )
144 size_t result = 0;
145 if (aStart.mBuffer == aEnd.mBuffer)
147 result = aEnd.mPosition - aStart.mPosition;
149 else
151 result = aStart.mBuffer->DataEnd() - aStart.mPosition;
152 for (Buffer* b = aStart.mBuffer->Next(); b != aEnd.mBuffer; b = b->Next())
153 result += b->DataLength();
154 result += aEnd.mPosition - aEnd.mBuffer->DataStart();
156 return result;
161 * nsScannerSubstring
164 nsScannerSubstring::nsScannerSubstring()
165 : mStart(nsnull, nsnull)
166 , mEnd(nsnull, nsnull)
167 , mBufferList(nsnull)
168 , mLength(0)
169 , mIsDirty(PR_TRUE)
173 nsScannerSubstring::nsScannerSubstring( const nsAString& s )
174 : mBufferList(nsnull)
175 , mIsDirty(PR_TRUE)
177 Rebind(s);
180 nsScannerSubstring::~nsScannerSubstring()
182 release_ownership_of_buffer_list();
185 PRInt32
186 nsScannerSubstring::CountChar( PRUnichar c ) const
189 re-write this to use a counting sink
192 size_type result = 0;
193 size_type lengthToExamine = Length();
195 nsScannerIterator iter;
196 for ( BeginReading(iter); ; )
198 PRInt32 lengthToExamineInThisFragment = iter.size_forward();
199 const PRUnichar* fromBegin = iter.get();
200 result += size_type(NS_COUNT(fromBegin, fromBegin+lengthToExamineInThisFragment, c));
201 if ( !(lengthToExamine -= lengthToExamineInThisFragment) )
202 return result;
203 iter.advance(lengthToExamineInThisFragment);
205 // never reached; quiets warnings
206 return 0;
209 void
210 nsScannerSubstring::Rebind( const nsScannerSubstring& aString,
211 const nsScannerIterator& aStart,
212 const nsScannerIterator& aEnd )
214 // allow for the case where &aString == this
216 aString.acquire_ownership_of_buffer_list();
217 release_ownership_of_buffer_list();
219 mStart = aStart;
220 mEnd = aEnd;
221 mBufferList = aString.mBufferList;
222 mLength = Distance(aStart, aEnd);
223 mIsDirty = PR_TRUE;
226 void
227 nsScannerSubstring::Rebind( const nsAString& aString )
229 release_ownership_of_buffer_list();
231 mBufferList = new nsScannerBufferList(AllocBufferFromString(aString));
232 mIsDirty = PR_TRUE;
234 init_range_from_buffer_list();
235 acquire_ownership_of_buffer_list();
238 const nsSubstring&
239 nsScannerSubstring::AsString() const
241 if (mIsDirty)
243 nsScannerSubstring* mutable_this = const_cast<nsScannerSubstring*>(this);
245 if (mStart.mBuffer == mEnd.mBuffer) {
246 // We only have a single fragment to deal with, so just return it
247 // as a substring.
248 mutable_this->mFlattenedRep.Rebind(mStart.mPosition, mEnd.mPosition);
249 } else {
250 // Otherwise, we need to copy the data into a flattened buffer.
251 nsScannerIterator start, end;
252 CopyUnicodeTo(BeginReading(start), EndReading(end), mutable_this->mFlattenedRep);
255 mutable_this->mIsDirty = PR_FALSE;
258 return mFlattenedRep;
261 nsScannerIterator&
262 nsScannerSubstring::BeginReading( nsScannerIterator& iter ) const
264 iter.mOwner = this;
266 iter.mFragment.mBuffer = mStart.mBuffer;
267 iter.mFragment.mFragmentStart = mStart.mPosition;
268 if (mStart.mBuffer == mEnd.mBuffer)
269 iter.mFragment.mFragmentEnd = mEnd.mPosition;
270 else
271 iter.mFragment.mFragmentEnd = mStart.mBuffer->DataEnd();
273 iter.mPosition = mStart.mPosition;
274 iter.normalize_forward();
275 return iter;
278 nsScannerIterator&
279 nsScannerSubstring::EndReading( nsScannerIterator& iter ) const
281 iter.mOwner = this;
283 iter.mFragment.mBuffer = mEnd.mBuffer;
284 iter.mFragment.mFragmentEnd = mEnd.mPosition;
285 if (mStart.mBuffer == mEnd.mBuffer)
286 iter.mFragment.mFragmentStart = mStart.mPosition;
287 else
288 iter.mFragment.mFragmentStart = mEnd.mBuffer->DataStart();
290 iter.mPosition = mEnd.mPosition;
291 // must not |normalize_backward| as that would likely invalidate tests like |while ( first != last )|
292 return iter;
295 PRBool
296 nsScannerSubstring::GetNextFragment( nsScannerFragment& frag ) const
298 // check to see if we are at the end of the buffer list
299 if (frag.mBuffer == mEnd.mBuffer)
300 return PR_FALSE;
302 frag.mBuffer = static_cast<const Buffer*>(PR_NEXT_LINK(frag.mBuffer));
304 if (frag.mBuffer == mStart.mBuffer)
305 frag.mFragmentStart = mStart.mPosition;
306 else
307 frag.mFragmentStart = frag.mBuffer->DataStart();
309 if (frag.mBuffer == mEnd.mBuffer)
310 frag.mFragmentEnd = mEnd.mPosition;
311 else
312 frag.mFragmentEnd = frag.mBuffer->DataEnd();
314 return PR_TRUE;
317 PRBool
318 nsScannerSubstring::GetPrevFragment( nsScannerFragment& frag ) const
320 // check to see if we are at the beginning of the buffer list
321 if (frag.mBuffer == mStart.mBuffer)
322 return PR_FALSE;
324 frag.mBuffer = static_cast<const Buffer*>(PR_PREV_LINK(frag.mBuffer));
326 if (frag.mBuffer == mStart.mBuffer)
327 frag.mFragmentStart = mStart.mPosition;
328 else
329 frag.mFragmentStart = frag.mBuffer->DataStart();
331 if (frag.mBuffer == mEnd.mBuffer)
332 frag.mFragmentEnd = mEnd.mPosition;
333 else
334 frag.mFragmentEnd = frag.mBuffer->DataEnd();
336 return PR_TRUE;
341 * nsScannerString
344 nsScannerString::nsScannerString( Buffer* aBuf )
346 mBufferList = new nsScannerBufferList(aBuf);
348 init_range_from_buffer_list();
349 acquire_ownership_of_buffer_list();
352 void
353 nsScannerString::AppendBuffer( Buffer* aBuf )
355 mBufferList->Append(aBuf);
356 mLength += aBuf->DataLength();
358 mEnd.mBuffer = aBuf;
359 mEnd.mPosition = aBuf->DataEnd();
361 mIsDirty = PR_TRUE;
364 void
365 nsScannerString::DiscardPrefix( const nsScannerIterator& aIter )
367 Position old_start(mStart);
368 mStart = aIter;
369 mLength -= Position::Distance(old_start, mStart);
371 mStart.mBuffer->IncrementUsageCount();
372 old_start.mBuffer->DecrementUsageCount();
374 mBufferList->DiscardUnreferencedPrefix(old_start.mBuffer);
376 mIsDirty = PR_TRUE;
379 void
380 nsScannerString::UngetReadable( const nsAString& aReadable, const nsScannerIterator& aInsertPoint )
382 * Warning: this routine manipulates the shared buffer list in an unexpected way.
383 * The original design did not really allow for insertions, but this call promises
384 * that if called for a point after the end of all extant token strings, that no token string
385 * or the work string will be invalidated.
387 * This routine is protected because it is the responsibility of the derived class to keep those promises.
390 Position insertPos(aInsertPoint);
392 mBufferList->SplitBuffer(insertPos);
393 // splitting to the right keeps the work string and any extant token pointing to and
394 // holding a reference count on the same buffer
396 Buffer* new_buffer = AllocBufferFromString(aReadable);
397 // make a new buffer with all the data to insert...
398 // BULLSHIT ALERT: we may have empty space to re-use in the split buffer, measure the cost
399 // of this and decide if we should do the work to fill it
401 Buffer* buffer_to_split = insertPos.mBuffer;
402 mBufferList->InsertAfter(new_buffer, buffer_to_split);
403 mLength += aReadable.Length();
405 mEnd.mBuffer = mBufferList->Tail();
406 mEnd.mPosition = mEnd.mBuffer->DataEnd();
408 mIsDirty = PR_TRUE;
411 void
412 nsScannerString::ReplaceCharacter(nsScannerIterator& aPosition, PRUnichar aChar)
414 // XXX Casting a const to non-const. Unless the base class
415 // provides support for writing iterators, this is the best
416 // that can be done.
417 PRUnichar* pos = const_cast<PRUnichar*>(aPosition.get());
418 *pos = aChar;
420 mIsDirty = PR_TRUE;
425 * nsScannerSharedSubstring
428 void
429 nsScannerSharedSubstring::Rebind(const nsScannerIterator &aStart,
430 const nsScannerIterator &aEnd)
432 // If the start and end positions are inside the same buffer, we must
433 // acquire ownership of the buffer. If not, we can optimize by not holding
434 // onto it.
436 Buffer *buffer = const_cast<Buffer*>(aStart.buffer());
437 PRBool sameBuffer = buffer == aEnd.buffer();
439 nsScannerBufferList *bufferList;
441 if (sameBuffer) {
442 bufferList = aStart.mOwner->mBufferList;
443 bufferList->AddRef();
444 buffer->IncrementUsageCount();
447 if (mBufferList)
448 ReleaseBuffer();
450 if (sameBuffer) {
451 mBuffer = buffer;
452 mBufferList = bufferList;
453 mString.Rebind(aStart.mPosition, aEnd.mPosition);
454 } else {
455 mBuffer = nsnull;
456 mBufferList = nsnull;
457 CopyUnicodeTo(aStart, aEnd, mString);
461 void
462 nsScannerSharedSubstring::ReleaseBuffer()
464 NS_ASSERTION(mBufferList, "Should only be called with non-null mBufferList");
465 mBuffer->DecrementUsageCount();
466 mBufferList->DiscardUnreferencedPrefix(mBuffer);
467 mBufferList->Release();
470 void
471 nsScannerSharedSubstring::MakeMutable()
473 nsString temp(mString); // this will force a copy of the data
474 mString.Assign(temp); // mString will now share the just-allocated buffer
476 ReleaseBuffer();
478 mBuffer = nsnull;
479 mBufferList = nsnull;
483 * utils -- based on code from nsReadableUtils.cpp
486 // private helper function
487 static inline
488 nsAString::iterator&
489 copy_multifragment_string( nsScannerIterator& first, const nsScannerIterator& last, nsAString::iterator& result )
491 typedef nsCharSourceTraits<nsScannerIterator> source_traits;
492 typedef nsCharSinkTraits<nsAString::iterator> sink_traits;
494 while ( first != last )
496 PRUint32 distance = source_traits::readable_distance(first, last);
497 sink_traits::write(result, source_traits::read(first), distance);
498 NS_ASSERTION(distance > 0, "|copy_multifragment_string| will never terminate");
499 source_traits::advance(first, distance);
502 return result;
505 void
506 CopyUnicodeTo( const nsScannerIterator& aSrcStart,
507 const nsScannerIterator& aSrcEnd,
508 nsAString& aDest )
510 nsAString::iterator writer;
511 if (!EnsureStringLength(aDest, Distance(aSrcStart, aSrcEnd))) {
512 aDest.Truncate();
513 return; // out of memory
515 aDest.BeginWriting(writer);
516 nsScannerIterator fromBegin(aSrcStart);
518 copy_multifragment_string(fromBegin, aSrcEnd, writer);
521 void
522 AppendUnicodeTo( const nsScannerIterator& aSrcStart,
523 const nsScannerIterator& aSrcEnd,
524 nsScannerSharedSubstring& aDest )
526 // Check whether we can just create a dependent string.
527 if (aDest.str().IsEmpty()) {
528 // We can just make |aDest| point to the buffer.
529 // This will take care of copying if the buffer spans fragments.
530 aDest.Rebind(aSrcStart, aSrcEnd);
531 } else {
532 // The dest string is not empty, so it can't be a dependent substring.
533 AppendUnicodeTo(aSrcStart, aSrcEnd, aDest.writable());
537 void
538 AppendUnicodeTo( const nsScannerIterator& aSrcStart,
539 const nsScannerIterator& aSrcEnd,
540 nsAString& aDest )
542 nsAString::iterator writer;
543 PRUint32 oldLength = aDest.Length();
544 if (!EnsureStringLength(aDest, oldLength + Distance(aSrcStart, aSrcEnd)))
545 return; // out of memory
546 aDest.BeginWriting(writer).advance(oldLength);
547 nsScannerIterator fromBegin(aSrcStart);
549 copy_multifragment_string(fromBegin, aSrcEnd, writer);
552 PRBool
553 FindCharInReadable( PRUnichar aChar,
554 nsScannerIterator& aSearchStart,
555 const nsScannerIterator& aSearchEnd )
557 while ( aSearchStart != aSearchEnd )
559 PRInt32 fragmentLength;
560 if ( SameFragment(aSearchStart, aSearchEnd) )
561 fragmentLength = aSearchEnd.get() - aSearchStart.get();
562 else
563 fragmentLength = aSearchStart.size_forward();
565 const PRUnichar* charFoundAt = nsCharTraits<PRUnichar>::find(aSearchStart.get(), fragmentLength, aChar);
566 if ( charFoundAt ) {
567 aSearchStart.advance( charFoundAt - aSearchStart.get() );
568 return PR_TRUE;
571 aSearchStart.advance(fragmentLength);
574 return PR_FALSE;
577 PRBool
578 FindInReadable( const nsAString& aPattern,
579 nsScannerIterator& aSearchStart,
580 nsScannerIterator& aSearchEnd,
581 const nsStringComparator& compare )
583 PRBool found_it = PR_FALSE;
585 // only bother searching at all if we're given a non-empty range to search
586 if ( aSearchStart != aSearchEnd )
588 nsAString::const_iterator aPatternStart, aPatternEnd;
589 aPattern.BeginReading(aPatternStart);
590 aPattern.EndReading(aPatternEnd);
592 // outer loop keeps searching till we find it or run out of string to search
593 while ( !found_it )
595 // fast inner loop (that's what it's called, not what it is) looks for a potential match
596 while ( aSearchStart != aSearchEnd &&
597 compare(*aPatternStart, *aSearchStart) )
598 ++aSearchStart;
600 // if we broke out of the `fast' loop because we're out of string ... we're done: no match
601 if ( aSearchStart == aSearchEnd )
602 break;
604 // otherwise, we're at a potential match, let's see if we really hit one
605 nsAString::const_iterator testPattern(aPatternStart);
606 nsScannerIterator testSearch(aSearchStart);
608 // slow inner loop verifies the potential match (found by the `fast' loop) at the current position
609 for(;;)
611 // we already compared the first character in the outer loop,
612 // so we'll advance before the next comparison
613 ++testPattern;
614 ++testSearch;
616 // if we verified all the way to the end of the pattern, then we found it!
617 if ( testPattern == aPatternEnd )
619 found_it = PR_TRUE;
620 aSearchEnd = testSearch; // return the exact found range through the parameters
621 break;
624 // if we got to end of the string we're searching before we hit the end of the
625 // pattern, we'll never find what we're looking for
626 if ( testSearch == aSearchEnd )
628 aSearchStart = aSearchEnd;
629 break;
632 // else if we mismatched ... it's time to advance to the next search position
633 // and get back into the `fast' loop
634 if ( compare(*testPattern, *testSearch) )
636 ++aSearchStart;
637 break;
643 return found_it;
647 * This implementation is simple, but does too much work.
648 * It searches the entire string from left to right, and returns the last match found, if any.
649 * This implementation will be replaced when I get |reverse_iterator|s working.
651 PRBool
652 RFindInReadable( const nsAString& aPattern,
653 nsScannerIterator& aSearchStart,
654 nsScannerIterator& aSearchEnd,
655 const nsStringComparator& aComparator )
657 PRBool found_it = PR_FALSE;
659 nsScannerIterator savedSearchEnd(aSearchEnd);
660 nsScannerIterator searchStart(aSearchStart), searchEnd(aSearchEnd);
662 while ( searchStart != searchEnd )
664 if ( FindInReadable(aPattern, searchStart, searchEnd, aComparator) )
666 found_it = PR_TRUE;
668 // this is the best match so far, so remember it
669 aSearchStart = searchStart;
670 aSearchEnd = searchEnd;
672 // ...and get ready to search some more
673 // (it's tempting to set |searchStart=searchEnd| ... but that misses overlapping patterns)
674 ++searchStart;
675 searchEnd = savedSearchEnd;
679 // if we never found it, return an empty range
680 if ( !found_it )
681 aSearchStart = aSearchEnd;
683 return found_it;