1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
3 /* ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
16 * The Original Code is Mozilla.
18 * The Initial Developer of the Original Code is IBM Corporation.
19 * Portions created by IBM Corporation are Copyright (C) 2003
20 * IBM Corporation. All Rights Reserved.
23 * Darin Fisher <darin@meer.net>
25 * Alternatively, the contents of this file may be used under the terms of
26 * either the GNU General Public License Version 2 or later (the "GPL"), or
27 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 * in which case the provisions of the GPL or the LGPL are applicable instead
29 * of those above. If you wish to allow use of your version of this file only
30 * under the terms of either the GPL or the LGPL, and not to allow others to
31 * use your version of this file under the terms of the MPL, indicate your
32 * decision by deleting the provisions above and replace them with the notice
33 * and other provisions required by the GPL or the LGPL. If you do not delete
34 * the provisions above, a recipient may use your version of this file under
35 * the terms of any one of the MPL, the GPL or the LGPL.
37 * ***** END LICENSE BLOCK ***** */
39 #ifndef nsScannerString_h___
40 #define nsScannerString_h___
43 #include "nsUnicharUtils.h" // for nsCaseInsensitiveStringComparator
48 * NOTE: nsScannerString (and the other classes defined in this file) are
49 * not related to nsAString or any of the other xpcom/string classes.
51 * nsScannerString is based on the nsSlidingString implementation that used
52 * to live in xpcom/string. Now that nsAString is limited to representing
53 * only single fragment strings, nsSlidingString can no longer be used.
55 * An advantage to this design is that it does not employ any virtual
58 * This file uses SCC-style indenting in deference to the nsSlidingString
59 * code from which this code is derived ;-)
62 class nsScannerIterator
;
63 class nsScannerSubstring
;
64 class nsScannerString
;
70 * This class maintains a list of heap-allocated Buffer objects. The buffers
71 * are maintained in a circular linked list. Each buffer has a usage count
72 * that is decremented by the owning nsScannerSubstring.
74 * The buffer list itself is reference counted. This allows the buffer list
75 * to be shared by multiple nsScannerSubstring objects. The reference
76 * counting is not threadsafe, which is not at all a requirement.
78 * When a nsScannerSubstring releases its reference to a buffer list, it
79 * decrements the usage count of the first buffer in the buffer list that it
80 * was referencing. It informs the buffer list that it can discard buffers
81 * starting at that prefix. The buffer list will do so if the usage count of
82 * that buffer is 0 and if it is the first buffer in the list. It will
83 * continue to prune buffers starting from the front of the buffer list until
84 * it finds a buffer that has a usage count that is non-zero.
86 class nsScannerBufferList
91 * Buffer objects are directly followed by a data segment. The start
92 * of the data segment is determined by increment the |this| pointer
95 class Buffer
: public PRCList
99 void IncrementUsageCount() { ++mUsageCount
; }
100 void DecrementUsageCount() { --mUsageCount
; }
102 PRBool
IsInUse() const { return mUsageCount
!= 0; }
104 const PRUnichar
* DataStart() const { return (const PRUnichar
*) (this+1); }
105 PRUnichar
* DataStart() { return ( PRUnichar
*) (this+1); }
107 const PRUnichar
* DataEnd() const { return mDataEnd
; }
108 PRUnichar
* DataEnd() { return mDataEnd
; }
110 const Buffer
* Next() const { return static_cast<const Buffer
*>(next
); }
111 Buffer
* Next() { return static_cast<Buffer
*>(next
); }
113 const Buffer
* Prev() const { return static_cast<const Buffer
*>(prev
); }
114 Buffer
* Prev() { return static_cast<Buffer
*>(prev
); }
116 PRUint32
DataLength() const { return mDataEnd
- DataStart(); }
117 void SetDataLength(PRUint32 len
) { mDataEnd
= DataStart() + len
; }
121 friend class nsScannerBufferList
;
128 * Position objects serve as lightweight pointers into a buffer list.
129 * The mPosition member must be contained with mBuffer->DataStart()
130 * and mBuffer->DataEnd().
138 Position( Buffer
* buffer
, PRUnichar
* position
)
140 , mPosition(position
)
144 Position( const nsScannerIterator
& aIter
);
147 Position
& operator=( const nsScannerIterator
& aIter
);
149 static size_t Distance( const Position
& p1
, const Position
& p2
);
152 PRUnichar
* mPosition
;
155 static Buffer
* AllocBufferFromString( const nsAString
& );
156 static Buffer
* AllocBuffer( PRUint32 capacity
); // capacity = number of chars
158 nsScannerBufferList( Buffer
* buf
)
161 PR_INIT_CLIST(&mBuffers
);
162 PR_APPEND_LINK(buf
, &mBuffers
);
165 void AddRef() { ++mRefCnt
; }
166 void Release() { if (--mRefCnt
== 0) delete this; }
168 void Append( Buffer
* buf
) { PR_APPEND_LINK(buf
, &mBuffers
); }
169 void InsertAfter( Buffer
* buf
, Buffer
* prev
) { PR_INSERT_AFTER(buf
, prev
); }
170 void SplitBuffer( const Position
& );
171 void DiscardUnreferencedPrefix( Buffer
* );
173 Buffer
* Head() { return static_cast<Buffer
*>(PR_LIST_HEAD(&mBuffers
)); }
174 const Buffer
* Head() const { return static_cast<const Buffer
*>(PR_LIST_HEAD(&mBuffers
)); }
176 Buffer
* Tail() { return static_cast<Buffer
*>(PR_LIST_TAIL(&mBuffers
)); }
177 const Buffer
* Tail() const { return static_cast<const Buffer
*>(PR_LIST_TAIL(&mBuffers
)); }
181 friend class nsScannerSubstring
;
183 ~nsScannerBufferList() { ReleaseAll(); }
192 * nsScannerFragment represents a "slice" of a Buffer object.
194 struct nsScannerFragment
196 typedef nsScannerBufferList::Buffer Buffer
;
198 const Buffer
* mBuffer
;
199 const PRUnichar
* mFragmentStart
;
200 const PRUnichar
* mFragmentEnd
;
205 * nsScannerSubstring is the base class for nsScannerString. It provides
206 * access to iterators and methods to bind the substring to another
207 * substring or nsAString instance.
209 * This class owns the buffer list.
211 class nsScannerSubstring
214 typedef nsScannerBufferList::Buffer Buffer
;
215 typedef nsScannerBufferList::Position Position
;
216 typedef PRUint32 size_type
;
218 nsScannerSubstring();
219 nsScannerSubstring( const nsAString
& s
);
221 ~nsScannerSubstring();
223 nsScannerIterator
& BeginReading( nsScannerIterator
& iter
) const;
224 nsScannerIterator
& EndReading( nsScannerIterator
& iter
) const;
226 size_type
Length() const { return mLength
; }
228 PRInt32
CountChar( PRUnichar
) const;
230 void Rebind( const nsScannerSubstring
&, const nsScannerIterator
&, const nsScannerIterator
& );
231 void Rebind( const nsAString
& );
233 const nsSubstring
& AsString() const;
235 PRBool
GetNextFragment( nsScannerFragment
& ) const;
236 PRBool
GetPrevFragment( nsScannerFragment
& ) const;
238 static inline Buffer
* AllocBufferFromString( const nsAString
& aStr
) { return nsScannerBufferList::AllocBufferFromString(aStr
); }
239 static inline Buffer
* AllocBuffer( size_type aCapacity
) { return nsScannerBufferList::AllocBuffer(aCapacity
); }
243 void acquire_ownership_of_buffer_list() const
245 mBufferList
->AddRef();
246 mStart
.mBuffer
->IncrementUsageCount();
249 void release_ownership_of_buffer_list()
253 mStart
.mBuffer
->DecrementUsageCount();
254 mBufferList
->DiscardUnreferencedPrefix(mStart
.mBuffer
);
255 mBufferList
->Release();
259 void init_range_from_buffer_list()
261 mStart
.mBuffer
= mBufferList
->Head();
262 mStart
.mPosition
= mStart
.mBuffer
->DataStart();
264 mEnd
.mBuffer
= mBufferList
->Tail();
265 mEnd
.mPosition
= mEnd
.mBuffer
->DataEnd();
267 mLength
= Position::Distance(mStart
, mEnd
);
272 nsScannerBufferList
*mBufferList
;
275 // these fields are used to implement AsString
276 nsDependentSubstring mFlattenedRep
;
279 friend class nsScannerSharedSubstring
;
284 * nsScannerString provides methods to grow and modify a buffer list.
286 class nsScannerString
: public nsScannerSubstring
290 nsScannerString( Buffer
* );
292 // you are giving ownership to the string, it takes and keeps your
293 // buffer, deleting it when done.
294 // Use AllocBuffer or AllocBufferFromString to create a Buffer object
295 // for use with this function.
296 void AppendBuffer( Buffer
* );
298 void DiscardPrefix( const nsScannerIterator
& );
299 // any other way you want to do this?
301 void UngetReadable(const nsAString
& aReadable
, const nsScannerIterator
& aCurrentPosition
);
302 void ReplaceCharacter(nsScannerIterator
& aPosition
, PRUnichar aChar
);
307 * nsScannerSharedSubstring implements copy-on-write semantics for
308 * nsScannerSubstring. When you call .writable(), it will copy the data
309 * and return a mutable string object. This class also manages releasing
310 * the reference to the scanner buffer when it is no longer needed.
313 class nsScannerSharedSubstring
316 nsScannerSharedSubstring()
317 : mBuffer(nsnull
), mBufferList(nsnull
) { }
319 ~nsScannerSharedSubstring()
325 // Acquire a copy-on-write reference to the given substring.
326 NS_HIDDEN_(void) Rebind(const nsScannerIterator
& aStart
,
327 const nsScannerIterator
& aEnd
);
329 // Get a mutable reference to this string
330 nsSubstring
& writable()
338 // Get a const reference to this string
339 const nsSubstring
& str() const { return mString
; }
342 typedef nsScannerBufferList::Buffer Buffer
;
344 NS_HIDDEN_(void) ReleaseBuffer();
345 NS_HIDDEN_(void) MakeMutable();
347 nsDependentSubstring mString
;
349 nsScannerBufferList
*mBufferList
;
353 * nsScannerIterator works just like nsReadingIterator<CharT> except that
354 * it knows how to iterate over a list of scanner buffers.
356 class nsScannerIterator
359 typedef nsScannerIterator self_type
;
360 typedef ptrdiff_t difference_type
;
361 typedef PRUnichar value_type
;
362 typedef const PRUnichar
* pointer
;
363 typedef const PRUnichar
& reference
;
364 typedef nsScannerSubstring::Buffer Buffer
;
368 nsScannerFragment mFragment
;
369 const PRUnichar
* mPosition
;
370 const nsScannerSubstring
* mOwner
;
372 friend class nsScannerSubstring
;
373 friend class nsScannerSharedSubstring
;
376 nsScannerIterator() {}
377 // nsScannerIterator( const nsScannerIterator& ); // auto-generated copy-constructor OK
378 // nsScannerIterator& operator=( const nsScannerIterator& ); // auto-generated copy-assignment operator OK
380 inline void normalize_forward();
381 inline void normalize_backward();
388 PRUnichar
operator*() const
393 const nsScannerFragment
& fragment() const
398 const Buffer
* buffer() const
400 return mFragment
.mBuffer
;
403 self_type
& operator++()
410 self_type
operator++( int )
412 self_type
result(*this);
418 self_type
& operator--()
420 normalize_backward();
425 self_type
operator--( int )
427 self_type
result(*this);
428 normalize_backward();
433 difference_type
size_forward() const
435 return mFragment
.mFragmentEnd
- mPosition
;
438 difference_type
size_backward() const
440 return mPosition
- mFragment
.mFragmentStart
;
443 self_type
& advance( difference_type n
)
447 difference_type one_hop
= NS_MIN(n
, size_forward());
449 NS_ASSERTION(one_hop
>0, "Infinite loop: can't advance a reading iterator beyond the end of a string");
450 // perhaps I should |break| if |!one_hop|?
452 mPosition
+= one_hop
;
459 normalize_backward();
460 difference_type one_hop
= NS_MAX(n
, -size_backward());
462 NS_ASSERTION(one_hop
<0, "Infinite loop: can't advance (backward) a reading iterator beyond the end of a string");
463 // perhaps I should |break| if |!one_hop|?
465 mPosition
+= one_hop
;
476 SameFragment( const nsScannerIterator
& a
, const nsScannerIterator
& b
)
478 return a
.fragment().mFragmentStart
== b
.fragment().mFragmentStart
;
483 * this class is needed in order to make use of the methods in nsAlgorithm.h
485 NS_SPECIALIZE_TEMPLATE
486 struct nsCharSourceTraits
<nsScannerIterator
>
488 typedef nsScannerIterator::difference_type difference_type
;
492 readable_distance( const nsScannerIterator
& first
, const nsScannerIterator
& last
)
494 return PRUint32(SameFragment(first
, last
) ? last
.get() - first
.get() : first
.size_forward());
498 const nsScannerIterator::value_type
*
499 read( const nsScannerIterator
& iter
)
506 advance( nsScannerIterator
& s
, difference_type n
)
514 * inline methods follow
519 nsScannerIterator::normalize_forward()
521 while (mPosition
== mFragment
.mFragmentEnd
&& mOwner
->GetNextFragment(mFragment
))
522 mPosition
= mFragment
.mFragmentStart
;
527 nsScannerIterator::normalize_backward()
529 while (mPosition
== mFragment
.mFragmentStart
&& mOwner
->GetPrevFragment(mFragment
))
530 mPosition
= mFragment
.mFragmentEnd
;
535 operator==( const nsScannerIterator
& lhs
, const nsScannerIterator
& rhs
)
537 return lhs
.get() == rhs
.get();
542 operator!=( const nsScannerIterator
& lhs
, const nsScannerIterator
& rhs
)
544 return lhs
.get() != rhs
.get();
549 nsScannerBufferList::Position::Position(const nsScannerIterator
& aIter
)
550 : mBuffer(const_cast<Buffer
*>(aIter
.buffer()))
551 , mPosition(const_cast<PRUnichar
*>(aIter
.get()))
555 nsScannerBufferList::Position
&
556 nsScannerBufferList::Position::operator=(const nsScannerIterator
& aIter
)
558 mBuffer
= const_cast<Buffer
*>(aIter
.buffer());
559 mPosition
= const_cast<PRUnichar
*>(aIter
.get());
565 * scanner string utils
567 * These methods mimic the API provided by nsReadableUtils in xpcom/string.
568 * Here we provide only the methods that the htmlparser module needs.
573 Distance( const nsScannerIterator
& aStart
, const nsScannerIterator
& aEnd
)
575 typedef nsScannerBufferList::Position Position
;
576 return Position::Distance(Position(aStart
), Position(aEnd
));
580 CopyUnicodeTo( const nsScannerIterator
& aSrcStart
,
581 const nsScannerIterator
& aSrcEnd
,
586 CopyUnicodeTo( const nsScannerSubstring
& aSrc
, nsAString
& aDest
)
588 nsScannerIterator begin
, end
;
589 CopyUnicodeTo(aSrc
.BeginReading(begin
), aSrc
.EndReading(end
), aDest
);
593 AppendUnicodeTo( const nsScannerIterator
& aSrcStart
,
594 const nsScannerIterator
& aSrcEnd
,
599 AppendUnicodeTo( const nsScannerSubstring
& aSrc
, nsAString
& aDest
)
601 nsScannerIterator begin
, end
;
602 AppendUnicodeTo(aSrc
.BeginReading(begin
), aSrc
.EndReading(end
), aDest
);
606 AppendUnicodeTo( const nsScannerIterator
& aSrcStart
,
607 const nsScannerIterator
& aSrcEnd
,
608 nsScannerSharedSubstring
& aDest
);
611 FindCharInReadable( PRUnichar aChar
,
612 nsScannerIterator
& aStart
,
613 const nsScannerIterator
& aEnd
);
616 FindInReadable( const nsAString
& aPattern
,
617 nsScannerIterator
& aStart
,
618 nsScannerIterator
& aEnd
,
619 const nsStringComparator
& = nsDefaultStringComparator() );
622 RFindInReadable( const nsAString
& aPattern
,
623 nsScannerIterator
& aStart
,
624 nsScannerIterator
& aEnd
,
625 const nsStringComparator
& = nsDefaultStringComparator() );
629 CaseInsensitiveFindInReadable( const nsAString
& aPattern
,
630 nsScannerIterator
& aStart
,
631 nsScannerIterator
& aEnd
)
633 return FindInReadable(aPattern
, aStart
, aEnd
,
634 nsCaseInsensitiveStringComparator());
637 #endif // !defined(nsScannerString_h___)