1 /***************************************************************************
2 * Copyright (C) 2005 by Piotr Szymanski <niedakh@gmail.com> *
4 * This program is free software; you can redistribute it and/or modify *
5 * it under the terms of the GNU General Public License as published by *
6 * the Free Software Foundation; either version 2 of the License, or *
7 * (at your option) any later version. *
8 ***************************************************************************/
11 #include "textpage_p.h"
23 using namespace Okular
;
29 : offset_begin( -1 ), offset_end( -1 )
33 TextList::ConstIterator it_begin
;
34 TextList::ConstIterator it_end
;
41 Rationale behind TinyTextEntity:
43 instead of storing directly a QString for the text of an entity,
44 we store the UTF-16 data and their length. This way, we save about
45 4 int's wrt a QString, and we can create a new string from that
46 raw data (that's the only penalty of that).
51 TinyTextEntity( const QString
&text
, const NormalizedRect
&rect
)
54 Q_ASSERT_X( !text
.isEmpty(), "TinyTextEntity", "empty string" );
55 length
= text
.length();
56 data
= new QChar
[ length
];
57 std::memcpy( data
, text
.constData(), length
* sizeof( QChar
) );
65 inline QString
text() const
67 return QString::fromRawData( data
, length
);
70 inline NormalizedRect
transformedArea( const QMatrix
&matrix
) const
72 NormalizedRect transformed_area
= area
;
73 transformed_area
.transform( matrix
);
74 return transformed_area
;
80 Q_DISABLE_COPY( TinyTextEntity
)
87 TextEntity::TextEntity( const QString
&text
, NormalizedRect
*area
)
88 : m_text( text
), m_area( area
), d( 0 )
92 TextEntity::~TextEntity()
97 QString
TextEntity::text() const
102 NormalizedRect
* TextEntity::area() const
107 NormalizedRect
TextEntity::transformedArea(const QMatrix
&matrix
) const
109 NormalizedRect transformed_area
= *m_area
;
110 transformed_area
.transform( matrix
);
111 return transformed_area
;
115 TextPagePrivate::TextPagePrivate()
120 TextPagePrivate::~TextPagePrivate()
122 qDeleteAll( m_searchPoints
);
123 qDeleteAll( m_words
);
128 : d( new TextPagePrivate() )
132 TextPage::TextPage( const TextEntity::List
&words
)
133 : d( new TextPagePrivate() )
135 TextEntity::List::ConstIterator it
= words
.constBegin(), itEnd
= words
.constEnd();
136 for ( ; it
!= itEnd
; ++it
)
139 if ( !e
->text().isEmpty() )
140 d
->m_words
.append( new TinyTextEntity( e
->text(), *e
->area() ) );
145 TextPage::~TextPage()
150 void TextPage::append( const QString
&text
, NormalizedRect
*area
)
152 if ( !text
.isEmpty() )
153 d
->m_words
.append( new TinyTextEntity( text
, *area
) );
157 RegularAreaRect
* TextPage::textArea ( TextSelection
* sel
) const
159 if ( d
->m_words
.isEmpty() )
160 return new RegularAreaRect();
164 There are two cursors, we need to select all the text between them. The coordinates are normalised, leftTop is (0,0)
165 rightBottom is (1,1), so for cursors start (sx,sy) and end (ex,ey) we start with finding text rectangles under those
166 points, if not we search for the first that is to the right to it in the same baseline, if none found, then we search
167 for the first rectangle with a baseline under the cursor, having two points that are the best rectangles to both
168 of the cursors: (rx,ry)x(tx,ty) for start and (ux,uy)x(vx,vy) for end, we do a
173 To find the closest rectangle to cursor (cx,cy) we search for a rectangle that either contains the cursor
174 or that has a left border >= cx and bottom border >= cy.
176 RegularAreaRect
* ret
= new RegularAreaRect
;
178 QMatrix matrix
= d
->m_page
? d
->m_page
->rotationMatrix() : QMatrix();
184 // ending cursor is higher than start cursor, we need to find positions in reverse
186 NormalizedRect start
;
189 NormalizedPoint startC
= sel
->start();
190 double startCx
= startC
.x
;
191 double startCy
= startC
.y
;
193 NormalizedPoint endC
= sel
->end();
194 double endCx
= endC
.x
;
195 double endCy
= endC
.y
;
197 if ( sel
->direction() == 1 || ( sel
->itB() == -1 && sel
->direction() == 0 ) )
199 #ifdef DEBUG_TEXTPAGE
200 kWarning() << "running first loop";
202 const int count
= d
->m_words
.count();
203 for ( it
= 0; it
< count
; it
++ )
205 tmp
= *d
->m_words
[ it
]->area();
206 if ( tmp
.contains( startCx
, startCy
)
207 || ( tmp
.top
<= startCy
&& tmp
.bottom
>= startCy
&& tmp
.left
>= startCx
)
208 || ( tmp
.top
>= startCy
))
210 /// we have found the (rx,ry)x(tx,ty)
212 #ifdef DEBUG_TEXTPAGE
213 kWarning() << "start is" << itB
<< "count is" << d
->m_words
.count();
221 #ifdef DEBUG_TEXTPAGE
222 kWarning() << "direction is" << sel
->direction();
223 kWarning() << "reloaded start is" << itB
<< "against" << sel
->itB();
225 if ( sel
->direction() == 0 || ( sel
->itE() == -1 && sel
->direction() == 1 ) )
227 #ifdef DEBUG_TEXTPAGE
228 kWarning() << "running second loop";
230 for ( it
= d
->m_words
.count() - 1; it
>= itB
; it
-- )
232 tmp
= *d
->m_words
[ it
]->area();
233 if ( tmp
.contains( endCx
, endCy
)
234 || ( tmp
.top
<= endCy
&& tmp
.bottom
>= endCy
&& tmp
.right
<= endCx
)
235 || ( tmp
.bottom
<= endCy
) )
237 /// we have found the (ux,uy)x(vx,vy)
239 #ifdef DEBUG_TEXTPAGE
240 kWarning() << "ending is" << itE
<< "count is" << d
->m_words
.count();
241 kWarning() << "conditions" << tmp
.contains( endCx
, endCy
) << " "
242 << ( tmp
.top
<= endCy
&& tmp
.bottom
>= endCy
&& tmp
.right
<= endCx
) << " " <<
250 #ifdef DEBUG_TEXTPAGE
251 kWarning() << "reloaded ending is" << itE
<< "against" << sel
->itE();
254 if ( sel
->itB() != -1 && sel
->itE() != -1 )
256 start
= *d
->m_words
[ sel
->itB() ]->area();
257 end
= *d
->m_words
[ sel
->itE() ]->area();
259 NormalizedRect first
, second
, third
;
260 /// finding out if there is more than one baseline between them is a hard and discussable task
261 /// we will create a rectangle (rx,0)x(tx,1) and will check how many times does it intersect the
262 /// areas, if more than one -> we have a three or over line selection
264 second
.top
= start
.bottom
;
265 first
.right
= second
.right
= 1;
267 third
.left
= second
.left
= 0;
268 second
.bottom
= end
.top
;
269 int selMax
= qMax( sel
->itB(), sel
->itE() );
270 for ( it
= qMin( sel
->itB(), sel
->itE() ); it
<= selMax
; ++it
)
272 tmp
= *d
->m_words
[ it
]->area();
273 if ( tmp
.intersects( &first
) || tmp
.intersects( &second
) || tmp
.intersects( &third
) )
274 ret
->appendShape( d
->m_words
.at( it
)->transformedArea( matrix
) );
280 NormalizedPoint startC
= sel
->start();
281 double startCx
= startC
.x
;
282 double startCy
= startC
.y
;
284 NormalizedPoint endC
= sel
->end();
285 double endCx
= endC
.x
;
286 double endCy
= endC
.y
;
288 TextList::ConstIterator it
= d
->m_words
.constBegin(), itEnd
= d
->m_words
.constEnd();
289 MergeSide side
= d
->m_page
? (MergeSide
)d
->m_page
->m_page
->totalOrientation() : MergeRight
;
290 for ( ; it
!= itEnd
; ++it
)
293 if ( ( tmp
.top
> startCy
|| ( tmp
.bottom
> startCy
&& tmp
.right
> startCx
) )
294 && ( tmp
.bottom
< endCy
|| ( tmp
.top
< endCy
&& tmp
.left
< endCx
) ) )
296 ret
->appendShape( (*it
)->transformedArea( matrix
), side
);
305 RegularAreaRect
* TextPage::findText( int searchID
, const QString
&query
, SearchDirection direct
,
306 Qt::CaseSensitivity caseSensitivity
, const RegularAreaRect
*area
)
308 SearchDirection dir
=direct
;
309 // invalid search request
310 if ( d
->m_words
.isEmpty() || query
.isEmpty() || ( area
&& area
->isNull() ) )
312 TextList::ConstIterator start
;
313 TextList::ConstIterator end
;
314 QMap
< int, SearchPoint
* >::const_iterator sIt
= d
->m_searchPoints
.constFind( searchID
);
315 if ( sIt
== d
->m_searchPoints
.constEnd() )
317 // if no previous run of this search is found, then set it to start
318 // from the beginning (respecting the search direction)
319 if ( dir
== NextResult
)
321 else if ( dir
== PreviousResult
)
328 start
= d
->m_words
.constBegin();
329 end
= d
->m_words
.constEnd();
332 start
= d
->m_words
.constEnd();
333 end
= d
->m_words
.constBegin();
334 Q_ASSERT( start
!= end
);
335 // we can safely go one step back, as we already checked
336 // that the list is not empty
341 start
= (*sIt
)->it_end
;
342 end
= d
->m_words
.constEnd();
343 if ( ( start
+ 1 ) != end
)
347 start
= (*sIt
)->it_begin
;
348 end
= d
->m_words
.constBegin();
354 RegularAreaRect
* ret
= 0;
357 ret
= d
->findTextInternalForward( searchID
, query
, caseSensitivity
, start
, end
);
361 ret
= d
->findTextInternalBackward( searchID
, query
, caseSensitivity
, start
, end
);
367 RegularAreaRect
* TextPagePrivate::findTextInternalForward( int searchID
, const QString
&_query
,
368 Qt::CaseSensitivity caseSensitivity
,
369 const TextList::ConstIterator
&start
,
370 const TextList::ConstIterator
&end
)
372 QMatrix matrix
= m_page
? m_page
->rotationMatrix() : QMatrix();
374 RegularAreaRect
* ret
=new RegularAreaRect
;
375 QString query
= (caseSensitivity
== Qt::CaseSensitive
) ? _query
: _query
.toLower();
377 // j is the current position in our query
378 // len is the length of the string in TextEntity
379 // queryLeft is the length of the query we have left
380 const TinyTextEntity
* curEntity
= 0;
381 int j
=0, len
=0, queryLeft
=query
.length();
383 bool haveMatch
=false;
384 bool offsetMoved
= false;
385 TextList::ConstIterator it
= start
;
386 TextList::ConstIterator it_begin
;
387 for ( ; it
!= end
; ++it
)
390 const QString
&str
= curEntity
->text();
391 if ( !offsetMoved
&& ( it
== start
) )
393 if ( m_searchPoints
.contains( searchID
) )
395 offset
= qMax( m_searchPoints
[ searchID
]->offset_end
, 0 );
401 int min
=qMin(queryLeft
,len
);
402 #ifdef DEBUG_TEXTPAGE
403 kDebug(OkularDebug
) << str
.mid(offset
,min
) << ":" << _query
.mid(j
,min
);
405 // we have equal (or less than) area of the query left as the length of the current
408 if ((caseSensitivity
== Qt::CaseSensitive
)
409 ? (str
.mid(offset
,min
) != query
.mid(j
,min
))
410 : (str
.mid(offset
,min
).toLower() != query
.mid(j
,min
))
413 // we not have matched
414 // this means we do not have a complete match
415 // we need to get back to query start
416 // and continue the search from this place
419 #ifdef DEBUG_TEXTPAGE
420 kDebug(OkularDebug
) << "\tnot matched";
424 queryLeft
=query
.length();
425 it_begin
= TextList::ConstIterator();
430 // move the current position in the query
431 // to the position after the length of this string
433 // substract the length of the current entity from
434 // the left length of the query
435 #ifdef DEBUG_TEXTPAGE
436 kDebug(OkularDebug
) << "\tmatched";
439 ret
->append( curEntity
->transformedArea( matrix
) );
442 if ( it_begin
== TextList::ConstIterator() )
449 if (haveMatch
&& queryLeft
==0 && j
==query
.length())
451 // save or update the search point for the current searchID
452 if ( !m_searchPoints
.contains( searchID
) )
454 SearchPoint
* newsp
= new SearchPoint
;
455 m_searchPoints
.insert( searchID
, newsp
);
457 SearchPoint
* sp
= m_searchPoints
[ searchID
];
458 sp
->it_begin
= it_begin
;
460 sp
->offset_begin
= j
;
461 sp
->offset_end
= j
+ qMin( queryLeft
, len
);
466 // end of loop - it means that we've ended the textentities
467 if ( m_searchPoints
.contains( searchID
) )
469 SearchPoint
* sp
= m_searchPoints
[ searchID
];
470 m_searchPoints
.remove( searchID
);
477 RegularAreaRect
* TextPagePrivate::findTextInternalBackward( int searchID
, const QString
&_query
,
478 Qt::CaseSensitivity caseSensitivity
,
479 const TextList::ConstIterator
&start
,
480 const TextList::ConstIterator
&end
)
482 QMatrix matrix
= m_page
? m_page
->rotationMatrix() : QMatrix();
484 RegularAreaRect
* ret
=new RegularAreaRect
;
485 const QString query
= (caseSensitivity
== Qt::CaseSensitive
) ? _query
: _query
.toLower();
487 // j is the current position in our query
488 // len is the length of the string in TextEntity
489 // queryLeft is the length of the query we have left
490 const TinyTextEntity
* curEntity
= 0;
491 int j
=query
.length() - 1, len
=0, queryLeft
=query
.length();
493 bool haveMatch
=false;
494 bool dontIncrement
=false;
495 bool offsetMoved
= false;
496 TextList::ConstIterator it
= start
;
497 TextList::ConstIterator it_begin
;
501 const QString
&str
= curEntity
->text();
502 if ( !offsetMoved
&& ( it
== start
) )
504 if ( m_searchPoints
.contains( searchID
) )
506 offset
= qMax( m_searchPoints
[ searchID
]->offset_begin
, 0 );
510 if ( query
.at(j
).isSpace() )
512 // lets match newline as a space
513 #ifdef DEBUG_TEXTPAGE
514 kDebug(OkularDebug
) << "newline or space";
518 // since we do not really need to increment this after this
519 // run of the loop finishes because we are not comparing it
520 // to any entity, rather we are deducing a situation in a document
527 int min
=qMin(queryLeft
,len
);
528 #ifdef DEBUG_TEXTPAGE
529 kDebug(OkularDebug
) << str
.right(min
) << " : " << _query
.mid(j
-min
+1,min
);
531 // we have equal (or less than) area of the query left as the length of the current
534 if ((caseSensitivity
== Qt::CaseSensitive
)
535 ? (str
.right(min
) != query
.mid(j
-min
+1,min
))
536 : (str
.right(min
).toLower() != query
.mid(j
-min
+1,min
))
539 // we not have matched
540 // this means we do not have a complete match
541 // we need to get back to query start
542 // and continue the search from this place
545 #ifdef DEBUG_TEXTPAGE
546 kDebug(OkularDebug
) << "\tnot matched";
548 j
=query
.length() - 1;
550 queryLeft
=query
.length();
551 it_begin
= TextList::ConstIterator();
556 // move the current position in the query
557 // to the position after the length of this string
559 // substract the length of the current entity from
560 // the left length of the query
561 #ifdef DEBUG_TEXTPAGE
562 kDebug(OkularDebug
) << "\tmatched";
565 ret
->append( curEntity
->transformedArea( matrix
) );
568 if ( it_begin
== TextList::ConstIterator() )
575 if (haveMatch
&& queryLeft
==0 && j
<0)
577 // save or update the search point for the current searchID
578 QMap
< int, SearchPoint
* >::iterator sIt
= m_searchPoints
.find( searchID
);
579 if ( sIt
== m_searchPoints
.end() )
581 sIt
= m_searchPoints
.insert( searchID
, new SearchPoint
);
583 SearchPoint
* sp
= *sIt
;
585 sp
->it_end
= it_begin
;
586 sp
->offset_begin
= j
;
587 sp
->offset_end
= j
+ qMin( queryLeft
, len
);
596 // end of loop - it means that we've ended the textentities
597 QMap
< int, SearchPoint
* >::iterator sIt
= m_searchPoints
.find( searchID
);
598 if ( sIt
!= m_searchPoints
.end() )
600 SearchPoint
* sp
= *sIt
;
601 m_searchPoints
.erase( sIt
);
608 QString
TextPage::text(const RegularAreaRect
*area
) const
610 if ( area
&& area
->isNull() )
613 TextList::ConstIterator it
= d
->m_words
.constBegin(), itEnd
= d
->m_words
.constEnd();
617 for ( ; it
!= itEnd
; ++it
)
619 if ( area
->intersects( (*it
)->area
) )
621 ret
+= (*it
)->text();
627 for ( ; it
!= itEnd
; ++it
)
628 ret
+= (*it
)->text();