1 /***************************************************************************
2 * Copyright (C) 2005 by Piotr Szymanski <niedakh@gmail.com> *
4 * This program is free software; you can redistribute it and/or modify *
5 * it under the terms of the GNU General Public License as published by *
6 * the Free Software Foundation; either version 2 of the License, or *
7 * (at your option) any later version. *
8 ***************************************************************************/
11 #include "textpage_p.h"
23 using namespace Okular
;
29 : offset_begin( -1 ), offset_end( -1 )
33 TextList::ConstIterator it_begin
;
34 TextList::ConstIterator it_end
;
41 Rationale behind TinyTextEntity:
43 instead of storing directly a QString for the text of an entity,
44 we store the UTF-16 data and their length. This way, we save about
45 4 int's wrt a QString, and we can create a new string from that
46 raw data (that's the only penalty of that).
51 TinyTextEntity( const QString
&text
, const NormalizedRect
&rect
)
54 Q_ASSERT_X( !text
.isEmpty(), "TinyTextEntity", "empty string" );
55 length
= text
.length();
56 data
= new QChar
[ length
];
57 std::memcpy( data
, text
.constData(), length
* sizeof( QChar
) );
65 inline QString
text() const
67 return QString::fromRawData( data
, length
);
70 inline NormalizedRect
transformedArea( const QMatrix
&matrix
) const
72 NormalizedRect transformed_area
= area
;
73 transformed_area
.transform( matrix
);
74 return transformed_area
;
80 Q_DISABLE_COPY( TinyTextEntity
)
87 TextEntity::TextEntity( const QString
&text
, NormalizedRect
*area
)
88 : m_text( text
), m_area( area
), d( 0 )
92 TextEntity::~TextEntity()
97 QString
TextEntity::text() const
102 NormalizedRect
* TextEntity::area() const
107 NormalizedRect
TextEntity::transformedArea(const QMatrix
&matrix
) const
109 NormalizedRect transformed_area
= *m_area
;
110 transformed_area
.transform( matrix
);
111 return transformed_area
;
115 TextPagePrivate::TextPagePrivate()
120 TextPagePrivate::~TextPagePrivate()
122 qDeleteAll( m_searchPoints
);
123 qDeleteAll( m_words
);
128 : d( new TextPagePrivate() )
132 TextPage::TextPage( const TextEntity::List
&words
)
133 : d( new TextPagePrivate() )
135 TextEntity::List::ConstIterator it
= words
.begin(), itEnd
= words
.end();
136 for ( ; it
!= itEnd
; ++it
)
139 if ( !e
->text().isEmpty() )
140 d
->m_words
.append( new TinyTextEntity( e
->text(), *e
->area() ) );
145 TextPage::~TextPage()
150 void TextPage::append( const QString
&text
, NormalizedRect
*area
)
152 if ( !text
.isEmpty() )
153 d
->m_words
.append( new TinyTextEntity( text
, *area
) );
157 RegularAreaRect
* TextPage::textArea ( TextSelection
* sel
) const
159 if ( d
->m_words
.isEmpty() )
160 return new RegularAreaRect();
164 There are two cursors, we need to select all the text between them. The coordinates are normalised, leftTop is (0,0)
165 rightBottom is (1,1), so for cursors start (sx,sy) and end (ex,ey) we start with finding text rectangles under those
166 points, if not we search for the first that is to the right to it in the same baseline, if none found, then we search
167 for the first rectangle with a baseline under the cursor, having two points that are the best rectangles to both
168 of the cursors: (rx,ry)x(tx,ty) for start and (ux,uy)x(vx,vy) for end, we do a
173 To find the closest rectangle to cursor (cx,cy) we search for a rectangle that either contains the cursor
174 or that has a left border >= cx and bottom border >= cy.
176 RegularAreaRect
* ret
= new RegularAreaRect
;
178 QMatrix matrix
= d
->m_page
? d
->m_page
->rotationMatrix() : QMatrix();
184 // ending cursor is higher than start cursor, we need to find positions in reverse
186 NormalizedRect start
;
189 NormalizedPoint startC
= sel
->start();
190 double startCx
= startC
.x
;
191 double startCy
= startC
.y
;
193 NormalizedPoint endC
= sel
->end();
194 double endCx
= endC
.x
;
195 double endCy
= endC
.y
;
197 if ( sel
->direction() == 1 || ( sel
->itB() == -1 && sel
->direction() == 0 ) )
199 #ifdef DEBUG_TEXTPAGE
200 kWarning() << "running first loop";
202 const int count
= d
->m_words
.count();
203 for ( it
= 0; it
< count
; it
++ )
205 tmp
= *d
->m_words
[ it
]->area();
206 if ( tmp
.contains( startCx
, startCy
)
207 || ( tmp
.top
<= startCy
&& tmp
.bottom
>= startCy
&& tmp
.left
>= startCx
)
208 || ( tmp
.top
>= startCy
))
210 /// we have found the (rx,ry)x(tx,ty)
212 #ifdef DEBUG_TEXTPAGE
213 kWarning() << "start is" << itB
<< "count is" << d
->m_words
.count();
221 #ifdef DEBUG_TEXTPAGE
222 kWarning() << "direction is" << sel
->direction();
223 kWarning() << "reloaded start is" << itB
<< "against" << sel
->itB();
225 if ( sel
->direction() == 0 || ( sel
->itE() == -1 && sel
->direction() == 1 ) )
227 #ifdef DEBUG_TEXTPAGE
228 kWarning() << "running second loop";
230 for ( it
= d
->m_words
.count() - 1; it
>= itB
; it
-- )
232 tmp
= *d
->m_words
[ it
]->area();
233 if ( tmp
.contains( endCx
, endCy
)
234 || ( tmp
.top
<= endCy
&& tmp
.bottom
>= endCy
&& tmp
.right
<= endCx
)
235 || ( tmp
.bottom
<= endCy
) )
237 /// we have found the (ux,uy)x(vx,vy)
239 #ifdef DEBUG_TEXTPAGE
240 kWarning() << "ending is" << itE
<< "count is" << d
->m_words
.count();
241 kWarning() << "conditions" << tmp
.contains( endCx
, endCy
) << " "
242 << ( tmp
.top
<= endCy
&& tmp
.bottom
>= endCy
&& tmp
.right
<= endCx
) << " " <<
250 #ifdef DEBUG_TEXTPAGE
251 kWarning() << "reloaded ending is" << itE
<< "against" << sel
->itE();
254 if ( sel
->itB() != -1 && sel
->itE() != -1 )
256 start
= *d
->m_words
[ sel
->itB() ]->area();
257 end
= *d
->m_words
[ sel
->itE() ]->area();
259 NormalizedRect first
, second
, third
;
260 /// finding out if there is more than one baseline between them is a hard and discussable task
261 /// we will create a rectangle (rx,0)x(tx,1) and will check how many times does it intersect the
262 /// areas, if more than one -> we have a three or over line selection
264 second
.top
= start
.bottom
;
265 first
.right
= second
.right
= 1;
267 third
.left
= second
.left
= 0;
268 second
.bottom
= end
.top
;
269 int selMax
= qMax( sel
->itB(), sel
->itE() );
270 for ( it
= qMin( sel
->itB(), sel
->itE() ); it
<= selMax
; ++it
)
272 tmp
= *d
->m_words
[ it
]->area();
273 if ( tmp
.intersects( &first
) || tmp
.intersects( &second
) || tmp
.intersects( &third
) )
274 ret
->appendShape( d
->m_words
.at( it
)->transformedArea( matrix
) );
280 NormalizedPoint startC
= sel
->start();
281 double startCx
= startC
.x
;
282 double startCy
= startC
.y
;
284 NormalizedPoint endC
= sel
->end();
285 double endCx
= endC
.x
;
286 double endCy
= endC
.y
;
288 TextList::ConstIterator it
= d
->m_words
.begin(), itEnd
= d
->m_words
.end();
289 MergeSide side
= d
->m_page
? (MergeSide
)d
->m_page
->m_page
->totalOrientation() : MergeRight
;
290 for ( ; it
!= itEnd
; ++it
)
293 if ( ( tmp
.top
> startCy
|| ( tmp
.bottom
> startCy
&& tmp
.right
> startCx
) )
294 && ( tmp
.bottom
< endCy
|| ( tmp
.top
< endCy
&& tmp
.left
< endCx
) ) )
296 ret
->appendShape( (*it
)->transformedArea( matrix
), side
);
305 RegularAreaRect
* TextPage::findText( int searchID
, const QString
&query
, SearchDirection direct
,
306 Qt::CaseSensitivity caseSensitivity
, const RegularAreaRect
*area
)
308 SearchDirection dir
=direct
;
309 // invalid search request
310 if ( d
->m_words
.isEmpty() || query
.isEmpty() || ( area
&& area
->isNull() ) )
312 TextList::ConstIterator start
;
313 TextList::ConstIterator end
;
314 QMap
< int, SearchPoint
* >::const_iterator sIt
= d
->m_searchPoints
.find( searchID
);
315 if ( sIt
== d
->m_searchPoints
.end() )
317 // if no previous run of this search is found, then set it to start
318 // from the beginning (respecting the search direction)
319 if ( dir
== NextResult
)
321 else if ( dir
== PreviousResult
)
328 start
= d
->m_words
.begin();
329 end
= d
->m_words
.end();
332 start
= d
->m_words
.end();
333 end
= d
->m_words
.begin();
334 Q_ASSERT( start
!= end
);
335 // we can safely go one step back, as we already checked
336 // that the list is not empty
341 start
= (*sIt
)->it_end
;
342 end
= d
->m_words
.end();
343 if ( ( start
+ 1 ) != end
)
347 start
= (*sIt
)->it_begin
;
348 end
= d
->m_words
.begin();
354 RegularAreaRect
* ret
= 0;
357 ret
= d
->findTextInternalForward( searchID
, query
, caseSensitivity
, start
, end
);
361 ret
= d
->findTextInternalBackward( searchID
, query
, caseSensitivity
, start
, end
);
367 RegularAreaRect
* TextPagePrivate::findTextInternalForward( int searchID
, const QString
&_query
,
368 Qt::CaseSensitivity caseSensitivity
,
369 const TextList::ConstIterator
&start
,
370 const TextList::ConstIterator
&end
)
372 QMatrix matrix
= m_page
? m_page
->rotationMatrix() : QMatrix();
374 RegularAreaRect
* ret
=new RegularAreaRect
;
375 QString query
= (caseSensitivity
== Qt::CaseSensitive
) ? _query
: _query
.toLower();
377 // j is the current position in our query
378 // len is the length of the string in TextEntity
379 // queryLeft is the length of the query we have left
380 const TinyTextEntity
* curEntity
= 0;
381 int j
=0, len
=0, queryLeft
=query
.length();
383 bool haveMatch
=false;
384 bool dontIncrement
=false;
385 bool offsetMoved
= false;
386 TextList::ConstIterator it
= start
;
387 TextList::ConstIterator it_begin
;
388 for ( ; it
!= end
; ++it
)
391 const QString
&str
= curEntity
->text();
392 if ( !offsetMoved
&& ( it
== start
) )
394 if ( m_searchPoints
.contains( searchID
) )
396 offset
= qMax( m_searchPoints
[ searchID
]->offset_end
, 0 );
400 if ( query
.at(j
).isSpace() )
402 // lets match newline as a space
403 #ifdef DEBUG_TEXTPAGE
404 kDebug(OkularDebug
) << "newline or space";
408 // since we do not really need to increment this after this
409 // run of the loop finishes because we are not comparing it
410 // to any entity, rather we are deducing a situation in a document
417 int min
=qMin(queryLeft
,len
);
418 #ifdef DEBUG_TEXTPAGE
419 kDebug(OkularDebug
) << str
.mid(offset
,min
) << ":" << _query
.mid(j
,min
);
421 // we have equal (or less than) area of the query left as the length of the current
424 if ((caseSensitivity
== Qt::CaseSensitive
)
425 ? (str
.mid(offset
,min
) != query
.mid(j
,min
))
426 : (str
.mid(offset
,min
).toLower() != query
.mid(j
,min
))
429 // we not have matched
430 // this means we do not have a complete match
431 // we need to get back to query start
432 // and continue the search from this place
435 #ifdef DEBUG_TEXTPAGE
436 kDebug(OkularDebug
) << "\tnot matched";
440 queryLeft
=query
.length();
441 it_begin
= TextList::ConstIterator();
446 // move the current position in the query
447 // to the position after the length of this string
449 // substract the length of the current entity from
450 // the left length of the query
451 #ifdef DEBUG_TEXTPAGE
452 kDebug(OkularDebug
) << "\tmatched";
455 ret
->append( curEntity
->transformedArea( matrix
) );
458 if ( it_begin
== TextList::ConstIterator() )
465 if (haveMatch
&& queryLeft
==0 && j
==query
.length())
467 // save or update the search point for the current searchID
468 if ( !m_searchPoints
.contains( searchID
) )
470 SearchPoint
* newsp
= new SearchPoint
;
471 m_searchPoints
.insert( searchID
, newsp
);
473 SearchPoint
* sp
= m_searchPoints
[ searchID
];
474 sp
->it_begin
= it_begin
;
476 sp
->offset_begin
= j
;
477 sp
->offset_end
= j
+ qMin( queryLeft
, len
);
482 // end of loop - it means that we've ended the textentities
483 if ( m_searchPoints
.contains( searchID
) )
485 SearchPoint
* sp
= m_searchPoints
[ searchID
];
486 m_searchPoints
.remove( searchID
);
493 RegularAreaRect
* TextPagePrivate::findTextInternalBackward( int searchID
, const QString
&_query
,
494 Qt::CaseSensitivity caseSensitivity
,
495 const TextList::ConstIterator
&start
,
496 const TextList::ConstIterator
&end
)
498 QMatrix matrix
= m_page
? m_page
->rotationMatrix() : QMatrix();
500 RegularAreaRect
* ret
=new RegularAreaRect
;
501 const QString query
= (caseSensitivity
== Qt::CaseSensitive
) ? _query
: _query
.toLower();
503 // j is the current position in our query
504 // len is the length of the string in TextEntity
505 // queryLeft is the length of the query we have left
506 const TinyTextEntity
* curEntity
= 0;
507 int j
=query
.length() - 1, len
=0, queryLeft
=query
.length();
509 bool haveMatch
=false;
510 bool dontIncrement
=false;
511 bool offsetMoved
= false;
512 TextList::ConstIterator it
= start
;
513 TextList::ConstIterator it_begin
;
517 const QString
&str
= curEntity
->text();
518 if ( !offsetMoved
&& ( it
== start
) )
520 if ( m_searchPoints
.contains( searchID
) )
522 offset
= qMax( m_searchPoints
[ searchID
]->offset_begin
, 0 );
526 if ( query
.at(j
).isSpace() )
528 // lets match newline as a space
529 #ifdef DEBUG_TEXTPAGE
530 kDebug(OkularDebug
) << "newline or space";
534 // since we do not really need to increment this after this
535 // run of the loop finishes because we are not comparing it
536 // to any entity, rather we are deducing a situation in a document
543 int min
=qMin(queryLeft
,len
);
544 #ifdef DEBUG_TEXTPAGE
545 kDebug(OkularDebug
) << str
.right(min
) << " : " << _query
.mid(j
-min
+1,min
);
547 // we have equal (or less than) area of the query left as the length of the current
550 if ((caseSensitivity
== Qt::CaseSensitive
)
551 ? (str
.right(min
) != query
.mid(j
-min
+1,min
))
552 : (str
.right(min
).toLower() != query
.mid(j
-min
+1,min
))
555 // we not have matched
556 // this means we do not have a complete match
557 // we need to get back to query start
558 // and continue the search from this place
561 #ifdef DEBUG_TEXTPAGE
562 kDebug(OkularDebug
) << "\tnot matched";
564 j
=query
.length() - 1;
566 queryLeft
=query
.length();
567 it_begin
= TextList::ConstIterator();
572 // move the current position in the query
573 // to the position after the length of this string
575 // substract the length of the current entity from
576 // the left length of the query
577 #ifdef DEBUG_TEXTPAGE
578 kDebug(OkularDebug
) << "\tmatched";
581 ret
->append( curEntity
->transformedArea( matrix
) );
584 if ( it_begin
== TextList::ConstIterator() )
591 if (haveMatch
&& queryLeft
==0 && j
<0)
593 // save or update the search point for the current searchID
594 QMap
< int, SearchPoint
* >::iterator sIt
= m_searchPoints
.find( searchID
);
595 if ( sIt
== m_searchPoints
.end() )
597 sIt
= m_searchPoints
.insert( searchID
, new SearchPoint
);
599 SearchPoint
* sp
= *sIt
;
601 sp
->it_end
= it_begin
;
602 sp
->offset_begin
= j
;
603 sp
->offset_end
= j
+ qMin( queryLeft
, len
);
612 // end of loop - it means that we've ended the textentities
613 QMap
< int, SearchPoint
* >::iterator sIt
= m_searchPoints
.find( searchID
);
614 if ( sIt
!= m_searchPoints
.end() )
616 SearchPoint
* sp
= *sIt
;
617 m_searchPoints
.erase( sIt
);
624 QString
TextPage::text(const RegularAreaRect
*area
) const
626 if ( area
&& area
->isNull() )
629 TextList::ConstIterator it
= d
->m_words
.begin(), itEnd
= d
->m_words
.end();
633 for ( ; it
!= itEnd
; ++it
)
635 if ( area
->intersects( (*it
)->area
) )
637 ret
+= (*it
)->text();
643 for ( ; it
!= itEnd
; ++it
)
644 ret
+= (*it
)->text();