compile
[kdegraphics.git] / okular / core / textpage.cpp
blobaf698a403b8b169151d751418d3ab43d136ec518
1 /***************************************************************************
2 * Copyright (C) 2005 by Piotr Szymanski <niedakh@gmail.com> *
3 * *
4 * This program is free software; you can redistribute it and/or modify *
5 * it under the terms of the GNU General Public License as published by *
6 * the Free Software Foundation; either version 2 of the License, or *
7 * (at your option) any later version. *
8 ***************************************************************************/
10 #include "textpage.h"
11 #include "textpage_p.h"
13 #include <kdebug.h>
15 #include "area.h"
16 #include "debug_p.h"
17 #include "misc.h"
18 #include "page.h"
19 #include "page_p.h"
21 #include <cstring>
23 using namespace Okular;
25 class SearchPoint
27 public:
28 SearchPoint()
29 : offset_begin( -1 ), offset_end( -1 )
33 TextList::ConstIterator it_begin;
34 TextList::ConstIterator it_end;
35 int offset_begin;
36 int offset_end;
41 Rationale behind TinyTextEntity:
43 instead of storing directly a QString for the text of an entity,
44 we store the UTF-16 data and their length. This way, we save about
45 4 int's wrt a QString, and we can create a new string from that
46 raw data (that's the only penalty of that).
48 class TinyTextEntity
50 public:
51 TinyTextEntity( const QString &text, const NormalizedRect &rect )
52 : area( rect )
54 Q_ASSERT_X( !text.isEmpty(), "TinyTextEntity", "empty string" );
55 length = text.length();
56 data = new QChar[ length ];
57 std::memcpy( data, text.constData(), length * sizeof( QChar ) );
60 ~TinyTextEntity()
62 delete [] data;
65 inline QString text() const
67 return QString::fromRawData( data, length );
70 inline NormalizedRect transformedArea( const QMatrix &matrix ) const
72 NormalizedRect transformed_area = area;
73 transformed_area.transform( matrix );
74 return transformed_area;
77 NormalizedRect area;
79 private:
80 Q_DISABLE_COPY( TinyTextEntity )
82 QChar *data;
83 int length;
87 TextEntity::TextEntity( const QString &text, NormalizedRect *area )
88 : m_text( text ), m_area( area ), d( 0 )
92 TextEntity::~TextEntity()
94 delete m_area;
97 QString TextEntity::text() const
99 return m_text;
102 NormalizedRect* TextEntity::area() const
104 return m_area;
107 NormalizedRect TextEntity::transformedArea(const QMatrix &matrix) const
109 NormalizedRect transformed_area = *m_area;
110 transformed_area.transform( matrix );
111 return transformed_area;
115 TextPagePrivate::TextPagePrivate()
116 : m_page( 0 )
120 TextPagePrivate::~TextPagePrivate()
122 qDeleteAll( m_searchPoints );
123 qDeleteAll( m_words );
127 TextPage::TextPage()
128 : d( new TextPagePrivate() )
132 TextPage::TextPage( const TextEntity::List &words )
133 : d( new TextPagePrivate() )
135 TextEntity::List::ConstIterator it = words.constBegin(), itEnd = words.constEnd();
136 for ( ; it != itEnd; ++it )
138 TextEntity *e = *it;
139 if ( !e->text().isEmpty() )
140 d->m_words.append( new TinyTextEntity( e->text(), *e->area() ) );
141 delete e;
145 TextPage::~TextPage()
147 delete d;
150 void TextPage::append( const QString &text, NormalizedRect *area )
152 if ( !text.isEmpty() )
153 d->m_words.append( new TinyTextEntity( text, *area ) );
154 delete area;
157 RegularAreaRect * TextPage::textArea ( TextSelection * sel) const
159 if ( d->m_words.isEmpty() )
160 return new RegularAreaRect();
163 It works like this:
164 There are two cursors, we need to select all the text between them. The coordinates are normalised, leftTop is (0,0)
165 rightBottom is (1,1), so for cursors start (sx,sy) and end (ex,ey) we start with finding text rectangles under those
166 points, if not we search for the first that is to the right to it in the same baseline, if none found, then we search
167 for the first rectangle with a baseline under the cursor, having two points that are the best rectangles to both
168 of the cursors: (rx,ry)x(tx,ty) for start and (ux,uy)x(vx,vy) for end, we do a
169 1. (rx,ry)x(1,ty)
170 2. (0,ty)x(1,uy)
171 3. (0,uy)x(vx,vy)
173 To find the closest rectangle to cursor (cx,cy) we search for a rectangle that either contains the cursor
174 or that has a left border >= cx and bottom border >= cy.
176 RegularAreaRect * ret= new RegularAreaRect;
178 QMatrix matrix = d->m_page ? d->m_page->rotationMatrix() : QMatrix();
179 #if 0
180 int it = -1;
181 int itB = -1;
182 int itE = -1;
184 // ending cursor is higher than start cursor, we need to find positions in reverse
185 NormalizedRect tmp;
186 NormalizedRect start;
187 NormalizedRect end;
189 NormalizedPoint startC = sel->start();
190 double startCx = startC.x;
191 double startCy = startC.y;
193 NormalizedPoint endC = sel->end();
194 double endCx = endC.x;
195 double endCy = endC.y;
197 if ( sel->direction() == 1 || ( sel->itB() == -1 && sel->direction() == 0 ) )
199 #ifdef DEBUG_TEXTPAGE
200 kWarning() << "running first loop";
201 #endif
202 const int count = d->m_words.count();
203 for ( it = 0; it < count; it++ )
205 tmp = *d->m_words[ it ]->area();
206 if ( tmp.contains( startCx, startCy )
207 || ( tmp.top <= startCy && tmp.bottom >= startCy && tmp.left >= startCx )
208 || ( tmp.top >= startCy))
210 /// we have found the (rx,ry)x(tx,ty)
211 itB = it;
212 #ifdef DEBUG_TEXTPAGE
213 kWarning() << "start is" << itB << "count is" << d->m_words.count();
214 #endif
215 break;
218 sel->itB( itB );
220 itB = sel->itB();
221 #ifdef DEBUG_TEXTPAGE
222 kWarning() << "direction is" << sel->direction();
223 kWarning() << "reloaded start is" << itB << "against" << sel->itB();
224 #endif
225 if ( sel->direction() == 0 || ( sel->itE() == -1 && sel->direction() == 1 ) )
227 #ifdef DEBUG_TEXTPAGE
228 kWarning() << "running second loop";
229 #endif
230 for ( it = d->m_words.count() - 1; it >= itB; it-- )
232 tmp = *d->m_words[ it ]->area();
233 if ( tmp.contains( endCx, endCy )
234 || ( tmp.top <= endCy && tmp.bottom >= endCy && tmp.right <= endCx )
235 || ( tmp.bottom <= endCy ) )
237 /// we have found the (ux,uy)x(vx,vy)
238 itE = it;
239 #ifdef DEBUG_TEXTPAGE
240 kWarning() << "ending is" << itE << "count is" << d->m_words.count();
241 kWarning() << "conditions" << tmp.contains( endCx, endCy ) << " "
242 << ( tmp.top <= endCy && tmp.bottom >= endCy && tmp.right <= endCx ) << " " <<
243 ( tmp.top >= endCy);
244 #endif
245 break;
248 sel->itE( itE );
250 #ifdef DEBUG_TEXTPAGE
251 kWarning() << "reloaded ending is" << itE << "against" << sel->itE();
252 #endif
254 if ( sel->itB() != -1 && sel->itE() != -1 )
256 start = *d->m_words[ sel->itB() ]->area();
257 end = *d->m_words[ sel->itE() ]->area();
259 NormalizedRect first, second, third;
260 /// finding out if there is more than one baseline between them is a hard and discussable task
261 /// we will create a rectangle (rx,0)x(tx,1) and will check how many times does it intersect the
262 /// areas, if more than one -> we have a three or over line selection
263 first = start;
264 second.top = start.bottom;
265 first.right = second.right = 1;
266 third = end;
267 third.left = second.left = 0;
268 second.bottom = end.top;
269 int selMax = qMax( sel->itB(), sel->itE() );
270 for ( it = qMin( sel->itB(), sel->itE() ); it <= selMax; ++it )
272 tmp = *d->m_words[ it ]->area();
273 if ( tmp.intersects( &first ) || tmp.intersects( &second ) || tmp.intersects( &third ) )
274 ret->appendShape( d->m_words.at( it )->transformedArea( matrix ) );
277 #else
278 NormalizedRect tmp;
280 NormalizedPoint startC = sel->start();
281 double startCx = startC.x;
282 double startCy = startC.y;
284 NormalizedPoint endC = sel->end();
285 double endCx = endC.x;
286 double endCy = endC.y;
288 TextList::ConstIterator it = d->m_words.constBegin(), itEnd = d->m_words.constEnd();
289 MergeSide side = d->m_page ? (MergeSide)d->m_page->m_page->totalOrientation() : MergeRight;
290 for ( ; it != itEnd; ++it )
292 tmp = (*it)->area;
293 if ( ( tmp.top > startCy || ( tmp.bottom > startCy && tmp.right > startCx ) )
294 && ( tmp.bottom < endCy || ( tmp.top < endCy && tmp.left < endCx ) ) )
296 ret->appendShape( (*it)->transformedArea( matrix ), side );
299 #endif
301 return ret;
305 RegularAreaRect* TextPage::findText( int searchID, const QString &query, SearchDirection direct,
306 Qt::CaseSensitivity caseSensitivity, const RegularAreaRect *area )
308 SearchDirection dir=direct;
309 // invalid search request
310 if ( d->m_words.isEmpty() || query.isEmpty() || ( area && area->isNull() ) )
311 return 0;
312 TextList::ConstIterator start;
313 TextList::ConstIterator end;
314 QMap< int, SearchPoint* >::const_iterator sIt = d->m_searchPoints.constFind( searchID );
315 if ( sIt == d->m_searchPoints.constEnd() )
317 // if no previous run of this search is found, then set it to start
318 // from the beginning (respecting the search direction)
319 if ( dir == NextResult )
320 dir = FromTop;
321 else if ( dir == PreviousResult )
322 dir = FromBottom;
324 bool forward = true;
325 switch ( dir )
327 case FromTop:
328 start = d->m_words.constBegin();
329 end = d->m_words.constEnd();
330 break;
331 case FromBottom:
332 start = d->m_words.constEnd();
333 end = d->m_words.constBegin();
334 Q_ASSERT( start != end );
335 // we can safely go one step back, as we already checked
336 // that the list is not empty
337 --start;
338 forward = false;
339 break;
340 case NextResult:
341 start = (*sIt)->it_end;
342 end = d->m_words.constEnd();
343 if ( ( start + 1 ) != end )
344 ++start;
345 break;
346 case PreviousResult:
347 start = (*sIt)->it_begin;
348 end = d->m_words.constBegin();
349 if ( start != end )
350 --start;
351 forward = false;
352 break;
354 RegularAreaRect* ret = 0;
355 if ( forward )
357 ret = d->findTextInternalForward( searchID, query, caseSensitivity, start, end );
359 else
361 ret = d->findTextInternalBackward( searchID, query, caseSensitivity, start, end );
363 return ret;
367 RegularAreaRect* TextPagePrivate::findTextInternalForward( int searchID, const QString &_query,
368 Qt::CaseSensitivity caseSensitivity,
369 const TextList::ConstIterator &start,
370 const TextList::ConstIterator &end )
372 QMatrix matrix = m_page ? m_page->rotationMatrix() : QMatrix();
374 RegularAreaRect* ret=new RegularAreaRect;
375 QString query = (caseSensitivity == Qt::CaseSensitive) ? _query : _query.toLower();
377 // j is the current position in our query
378 // len is the length of the string in TextEntity
379 // queryLeft is the length of the query we have left
380 const TinyTextEntity* curEntity = 0;
381 int j=0, len=0, queryLeft=query.length();
382 int offset = 0;
383 bool haveMatch=false;
384 bool offsetMoved = false;
385 TextList::ConstIterator it = start;
386 TextList::ConstIterator it_begin;
387 for ( ; it != end; ++it )
389 curEntity = *it;
390 const QString &str = curEntity->text();
391 if ( !offsetMoved && ( it == start ) )
393 if ( m_searchPoints.contains( searchID ) )
395 offset = qMax( m_searchPoints[ searchID ]->offset_end, 0 );
397 offsetMoved = true;
400 len=str.length();
401 int min=qMin(queryLeft,len);
402 #ifdef DEBUG_TEXTPAGE
403 kDebug(OkularDebug) << str.mid(offset,min) << ":" << _query.mid(j,min);
404 #endif
405 // we have equal (or less than) area of the query left as the length of the current
406 // entity
408 if ((caseSensitivity == Qt::CaseSensitive)
409 ? (str.mid(offset,min) != query.mid(j,min))
410 : (str.mid(offset,min).toLower() != query.mid(j,min))
413 // we not have matched
414 // this means we do not have a complete match
415 // we need to get back to query start
416 // and continue the search from this place
417 haveMatch=false;
418 ret->clear();
419 #ifdef DEBUG_TEXTPAGE
420 kDebug(OkularDebug) << "\tnot matched";
421 #endif
422 j=0;
423 offset = 0;
424 queryLeft=query.length();
425 it_begin = TextList::ConstIterator();
427 else
429 // we have a match
430 // move the current position in the query
431 // to the position after the length of this string
432 // we matched
433 // substract the length of the current entity from
434 // the left length of the query
435 #ifdef DEBUG_TEXTPAGE
436 kDebug(OkularDebug) << "\tmatched";
437 #endif
438 haveMatch=true;
439 ret->append( curEntity->transformedArea( matrix ) );
440 j+=min;
441 queryLeft-=min;
442 if ( it_begin == TextList::ConstIterator() )
444 it_begin = it;
449 if (haveMatch && queryLeft==0 && j==query.length())
451 // save or update the search point for the current searchID
452 if ( !m_searchPoints.contains( searchID ) )
454 SearchPoint* newsp = new SearchPoint;
455 m_searchPoints.insert( searchID, newsp );
457 SearchPoint* sp = m_searchPoints[ searchID ];
458 sp->it_begin = it_begin;
459 sp->it_end = it;
460 sp->offset_begin = j;
461 sp->offset_end = j + qMin( queryLeft, len );
462 ret->simplify();
463 return ret;
466 // end of loop - it means that we've ended the textentities
467 if ( m_searchPoints.contains( searchID ) )
469 SearchPoint* sp = m_searchPoints[ searchID ];
470 m_searchPoints.remove( searchID );
471 delete sp;
473 delete ret;
474 return 0;
477 RegularAreaRect* TextPagePrivate::findTextInternalBackward( int searchID, const QString &_query,
478 Qt::CaseSensitivity caseSensitivity,
479 const TextList::ConstIterator &start,
480 const TextList::ConstIterator &end )
482 QMatrix matrix = m_page ? m_page->rotationMatrix() : QMatrix();
484 RegularAreaRect* ret=new RegularAreaRect;
485 const QString query = (caseSensitivity == Qt::CaseSensitive) ? _query : _query.toLower();
487 // j is the current position in our query
488 // len is the length of the string in TextEntity
489 // queryLeft is the length of the query we have left
490 const TinyTextEntity* curEntity = 0;
491 int j=query.length() - 1, len=0, queryLeft=query.length();
492 int offset = 0;
493 bool haveMatch=false;
494 bool dontIncrement=false;
495 bool offsetMoved = false;
496 TextList::ConstIterator it = start;
497 TextList::ConstIterator it_begin;
498 while ( true )
500 curEntity = *it;
501 const QString &str = curEntity->text();
502 if ( !offsetMoved && ( it == start ) )
504 if ( m_searchPoints.contains( searchID ) )
506 offset = qMax( m_searchPoints[ searchID ]->offset_begin, 0 );
508 offsetMoved = true;
510 if ( query.at(j).isSpace() )
512 // lets match newline as a space
513 #ifdef DEBUG_TEXTPAGE
514 kDebug(OkularDebug) << "newline or space";
515 #endif
516 j--;
517 queryLeft--;
518 // since we do not really need to increment this after this
519 // run of the loop finishes because we are not comparing it
520 // to any entity, rather we are deducing a situation in a document
521 dontIncrement=true;
523 else
525 dontIncrement=false;
526 len=str.length();
527 int min=qMin(queryLeft,len);
528 #ifdef DEBUG_TEXTPAGE
529 kDebug(OkularDebug) << str.right(min) << " : " << _query.mid(j-min+1,min);
530 #endif
531 // we have equal (or less than) area of the query left as the length of the current
532 // entity
534 if ((caseSensitivity == Qt::CaseSensitive)
535 ? (str.right(min) != query.mid(j-min+1,min))
536 : (str.right(min).toLower() != query.mid(j-min+1,min))
539 // we not have matched
540 // this means we do not have a complete match
541 // we need to get back to query start
542 // and continue the search from this place
543 haveMatch=false;
544 ret->clear();
545 #ifdef DEBUG_TEXTPAGE
546 kDebug(OkularDebug) << "\tnot matched";
547 #endif
548 j=query.length() - 1;
549 offset = 0;
550 queryLeft=query.length();
551 it_begin = TextList::ConstIterator();
553 else
555 // we have a match
556 // move the current position in the query
557 // to the position after the length of this string
558 // we matched
559 // substract the length of the current entity from
560 // the left length of the query
561 #ifdef DEBUG_TEXTPAGE
562 kDebug(OkularDebug) << "\tmatched";
563 #endif
564 haveMatch=true;
565 ret->append( curEntity->transformedArea( matrix ) );
566 j-=min;
567 queryLeft-=min;
568 if ( it_begin == TextList::ConstIterator() )
570 it_begin = it;
575 if (haveMatch && queryLeft==0 && j<0)
577 // save or update the search point for the current searchID
578 QMap< int, SearchPoint* >::iterator sIt = m_searchPoints.find( searchID );
579 if ( sIt == m_searchPoints.end() )
581 sIt = m_searchPoints.insert( searchID, new SearchPoint );
583 SearchPoint* sp = *sIt;
584 sp->it_begin = it;
585 sp->it_end = it_begin;
586 sp->offset_begin = j;
587 sp->offset_end = j + qMin( queryLeft, len );
588 ret->simplify();
589 return ret;
591 if ( it == end )
592 break;
593 else
594 --it;
596 // end of loop - it means that we've ended the textentities
597 QMap< int, SearchPoint* >::iterator sIt = m_searchPoints.find( searchID );
598 if ( sIt != m_searchPoints.end() )
600 SearchPoint* sp = *sIt;
601 m_searchPoints.erase( sIt );
602 delete sp;
604 delete ret;
605 return 0;
608 QString TextPage::text(const RegularAreaRect *area) const
610 if ( area && area->isNull() )
611 return QString();
613 TextList::ConstIterator it = d->m_words.constBegin(), itEnd = d->m_words.constEnd();
614 QString ret;
615 if ( area )
617 for ( ; it != itEnd; ++it )
619 if ( area->intersects( (*it)->area ) )
621 ret += (*it)->text();
625 else
627 for ( ; it != itEnd; ++it )
628 ret += (*it)->text();
630 return ret;