typos
[kdegraphics.git] / okular / core / textpage.cpp
blobb4b923a1d00212f38fc930a15354f2f8a983262a
1 /***************************************************************************
2 * Copyright (C) 2005 by Piotr Szymanski <niedakh@gmail.com> *
3 * *
4 * This program is free software; you can redistribute it and/or modify *
5 * it under the terms of the GNU General Public License as published by *
6 * the Free Software Foundation; either version 2 of the License, or *
7 * (at your option) any later version. *
8 ***************************************************************************/
10 #include "textpage.h"
11 #include "textpage_p.h"
13 #include <kdebug.h>
15 #include "area.h"
16 #include "debug_p.h"
17 #include "misc.h"
18 #include "page.h"
19 #include "page_p.h"
21 #include <cstring>
23 using namespace Okular;
25 class SearchPoint
27 public:
28 SearchPoint()
29 : offset_begin( -1 ), offset_end( -1 )
33 TextList::ConstIterator it_begin;
34 TextList::ConstIterator it_end;
35 int offset_begin;
36 int offset_end;
41 Rationale behind TinyTextEntity:
43 instead of storing directly a QString for the text of an entity,
44 we store the UTF-16 data and their length. This way, we save about
45 4 int's wrt a QString, and we can create a new string from that
46 raw data (that's the only penalty of that).
48 class TinyTextEntity
50 public:
51 TinyTextEntity( const QString &text, const NormalizedRect &rect )
52 : area( rect )
54 Q_ASSERT_X( !text.isEmpty(), "TinyTextEntity", "empty string" );
55 length = text.length();
56 data = new QChar[ length ];
57 std::memcpy( data, text.constData(), length * sizeof( QChar ) );
60 ~TinyTextEntity()
62 delete [] data;
65 inline QString text() const
67 return QString::fromRawData( data, length );
70 inline NormalizedRect transformedArea( const QMatrix &matrix ) const
72 NormalizedRect transformed_area = area;
73 transformed_area.transform( matrix );
74 return transformed_area;
77 NormalizedRect area;
79 private:
80 Q_DISABLE_COPY( TinyTextEntity )
82 QChar *data;
83 int length;
87 TextEntity::TextEntity( const QString &text, NormalizedRect *area )
88 : m_text( text ), m_area( area ), d( 0 )
92 TextEntity::~TextEntity()
94 delete m_area;
97 QString TextEntity::text() const
99 return m_text;
102 NormalizedRect* TextEntity::area() const
104 return m_area;
107 NormalizedRect TextEntity::transformedArea(const QMatrix &matrix) const
109 NormalizedRect transformed_area = *m_area;
110 transformed_area.transform( matrix );
111 return transformed_area;
115 TextPagePrivate::TextPagePrivate()
116 : m_page( 0 )
120 TextPagePrivate::~TextPagePrivate()
122 qDeleteAll( m_searchPoints );
123 qDeleteAll( m_words );
127 TextPage::TextPage()
128 : d( new TextPagePrivate() )
132 TextPage::TextPage( const TextEntity::List &words )
133 : d( new TextPagePrivate() )
135 TextEntity::List::ConstIterator it = words.begin(), itEnd = words.end();
136 for ( ; it != itEnd; ++it )
138 TextEntity *e = *it;
139 if ( !e->text().isEmpty() )
140 d->m_words.append( new TinyTextEntity( e->text(), *e->area() ) );
141 delete e;
145 TextPage::~TextPage()
147 delete d;
150 void TextPage::append( const QString &text, NormalizedRect *area )
152 if ( !text.isEmpty() )
153 d->m_words.append( new TinyTextEntity( text, *area ) );
154 delete area;
157 RegularAreaRect * TextPage::textArea ( TextSelection * sel) const
159 if ( d->m_words.isEmpty() )
160 return new RegularAreaRect();
163 It works like this:
164 There are two cursors, we need to select all the text between them. The coordinates are normalised, leftTop is (0,0)
165 rightBottom is (1,1), so for cursors start (sx,sy) and end (ex,ey) we start with finding text rectangles under those
166 points, if not we search for the first that is to the right to it in the same baseline, if none found, then we search
167 for the first rectangle with a baseline under the cursor, having two points that are the best rectangles to both
168 of the cursors: (rx,ry)x(tx,ty) for start and (ux,uy)x(vx,vy) for end, we do a
169 1. (rx,ry)x(1,ty)
170 2. (0,ty)x(1,uy)
171 3. (0,uy)x(vx,vy)
173 To find the closest rectangle to cursor (cx,cy) we search for a rectangle that either contains the cursor
174 or that has a left border >= cx and bottom border >= cy.
176 RegularAreaRect * ret= new RegularAreaRect;
178 QMatrix matrix = d->m_page ? d->m_page->rotationMatrix() : QMatrix();
179 #if 0
180 int it = -1;
181 int itB = -1;
182 int itE = -1;
184 // ending cursor is higher than start cursor, we need to find positions in reverse
185 NormalizedRect tmp;
186 NormalizedRect start;
187 NormalizedRect end;
189 NormalizedPoint startC = sel->start();
190 double startCx = startC.x;
191 double startCy = startC.y;
193 NormalizedPoint endC = sel->end();
194 double endCx = endC.x;
195 double endCy = endC.y;
197 if ( sel->direction() == 1 || ( sel->itB() == -1 && sel->direction() == 0 ) )
199 #ifdef DEBUG_TEXTPAGE
200 kWarning() << "running first loop";
201 #endif
202 const int count = d->m_words.count();
203 for ( it = 0; it < count; it++ )
205 tmp = *d->m_words[ it ]->area();
206 if ( tmp.contains( startCx, startCy )
207 || ( tmp.top <= startCy && tmp.bottom >= startCy && tmp.left >= startCx )
208 || ( tmp.top >= startCy))
210 /// we have found the (rx,ry)x(tx,ty)
211 itB = it;
212 #ifdef DEBUG_TEXTPAGE
213 kWarning() << "start is" << itB << "count is" << d->m_words.count();
214 #endif
215 break;
218 sel->itB( itB );
220 itB = sel->itB();
221 #ifdef DEBUG_TEXTPAGE
222 kWarning() << "direction is" << sel->direction();
223 kWarning() << "reloaded start is" << itB << "against" << sel->itB();
224 #endif
225 if ( sel->direction() == 0 || ( sel->itE() == -1 && sel->direction() == 1 ) )
227 #ifdef DEBUG_TEXTPAGE
228 kWarning() << "running second loop";
229 #endif
230 for ( it = d->m_words.count() - 1; it >= itB; it-- )
232 tmp = *d->m_words[ it ]->area();
233 if ( tmp.contains( endCx, endCy )
234 || ( tmp.top <= endCy && tmp.bottom >= endCy && tmp.right <= endCx )
235 || ( tmp.bottom <= endCy ) )
237 /// we have found the (ux,uy)x(vx,vy)
238 itE = it;
239 #ifdef DEBUG_TEXTPAGE
240 kWarning() << "ending is" << itE << "count is" << d->m_words.count();
241 kWarning() << "conditions" << tmp.contains( endCx, endCy ) << " "
242 << ( tmp.top <= endCy && tmp.bottom >= endCy && tmp.right <= endCx ) << " " <<
243 ( tmp.top >= endCy);
244 #endif
245 break;
248 sel->itE( itE );
250 #ifdef DEBUG_TEXTPAGE
251 kWarning() << "reloaded ending is" << itE << "against" << sel->itE();
252 #endif
254 if ( sel->itB() != -1 && sel->itE() != -1 )
256 start = *d->m_words[ sel->itB() ]->area();
257 end = *d->m_words[ sel->itE() ]->area();
259 NormalizedRect first, second, third;
260 /// finding out if there is more than one baseline between them is a hard and discussable task
261 /// we will create a rectangle (rx,0)x(tx,1) and will check how many times does it intersect the
262 /// areas, if more than one -> we have a three or over line selection
263 first = start;
264 second.top = start.bottom;
265 first.right = second.right = 1;
266 third = end;
267 third.left = second.left = 0;
268 second.bottom = end.top;
269 int selMax = qMax( sel->itB(), sel->itE() );
270 for ( it = qMin( sel->itB(), sel->itE() ); it <= selMax; ++it )
272 tmp = *d->m_words[ it ]->area();
273 if ( tmp.intersects( &first ) || tmp.intersects( &second ) || tmp.intersects( &third ) )
274 ret->appendShape( d->m_words.at( it )->transformedArea( matrix ) );
277 #else
278 NormalizedRect tmp;
280 NormalizedPoint startC = sel->start();
281 double startCx = startC.x;
282 double startCy = startC.y;
284 NormalizedPoint endC = sel->end();
285 double endCx = endC.x;
286 double endCy = endC.y;
288 TextList::ConstIterator it = d->m_words.begin(), itEnd = d->m_words.end();
289 MergeSide side = d->m_page ? (MergeSide)d->m_page->m_page->totalOrientation() : MergeRight;
290 for ( ; it != itEnd; ++it )
292 tmp = (*it)->area;
293 if ( ( tmp.top > startCy || ( tmp.bottom > startCy && tmp.right > startCx ) )
294 && ( tmp.bottom < endCy || ( tmp.top < endCy && tmp.left < endCx ) ) )
296 ret->appendShape( (*it)->transformedArea( matrix ), side );
299 #endif
301 return ret;
305 RegularAreaRect* TextPage::findText( int searchID, const QString &query, SearchDirection direct,
306 Qt::CaseSensitivity caseSensitivity, const RegularAreaRect *area )
308 SearchDirection dir=direct;
309 // invalid search request
310 if ( d->m_words.isEmpty() || query.isEmpty() || ( area && area->isNull() ) )
311 return 0;
312 TextList::ConstIterator start;
313 TextList::ConstIterator end;
314 QMap< int, SearchPoint* >::const_iterator sIt = d->m_searchPoints.find( searchID );
315 if ( sIt == d->m_searchPoints.end() )
317 // if no previous run of this search is found, then set it to start
318 // from the beginning (respecting the search direction)
319 if ( dir == NextResult )
320 dir = FromTop;
321 else if ( dir == PreviousResult )
322 dir = FromBottom;
324 bool forward = true;
325 switch ( dir )
327 case FromTop:
328 start = d->m_words.begin();
329 end = d->m_words.end();
330 break;
331 case FromBottom:
332 start = d->m_words.end();
333 end = d->m_words.begin();
334 Q_ASSERT( start != end );
335 // we can safely go one step back, as we already checked
336 // that the list is not empty
337 --start;
338 forward = false;
339 break;
340 case NextResult:
341 start = (*sIt)->it_end;
342 end = d->m_words.end();
343 if ( ( start + 1 ) != end )
344 ++start;
345 break;
346 case PreviousResult:
347 start = (*sIt)->it_begin;
348 end = d->m_words.begin();
349 if ( start != end )
350 --start;
351 forward = false;
352 break;
354 RegularAreaRect* ret = 0;
355 if ( forward )
357 ret = d->findTextInternalForward( searchID, query, caseSensitivity, start, end );
359 else
361 ret = d->findTextInternalBackward( searchID, query, caseSensitivity, start, end );
363 return ret;
367 RegularAreaRect* TextPagePrivate::findTextInternalForward( int searchID, const QString &_query,
368 Qt::CaseSensitivity caseSensitivity,
369 const TextList::ConstIterator &start,
370 const TextList::ConstIterator &end )
372 QMatrix matrix = m_page ? m_page->rotationMatrix() : QMatrix();
374 RegularAreaRect* ret=new RegularAreaRect;
375 QString query = (caseSensitivity == Qt::CaseSensitive) ? _query : _query.toLower();
377 // j is the current position in our query
378 // len is the length of the string in TextEntity
379 // queryLeft is the length of the query we have left
380 const TinyTextEntity* curEntity = 0;
381 int j=0, len=0, queryLeft=query.length();
382 int offset = 0;
383 bool haveMatch=false;
384 bool dontIncrement=false;
385 bool offsetMoved = false;
386 TextList::ConstIterator it = start;
387 TextList::ConstIterator it_begin;
388 for ( ; it != end; ++it )
390 curEntity = *it;
391 const QString &str = curEntity->text();
392 if ( !offsetMoved && ( it == start ) )
394 if ( m_searchPoints.contains( searchID ) )
396 offset = qMax( m_searchPoints[ searchID ]->offset_end, 0 );
398 offsetMoved = true;
400 if ( query.at(j).isSpace() )
402 // lets match newline as a space
403 #ifdef DEBUG_TEXTPAGE
404 kDebug(OkularDebug) << "newline or space";
405 #endif
406 j++;
407 queryLeft--;
408 // since we do not really need to increment this after this
409 // run of the loop finishes because we are not comparing it
410 // to any entity, rather we are deducing a situation in a document
411 dontIncrement=true;
413 else
415 dontIncrement=false;
416 len=str.length();
417 int min=qMin(queryLeft,len);
418 #ifdef DEBUG_TEXTPAGE
419 kDebug(OkularDebug) << str.mid(offset,min) << ":" << _query.mid(j,min);
420 #endif
421 // we have equal (or less than) area of the query left as the length of the current
422 // entity
424 if ((caseSensitivity == Qt::CaseSensitive)
425 ? (str.mid(offset,min) != query.mid(j,min))
426 : (str.mid(offset,min).toLower() != query.mid(j,min))
429 // we not have matched
430 // this means we do not have a complete match
431 // we need to get back to query start
432 // and continue the search from this place
433 haveMatch=false;
434 ret->clear();
435 #ifdef DEBUG_TEXTPAGE
436 kDebug(OkularDebug) << "\tnot matched";
437 #endif
438 j=0;
439 offset = 0;
440 queryLeft=query.length();
441 it_begin = TextList::ConstIterator();
443 else
445 // we have a match
446 // move the current position in the query
447 // to the position after the length of this string
448 // we matched
449 // substract the length of the current entity from
450 // the left length of the query
451 #ifdef DEBUG_TEXTPAGE
452 kDebug(OkularDebug) << "\tmatched";
453 #endif
454 haveMatch=true;
455 ret->append( curEntity->transformedArea( matrix ) );
456 j+=min;
457 queryLeft-=min;
458 if ( it_begin == TextList::ConstIterator() )
460 it_begin = it;
465 if (haveMatch && queryLeft==0 && j==query.length())
467 // save or update the search point for the current searchID
468 if ( !m_searchPoints.contains( searchID ) )
470 SearchPoint* newsp = new SearchPoint;
471 m_searchPoints.insert( searchID, newsp );
473 SearchPoint* sp = m_searchPoints[ searchID ];
474 sp->it_begin = it_begin;
475 sp->it_end = it;
476 sp->offset_begin = j;
477 sp->offset_end = j + qMin( queryLeft, len );
478 ret->simplify();
479 return ret;
482 // end of loop - it means that we've ended the textentities
483 if ( m_searchPoints.contains( searchID ) )
485 SearchPoint* sp = m_searchPoints[ searchID ];
486 m_searchPoints.remove( searchID );
487 delete sp;
489 delete ret;
490 return 0;
493 RegularAreaRect* TextPagePrivate::findTextInternalBackward( int searchID, const QString &_query,
494 Qt::CaseSensitivity caseSensitivity,
495 const TextList::ConstIterator &start,
496 const TextList::ConstIterator &end )
498 QMatrix matrix = m_page ? m_page->rotationMatrix() : QMatrix();
500 RegularAreaRect* ret=new RegularAreaRect;
501 const QString query = (caseSensitivity == Qt::CaseSensitive) ? _query : _query.toLower();
503 // j is the current position in our query
504 // len is the length of the string in TextEntity
505 // queryLeft is the length of the query we have left
506 const TinyTextEntity* curEntity = 0;
507 int j=query.length() - 1, len=0, queryLeft=query.length();
508 int offset = 0;
509 bool haveMatch=false;
510 bool dontIncrement=false;
511 bool offsetMoved = false;
512 TextList::ConstIterator it = start;
513 TextList::ConstIterator it_begin;
514 while ( true )
516 curEntity = *it;
517 const QString &str = curEntity->text();
518 if ( !offsetMoved && ( it == start ) )
520 if ( m_searchPoints.contains( searchID ) )
522 offset = qMax( m_searchPoints[ searchID ]->offset_begin, 0 );
524 offsetMoved = true;
526 if ( query.at(j).isSpace() )
528 // lets match newline as a space
529 #ifdef DEBUG_TEXTPAGE
530 kDebug(OkularDebug) << "newline or space";
531 #endif
532 j--;
533 queryLeft--;
534 // since we do not really need to increment this after this
535 // run of the loop finishes because we are not comparing it
536 // to any entity, rather we are deducing a situation in a document
537 dontIncrement=true;
539 else
541 dontIncrement=false;
542 len=str.length();
543 int min=qMin(queryLeft,len);
544 #ifdef DEBUG_TEXTPAGE
545 kDebug(OkularDebug) << str.right(min) << " : " << _query.mid(j-min+1,min);
546 #endif
547 // we have equal (or less than) area of the query left as the length of the current
548 // entity
550 if ((caseSensitivity == Qt::CaseSensitive)
551 ? (str.right(min) != query.mid(j-min+1,min))
552 : (str.right(min).toLower() != query.mid(j-min+1,min))
555 // we not have matched
556 // this means we do not have a complete match
557 // we need to get back to query start
558 // and continue the search from this place
559 haveMatch=false;
560 ret->clear();
561 #ifdef DEBUG_TEXTPAGE
562 kDebug(OkularDebug) << "\tnot matched";
563 #endif
564 j=query.length() - 1;
565 offset = 0;
566 queryLeft=query.length();
567 it_begin = TextList::ConstIterator();
569 else
571 // we have a match
572 // move the current position in the query
573 // to the position after the length of this string
574 // we matched
575 // substract the length of the current entity from
576 // the left length of the query
577 #ifdef DEBUG_TEXTPAGE
578 kDebug(OkularDebug) << "\tmatched";
579 #endif
580 haveMatch=true;
581 ret->append( curEntity->transformedArea( matrix ) );
582 j-=min;
583 queryLeft-=min;
584 if ( it_begin == TextList::ConstIterator() )
586 it_begin = it;
591 if (haveMatch && queryLeft==0 && j<0)
593 // save or update the search point for the current searchID
594 QMap< int, SearchPoint* >::iterator sIt = m_searchPoints.find( searchID );
595 if ( sIt == m_searchPoints.end() )
597 sIt = m_searchPoints.insert( searchID, new SearchPoint );
599 SearchPoint* sp = *sIt;
600 sp->it_begin = it;
601 sp->it_end = it_begin;
602 sp->offset_begin = j;
603 sp->offset_end = j + qMin( queryLeft, len );
604 ret->simplify();
605 return ret;
607 if ( it == end )
608 break;
609 else
610 --it;
612 // end of loop - it means that we've ended the textentities
613 QMap< int, SearchPoint* >::iterator sIt = m_searchPoints.find( searchID );
614 if ( sIt != m_searchPoints.end() )
616 SearchPoint* sp = *sIt;
617 m_searchPoints.erase( sIt );
618 delete sp;
620 delete ret;
621 return 0;
624 QString TextPage::text(const RegularAreaRect *area) const
626 if ( area && area->isNull() )
627 return QString();
629 TextList::ConstIterator it = d->m_words.begin(), itEnd = d->m_words.end();
630 QString ret;
631 if ( area )
633 for ( ; it != itEnd; ++it )
635 if ( area->intersects( (*it)->area ) )
637 ret += (*it)->text();
641 else
643 for ( ; it != itEnd; ++it )
644 ret += (*it)->text();
646 return ret;