3 final class PhutilSearchStemmerTestCase
4 extends PhutilTestCase
{
6 public function testStemTokens() {
8 // Various real-world cases collected from users before we implemented
16 'components' => 'compon',
17 'component' => 'compon',
19 'implementation' => 'implement',
20 'implements' => 'implement',
21 'implementing' => 'implement',
22 'implementer' => 'implement',
24 'deleting' => 'delet',
25 'deletion' => 'delet',
28 'erratically' => 'errat',
31 // Stems should be normalized.
34 // If stemming would bring a token under 3 characters, it should not
39 // Complex tokens with internal punctuation should be left untouched;
40 // these are usually things like domain names, API calls, informal tags,
43 'bananas' => 'banana',
44 'apples_bananas' => 'apples_bananas',
45 'apples_bananas.apples_bananas' => 'apples_bananas.apples_bananas',
48 $stemmer = new PhutilSearchStemmer();
49 foreach ($tests as $input => $expect) {
50 $stem = $stemmer->stemToken($input);
54 pht('Token stem of "%s".', $input));
58 public function testStemDocuments() {
60 'The wild boar meandered erratically.' =>
61 'the wild boar meander errat',
62 'Fool me onc, shame on you. Fool me twice, shame on me.' =>
63 'fool onc shame you twice',
64 'Fireball is a seventh-level spell which deals 2d16 points of damage '.
65 'in a 1-meter radius around a target.' =>
66 'firebal seventh level spell which deal 2d16 point damag meter '.
67 'radiu around target',
68 'apples-bananas' => 'appl banana',
69 'apples_bananas' => 'apples_bananas',
70 'apples.bananas' => 'apples.bananas',
71 'oddly-proportioned' => 'oddli proport',
74 $stemmer = new PhutilSearchStemmer();
75 foreach ($tests as $input => $expect) {
76 $stem = $stemmer->stemCorpus($input);
80 pht('Corpus stem of: %s', $input));