3 * WikiSearchThread is a background thread that does searches.
4 * See WikiFetchThread for more details on design.
7 package com
.piclab
.wikitest
;
8 import com
.meterware
.httpunit
.*;
10 public class WikiSearchThread
extends Thread
{
12 private WebConversation m_conv
;
13 private int m_totalsearches
;
14 private long m_totaltime
;
15 private volatile boolean m_running
;
17 public WikiSearchThread() {
18 m_conv
= new WebConversation();
23 public int getSearches() { return m_totalsearches
; }
24 public long getTime() { return m_totaltime
; }
25 public void requestStop() { m_running
= false; }
28 * First, a list of miscellaneous words that do appear in the
29 * titles and text of the testing articles.
32 public static String
[] searchterms
= {
33 "agriculture", "husbandry", "corn", "vegetable", "farming", "nitrogen",
34 "anthropology", "human", "culture", "language", "networks", "society",
35 "archaeology", "archeology", "history", "marxism", "ownership", "land",
36 "architecture", "building", "landscape", "furniture", "carpentry", "roman",
37 "astronomy", "astrophysics", "astrology", "star", "cosmology", "galaxy",
38 "biology", "life", "monet", "evolution", "species", "animal", "plant",
39 "business", "industry", "capitalism", "commerce", "company", "corporation",
40 "chemistry", "atom", "organic", "element", "alchemy", "polymer",
41 "classics", "greece", "rome", "latin", "literature", "mythology", "art",
42 "communication", "media", "television", "radio", "film", "mail",
43 "computer", "engineering", "linguistics", "algorithm", "graphics", "logic",
44 "cooking", "food", "heat", "cuisine", "ethnic", "nutrition", "flavor",
45 "critical", "theory", "frankfurt", "postmodernism", "weber",
46 "dance", "rhythm", "music", "recreation", "performance", "ballet",
47 "earth", "science", "geology", "weather", "fossil", "ocean", "environment",
48 "economic", "scarcity", "communism", "socialism", "utility", "money",
49 "education", "teaching", "knowledge", "reading", "testing", "school",
50 "technology", "civil", "mechanical", "nuclear", "process", "control",
51 "entertainment", "animation", "sport", "humor", "illusion", "theater",
52 "family", "consumer", "parenting", "sewing", "homemaker", "decoration",
53 "movie", "cinema", "director", "actor", "genre", "studio", "festival",
54 "game", "card", "board", "competition", "probability", "drinking", "dice",
55 "geography", "map", "projection", "continent", "island", "river", "sea",
56 "handicraft", "bead", "marquetry", "paper", "wood", "garden", "metal",
57 "history", "etymology", "orthodox", "controversy", "pasteur", "method",
58 "hobby", "pastime", "professional", "amateur", "collecting", "genealogy",
59 "language", "othography", "writing", "alphabet", "phonetic", "speech",
60 "law", "taboo", "more", "jurisdiction", "legislature", "judge", "police",
61 "library", "information", "book", "journal", "pediodical", "database",
62 "philology", "syntax", "semantic", "lexicology", "comparative", "cipher",
63 "letter", "rhetoric", "bible", "poem", "novel", "epic", "essay", "drama",
64 "math", "mathematics","statistics", "number", "algebra", "calculus",
65 "music", "melody", "instrument", "ensemble", "orchestra", "harmony",
66 "opera", "costume", "dialogue", "acting", "voice", "libretto", "stage",
67 "painting", "glaze", "acrylic", "mural", "portrait", "canvas", "fresco",
68 "philosophy", "concept", "dialectic", "beauty", "ethic", "aristotle",
69 "physics", "matter", "space", "energy", "quantum", "particle", "momentum",
70 "poker", "stud", "wager", "gambling", "joker", "flush", "chip", "deal",
71 "political", "politics", "government", "violence", "democracy", "fascism",
72 "psychology", "freud", "ethology", "medicine", "therapy", "drug", "health",
73 "public", "policy", "activism", "defense", "tax", "administration",
74 "recreation", "weekend", "holiday", "vacation", "leisure", "sex",
75 "religion", "christianity", "judaism", "islam", "deity", "faith", "priest",
76 "sculpture", "clay", "marble", "mobile", "kinetic", "statue", "bust",
77 "sociology", "kinship", "criminology", "race", "revolution", "gender",
78 "sport", "equipment", "injury", "spectator", "football", "baseball",
79 "invention", "recording", "cryptography", "metallurgy", "hydraulic",
80 "theatre", "mime", "tennessee", "lighting", "scenery", "improvisation",
81 "tourism", "travel", "sightseeing", "hotel", "camping", "cruise",
82 "transport", "vehicle", "airline", "train", "ferry", "subway", "car",
83 "visual", "design", "photography", "fashion", "tattoo", "textile"
87 * Then, a list of miscellaneous words that may or may not appear
88 * in the test articles.
91 public static String
[] randomterms
= {
92 "abatement", "acacia", "aerate", "allergy", "anvil", "ashtray", "auger",
93 "badger", "bakery", "benign", "biceps", "bookie", "brazen", "bulldog",
94 "caliber", "castigate", "centipede", "chemise", "clamor", "cupboard",
95 "dawn", "debris", "derrick", "dig", "divorce", "doublet", "drummer",
96 "ebony", "eclipse", "elitist", "emulator", "escrow", "euphoria", "evade",
97 "famine", "feedback", "fiefdom", "flax", "fox", "freckle", "funnel",
98 "gallop", "ghetto", "gingham", "gnat", "gossip", "grudge", "guitar",
99 "halogen", "hedgehog", "heuristic", "hillbilly", "hologram", "hyacinth",
100 "idealism", "illustrator", "impeach", "income", "injunction", "irony",
101 "janitor", "jellyfish", "jitterbug", "journalism", "juggling", "jury",
102 "kamikaze", "kerosene", "kindergarten", "kitten", "klaxon", "knuckle",
103 "lager", "leech", "lentil", "libido", "locust", "lox", "lullabye", "lyre",
104 "magenta", "marigold", "mediator", "mileage", "monarch", "municipal",
105 "navigation", "neurosis", "nicotine", "nostalgia", "nucleus", "nymph",
106 "oasis", "obscene", "oilcloth", "oratory", "osmosis", "ovary", "owl",
107 "parsley", "perplex", "phony", "pilgrim", "pliers", "pompadour", "prose",
108 "quahog", "quaver", "quench", "queue", "quilt", "quince", "quotient",
109 "railroad", "ravine", "recipe", "rescue", "rig", "roast", "ruthless",
110 "sarcasm", "sclerosis", "sellout", "shanty", "sigma", "skyhook", "synod",
111 "tantrum", "tenacious", "thorn", "tithe", "tonsil", "trauma", "tyranny",
112 "ulcer", "umpire", "unicorn", "unravel", "urinate", "upload", "utensil",
113 "valence", "veranda", "viewpoint", "volunteer", "vow", "vulnerable",
114 "wanton", "welcome", "wharf", "whiz", "wilderness", "woofer", "wretch",
115 "xanthate", "yeast", "yeoman", "yonder", "zealous", "zen", "zonal"
119 int tindex
= 0, rindex
= 0;
124 String prefix
= WikiSuite
.getServer() + WikiSuite
.getScript() + "?search=";
128 while ( m_running
) {
131 term
= searchterms
[tindex
] + " AND " + randomterms
[rindex
];
132 } else if ( r
< 0.3 ) {
133 term
= searchterms
[tindex
];
134 if ( ++tindex
>= searchterms
.length
) { tindex
= 0; }
135 term
+= " AND " + searchterms
[tindex
];
136 } else if ( r
< 0.4 ) {
137 term
= searchterms
[tindex
] + " OR " + randomterms
[rindex
];
138 } else if ( r
< 0.5 ) {
139 term
= randomterms
[rindex
];
140 if ( ++rindex
>= randomterms
.length
) { rindex
= 0; }
141 term
+= " OR " + randomterms
[rindex
];
142 } else if ( r
< 0.7 ) {
143 term
= randomterms
[rindex
];
145 term
= searchterms
[tindex
];
148 start
= System
.currentTimeMillis();
150 term
= java
.net
.URLEncoder
.encode( term
, "UTF-8" );
151 wr
= m_conv
.getResponse( prefix
+ term
);
152 } catch ( java
.io
.UnsupportedEncodingException e
) {
154 } catch (Exception e
) {
155 WikiSuite
.warning( "Error (" + e
+ ") searching for \"" + term
+ "\"" );
157 end
= System
.currentTimeMillis();
159 WikiSuite
.finer( "Searched for \"" + term
+ "\"" );
161 m_totaltime
+= ( end
- start
);
163 if ( ++tindex
>= searchterms
.length
) { tindex
= 0; }
164 if ( ++rindex
>= randomterms
.length
) { rindex
= 0; }
167 Thread
.sleep( 3000 );
168 } catch( InterruptedException e
) {
172 synchronized (this) { notify(); }