| 1 | /** |
| 2 | * |
| 3 | */ |
| 4 | package net.digitaltsunami.word.trie; |
| 5 | |
| 6 | import java.util.ArrayList; |
| 7 | import java.util.Arrays; |
| 8 | import java.util.Collection; |
| 9 | import java.util.Collections; |
| 10 | import java.util.LinkedList; |
| 11 | import java.util.List; |
| 12 | |
| 13 | import net.digitaltsunami.word.trie.event.NodeAddedEvent; |
| 14 | import net.digitaltsunami.word.trie.event.NodeAddedListener; |
| 15 | import net.digitaltsunami.word.trie.event.NodeEventListenerList; |
| 16 | import net.digitaltsunami.word.trie.event.TerminusNodeAddedEvent; |
| 17 | import net.digitaltsunami.word.trie.filter.CharFilter; |
| 18 | import net.digitaltsunami.word.trie.filter.TermFilter; |
| 19 | |
| 20 | /** |
| 21 | * <strong> JAVADOCS are a WORK IN PROGRESS</strong> |
| 22 | * <p> |
| 23 | * A dictionary of terms stored using a Trie structure. |
| 24 | * |
| 25 | * <p> |
| 26 | * case insensitive |
| 27 | * <p> |
| 28 | * ordered results |
| 29 | * <p> |
| 30 | * filters |
| 31 | * <p> |
| 32 | * filter term applied before char |
| 33 | * |
| 34 | * <p> |
| 35 | * To allow for additional processing as characters are added, a notification |
| 36 | * framework is provided. Event processing will be done using post filter |
| 37 | * processing, so excluded characters will not fire events. Events fired are |
| 38 | * listed below in descending order of frequency. |
| 39 | * <ul> |
| 40 | * <li><em>Character added</em> - Will be fired for each character as it is |
| 41 | * added. Will be fired whether or not the character had previously been added. |
| 42 | * <p> |
| 43 | * <strong>NOTE:</strong> This event also includes all other events (terminus |
| 44 | * characters, new nodes, new terminus nodes).</li> |
| 45 | * <li><em>Node added</em> - Will be fired for each node that is added. Will not |
| 46 | * fire if a node existed for the character being added. Note that this includes |
| 47 | * terminus nodes. |
| 48 | * <p> |
| 49 | * <strong>NOTE:</strong> This event also includes new terminus nodes.</li> |
| 50 | * <li><em>Terminus Character added</em> - Will be fired as each terminus |
| 51 | * character is added (i.e., fire only upon the last character in the term) |
| 52 | * <p> |
| 53 | * <strong>NOTE:</strong> This event also includes new terminus nodes.</li> |
| 54 | * <li><em>Terminus Node added</em> - Will be fired for each terminus node that |
| 55 | * is added (i.e., as each new term is added). This will fire for both new nodes |
| 56 | * and for existing non-terminus nodes that are converted see |
| 57 | * {@link #addTerminusNodeAddedListener(NodeAddedListener)} for more information |
| 58 | * regarding conversion. |
| 59 | * </ul> |
| 60 | * |
| 61 | * @author dhagberg |
| 62 | */ |
| 63 | public class CharTrie { |
| 64 | /** |
| 65 | * Default value used to indicate a wildcard character in position within a |
| 66 | * pattern. |
| 67 | */ |
| 68 | public static final char WILDCARD_CHAR = '~'; |
| 69 | /** |
| 70 | * Wildcard character used in pattern searches. Defaults to |
| 71 | * {@link #WILDCARD_CHAR}. |
| 72 | */ |
| 73 | private char wildcardChar = WILDCARD_CHAR; |
| 74 | |
| 75 | /** Empty node used as root of trie. */ |
| 76 | private final CharTrieNode root; |
| 77 | /** |
| 78 | * Optional character filter that will be applied to each input character if |
| 79 | * provided. |
| 80 | */ |
| 81 | private final CharFilter charFilter; |
| 82 | /** |
| 83 | * Optional term filter that will be applied to each input term if provided. |
| 84 | */ |
| 85 | private final TermFilter termFilter; |
| 86 | |
| 87 | /** Number of unique words contained within this dictionary. */ |
| 88 | private int wordCount; |
| 89 | /** |
| 90 | * Implementation of abstract factory used as delegate when adding nodes. |
| 91 | */ |
| 92 | private final CharTrieNodeFactory nodeFactory; |
| 93 | /** |
| 94 | * Handles event firing as new characters/nodes are added. |
| 95 | */ |
| 96 | private final NodeEventListenerList listenerList = new NodeEventListenerList(); |
| 97 | /** |
| 98 | * Strategy to use when querying for patterns. Defaults to recursive query |
| 99 | * if not set. |
| 100 | */ |
| 101 | private PatternSearchStrategy patternSearchStrategy; |
| 102 | |
| 103 | /** |
| 104 | * Create a default dictionary. All terms will be added without any |
| 105 | * filtering applied. |
| 106 | * <p> |
| 107 | * All nodes will be created using the {@link LexCharTrieNodeFactory}. |
| 108 | */ |
| 109 | public CharTrie() { |
| 110 | this(null, null); |
| 111 | } |
| 112 | |
| 113 | /** |
| 114 | * Create a dictionary using the provided character filter. All terms added |
| 115 | * to this dictionary will be passed through the filter prior to being |
| 116 | * added. |
| 117 | * <p> |
| 118 | * All nodes will be created using the {@link LexCharTrieNodeFactory}. |
| 119 | * |
| 120 | * @param charFilter |
| 121 | */ |
| 122 | public CharTrie(CharFilter charFilter) { |
| 123 | this(charFilter, null); |
| 124 | } |
| 125 | |
| 126 | /** |
| 127 | * Create a dictionary using the provided term filter. All terms added to |
| 128 | * this dictionary will be passed through the filter prior to being added. |
| 129 | * <p> |
| 130 | * All nodes will be created using the {@link LexCharTrieNodeFactory}. |
| 131 | * |
| 132 | * @param termFilter |
| 133 | */ |
| 134 | public CharTrie(TermFilter termFilter) { |
| 135 | this(null, termFilter); |
| 136 | } |
| 137 | |
| 138 | /** |
| 139 | * Create a dictionary using the provided term and character filters. All |
| 140 | * terms added to this dictionary will be passed through these filters prior |
| 141 | * to being added. The term filter will be applied first, followed by the |
| 142 | * character filter. |
| 143 | * <p> |
| 144 | * All nodes will be created using the {@link LexCharTrieNodeFactory}. |
| 145 | * |
| 146 | * @param charFilter |
| 147 | * @param termFilter |
| 148 | */ |
| 149 | public CharTrie(CharFilter charFilter, TermFilter termFilter) { |
| 150 | this(charFilter, termFilter, new LexCharTrieNodeFactory()); |
| 151 | } |
| 152 | |
| 153 | /** |
| 154 | * Create a dictionary using the provided term and character filters. All |
| 155 | * terms added to this dictionary will be passed through these filters prior |
| 156 | * to being added. The term filter will be applied first, followed by the |
| 157 | * character filter. |
| 158 | * <p> |
| 159 | * All nodes will be created using the provided {@link CharTrieNodeFactory}. |
| 160 | * |
| 161 | * @param charFilter |
| 162 | * @param termFilter |
| 163 | * @param nodeFactory |
| 164 | */ |
| 165 | public CharTrie(CharFilter charFilter, TermFilter termFilter, CharTrieNodeFactory nodeFactory) { |
| 166 | root = nodeFactory.createRootNode(); |
| 167 | this.charFilter = charFilter; |
| 168 | this.termFilter = termFilter; |
| 169 | this.nodeFactory = nodeFactory; |
| 170 | this.patternSearchStrategy = new PatternSearchRecursiveStrategy(); |
| 171 | } |
| 172 | |
| 173 | /** |
| 174 | * Add a term to the dictionary. The characters will be added, starting at |
| 175 | * the root of the dictionary. Both term and character filters will be |
| 176 | * applied if applicable, which may alter the term prior to entering the |
| 177 | * nodes. |
| 178 | * <p> |
| 179 | * All characters will be converted to lower case prior to placing in the |
| 180 | * dictionary. This will happen regardless of the filters applied as this |
| 181 | * operation is done after the filters have modified the term. |
| 182 | * <p> |
| 183 | * All nodes will be created using the current {@link CharTrieNodeFactory} |
| 184 | * set for this trie. Terminus nodes will be created using the same factory |
| 185 | * and will pass the original term to the node. |
| 186 | * |
| 187 | * @param term |
| 188 | * A string of characters to add. |
| 189 | */ |
| 190 | public void addTerm(String term) { |
| 191 | String originalTerm = term; |
| 192 | CharTrieNode currentNode = root; |
| 193 | if (termFilter != null) { |
| 194 | term = termFilter.apply(term); |
| 195 | if (term == TermFilter.SKIP_TERM) { |
| 196 | // Term Filter decided to skip this entry, just return. |
| 197 | return; |
| 198 | } |
| 199 | } |
| 200 | char[] termArray = term.toCharArray(); |
| 201 | termArray = applyCharFilter(termArray); |
| 202 | |
| 203 | // Don't count length until after filter applied as it may have changed. |
| 204 | int termLen = termArray.length; |
| 205 | if (termLen == 0) { |
| 206 | // Char Filter removed all characters, just return. |
| 207 | return; |
| 208 | } |
| 209 | |
| 210 | // Convert all remaining characters to lower case. |
| 211 | for (int i = 0; i < termLen; i++) { |
| 212 | termArray[i] = Character.toLowerCase(termArray[i]); |
| 213 | |
| 214 | } |
| 215 | |
| 216 | int lenUpToTerminus = termLen - 1; |
| 217 | int termPos = 0; |
| 218 | |
| 219 | /* Position node at last found */ |
| 220 | CharTrieNode node; |
| 221 | for (; termPos < termLen; termPos++) { |
| 222 | node = currentNode.getChild(termArray[termPos]); |
| 223 | // Did not reach the end of the array, will have to add the rest. |
| 224 | if (node == null) { |
| 225 | break; |
| 226 | } |
| 227 | // Terminus characters will be counted following this loop. |
| 228 | if (termPos < lenUpToTerminus) { |
| 229 | listenerList.dispatchCharacterAddedEvent(node); |
| 230 | } |
| 231 | currentNode = node; |
| 232 | } |
| 233 | // If at end, sequence already existed but may or may not have been a |
| 234 | // term. If node already a terminus, return, otherwise convert to |
| 235 | // terminus. |
| 236 | if (termPos == termLen) { |
| 237 | if (currentNode.isTerminus()) { |
| 238 | listenerList |
| 239 | .dispatchTerminusCharacterAddedEvent((CharTrieTerminusNode) currentNode); |
| 240 | return; |
| 241 | } else { |
| 242 | currentNode = nodeFactory.convertToTerminus(currentNode, originalTerm); |
| 243 | listenerList.dispatchTerminusNodeAddedEvent((CharTrieTerminusNode) currentNode); |
| 244 | wordCount++; |
| 245 | return; |
| 246 | } |
| 247 | |
| 248 | } |
| 249 | for (; termPos < lenUpToTerminus; termPos++) { |
| 250 | currentNode = nodeFactory.addChild(currentNode, termArray[termPos]); |
| 251 | listenerList.dispatchNodeAddedEvent(currentNode); |
| 252 | } |
| 253 | // Mark the last node as the end of a sequence if the sequence has not |
| 254 | // previously been added. |
| 255 | currentNode = nodeFactory.addChildTerminus(currentNode, termArray[termPos], originalTerm); |
| 256 | listenerList.dispatchTerminusNodeAddedEvent((CharTrieTerminusNode) currentNode); |
| 257 | wordCount++; |
| 258 | } |
| 259 | |
| 260 | /** |
| 261 | * Apply the char filter if applicable on all characters within the |
| 262 | * termArray. The results will be returned in a character array. If there is |
| 263 | * no applicable filter, the original results will be returned. If the |
| 264 | * filter is applied, the length of the results may be less than or equal to |
| 265 | * the original length. |
| 266 | * |
| 267 | * @param termArray |
| 268 | * @return |
| 269 | */ |
| 270 | private char[] applyCharFilter(char[] termArray) { |
| 271 | if (charFilter != null) { |
| 272 | int currentIndex = 0; |
| 273 | for (int i = 0; i < termArray.length; i++) { |
| 274 | char value = charFilter.apply(termArray[i]); |
| 275 | if (value != CharFilter.SKIP_CHAR) { |
| 276 | termArray[currentIndex] = charFilter.apply(termArray[i]); |
| 277 | currentIndex++; |
| 278 | } |
| 279 | } |
| 280 | // If any characters were skipped, then recreate array with new |
| 281 | // term. |
| 282 | if (currentIndex != termArray.length) { |
| 283 | termArray = Arrays.copyOf(termArray, currentIndex); |
| 284 | |
| 285 | } |
| 286 | } |
| 287 | return termArray; |
| 288 | } |
| 289 | |
| 290 | /** |
| 291 | * Find and return all terms within the dictionary beginning with the |
| 292 | * provided prefix. The query will start at the root of the dictionary. |
| 293 | * |
| 294 | * @param parent |
| 295 | * Node at which the query will begin |
| 296 | * @param prefix |
| 297 | * Common prefix to all terms to be returned. |
| 298 | * @return A set of all terms beginning with the provided prefix. If no |
| 299 | * terms found, an empty set will be returned. |
| 300 | */ |
| 301 | public Collection<String> findTerms(String prefix) { |
| 302 | return findTerms(root, prefix); |
| 303 | } |
| 304 | |
| 305 | /** |
| 306 | * Find and return all terms within the dictionary beginning with the |
| 307 | * provided prefix. The query will start with the children of the provided |
| 308 | * parent node, but will not include it. |
| 309 | * |
| 310 | * @param parent |
| 311 | * Node at which the query will begin |
| 312 | * @param prefix |
| 313 | * Common prefix to all terms to be returned. |
| 314 | * @return A set of all terms beginning with the provided prefix. If no |
| 315 | * terms found, an empty set will be returned. |
| 316 | */ |
| 317 | public Collection<String> findTerms(CharTrieNode parent, String prefix) { |
| 318 | String lcPrefix = prefix.toLowerCase(); |
| 319 | List<CharTrieNode> prefixNodes = findChildSequence(parent, lcPrefix); |
| 320 | if (prefixNodes.isEmpty()) { |
| 321 | return Collections.emptyList(); |
| 322 | } |
| 323 | |
| 324 | List<String> terms = new LinkedList<String>(); |
| 325 | StringBuilder termBuff = new StringBuilder(lcPrefix); |
| 326 | int prefixLen = lcPrefix.length(); |
| 327 | |
| 328 | /* |
| 329 | * Starting with the last node in the prefix node as a parent node, |
| 330 | * descend the trie looking for nodes with a terminus of true. |
| 331 | */ |
| 332 | prefixLen--; // Set the prefix to the last char in the prefix. |
| 333 | CharTrieNode current = prefixNodes.get(prefixLen); |
| 334 | if (current.isTerminus()) { |
| 335 | terms.add(termBuff.toString()); |
| 336 | } |
| 337 | // findTerms(terms, current, termBuff, prefixLen); |
| 338 | findTerms(terms, (LinkedCharTrieNode) current, termBuff, prefixLen); |
| 339 | |
| 340 | return terms; |
| 341 | } |
| 342 | |
| 343 | /** |
| 344 | * Find and return all terms within the dictionary matching the provided |
| 345 | * pattern. The pattern provided is currently limited to fixed and wildcard |
| 346 | * characters for a specific position. For example, with the default |
| 347 | * wildcard char of ~ (tilde), a~~m will return all terms that begin with a, |
| 348 | * end with m and are exactly four characters in length. Unlike |
| 349 | * {@link #findTerms(String)}, this method will return only the matches and |
| 350 | * not all terms beginning with the pattern. |
| 351 | * <p> |
| 352 | * The query will start at the root of the dictionary. |
| 353 | * |
| 354 | * @param pattern |
| 355 | * Mix of fixed and/or {@link #wildcardChar} characters to match. |
| 356 | * @return A set of all terms matching the provided pattern. If no terms |
| 357 | * found, an empty set will be returned. |
| 358 | * @see #WILDCARD_CHAR for default wildcard value. |
| 359 | */ |
| 360 | public Collection<String> findPattern(String pattern) { |
| 361 | return patternSearchStrategy.findPattern(pattern, root, wildcardChar); |
| 362 | } |
| 363 | |
| 364 | /** |
| 365 | * Returns true if the dictionary contains the provided term. |
| 366 | * |
| 367 | * @param term |
| 368 | * Term to query for. |
| 369 | * @return true if the term was found, false otherwise. |
| 370 | */ |
| 371 | public boolean contains(String term) { |
| 372 | String lcTerm = term.toLowerCase(); |
| 373 | List<CharTrieNode> prefixNodes = findChildSequence(root, lcTerm); |
| 374 | if (prefixNodes.isEmpty()) { |
| 375 | return false; |
| 376 | } else { |
| 377 | // If the last node is a terminus, then this is a complete term and |
| 378 | // was found, otherwise, it is a fragment and the query term was not |
| 379 | // found. |
| 380 | return prefixNodes.get(prefixNodes.size() - 1).isTerminus(); |
| 381 | } |
| 382 | } |
| 383 | |
| 384 | /** |
| 385 | * Return all terms within this dictionary. |
| 386 | * |
| 387 | * @return Return all terms within this dictionary. |
| 388 | */ |
| 389 | public Collection<String> getAllTerms() { |
| 390 | List<String> terms = new LinkedList<String>(); |
| 391 | int prefixLen = 0; |
| 392 | |
| 393 | /* |
| 394 | * Starting with the root node as the parent node, descend the trie |
| 395 | * looking for nodes with a terminus of true. |
| 396 | */ |
| 397 | for (CharTrieNode child : root) { |
| 398 | StringBuilder termBuff = new StringBuilder(); |
| 399 | termBuff.append(child.getValue()); |
| 400 | if (child.isTerminus()) { |
| 401 | terms.add(termBuff.toString()); |
| 402 | } |
| 403 | findTerms(terms, child, termBuff, prefixLen); |
| 404 | } |
| 405 | |
| 406 | return terms; |
| 407 | } |
| 408 | |
| 409 | /** |
| 410 | * Internal recursive method to walk the nodes searching for complete words |
| 411 | * within the trie. Each word will be added to the provided |
| 412 | * {@link Collection}. |
| 413 | * |
| 414 | * @param terms |
| 415 | * @param current |
| 416 | * @param termBuff |
| 417 | * @param currentLength |
| 418 | */ |
| 419 | private void findTerms(Collection<String> terms, CharTrieNode current, StringBuilder termBuff, |
| 420 | int currentLength) { |
| 421 | currentLength++; |
| 422 | for (CharTrieNode child : current) { |
| 423 | termBuff.setLength(currentLength); |
| 424 | termBuff.append(child.getValue()); |
| 425 | if (child.isTerminus()) { |
| 426 | terms.add(termBuff.toString()); |
| 427 | } |
| 428 | findTerms(terms, child, termBuff, currentLength); |
| 429 | } |
| 430 | } |
| 431 | |
| 432 | /** |
| 433 | * Internal recursive method to walk the nodes searching for complete words |
| 434 | * within the trie. Each word will be added to the provided |
| 435 | * {@link Collection}. |
| 436 | * |
| 437 | * @param terms |
| 438 | * @param current |
| 439 | * @param termBuff |
| 440 | * @param currentLength |
| 441 | */ |
| 442 | private void findTerms(Collection<String> terms, LinkedCharTrieNode current, |
| 443 | StringBuilder termBuff, int currentLength) { |
| 444 | currentLength++; |
| 445 | LinkedCharTrieNode child = current.getFirstChild(); |
| 446 | while (child != null) { |
| 447 | termBuff.setLength(currentLength); |
| 448 | termBuff.append(child.getValue()); |
| 449 | if (child.isTerminus()) { |
| 450 | terms.add(termBuff.toString()); |
| 451 | } |
| 452 | findTerms(terms, child, termBuff, currentLength); |
| 453 | child = child.getNextSibling(); |
| 454 | } |
| 455 | } |
| 456 | |
| 457 | /** |
| 458 | * Return a {@link List} of {@link CharTrieNode} objects representing the |
| 459 | * provided sequence. The search will begin by matching the first character |
| 460 | * in the sequence with the first child of the provided node. The sequence |
| 461 | * is a match if there is a corresponding child node at the each level |
| 462 | * indicated by the position in the sequence. |
| 463 | * <p> |
| 464 | * Note: Each of the {@link CharTrieNode}s in the returned list is the full |
| 465 | * node and may contain other children in addition to the requested node. |
| 466 | * |
| 467 | * @param parent |
| 468 | * {@link CharTrieNode} from which the query for children will |
| 469 | * start. This node will <strong>not</strong> be included in the |
| 470 | * returned list. |
| 471 | * @param sequence |
| 472 | * String of characters to search for. |
| 473 | * @return A {@link List} of {@link CharTrieNode} elements for the sequence. |
| 474 | * If an exact match is not found, then an empty list will be |
| 475 | * returned. |
| 476 | */ |
| 477 | protected List<CharTrieNode> findChildSequence(CharTrieNode parent, String sequence) { |
| 478 | List<CharTrieNode> nodes = new ArrayList<CharTrieNode>(sequence.length()); |
| 479 | for (char elem : sequence.toCharArray()) { |
| 480 | CharTrieNode node = parent.getChild(elem); |
| 481 | if (node == null) { |
| 482 | return Collections.emptyList(); // Use empty list so nodes can |
| 483 | // be GC'd |
| 484 | } |
| 485 | nodes.add(node); |
| 486 | parent = node; |
| 487 | } |
| 488 | return nodes; |
| 489 | } |
| 490 | |
| 491 | /** |
| 492 | * Return a {@link List} of {@link CharTrieNode} objects representing the |
| 493 | * provided sequence. The search will begin by matching the first character |
| 494 | * in the sequence beginning at the root of the dictionary. The sequence is |
| 495 | * a match if there is a corresponding child node at the each level |
| 496 | * indicated by the position in the sequence. |
| 497 | * <p> |
| 498 | * Note: Each of the {@link CharTrieNode}s in the returned list is the full |
| 499 | * node and may contain other children in addition to the requested node. |
| 500 | * |
| 501 | * @param sequence |
| 502 | * String of characters to search for. |
| 503 | * @return A {@link List} of {@link CharTrieNode} elements for the sequence. |
| 504 | * If an exact match is not found, then an empty list will be |
| 505 | * returned. |
| 506 | */ |
| 507 | protected List<CharTrieNode> findSequence(String sequence) { |
| 508 | return findChildSequence(root, sequence); |
| 509 | } |
| 510 | |
| 511 | /** |
| 512 | * Return the number of unique entry terms. |
| 513 | * |
| 514 | * @return the number of unique terms within this dictionary. |
| 515 | */ |
| 516 | public int getWordCount() { |
| 517 | return wordCount; |
| 518 | } |
| 519 | |
| 520 | /** |
| 521 | * Add a listener for events fired as each character is added. |
| 522 | * <p> |
| 523 | * Listeners on this event will be notified as each character that is added |
| 524 | * to the dictionary regardless of prior status. Notification will be |
| 525 | * through the invocation of |
| 526 | * {@link NodeAddedListener#characterAdded(NodeAddedEvent)}. |
| 527 | * <p> |
| 528 | * <strong>NOTE:</strong> Events fired for characters being added will |
| 529 | * include terminus characters as well. Adding the same listener for both |
| 530 | * character and terminus character events will result in two invocations on |
| 531 | * the listener for terminus characters. |
| 532 | * |
| 533 | * @param listener |
| 534 | * interested when a character is added. |
| 535 | */ |
| 536 | public void addCharacterAddedListener(NodeAddedListener listener) { |
| 537 | listenerList.addCharacterAddedListener(listener); |
| 538 | } |
| 539 | |
| 540 | /** |
| 541 | * Remove the specified listener for events fired as each character is |
| 542 | * added. |
| 543 | */ |
| 544 | public void removeCharacterAddedListener(NodeAddedListener listener) { |
| 545 | listenerList.removeCharacterAddedListener(listener); |
| 546 | } |
| 547 | |
| 548 | /** |
| 549 | * Add a listener for events fired when a new terminus character is added. |
| 550 | * <p> |
| 551 | * Listeners on this event will be invoked for each terminus character that |
| 552 | * is added to the dictionary regardless of prior status. Notification will |
| 553 | * be through the invocation of |
| 554 | * {@link NodeAddedListener#terminusCharacterAdded(net.digitaltsunami.word.trie.event.TerminusNodeAddedEvent)}. |
| 555 | * <p> |
| 556 | * <strong>NOTE:</strong> Events fired for characters being added will |
| 557 | * include terminus characters as well. Adding the same listener for both |
| 558 | * character and terminus character events will result in two invocations on |
| 559 | * the listener for terminus characters. |
| 560 | * |
| 561 | * @param listener |
| 562 | * interested when a terminus character is added. |
| 563 | */ |
| 564 | public void addCharacterTerminusAddedListener(NodeAddedListener listener) { |
| 565 | listenerList.addTerminusCharacterAddedListener(listener); |
| 566 | } |
| 567 | |
| 568 | /** |
| 569 | * Remove the specified listener for events fired when a terminus character |
| 570 | * is added. |
| 571 | */ |
| 572 | public void removeCharacterTerminusAddedListener(NodeAddedListener listener) { |
| 573 | listenerList.removeTerminusCharacterAddedListener(listener); |
| 574 | } |
| 575 | |
| 576 | /** |
| 577 | * Add a listener for events fired when a new node is added. |
| 578 | * <p> |
| 579 | * Listeners on this event will be invoked for each node that is |
| 580 | * <strong>added</strong> to the dictionary. This will occur only when the |
| 581 | * node did not already exist. Notification will be through the invocation |
| 582 | * of {@link NodeAddedListener#nodeAdded(NodeAddedEvent)}. |
| 583 | * <p> |
| 584 | * <strong>NOTE:</strong> Events fired for characters being added will |
| 585 | * include terminus characters as well. Adding the same listener for both |
| 586 | * character and terminus character events will result in two invocations on |
| 587 | * the listener for terminus characters. |
| 588 | * |
| 589 | * @param listener |
| 590 | * interested when a node is added. |
| 591 | */ |
| 592 | public void addNodeAddedListener(NodeAddedListener listener) { |
| 593 | listenerList.addNodeAddedListener(listener); |
| 594 | } |
| 595 | |
| 596 | /** |
| 597 | * Remove the specified listener for events fired when a node is added. |
| 598 | */ |
| 599 | public void removeNodeAddedListener(NodeAddedListener listener) { |
| 600 | listenerList.removeNodeAddedListener(listener); |
| 601 | } |
| 602 | |
| 603 | /** |
| 604 | * Add a listener for events fired when a new terminus node is added or an |
| 605 | * existing non-terminus node is converted to a terminus node. |
| 606 | * <p> |
| 607 | * Listeners on this event will be notified for each node that is |
| 608 | * <strong>added</strong> to the dictionary. This will occur only when the |
| 609 | * terminus node did not already exist. If the node existed, but was a |
| 610 | * non-terminus node and was converted, then the listener will be notified. |
| 611 | * Notification will be through the invocation of |
| 612 | * {@link NodeAddedListener#terminusNodeAdded(TerminusNodeAddedEvent)}. |
| 613 | * <p> |
| 614 | * <strong>NOTE:</strong> Events fired for characters being added will |
| 615 | * include terminus characters as well. Adding the same listener for both |
| 616 | * character and terminus character events will result in two invocations on |
| 617 | * the listener for terminus characters. |
| 618 | * |
| 619 | * @param listener |
| 620 | * interested when a terminus node is added. |
| 621 | */ |
| 622 | public void addTerminusNodeAddedListener(NodeAddedListener listener) { |
| 623 | listenerList.addTerminusNodeAddedListener(listener); |
| 624 | } |
| 625 | |
| 626 | /** |
| 627 | * Remove the specified listener for events fired when a terminus node is |
| 628 | * added. |
| 629 | */ |
| 630 | public void removeTerminusNodeAddedListener(NodeAddedListener listener) { |
| 631 | listenerList.removeTerminusNodeAddedListener(listener); |
| 632 | } |
| 633 | |
| 634 | /** |
| 635 | * Sets the pattern search strategy to use for this dictionary. The strategy |
| 636 | * must implement {@link PatternSearchStrategy}. |
| 637 | * |
| 638 | * @param patternSearchStrategy |
| 639 | * the patternSearchStrategy to set |
| 640 | */ |
| 641 | public void setPatternSearchStrategy(PatternSearchStrategy patternSearchStrategy) { |
| 642 | this.patternSearchStrategy = patternSearchStrategy; |
| 643 | } |
| 644 | |
| 645 | /** |
| 646 | * Current wildcard in use for pattern queries. |
| 647 | * |
| 648 | * @return the wildcardChar |
| 649 | */ |
| 650 | public char getWildcardChar() { |
| 651 | return wildcardChar; |
| 652 | } |
| 653 | |
| 654 | /** |
| 655 | * Set the wildcard to use for pattern queries. |
| 656 | * |
| 657 | * @param wildcardChar |
| 658 | * the wildcardChar to set |
| 659 | */ |
| 660 | public void setWildcardChar(char wildcardChar) { |
| 661 | this.wildcardChar = wildcardChar; |
| 662 | } |
| 663 | |
| 664 | /** |
| 665 | * Return all words within the dictionary with number of characters equal to |
| 666 | * the provided length. |
| 667 | * |
| 668 | * @param length |
| 669 | * exact size of words to be returned. |
| 670 | * @return |
| 671 | */ |
| 672 | public Collection<String> findAllTermsOfLength(int length) { |
| 673 | return findAllTermsOfLength(length, length); |
| 674 | } |
| 675 | |
| 676 | /** |
| 677 | * Return all words within the dictionary with number of characters within |
| 678 | * the range: |
| 679 | * <p> |
| 680 | * minLength <= termLen <= maxLength |
| 681 | * |
| 682 | * @param minLength |
| 683 | * minimum size of words to be returned. Must be >= 0 |
| 684 | * @param maxLength |
| 685 | * maximum size of words to be returned. Must be >= 0. If less |
| 686 | * than minLength, will be changed to minLength. |
| 687 | * @return a collection of strings matching size constraints. If none found, |
| 688 | * an empty collection will be returned. |
| 689 | */ |
| 690 | public Collection<String> findAllTermsOfLength(int minLength, int maxLength) { |
| 691 | CharTrieNode currentNode = root; |
| 692 | minLength = Math.max(0, minLength); |
| 693 | maxLength = Math.max(minLength, maxLength); |
| 694 | Collection<String> matchingTerms = new ArrayList<String>(); |
| 695 | findAllTermsOfLength(minLength, maxLength, matchingTerms, 0, currentNode); |
| 696 | return matchingTerms; |
| 697 | } |
| 698 | |
| 699 | /** |
| 700 | * Recursive method to find all terms of size within the range: |
| 701 | * <p> |
| 702 | * minLength <= termLen <= maxLength |
| 703 | * <p> |
| 704 | * |
| 705 | * Results will be stored in the provided list. |
| 706 | * |
| 707 | * @param minLength |
| 708 | * minimum size of words to be returned. |
| 709 | * @param maxLength |
| 710 | * maximum size of words to be returned. |
| 711 | * @param list |
| 712 | * Location to store all matching results. |
| 713 | * @param pos |
| 714 | * current character position with pattern. |
| 715 | * @param node |
| 716 | * current node from which the children will be compared against |
| 717 | * the current character in the pattern. |
| 718 | */ |
| 719 | private void findAllTermsOfLength(int minLength, int maxLength, Collection<String> list, |
| 720 | int pos, |
| 721 | CharTrieNode node) { |
| 722 | if (node == null || pos > maxLength) { |
| 723 | return; |
| 724 | } |
| 725 | if (node.isTerminus()) { |
| 726 | if (pos >= minLength) { |
| 727 | list.add(((CharTrieTerminusNode) node).getTerm()); |
| 728 | } |
| 729 | } |
| 730 | for (CharTrieNode child : node) { |
| 731 | findAllTermsOfLength(minLength, maxLength, list, pos + 1, child); |
| 732 | } |
| 733 | } |
| 734 | } |