EMMA Coverage Report

EMMA Coverage Report (generated Thu Jan 05 16:24:01 PST 2012)
[all classes][net.digitaltsunami.word.sequence]

COVERAGE SUMMARY FOR SOURCE FILE [TermLengthNormalization.java]

name	class, %	method, %	block, %	line, %
TermLengthNormalization.java	100% (1/1)	100% (3/3)	100% (33/33)	100% (5/5)

COVERAGE BREAKDOWN BY CLASS AND METHOD

name	class, %	method, %	block, %	line, %

class TermLengthNormalization	100% (1/1)	100% (3/3)	100% (33/33)	100% (5/5)
TermLengthNormalization (): void		100% (1/1)	100% (3/3)	100% (1/1)
getNormalizedEditDistance (double, String, String): double		100% (1/1)	100% (5/5)	100% (1/1)
normalizeOnTermLength (double, String, String): double		100% (1/1)	100% (25/25)	100% (3/3)

1	/**
2	*
3	*/
4	package net.digitaltsunami.word.sequence;
5
6	/**
7	* Provides edit distance normalization based on length of terms, with the ratio
8	* of edits to length. This results in the same number of edits returning a
9	* closer match for longer terms as opposed to shorter terms.
10	*
11	* @author dhagberg
12	*
13	*/
14	public class TermLengthNormalization implements EditDistanceNormalization {
15
16	/**
17	* Normalize edit distance between fromTerm and toTerm and return a value in
18	* the range [0,1] with a 0 being a complete mismatch (no characters in
19	* common) and 1 being an exact match.
20	* <p>
21	* Normalization is based on length of terms, with the ratio of edits to
22	* length. This results in the same number of edits returning a closer match
23	* for longer terms as opposed to shorter terms. See below for an example.
24	*
25	* <pre>
26	* From To Count Notes
27	* -------- ------- ----- ---------------------------------
28	* game g 0.25 Three deletions. Short word length
29	* gamelan game 0.57 Three deletions. Longer word length
30	* </pre>
31	*
32	* @param editCount
33	* number of edits to convert fromTerm to toTerm.
34	* @param fromTerm
35	* initial term used as baseline
36	* @param toTerm
37	* target term from which the edit count will be calculated.
38	*
39	* @return a value returned will be in the closed interval: [0, 1] with an
40	* identical term being 1 and decreasing towards zero as the
41	* difference in the terms increases.
42	* @see net.digitaltsunami.word.sequence.EditDistanceNormalization#getNormalizedEditDistance(double,
43	* java.lang.String, java.lang.String)
44	*/
45	@Override
46	public double getNormalizedEditDistance(double editCount, String fromTerm, String toTerm) {
47	return TermLengthNormalization.normalizeOnTermLength(editCount, fromTerm, toTerm);
48	}
49
50	/**
51	* Normalize edit distance based on term lengths. The provided edit count is
52	* normalized to return a value in the range [0,1] with a 0 being a complete
53	* mismatch (no characters in common) and 1 being an exact match.
54	* <p>
55	* Normalization is based on length of terms, with the ratio of edits to
56	* length. This results in the same number of edits returning a closer match
57	* for longer terms as opposed to shorter terms. See below for an example.
58	* <pre>
59	* From To Count Notes
60	* -------- ------- ----- ---------------------------------
61	* game g 0.25 Three deletions. Short word length
62	* gamelan game 0.57 Three deletions. Longer word length
63	* </pre>
64	*
65	*
66	* @param editCount
67	* number of edits required to convert fromTerm to toTerm.
68	* @param fromTerm
69	* initial term used as baseline
70	* @param toTerm
71	* target term from which the edit count will be calculated.
72	* @return a value returned will be in the closed interval: [0, 1] with an
73	* identical term being 1 and decreasing towards zero as the
74	* difference in the terms increases.
75	*/
76	protected static double normalizeOnTermLength(double editCount, String fromTerm, String toTerm) {
77	int minLen = Math.min(fromTerm.length(), toTerm.length());
78	int maxLen = Math.max(fromTerm.length(), toTerm.length());
79	return ((maxLen - editCount) * minLen) / (maxLen * minLen);
80	}
81	}

[all classes][net.digitaltsunami.word.sequence]

EMMA 2.1.5320 (stable) (C) Vladimir Roubtsov