Class: Bundler::SimilarityDetector
Relationships & Source Files | |
Namespace Children | |
Classes:
| |
Inherits: | Object |
Defined in: | lib/bundler/similarity_detector.rb |
Class Method Summary
-
.new(corpus) ⇒ SimilarityDetector
constructor
initialize with an array of words to be matched against.
Instance Method Summary
-
#similar_word_list(word, limit = 3)
return the result of ‘similar_words’, concatenated into a list (eg “a, b, or c”).
-
#similar_words(word, limit = 3)
return an array of words similar to ‘word’ from the corpus.
- #levenshtein_distance(this, that, ins = 2, del = 2, sub = 1) protected
Constructor Details
.new(corpus) ⇒ SimilarityDetector
initialize with an array of words to be matched against
# File 'lib/bundler/similarity_detector.rb', line 8
def initialize(corpus) @corpus = corpus end
Instance Method Details
#levenshtein_distance(this, that, ins = 2, del = 2, sub = 1) (protected)
[ GitHub ]# File 'lib/bundler/similarity_detector.rb', line 32
def levenshtein_distance(this, that, ins = 2, del = 2, sub = 1) # ins, del, sub are weighted costs return nil if this.nil? return nil if that.nil? dm = [] # distance matrix # Initialize first row values dm[0] = (0..this.length).collect {|i| i * ins } fill = [0] * (this.length - 1) # Initialize first column values (1..that.length).each do |i| dm[i] = [i * del, fill.flatten] end # populate matrix (1..that.length).each do |i| (1..this.length).each do |j| # critical comparison dm[i][j] = [ dm[i - 1][j - 1] + (this[j - 1] == that[i - 1] ? 0 : sub), dm[i][j - 1] + ins, dm[i - 1][j] + del ].min end end # The last value in matrix is the Levenshtein distance between the strings dm[that.length][this.length] end
#similar_word_list(word, limit = 3)
return the result of ‘similar_words’, concatenated into a list (eg “a, b, or c”)
# File 'lib/bundler/similarity_detector.rb', line 20
def similar_word_list(word, limit = 3) words = similar_words(word, limit) if words.length == 1 words[0] elsif words.length > 1 [words[0..-2].join(", "), words[-1]].join(" or ") end end
#similar_words(word, limit = 3)
return an array of words similar to ‘word’ from the corpus
# File 'lib/bundler/similarity_detector.rb', line 13
def similar_words(word, limit = 3) words_by_similarity = @corpus.map {|w| SimilarityScore.new(w, levenshtein_distance(word, w)) } words_by_similarity.select {|s| s.distance <= limit }.sort_by(&:distance).map(&:string) end