Skip to content

Commit

Permalink
extracting term counting to separate, TextChunk class
Browse files Browse the repository at this point in the history
  • Loading branch information
digitalronin committed Mar 22, 2009
1 parent 6a3d4ce commit ba0b2a9
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 24 deletions.
28 changes: 4 additions & 24 deletions app/models/cloud.rb
Original file line number Diff line number Diff line change
@@ -1,17 +1,6 @@
class Cloud
attr_accessor :title, :words, :max_count, :min_count # need these to derive the right CSS class

BORING_TERMS = [
'secretary of state',
'secretary of state for the home department'
]

# Yahoo term extractor likes sending back 'developme'. I have no
# idea why. We replace it with 'development' later on.
BROKEN_TERMS = {
'developme' => 'development'
}

def initialize(params)
@mp = params[:mp]
@title = "#{@mp.full_name} Written Questions"
Expand All @@ -24,9 +13,11 @@ def count_words
@max_count = 0
@min_count = 999

tc = TextChunk.new text

rtn = []
terms.sort.each do |term|
value = text.downcase.split(%r[#{term}]).size
tc.terms.sort.each do |term|
value = tc.body.downcase.split(%r[#{term}]).size
RAILS_DEFAULT_LOGGER.debug "Counting: #{term}, #{value}"
@max_count = [ @max_count, value ].max
@min_count = [ @min_count, value ].min
Expand All @@ -39,17 +30,6 @@ def count_words
rtn.sort {|a,b| a.value <=> b.value}.reverse
end

def terms
list = TagExtractor.extract(text) - BORING_TERMS
BROKEN_TERMS.each do |term, replacement|
if list.include? term
list.delete term
list << replacement
end
end
list
end

def text
@text ||= @mp.written_answer_text
end
Expand Down
30 changes: 30 additions & 0 deletions app/models/text_chunk.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
class TextChunk
attr_accessor :body

BORING_TERMS = [
'secretary of state',
'secretary of state for the home department'
]

# Yahoo term extractor likes sending back 'developme'. I have no
# idea why. We replace it with 'development' later on.
BROKEN_TERMS = {
'developme' => 'development'
}

def initialize(body)
@body = body
end

def terms
list = TagExtractor.extract(body) - BORING_TERMS
BROKEN_TERMS.each do |term, replacement|
if list.include? term
list.delete term
list << replacement
end
end
list
end

end

0 comments on commit ba0b2a9

Please sign in to comment.