Skip to content

Commit

Permalink
Add Bags of words
Browse files Browse the repository at this point in the history
  • Loading branch information
jjroman committed Jan 31, 2013
1 parent a53be39 commit 3798db5
Show file tree
Hide file tree
Showing 7 changed files with 547 additions and 470 deletions.
40 changes: 20 additions & 20 deletions CategoriesUsers.txt
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
Forums @davos
Forums @Karen_DaviLa
Forums @PhotographyTalk
Forums @MobileSupport
Forums @blackhatsecrets
Forums @iGiveaway
Forums @LaidbackLuke
Forums @greensboro_nc
Forums @O_Sergun4ik
Forums @MadonnaNation
Forums @MLucija
Forums @ravad
Forums @matthewtoren
Forums @GGInsider
Forums @Autographz
Forums @TRIPEOMUSICNET
Forums @hoodbooty69
Forums @forexed
Forums @NewsletterGuy
Forums @Fishdogs
Dental @abroaderview
Dental @O_Sergun4ik
Dental @Thomas_Bishara
Dental @MegaloMani
Dental @therabreath
Dental @Implantopia
Dental @Flap
Dental @wellness4all
Dental @odontosocial
Dental @Cheryl_Pierce
Dental @DrLarryJamesDDS
Dental @Dr_Connelly
Dental @GVBDO
Dental @mgdonline
Dental @vitamin_store
Dental @dannpoetra
Dental @seoredhead
Dental @ppierodds
Dental @digitaldentist
Dental @21CenturyDental
2 changes: 1 addition & 1 deletion PlikDoNauki.BLA.dict
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
1
1 Forums
1 Dental
823 changes: 407 additions & 416 deletions UserTweets.txt

Large diffs are not rendered by default.

30 changes: 25 additions & 5 deletions src/main/java/sem2/iwi/gui/GettingTweetsPanel.form
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,6 @@
<Group type="102" attributes="0">
<EmptySpace max="-2" attributes="0"/>
<Group type="103" groupAlignment="0" attributes="0">
<Group type="102" attributes="0">
<Component id="jScrollPane1" max="32767" attributes="0"/>
<EmptySpace max="-2" attributes="0"/>
</Group>
<Group type="102" alignment="0" attributes="0">
<Component id="jLabel1" min="-2" max="-2" attributes="0"/>
<EmptySpace max="32767" attributes="0"/>
Expand All @@ -46,6 +42,13 @@
<Component id="jButton2" min="-2" max="-2" attributes="0"/>
<EmptySpace min="-2" pref="27" max="-2" attributes="0"/>
</Group>
<Group type="102" alignment="1" attributes="0">
<Group type="103" groupAlignment="1" attributes="0">
<Component id="jScrollPane2" alignment="0" max="32767" attributes="0"/>
<Component id="jScrollPane1" max="32767" attributes="0"/>
</Group>
<EmptySpace max="-2" attributes="0"/>
</Group>
</Group>
</Group>
</Group>
Expand All @@ -64,9 +67,11 @@
<Component id="jButton1" min="-2" max="-2" attributes="0"/>
<EmptySpace type="separate" max="-2" attributes="0"/>
<Component id="jScrollPane1" min="-2" pref="124" max="-2" attributes="0"/>
<EmptySpace max="32767" attributes="0"/>
<EmptySpace max="-2" attributes="0"/>
<Component id="jButton2" min="-2" max="-2" attributes="0"/>
<EmptySpace max="-2" attributes="0"/>
<Component id="jScrollPane2" max="32767" attributes="0"/>
<EmptySpace max="-2" attributes="0"/>
</Group>
</Group>
</DimensionLayout>
Expand Down Expand Up @@ -121,5 +126,20 @@
<EventHandler event="actionPerformed" listener="java.awt.event.ActionListener" parameters="java.awt.event.ActionEvent" handler="jButton2ActionPerformed"/>
</Events>
</Component>
<Container class="javax.swing.JScrollPane" name="jScrollPane2">
<AuxValues>
<AuxValue name="autoScrollPane" type="java.lang.Boolean" value="true"/>
</AuxValues>

<Layout class="org.netbeans.modules.form.compat2.layouts.support.JScrollPaneSupportLayout"/>
<SubComponents>
<Component class="javax.swing.JTextArea" name="jTextArea2">
<Properties>
<Property name="columns" type="int" value="20"/>
<Property name="rows" type="int" value="5"/>
</Properties>
</Component>
</SubComponents>
</Container>
</SubComponents>
</Form>
36 changes: 29 additions & 7 deletions src/main/java/sem2/iwi/gui/GettingTweetsPanel.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import java.util.List;
import java.util.Map;
import javax.swing.JTextField;
import sem2.iwi.tweets.GettingTweets;

/**
*
Expand Down Expand Up @@ -39,6 +40,8 @@ private void initComponents() {
jScrollPane1 = new javax.swing.JScrollPane();
jTextArea1 = new javax.swing.JTextArea();
jButton2 = new javax.swing.JButton();
jScrollPane2 = new javax.swing.JScrollPane();
jTextArea2 = new javax.swing.JTextArea();

jLabel1.setText("Pobierz tweety");

Expand Down Expand Up @@ -69,19 +72,20 @@ public void actionPerformed(java.awt.event.ActionEvent evt) {
}
});

jTextArea2.setColumns(20);
jTextArea2.setRows(5);
jScrollPane2.setViewportView(jTextArea2);

javax.swing.GroupLayout layout = new javax.swing.GroupLayout(this);
this.setLayout(layout);
layout.setHorizontalGroup(
layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
.addGroup(layout.createSequentialGroup()
.addContainerGap()
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
.addGroup(layout.createSequentialGroup()
.addComponent(jScrollPane1)
.addContainerGap())
.addGroup(layout.createSequentialGroup()
.addComponent(jLabel1)
.addContainerGap())
.addContainerGap(javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE))
.addGroup(javax.swing.GroupLayout.Alignment.TRAILING, layout.createSequentialGroup()
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.TRAILING)
.addGroup(layout.createSequentialGroup()
Expand All @@ -95,7 +99,12 @@ public void actionPerformed(java.awt.event.ActionEvent evt) {
.addGroup(javax.swing.GroupLayout.Alignment.TRAILING, layout.createSequentialGroup()
.addGap(0, 0, Short.MAX_VALUE)
.addComponent(jButton2)
.addGap(27, 27, 27))))
.addGap(27, 27, 27))
.addGroup(javax.swing.GroupLayout.Alignment.TRAILING, layout.createSequentialGroup()
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.TRAILING)
.addComponent(jScrollPane2, javax.swing.GroupLayout.Alignment.LEADING)
.addComponent(jScrollPane1))
.addContainerGap())))
);
layout.setVerticalGroup(
layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
Expand All @@ -110,8 +119,10 @@ public void actionPerformed(java.awt.event.ActionEvent evt) {
.addComponent(jButton1)
.addGap(18, 18, 18)
.addComponent(jScrollPane1, javax.swing.GroupLayout.PREFERRED_SIZE, 124, javax.swing.GroupLayout.PREFERRED_SIZE)
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
.addComponent(jButton2)
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
.addComponent(jScrollPane2)
.addContainerGap())
);
}// </editor-fold>//GEN-END:initComponents
Expand All @@ -138,7 +149,16 @@ private void jButton2ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRS
Map<String,Integer> categoryMappings = new HashMap<String,Integer>();
categoryMappings = sem2.iwi.tweets.GettingTweets.readCategoriesDictionary();


ArrayList<ArrayList<Integer>> alali = GettingTweets.getForBayes();
StringBuilder sb = new StringBuilder();
sb.append(String.format("%s %s\n", alali.size(), alali.get(0).size() ));
for (ArrayList<Integer> arrayList : alali) {
for(Integer i : arrayList){
sb.append(i).append(" ");
}
sb.append("\n");
}
jTextArea2.setText(sb.toString());
}//GEN-LAST:event_jButton2ActionPerformed

// Variables declaration - do not modify//GEN-BEGIN:variables
Expand All @@ -147,7 +167,9 @@ private void jButton2ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRS
private javax.swing.JLabel jLabel1;
private javax.swing.JLabel jLabel2;
private javax.swing.JScrollPane jScrollPane1;
private javax.swing.JScrollPane jScrollPane2;
private javax.swing.JTextArea jTextArea1;
private javax.swing.JTextArea jTextArea2;
private javax.swing.JTextField jTextField1;
// End of variables declaration//GEN-END:variables

Expand Down
10 changes: 9 additions & 1 deletion src/main/java/sem2/iwi/nlp/Stemmer.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,20 @@ public static void main(String[] args) {
}

}

static MaxentTagger mxt = null;
private static synchronized MaxentTagger getMaxentTagger() throws IOException, ClassNotFoundException{
if(mxt == null){
mxt = new MaxentTagger("english-left3words-distsim.tagger");
}
return mxt;
}

public static ArrayList<String> getWordsOnlyForTags(String forPosTagging, String... tags) {
ArrayList<String> retVal = new ArrayList<>();

try {
MaxentTagger tagger = new MaxentTagger("english-left3words-distsim.tagger");
MaxentTagger tagger = getMaxentTagger();
List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new StringReader(forPosTagging));

for (List<HasWord> sentence : sentences) {
Expand Down
76 changes: 56 additions & 20 deletions src/main/java/sem2/iwi/tweets/GettingTweets.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,19 @@
*/
package sem2.iwi.tweets;

import com.sun.imageio.plugins.common.BogusColorSpace;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import sem2.iwi.nlp.BagOfWords;
import sem2.iwi.utils.IWIUtils;
import twitter4j.*;
import twitter4j.auth.AccessToken;
Expand All @@ -23,7 +26,8 @@
* @author kubeusz
*/
public class GettingTweets {

private final static BagOfWords bow = new BagOfWords();

public static void writeToFileTwellowCategories(String content){

FileWriter fw = null;
Expand Down Expand Up @@ -95,39 +99,41 @@ public static void createCategoriesDictionary(HashSet<String> categories){
}

}
static Map<String,Integer> categoryMappings = null;

public static Map<String,Integer> readCategoriesDictionary(){

Map<String,Integer> categoryMappings = new HashMap<String,Integer>();

Path path = Paths.get(IWIUtils.getPropertyValue("categoriesDictionaryFile"));
try {
Scanner scanner = new Scanner(path);

while (scanner.hasNextLine()){
if(categoryMappings == null){
categoryMappings = new HashMap<String,Integer>();

Path path = Paths.get(IWIUtils.getPropertyValue("categoriesDictionaryFile"));
try {
Scanner scanner = new Scanner(path);

while (scanner.hasNextLine()){

String[] result = scanner.nextLine().split(" ");

if(result.length!=1){
categoryMappings.put( result[1],Integer.parseInt(result[0]));
}

String[] result = scanner.nextLine().split("\\t");

if(result.length!=1){
categoryMappings.put(result[0], Integer.parseInt(result[1]));
}

scanner.close();
} catch (IOException ex) {
Logger.getLogger(GettingTweets.class.getName()).log(Level.SEVERE, null, ex);
}
scanner.close();
} catch (IOException ex) {
Logger.getLogger(GettingTweets.class.getName()).log(Level.SEVERE, null, ex);
}

return categoryMappings;
}

private static HashMap<String, ArrayList<String>> forBOW = null;
public static String gettingTweets(List<String> users, String noOfTweets){

Twitter twitter = getTwitterInstance();

FileWriter fw = null;
BufferedWriter bw =null;
String content = "";
forBOW = new HashMap<>();

try {
fw = new FileWriter(IWIUtils.getPropertyValue("usersTweetsFile"));
Expand Down Expand Up @@ -156,6 +162,13 @@ public static String gettingTweets(List<String> users, String noOfTweets){
List<Status> statuses = twitter.getUserTimeline(result[1], new Paging(1,Integer.parseInt(noOfTweets) ));

for (Status status : statuses) {
if(forBOW.containsKey(result[0])){
forBOW.get(result[0]).add(status.getText());
}else{
ArrayList<String> altmp = new ArrayList<>();
altmp.add(status.getText());
forBOW.put(result[0], altmp);
}
bw.write(result[0] + " || " + status.getText() +"\n");
content+=result[0] + " || " + status.getText() +"\n";
}
Expand All @@ -172,11 +185,27 @@ public static String gettingTweets(List<String> users, String noOfTweets){
Logger.getLogger(GettingTweets.class.getName()).log(Level.SEVERE, null, tex);
}

for (Map.Entry<String, ArrayList<String>> me : forBOW.entrySet()) {
bow.addNewBag(me.getValue());
}


return content;
}

public static ArrayList<ArrayList<Integer>> getForBayes(){

ArrayList<ArrayList<Integer>> retVal = new ArrayList<>();
if(forBOW != null){
for (Map.Entry<String, ArrayList<String>> me : forBOW.entrySet()) {
for (String twit : me.getValue()) {
retVal.add(getAllMetrics(twit, me.getKey()));
}
}
}
return retVal;
}

public static Twitter getTwitterInstance(){
TwitterFactory factory = new TwitterFactory();

Expand Down Expand Up @@ -233,5 +262,12 @@ public static Integer[] getGenericMetrics(String twit){

return metrics;
}

public static ArrayList<Integer> getAllMetrics(String twit, String category){
ArrayList<Integer> retVal = new ArrayList<>();
Collections.addAll(retVal, getGenericMetrics(twit));
retVal.addAll(bow.scoreAgainsAll(twit));
retVal.add(readCategoriesDictionary().get(category));
return retVal;
}

}

0 comments on commit 3798db5

Please sign in to comment.