Skip to content

Commit

Permalink
now entities vector!
Browse files Browse the repository at this point in the history
  • Loading branch information
Amit223 committed Dec 30, 2018
1 parent c02ce8e commit 8ebc36e
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 56 deletions.
8 changes: 5 additions & 3 deletions src/main/java/Document.java
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@

import javafx.util.Pair;

import java.util.Map;
import java.util.Vector;

public class Document {
private int _docNum;
private String _name;
private Map<String, Integer> _entities;
private Vector<Pair<String, Integer>> _entities;

public Document(int docNum, String name, Map<String, Integer> entities) {
public Document(int docNum, String name, Vector<Pair<String, Integer>> entities) {
this._docNum = docNum;
this._name = name;
this._entities= entities;
Expand All @@ -29,7 +31,7 @@ public void set_name(String _name) {
this._name = _name;
}

public Map<String, Integer> get_entities() {
public Vector<Pair<String, Integer>> get_entities() {
return _entities;
}

Expand Down
2 changes: 2 additions & 0 deletions src/main/java/Indexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -916,6 +916,8 @@ private void writeToDocumentsAndEntitiesList(String nameOfDoc, String cityOfDoc,
//write entities
entitiesToWrite.add(entitiesLine);



}

/**
Expand Down
11 changes: 5 additions & 6 deletions src/main/java/Model_2.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,19 @@ public class Model_2 {
private double avgldl;
private int numOfIndexedDocs;


public Vector<Pair<String,Collection<Document>>> Start(String path, Vector<String> cities, Path queriesPath, boolean toStem, boolean toTreatSemantic) throws IOException {
Vector<Pair<String,Collection<Document>>> id_docsCollection= new Vector<>();
HashSet<String> citieshash = new HashSet<>(cities);
readIndexerInfo(path,toStem);
Vector<Pair<String, String>> queries = ReadFile.readQueriesFile(queriesPath);
searcher=new Searcher(avgldl,numOfIndexedDocs,path,citieshash,toStem);
boolean flag=true;
searcher=new Searcher(avgldl,numOfIndexedDocs,path,citieshash,toStem,"");//todo !!!!!

for (int i = 0; i <queries.size() ; i++) {
Pair <String,String>id_query= queries.get(i);
String id = id_query.getKey();
String query = id_query.getValue();
Collection<Document> queryDocs= searcher.Search(id, query, toTreatSemantic,flag);
flag=false;//not first quey anymore!
Collection<Document> queryDocs= searcher.Search(id, query, toTreatSemantic);
id_docsCollection.add(new Pair<String,Collection<Document>>(id,queryDocs));
}
return id_docsCollection;
Expand All @@ -38,8 +37,8 @@ public Collection<Document> Start(String path, Vector<String> cities, String que
HashSet<String> citieshash = new HashSet<>(cities);
readIndexerInfo(path,toStem);

searcher=new Searcher(avgldl,numOfIndexedDocs,path,citieshash,toStem);
Collection<Document> docs = searcher.Search("1", query, toTreatSemantic,true);
searcher=new Searcher(avgldl,numOfIndexedDocs,path,citieshash,toStem,"");//todo !!!!!!!!!!!!
Collection<Document> docs = searcher.Search("1", query, toTreatSemantic);

return docs;
}
Expand Down
36 changes: 9 additions & 27 deletions src/main/java/Ranker.java
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,15 @@ private Collection<Integer> test(HashMap<Integer, Vector<Pair<String, Integer>>>
return get50BestDocs();

}

/**
*
* @param doc-the line of doc
* @param docsToRank-the list of docs and thier info
* @param doc_size - the size of doc
* @param term_docsNumber-the info of term- how many docs
* ranks the doc and puts in the docs and ranks table
*/
private void rankDoc(int doc,HashMap<Integer, Vector<Pair<String, Integer>>> docsToRank,
ConcurrentHashMap<Integer, Integer> doc_size,
ConcurrentHashMap<String, Integer> term_docsNumber){
Expand All @@ -113,33 +122,6 @@ private void rankDoc(int doc,HashMap<Integer, Vector<Pair<String, Integer>>> doc
}
docsAndRanks.put(doc,rank);
}
private void addItem(double rank, Integer doc) {
if (rank > 0) {
if (queueSize.get() > 49) {
_RankDocsMutex.lock();
Double lowest = _RankedDocs.peek().getValue();
if (rank > (lowest)) {
_RankedDocs.poll();
_RankedDocs.add(new Pair<Integer, Double>(doc, rank));
minQueue.set(_RankedDocs.peek().getValue());

//}
_RankDocsMutex.unlock();
} else {
_RankDocsMutex.lock();
_RankedDocs.add(new Pair<Integer, Double>(doc, rank));
_RankDocsMutex.unlock();
queueSize.getAndAdd(1);
if (rank < minQueue.get() || queueSize.get() == 0) {
minQueue.set(rank);
}

}

}
}
}


/**
* this function give a grade to any doc and add it to priority queue of 50.
Expand Down
63 changes: 43 additions & 20 deletions src/main/java/Searcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,24 @@ public class Searcher {
*/
private HashMap<Integer, Vector<Pair<String, Integer>>> _doc_termPlusTfs;
private static ConcurrentHashMap<Integer, Integer> _doc_size = new ConcurrentHashMap<>();
private ConcurrentHashMap<Integer, Map<String,Integer>> _doc_Entities = new ConcurrentHashMap<>();
private ConcurrentHashMap<Integer, Vector<Pair<String,Integer>>> _doc_Entities = new ConcurrentHashMap<>();
private static ConcurrentHashMap<String, Integer> _term_docsCounter = new ConcurrentHashMap<>();
private String _path = "";
private boolean isSemantic; //todo what defualt ?
private String _toSave="";


public Searcher(double avgldl, int numOfIndexedDocs, String path, HashSet<String> chosenCities,boolean toStem) {
/**
*
* @param avgldl
* @param numOfIndexedDocs
* @param path
* @param chosenCities
* @param toStem
* @param pathToSave
* constructor
*/
public Searcher(double avgldl, int numOfIndexedDocs, String path, HashSet<String> chosenCities,boolean toStem,String pathToSave) {
_numOfIndexedDocs = numOfIndexedDocs;
_avgldl = avgldl;//_indexer.getAvgldl();no!
_doc_termPlusTfs = new HashMap<>();
Expand All @@ -46,6 +57,7 @@ public Searcher(double avgldl, int numOfIndexedDocs, String path, HashSet<String
_path=path;
this.toStem=toStem;
loadDictionaryToMemory(toStem); //using for "Entities"
_toSave=pathToSave;
}

/**Auxiliary functions for Search**/
Expand Down Expand Up @@ -113,6 +125,11 @@ private String treatSemantic(String query) {
return query;
}

/**
*
* @param query
* @return query with "+" between
*/
private String queryWithPluses(String query) {
String [] words=query.split("\\s+");
String queryWithPluses="";
Expand Down Expand Up @@ -266,10 +283,10 @@ public void getEntities(Collection<Integer> ans) {
byte[]line=new byte[120];
raf.seek(docNum*120);
raf.read(line);//120 bytes each term!
Map<String,Integer> Entities = findEntities(line);
if(Entities.size()==1&&Entities.containsKey("X")){
Vector<Pair<String,Integer>> Entities = findEntities(line);
if(Entities.size()==1&&(Entities.get(0).getKey().equals("X"))){
if (!_doc_Entities.containsKey(docNum))
_doc_Entities.put(docNum, new HashMap<>());//no entities for doc
_doc_Entities.put(docNum, new Vector<>());//no entities for doc
}
else {
if (!_doc_Entities.containsKey(docNum))
Expand All @@ -290,7 +307,7 @@ public void getEntities(Collection<Integer> ans) {
* @param line-byte[100] - 5 entities in it
* @return vector of the 5 entities in the line
*/
private Map<String,Integer> findEntities(byte[] line) {
private Vector<Pair<String,Integer>> findEntities(byte[] line) {
byte [] e1=new byte[20];
byte [] e2=new byte[20];
byte [] e3=new byte[20];
Expand Down Expand Up @@ -339,17 +356,17 @@ private Map<String,Integer> findEntities(byte[] line) {
int tf4=byteToInt(f4);
int tf5=byteToInt(f5);

Map<String,Integer> entities=new HashMap<>();
Vector<Pair<String,Integer>> entities=new Vector();
if(!entity1.equals(""))
entities.put(entity1,tf1);
entities.add(new Pair(entity1,tf1));
if(!entity2.equals(""))
entities.put(entity2,tf2);
entities.add(new Pair(entity2,tf2));
if(!entity3.equals(""))
entities.put(entity3,tf3);
entities.add(new Pair(entity3,tf3));
if(!entity4.equals(""))
entities.put(entity4,tf4);
entities.add(new Pair(entity4,tf4));
if(!entity5.equals(""))
entities.put(entity5,tf5);
entities.add(new Pair(entity5,tf5));
return entities;
}

Expand All @@ -364,23 +381,23 @@ private Map<String,Integer> findEntities(byte[] line) {
* @param toTreatSemantic
* @return list of relevant docs.
*/
public Collection<Document> Search(String id, String query, boolean toTreatSemantic, boolean firstQuery) {
public Collection<Document> Search(String id, String query, boolean toTreatSemantic) {
Collection<Document> docs= new Vector<>();
isSemantic=toTreatSemantic;
build_doc_termPlusTfs(query, toStem);
FilterDocsByCitys();
Collection<Integer> docNums = _ranker.Rank(_doc_termPlusTfs, _doc_size, _numOfIndexedDocs, _term_docsCounter, _avgldl,_path); // return only 50 most relvante
getEntities(docNums);
Collection<String> docNames = docNumToNames(docNums,id,firstQuery);
Collection<String> docNames = docNumToNames(docNums,id);
Iterator<Integer> docNumIt= docNums.iterator();
Iterator<String> docNameIt= docNames.iterator();
while (docNumIt.hasNext()) {
Integer docNum= docNumIt.next();
Map<String, Integer> entities;
Vector<Pair<String, Integer>> entities;
if(_doc_Entities.contains(docNum)){
entities = _doc_Entities.get(docNum);
}
else entities = new ConcurrentHashMap<>();
else entities = new Vector();
docs.add(new Document(docNum,docNameIt.next(),entities));
}
return docs;
Expand All @@ -391,7 +408,7 @@ public Collection<Document> Search(String id, String query, boolean toTreatSeman
* @param ans- doclines
* @return name of documents
*/
private Collection<String> docNumToNames(Collection<Integer> ans,String id,boolean flag) {
private Collection<String> docNumToNames(Collection<Integer> ans,String id) {
//get doc names!
Set<String> documentsToReturn=new HashSet<>();
Map<Integer,Double> docLine_rank=_ranker.getDocsRanking();
Expand All @@ -411,14 +428,20 @@ private Collection<String> docNumToNames(Collection<Integer> ans,String id,boole
} catch (Exception e) {
e.printStackTrace();
}
WriteToQueryFile(docs_rank,id,flag);
WriteToQueryFile(docs_rank,id);
return documentsToReturn;
}

private void WriteToQueryFile(Map<String, Double> docs_rank,String id,boolean createNew) {
/**
*
* @param docs_rank- the docs and thier ranks
* @param id of query
* writes to the results file the result for the query
*/
private void WriteToQueryFile(Map<String, Double> docs_rank,String id) {
File file=new File("results.txt");
try {
if(createNew)
if(!file.exists())
file.createNewFile();
BufferedWriter writer=new BufferedWriter(new FileWriter(file));
Iterator<String> docs=docs_rank.keySet().iterator();
Expand Down

0 comments on commit 8ebc36e

Please sign in to comment.