package org.kit.furia.index;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import org.ajmm.obsearch.Index;
import org.ajmm.obsearch.index.IndexShort;
import org.ajmm.obsearch.ob.OBShort;
import org.ajmm.obsearch.result.OBPriorityQueueShort;
import org.ajmm.obsearch.result.OBResultShort;
import org.apache.log4j.Logger;
import org.kit.furia.Document;
import org.kit.furia.ResultCandidate;
import org.kit.furia.Document.DocumentElement;
import org.kit.furia.exceptions.IRException;
import com.sleepycat.je.DatabaseException;
/*
Furia-chan: An Open Source software license violation detector.
Copyright (C) 2007 Kyushu Institute of Technology
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
/**
* FIRIndexShort uses IR techniques to match OB objects (OBSearch objects) when
* the objects extend from OBShort.
* @author Arnoldo Jose Muller Molina
* @since 0
*/
public class FIRIndexShort < O extends OBShort >
extends AbstractIRIndex < O > implements
org.kit.furia.IRIndexShort < O > {
private static final Logger logger = Logger.getLogger(FIRIndexShort.class.getSimpleName());
/**
*
*/
private IndexShort < O > index;
/**
* Creates a new IR Index that works on shorts
* @param dbFolder
* The folder in which Lucene's files will be stored
* @throws IOException
* If the given directory does not exist or if some other IO
* error occurs
*/
public FIRIndexShort(IndexShort < O > index, File dbFolder)
throws IOException {
super(dbFolder);
this.index = index;
}
// TODO: re-write this as an iterator to lazily extract the results.
public final List < ResultCandidate > search(Document < O > document, byte k,
short r, short n) throws IRException{
Iterator < Document < O >.DocumentElement < O >> it = document
.iterator();
// we transform now the given document, to a document that is in terms
// of the
// fragments available in the database.
// we store term id -> term freq. This will be used to create the query.
Map documentInTermsOfTheDatabase = new HashMap(document.size() * k);
while (it.hasNext()) {
Document < O >.DocumentElement < O > elem = it.next();
O toMatch = elem.getObject();
OBPriorityQueueShort < O > result = new OBPriorityQueueShort < O >(
k);
try{
// match the object in the database.
index.searchOB(toMatch, r, result);
// for all the returned elements, we add their ids and the initial
// count that came from "document".
Iterator> itO = result.iterator();
while(itO.hasNext()){
OBResultShort match = itO.next();
Integer exists = documentInTermsOfTheDatabase.get(match.getId());
if(exists == null){
documentInTermsOfTheDatabase.put(match.getId(), elem.getCount());
}else{
documentInTermsOfTheDatabase.put(match.getId(), elem.getCount() + exists);
}
}
}catch(Exception e){
logger.fatal("Fatal error while searching" , e);
throw new IRException(e);
}
}
return processQueryResults(documentInTermsOfTheDatabase,n, document);
}
public Index < O > getIndex() {
return index;
}
public int getWordsSize() throws DatabaseException{
return this.index.databaseSize();
}
}