package org.kit.furia;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.ajmm.obsearch.OB;
import org.kit.furia.misc.IntegerHolder;
/*
Furia-chan: An Open Source software license violation detector.
Copyright (C) 2007 Kyushu Institute of Technology
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
/**
* This class is a multi-set of OB objects. If we were to store natural
* language, a document is just a multi-set of natural language words.
* Relationships of the words within the document are not stored.
* @param
* The type of OB object that will be stored in this document.
* @author Arnoldo Jose Muller Molina
* @since 0
*/
public class Document < O extends OB > {
/**
* Contains each of the objects and the amount of times they appear in the
* document.
*/
private Map < O, DocumentElement < O > > data;
/**
* The name (identification string) of this document.
*/
private String name;
/**
* The size of the multi-set of the words of this document.
*/
private int wordCountMultiSet;
public String getName() {
return name;
}
/**
* Creates a document with an initial estimate of 2000 elements.
* @param id
* The id of the document.
*/
public Document(String id) {
this(id, 2000);
}
/**
* @return The size of the set of words contained in this document.
*/
public int size(){
return data.size();
}
/**
*
* @return The size of the multi-set of words contained in this document.
*/
public int multiSetSize(){
return wordCountMultiSet;
}
/**
* Creates a document.
* @param initialCapacity
* The number of elements that we are expecting to hold. This
* is for efficiency reasons, as the Document will grow
* automatically if the number of elements exceeds this
* initial estimate.
* @param id
* The id of the document.
*/
public Document(String id, int initialCapacity) {
data = new HashMap < O, DocumentElement < O > >(initialCapacity);
this.name = id;
wordCountMultiSet = 0;
}
/**
* Adds a word to the document.
* @param word
* The word that will be added.
*/
public void addWord(O word) {
DocumentElement < O > r = data.get(word);
if (r == null) {
// this is the first time we add this word, so
// we should initialize the counter for "word"
r = new DocumentElement < O >(word, new IntegerHolder(0));
data.put(word, r);
}
// increment the number of words in the document.
r.inc();
wordCountMultiSet++;
}
/**
* Sets the multiplicity for the given word.
* @param word
* @param multiplicity
*/
public void setWord(O word, int multiplicity){
// we cannot have an existing word here, because the Furia-chan file format
// holds one item per line. All the items are different.
assert data.get(word) == null;
data.put(word, new DocumentElement(word, new IntegerHolder(multiplicity)));
wordCountMultiSet += multiplicity;
}
/**
* @return An iterator with all the elements of this document.
*/
public Iterator < DocumentElement < O >> iterator() {
return data.values().iterator();
}
/**
* This class is used by the iterator of the Document class. It holds the O
* object and the number of times it appears in this document.
* @param
* The type of OB object that will be stored in this
* document.
* @author Arnoldo Jose Muller Molina
* @since 0
*/
public class DocumentElement < T > {
private T object;
private IntegerHolder count;
public DocumentElement(T object, IntegerHolder count) {
super();
this.object = object;
this.count = count;
}
/**
* @return The object that composes this element of the document.
*/
public T getObject() {
return object;
}
/**
* @return The # of times this object has appeared in the document.
*/
public int getCount() {
return count.getValue();
}
/**
* Increments the count for object.
*/
protected void inc() {
count.inc();
}
}
}