Index.java
| Index Score | ||
|---|---|---|
![]() |
![]() |
org.apache.slide.index.lucene |
![]() |
![]() |
Jakarta Slide |
View: Reasons, Metrics, Source Code
These are the metrics that contribute to the Enerjy Score for this file, ranked by impact. So the metrics listed at the top influence the score to a greater extent that the metrics listed at the bottom.
/*
* $Header$
* $Revision: 527546 $
* $Date: 2007-04-11 11:59:18 -0400 (Wed, 11 Apr 2007) $
*
* ====================================================================
*
* Copyright 1999-2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.slide.index.lucene;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.StringTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.slide.common.PropertyName;
import org.apache.slide.common.Uri;
import org.apache.slide.content.NodeProperty;
import org.apache.slide.content.NodeRevisionDescriptor;
import org.apache.slide.content.NodeRevisionNumber;
import org.apache.slide.extractor.ContentExtractor;
import org.apache.slide.extractor.ExtractorException;
import org.apache.slide.extractor.ExtractorManager;
import org.apache.slide.search.IndexException;
import org.apache.slide.util.logger.Logger;
/**
* Wrapper for Lucene index.
*/
public class Index {
public static final String KEY_FIELD_NAME = "SLIDE_KEY";
public static final String URI_FIELD_NAME = "SLIDE_URI";
public static final String REVISION_FIELD_NAME = "SLIDE_REVISION";
public static final String SCOPE_FIELD_NAME = "SLIDE_SCOPE";
public static final String DEPTH_FIELD_NAME = "SLIDE_DEPTH";
public static final String VERSION_FIELD_NAME = "SLIDE_VERSION";
public static final String IS_DEFINED_FIELD_NAME = "SLIDE_ISDEFINED";
public static final String CONTENT_FIELD_NAME = "SLIDE_CONTENT";
public static final String NULL_FIELD_NAME = "SLIDE_NULL";
protected static final SimpleDateFormat DATE_INDEX_FORMAT = new SimpleDateFormat(
"yyyy-MM-dd HH:mm", Locale.UK);
protected static final DecimalFormat INT_INDEX_FORMAT = new DecimalFormat(
"b0000000000000000000;a0000000000000000000");
public static final String DATE_LOWER_BOUND = DATE_INDEX_FORMAT
.format(new Date(0));
public static final String DATE_UPPER_BOUND = DATE_INDEX_FORMAT
.format(new Date(Long.MAX_VALUE));
public static final String INT_LOWER_BOUND = INT_INDEX_FORMAT
.format(Long.MIN_VALUE);
public static final String INT_UPPER_BOUND = INT_INDEX_FORMAT
.format(Long.MAX_VALUE);
public static final String STRING_UPPER_BOUND = "\uffff\uffff";
public static final String STRING_LOWER_BOUND = "";
protected static final String LOG_CHANNEL = Index.class.getName();
protected IndexConfiguration configuration;
protected JobRunner indexThread;
protected String indexName;
protected Logger logger;
protected LinkedList txnQueue = new LinkedList();
/**
* Counter for recently executed index jobs (insertions, deletions). Will be
* reseted after optimization.
*/
private int jobCounter = 0;
private boolean needsInitialization = false;
public Index(IndexConfiguration configuration, Logger logger, String name)
throws IndexException {
this.logger = logger;
this.configuration = configuration;
this.indexName = name;
File file = new File(this.configuration.getIndexPath());
if (!file.exists() && !file.mkdirs()) {
throw new IndexException(
"Error can't find or create index directory: "
+ this.configuration.getIndexPath());
}
try {
Directory directory = getDirectory();
if (IndexReader.indexExists(directory)) {
if (IndexReader.isLocked(directory)) {
IndexReader.unlock(directory);
}
} else {
this.needsInitialization = true;
IndexWriter writer = new IndexWriter(directory, configuration
.getAnalyzer(), true);
writer.close();
}
} catch (IOException e) {
throw new IndexException("Error while creating index: "
+ this.configuration.getIndexPath(), e);
}
// TODO make configurable
BooleanQuery.setMaxClauseCount(10000);
}
public IndexConfiguration getConfiguration() {
return this.configuration;
}
public Logger getLogger() {
return this.logger;
}
public boolean needsInitialization() {
return this.needsInitialization;
}
public void start() {
if (configuration.indexAsynchron) {
if (this.indexThread != null) {
// this really should be WARNING, but this can get called so
// many times
/* getLogger().log(
"Indexing thread has already been started! Not starting a new one!",
Logger.WARNING);
*/
} else {
this.indexThread = new JobRunner();
this.indexThread.setName("Indexing Thread (" + this.indexName + ")");
this.indexThread.setPriority(configuration.getPriority());
this.indexThread.start();
}
}
}
public void stop() {
if (this.indexThread != null) {
// stop the indexing thread
try {
this.indexThread.interrupt();
synchronized (this.txnQueue) {
this.txnQueue.notify();
}
this.indexThread.join();
} catch (InterruptedException e) {
//
}
// execute remaining indexing jobs
try {
for (Iterator i = this.txnQueue.iterator(); i.hasNext();) {
IndexTransaction txn = (IndexTransaction) i.next();
executeIndexTransaction(txn);
}
} catch (IndexException e) {
logger.log("Error while executing job", e, LOG_CHANNEL,
Logger.ERROR);
}
}
}
public IndexSearcher getSearcher() throws IOException {
// TODO can this be reused?
return new IndexSearcher(this.configuration.getIndexPath());
}
private Directory getDirectory() throws IOException {
// file system based directory
return FSDirectory.getDirectory(this.configuration.getIndexPath(),
false);
}
private Field unstoredString(String fieldName, String value) {
return new Field(fieldName, value, Store.NO, org.apache.lucene.document.Field.Index.UN_TOKENIZED);
}
private Field storedString(String fieldName, String value) {
return new Field(fieldName, value, Store.YES, org.apache.lucene.document.Field.Index.UN_TOKENIZED);
}
private Field textField(String fieldName, String value) {
return new Field(fieldName, value, Store.NO, org.apache.lucene.document.Field.Index.TOKENIZED);
}
private Field textField(String fieldName, Reader value) {
return new Field(fieldName, value);
}
/**
* Creates a lucene index document for a properties indexer.
*
* @param uri
* resource
* @param descriptor
* properties to be indexed
*/
private Document createLuceneDocument(String uri,
NodeRevisionDescriptor descriptor) {
Document doc = new Document();
doc.add(unstoredString(Index.KEY_FIELD_NAME, configuration.generateKey(
uri, descriptor.getRevisionNumber())));
doc.add(storedString(Index.URI_FIELD_NAME, uri));
doc.add(storedString(Index.REVISION_FIELD_NAME, descriptor
.getRevisionNumber().toString()));
// scopes
StringTokenizer tokenizer = new StringTokenizer(uri, "/");
StringBuffer buffer = new StringBuffer(uri.length());
doc.add(unstoredString(Index.SCOPE_FIELD_NAME, "/"));
int depth = 0;
for (; tokenizer.hasMoreTokens();) {
buffer.append("/").append(tokenizer.nextToken());
doc.add(unstoredString(Index.SCOPE_FIELD_NAME, buffer.toString()));
depth++;
}
doc.add(unstoredString(Index.DEPTH_FIELD_NAME, configuration
.intToIndexString(depth)));
// resource type
String rtype = descriptor.getResourceType();
for (Iterator i = configuration.knownResourceTypes(); i.hasNext();) {
String name = (String) i.next();
if (rtype.indexOf(name) != -1) {
doc.add(unstoredString(configuration.generateFieldName(
NodeProperty.DEFAULT_NAMESPACE, "resourcetype"), name));
}
}
// all other properties
for (Enumeration e = descriptor.enumerateProperties(); e
.hasMoreElements();) {
NodeProperty property = (NodeProperty) e.nextElement();
PropertyName propertyName = property.getPropertyName();
Object value = property.getValue();
if (value == null)
continue;
if (!configuration.isIndexedProperty(propertyName))
continue;
if (configuration.isStringProperty(propertyName)) {
doc
.add(unstoredString(configuration.generateFieldName(
property.getNamespace(), property.getName()),
value.toString()));
}
if (configuration.isDateProperty(propertyName)) {
Date date = IndexConfiguration.getDateValue(value);
if (date != null) {
doc.add(unstoredString(configuration.generateFieldName(
property.getNamespace(), property.getName()),
configuration.dateToIndexString(date)));
}
}
if (configuration.isIntProperty(propertyName)) {
try {
doc.add(unstoredString(configuration.generateFieldName(
property.getNamespace(), property.getName()),
configuration.intToIndexString(Long.parseLong(value
.toString()))));
} catch (NumberFormatException ex) {
// TODO log warning
}
}
if (configuration.isTextProperty(propertyName)) {
doc
.add(textField(configuration.generateFieldName(property
.getNamespace(), property.getName()), value
.toString()));
}
if (configuration.supportsIsDefined(propertyName)) {
doc.add(unstoredString(Index.IS_DEFINED_FIELD_NAME,
configuration.generateFieldName(
property.getNamespace(), property.getName())));
}
}
return doc;
}
/**
* Creates a lucene document for content indexing.
*/
private Document createLuceneDocument(String uri,
NodeRevisionDescriptor descriptor, InputStream content)
throws IndexException, ExtractorException {
Document doc = new Document();
doc.add(unstoredString(Index.KEY_FIELD_NAME, configuration.generateKey(
uri, descriptor.getRevisionNumber())));
doc.add(storedString(Index.URI_FIELD_NAME, uri));
doc.add(storedString(Index.REVISION_FIELD_NAME, descriptor
.getRevisionNumber().toString()));
// scopes
StringTokenizer tokenizer = new StringTokenizer(uri, "/");
StringBuffer buffer = new StringBuffer(uri.length());
doc.add(unstoredString(Index.SCOPE_FIELD_NAME, "/"));
int depth = 0;
for (; tokenizer.hasMoreTokens();) {
buffer.append("/").append(tokenizer.nextToken());
doc.add(unstoredString(Index.SCOPE_FIELD_NAME, buffer.toString()));
depth++;
}
doc.add(unstoredString(Index.DEPTH_FIELD_NAME, configuration
.intToIndexString(depth)));
List extractors = ExtractorManager.getInstance().getContentExtractors(
configuration.getNamespaceName(), uri, descriptor);
for (Iterator i = extractors.iterator(); i.hasNext();) {
ContentExtractor extractor = (ContentExtractor) i.next();
doc.add(textField(Index.CONTENT_FIELD_NAME, extractor
.extract(content)));
}
return doc;
}
/**
* Schedules an index transaction. If asynchron indexing is enabled, this
* adds the jobs to the indexing queue, otherwise the indexing is executed
* imediately.
*
* @param removeJobs
* Set of jobs for deleting objects from the index.
* @param addJobs
* Set of Jobs for adding new objects to the index.
* @throws IndexException
*/
public void scheduleIndexTransaction(Set removeJobs, Set addJobs)
throws IndexException {
if (configuration.isIndexAsynchron()) {
IndexTransaction txn = new IndexTransaction(removeJobs, addJobs);
synchronized (this.txnQueue) {
this.txnQueue.addLast(txn);
this.txnQueue.notify();
}
} else {
executeIndexTransaction(removeJobs, addJobs);
}
}
synchronized void executeIndexTransaction(IndexTransaction txn)
throws IndexException {
executeIndexTransaction(txn.removeJobs, txn.addJobs);
}
synchronized void executeIndexTransaction(Set removeJobs, Set addJobs)
throws IndexException {
IndexWriter writer = null;
IndexReader reader = null;
try {
// execute delete jobs
if (removeJobs.size() > 0) {
reader = IndexReader.open(getDirectory());
for (Iterator i = removeJobs.iterator(); i.hasNext();) {
IndexJob job = (IndexJob) i.next();
if (logger.isEnabled(LOG_CHANNEL, Logger.DEBUG)) {
logger.log("remove: " + job.key, LOG_CHANNEL,
Logger.DEBUG);
}
reader.deleteDocuments(new Term(Index.KEY_FIELD_NAME, job.getKey()));
this.jobCounter++;
}
reader.close();
reader = null;
}
// execute index jobs
if (addJobs.size() > 0
|| this.jobCounter >= configuration.getOptimizeThreshold()) {
writer = new IndexWriter(getDirectory(), configuration
.getAnalyzer(), false);
for (Iterator i = addJobs.iterator(); i.hasNext();) {
IndexJob job = (IndexJob) i.next();
Document doc;
if (job.content != null) {
if (logger.isEnabled(LOG_CHANNEL, Logger.DEBUG)) {
logger.log("index content: " + job.key,
LOG_CHANNEL, Logger.DEBUG);
}
try {
doc = createLuceneDocument(job.uri, job.descriptor,
job.content);
writer.addDocument(doc);
} catch (ExtractorException e) {
logger.log("Error while extracting content: "
+ job.uri + " (" + e.toString() + ")",
LOG_CHANNEL, Logger.WARNING);
}
} else {
if (logger.isEnabled(LOG_CHANNEL, Logger.DEBUG)) {
logger.log("index properties: " + job.key,
LOG_CHANNEL, Logger.DEBUG);
}
doc = createLuceneDocument(job.uri, job.descriptor);
writer.addDocument(doc);
}
this.jobCounter++;
}
if (this.jobCounter > configuration.getOptimizeThreshold()) {
writer.optimize();
logger.log("optimize", LOG_CHANNEL, Logger.DEBUG);
this.jobCounter = 0;
}
writer.close();
writer = null;
}
} catch (Exception e) {
throw new IndexException(e);
} finally {
try {
if (reader != null)
reader.close();
if (writer != null)
writer.close();
} catch (IOException e) {
logger.log("Exception after executeIndexTransaction", e,
LOG_CHANNEL, Logger.ERROR);
}
}
}
/**
* Creates an IndexJob for <em>property indexing</em>.
*/
public IndexJob createIndexJob(Uri uri, NodeRevisionDescriptor descriptor) {
return new IndexJob(uri, descriptor);
}
/**
* Creates an IndexJob for <em>content indexing</em>.
*/
public IndexJob createIndexJob(Uri uri, NodeRevisionDescriptor descriptor,
InputStream content) {
return new IndexJob(uri, descriptor, content);
}
/**
* Creates an IndexJob for <em>removing</em> of a resource.
*/
public IndexJob createDeleteJob(Uri uri, NodeRevisionNumber number) {
return new IndexJob(uri, number);
}
public class IndexJob {
protected String key;
protected String uri;
protected NodeRevisionDescriptor descriptor;
protected InputStream content;
protected String getKey() {
return key;
}
protected IndexJob(Uri uri, NodeRevisionNumber number) {
this.uri = uri.toString();
this.descriptor = null;
this.content = null;
this.key = configuration.generateKey(this.uri, number);
}
protected IndexJob(Uri uri, NodeRevisionDescriptor descriptor) {
this.uri = uri.toString();
this.descriptor = descriptor;
this.content = null;
this.key = configuration.generateKey(this.uri, descriptor
.getRevisionNumber());
}
protected IndexJob(Uri uri, NodeRevisionDescriptor descriptor,
InputStream content) {
this.uri = uri.toString();
this.descriptor = descriptor;
this.content = content;
this.key = configuration.generateKey(this.uri, descriptor
.getRevisionNumber());
}
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj instanceof IndexJob) {
return this.key.equals(((IndexJob) obj).key);
}
return false;
}
public int hashCode() {
return key.hashCode();
}
public String toString() {
return this.key;
}
}
private static class IndexTransaction {
Set removeJobs;
Set addJobs;
IndexTransaction(Set removeJobs, Set addJobs) {
this.removeJobs = removeJobs;
this.addJobs = addJobs;
}
}
/**
* Thread for executing index jobs scheduled in the queue.
*/
private class JobRunner extends Thread {
public void run() {
while (true) {
IndexTransaction txn = null;
synchronized (txnQueue) {
while (txnQueue.size() < 1) {
// if the job list is empty, test whether the runner is
// interruppted, if so leave, otherwise wait for next
// job
if (this.isInterrupted()) {
return;
} else {
try {
txnQueue.wait();
} catch (InterruptedException e) {
return;
}
}
}
// assert(Index.this.jobs.size() >= 1)
txn = (IndexTransaction) txnQueue.getFirst();
txnQueue.removeFirst();
}
try {
executeIndexTransaction(txn);
} catch (Exception e) {
logger.log("Error while executing job", e, LOG_CHANNEL,
Logger.ERROR);
}
}
}
}
}
The table below shows all metrics for Index.java.




