Lucene检索文件(txt,jsp,html)
Lucene检索文件包裹txt,jsp,html格式(如果是word和pdf格式需要进行格式转化)建立索引文件的代码如下:
import org.apache.lucene.index.*;import org.apache.lucene.analysis.standard.*;import org.apache.lucene.document.*;import java.io.*;/** * @author Eric Zhang */public class IndexFiles { public static void main(String[] args) {try{ IndexWriter writer = new IndexWriter("myindex", new StandardAnalyzer(), true); File files = new File("mydoc"); String[] Fnamelist = files.list(); for (int i = 0; i < Fnamelist.length; i++){ File file = new File(files,Fnamelist); Document doc = new Document(); Field fld = Field.Text("path", file.getPath()); doc.add(fld); fld = Field.Keyword("modified", DateField.timeToString(file.lastModified())); doc.add(fld); FileInputStream in = new FileInputStream(file); Reader reader = new BufferedReader(new InputStreamReader(in)); fld = Field.Text("contents", reader); doc.add(fld); writer.addDocument(doc); System.out.println("Added : " + doc.get("path")); } writer.optimize(); writer.close(); System.out.println("Has Added Total: " + Fnamelist.length);}catch(Exception e){ System.out.println(e);} }} 检索索引的java代码如下:
import org.apache.lucene.analysis.*;import org.apache.lucene.analysis.standard.*;import org.apache.lucene.search.*;import org.apache.lucene.queryParser.*;import org.apache.lucene.document.*;//import com.augmentum.hrms.*;import java.util.Date;/** * @author Eric Zhang */public class SearchFile { public static void main(String[] args) { //XMap a = new XMap("");Analyzer anlzr = new StandardAnalyzer();try{ Query q = QueryParser.parse("数据库", "contents", anlzr); System.out.println("Searching for : " + q.toString("contents")); Searcher serch = new IndexSearcher("myindex"); Hits hts = serch.search(q); for(int i=0; i<hts.length(); i++){ Document doc = hts.doc(i); String path = doc.get("path"); System.out.println("Find: " +i+": "+ path); System.out.println("Find: " + doc.get("modified")); System.out.println("Find: " + doc.get("path")); } System.out.println("Find Total: " + hts.length());}catch(Exception e){ System.out.println(e);} }}
页:
[1]