001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.lucene.demo.facet; 018 019import java.io.IOException; 020import java.util.List; 021import java.util.Locale; 022import java.util.concurrent.ExecutorService; 023import java.util.concurrent.Executors; 024import org.apache.lucene.analysis.core.WhitespaceAnalyzer; 025import org.apache.lucene.document.Document; 026import org.apache.lucene.document.Field; 027import org.apache.lucene.document.NumericDocValuesField; 028import org.apache.lucene.document.StringField; 029import org.apache.lucene.facet.FacetsCollector; 030import org.apache.lucene.facet.FacetsCollectorManager; 031import org.apache.lucene.facet.FacetsConfig; 032import org.apache.lucene.facet.range.DynamicRangeUtil; 033import org.apache.lucene.index.DirectoryReader; 034import org.apache.lucene.index.IndexWriter; 035import org.apache.lucene.index.IndexWriterConfig; 036import org.apache.lucene.search.IndexSearcher; 037import org.apache.lucene.search.LongValuesSource; 038import org.apache.lucene.search.MatchAllDocsQuery; 039import org.apache.lucene.store.ByteBuffersDirectory; 040import org.apache.lucene.store.Directory; 041import org.apache.lucene.util.NamedThreadFactory; 042 043/** 044 * Demo dynamic range faceting. 045 * 046 * <p>The results look like so: min: 63 max: 75 centroid: 69.000000 count: 2 weight: 137 min: 79 047 * max: 96 centroid: 86.000000 count: 3 weight: 83 048 * 049 * <p>We've computed dynamic ranges over popularity weighted by number of books. We can read the 050 * results as so: There are 137 books written by authors in the 63 to 75 popularity range. 051 * 052 * <p>How it works: We collect all the values (popularity) and their weights (book counts). We sort 053 * the values and find the approximate weight per range. In this case the total weight is 220 (total 054 * books by all authors) and we want 2 ranges, so we're aiming for 110 books in each range. We add 055 * Chesterton to the first range, since he is the least popular author. He's written a lot of books, 056 * the range's weight is 90. We add Tolstoy to the first range, since he is next in line of 057 * popularity. He's written another 47 books, which brings the total weight to 137. We're over the 058 * 110 target weight, so we stop and add everyone left to the second range. 059 */ 060public class DynamicRangeFacetsExample { 061 062 private final Directory indexDir = new ByteBuffersDirectory(); 063 private final FacetsConfig config = new FacetsConfig(); 064 065 /** Empty constructor */ 066 public DynamicRangeFacetsExample() {} 067 068 /** Build the example index. */ 069 private void index() throws IOException { 070 IndexWriter indexWriter = 071 new IndexWriter( 072 indexDir, 073 new IndexWriterConfig(new WhitespaceAnalyzer()) 074 .setOpenMode(IndexWriterConfig.OpenMode.CREATE)); 075 076 Document doc = new Document(); 077 doc.add(new StringField("Author", "J. R. R. Tolkien", Field.Store.NO)); 078 doc.add(new NumericDocValuesField("Popularity", 96)); 079 doc.add(new NumericDocValuesField("Books", 24)); 080 indexWriter.addDocument(config.build(doc)); 081 082 doc = new Document(); 083 doc.add(new StringField("Author", "C. S. Lewis", Field.Store.NO)); 084 doc.add(new NumericDocValuesField("Popularity", 83)); 085 doc.add(new NumericDocValuesField("Books", 48)); 086 indexWriter.addDocument(config.build(doc)); 087 088 doc = new Document(); 089 doc.add(new StringField("Author", "G. K. Chesterton", Field.Store.NO)); 090 doc.add(new NumericDocValuesField("Popularity", 63)); 091 doc.add(new NumericDocValuesField("Books", 90)); 092 indexWriter.addDocument(config.build(doc)); 093 indexWriter.commit(); 094 095 doc = new Document(); 096 doc.add(new StringField("Author", "Fyodor Dostoevsky", Field.Store.NO)); 097 doc.add(new NumericDocValuesField("Popularity", 79)); 098 doc.add(new NumericDocValuesField("Books", 11)); 099 indexWriter.addDocument(config.build(doc)); 100 101 doc = new Document(); 102 doc.add(new StringField("Author", "Leo Tolstoy", Field.Store.NO)); 103 doc.add(new NumericDocValuesField("Popularity", 75)); 104 doc.add(new NumericDocValuesField("Books", 47)); 105 indexWriter.addDocument(config.build(doc)); 106 107 indexWriter.close(); 108 } 109 110 /** User runs a query and counts facets. */ 111 private List<DynamicRangeUtil.DynamicRangeInfo> search() throws IOException { 112 DirectoryReader indexReader = DirectoryReader.open(indexDir); 113 IndexSearcher searcher = new IndexSearcher(indexReader); 114 115 LongValuesSource valuesSource = LongValuesSource.fromLongField("Popularity"); 116 LongValuesSource weightsSource = LongValuesSource.fromLongField("Books"); 117 118 // Aggregates the facet counts 119 FacetsCollectorManager fcm = new FacetsCollectorManager(); 120 121 // MatchAllDocsQuery is for "browsing" (counts facets 122 // for all non-deleted docs in the index); normally 123 // you'd use a "normal" query: 124 FacetsCollector fc = 125 FacetsCollectorManager.search(searcher, new MatchAllDocsQuery(), 10, fcm).facetsCollector(); 126 127 try (ExecutorService executor = 128 Executors.newFixedThreadPool(2, new NamedThreadFactory("dynamic-ranges"))) { 129 // We ask for 2 ranges over popularity weighted by book count 130 return DynamicRangeUtil.computeDynamicRanges( 131 "Books", weightsSource, valuesSource, fc, 2, executor); 132 } 133 } 134 135 /** Runs the search example. */ 136 public List<DynamicRangeUtil.DynamicRangeInfo> runSearch() throws IOException { 137 index(); 138 return search(); 139 } 140 141 /** Runs the search example and prints the results. */ 142 public static void main(String[] args) throws Exception { 143 System.out.println("Dynamic range facets example:"); 144 System.out.println("-----------------------"); 145 DynamicRangeFacetsExample example = new DynamicRangeFacetsExample(); 146 List<DynamicRangeUtil.DynamicRangeInfo> results = example.runSearch(); 147 for (DynamicRangeUtil.DynamicRangeInfo range : results) { 148 System.out.printf( 149 Locale.ROOT, 150 "min: %d max: %d centroid: %f count: %d weight: %d%n", 151 range.min(), 152 range.max(), 153 range.centroid(), 154 range.count(), 155 range.weight()); 156 } 157 } 158}