001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.lucene.demo.facet; 018 019import java.io.Closeable; 020import java.io.IOException; 021import java.util.Random; 022import org.apache.lucene.analysis.core.WhitespaceAnalyzer; 023import org.apache.lucene.document.Document; 024import org.apache.lucene.document.Field; 025import org.apache.lucene.document.LongPoint; 026import org.apache.lucene.document.NumericDocValuesField; 027import org.apache.lucene.document.StringField; 028import org.apache.lucene.facet.DrillDownQuery; 029import org.apache.lucene.facet.DrillSideways; 030import org.apache.lucene.facet.FacetResult; 031import org.apache.lucene.facet.Facets; 032import org.apache.lucene.facet.FacetsCollector; 033import org.apache.lucene.facet.FacetsCollectorManager; 034import org.apache.lucene.facet.FacetsConfig; 035import org.apache.lucene.facet.range.LongRange; 036import org.apache.lucene.facet.range.LongRangeFacetCounts; 037import org.apache.lucene.index.DirectoryReader; 038import org.apache.lucene.index.IndexWriter; 039import org.apache.lucene.index.IndexWriterConfig; 040import org.apache.lucene.index.IndexWriterConfig.OpenMode; 041import org.apache.lucene.search.IndexSearcher; 042import org.apache.lucene.search.MatchAllDocsQuery; 043import org.apache.lucene.search.TopDocs; 044import org.apache.lucene.store.ByteBuffersDirectory; 045import org.apache.lucene.store.Directory; 046import org.apache.lucene.util.IOUtils; 047 048/** Shows simple usage of dynamic range faceting. */ 049public class RangeFacetsExample implements Closeable { 050 051 private final Directory indexDir = new ByteBuffersDirectory(); 052 private IndexSearcher searcher; 053 private LongRange[] logTimestampRanges = new LongRange[168]; 054 private final long nowSec = System.currentTimeMillis() / 1000L; 055 056 final LongRange PAST_HOUR = new LongRange("Past hour", nowSec - 3600, true, nowSec, true); 057 final LongRange PAST_SIX_HOURS = 058 new LongRange("Past six hours", nowSec - 6 * 3600, true, nowSec, true); 059 final LongRange PAST_DAY = new LongRange("Past day", nowSec - 24 * 3600, true, nowSec, true); 060 061 /** Empty constructor */ 062 public RangeFacetsExample() {} 063 064 /** Build the example index. */ 065 public void index() throws IOException { 066 IndexWriter indexWriter = 067 new IndexWriter( 068 indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE)); 069 070 // Add documents with a fake timestamp, 1000 sec before 071 // "now", 2000 sec before "now", ...: 072 for (int i = 0; i < 100; i++) { 073 Document doc = new Document(); 074 long then = nowSec - i * 1000L; 075 // Add as doc values field, so we can compute range facets: 076 doc.add(new NumericDocValuesField("timestamp", then)); 077 // Add as numeric field so we can drill-down: 078 doc.add(new LongPoint("timestamp", then)); 079 indexWriter.addDocument(doc); 080 } 081 082 // Add documents with a fake timestamp for the past 7 days (24 * 7 = 168 hours), 3600 sec (1 083 // hour) from "now", 7200 sec (2 hours) from "now", ...: 084 long startTime = 0; 085 for (int i = 0; i < 168; i++) { 086 long endTime = (i + 1) * 3600L; 087 // Choose a relatively large number, e,g., "35", to create variation in count for 088 // the top n children, so that calling getTopChildren(10) can return top 10 children with 089 // different counts 090 for (int j = 0; j < i % 35; j++) { 091 Document doc = new Document(); 092 Random r = new Random(); 093 // Randomly generate a timestamp within the current range 094 long randomTimestamp = r.nextLong(1, endTime - startTime) + startTime; 095 // Add as doc values field, so we can compute range facets: 096 doc.add(new NumericDocValuesField("error timestamp", randomTimestamp)); 097 doc.add( 098 new StringField( 099 "error message", "server encountered error at " + randomTimestamp, Field.Store.NO)); 100 indexWriter.addDocument(doc); 101 } 102 logTimestampRanges[i] = 103 new LongRange("Hour " + i + "-" + (i + 1), startTime, false, endTime, true); 104 startTime = endTime; 105 } 106 107 // Open near-real-time searcher 108 searcher = new IndexSearcher(DirectoryReader.open(indexWriter)); 109 indexWriter.close(); 110 } 111 112 private FacetsConfig getConfig() { 113 return new FacetsConfig(); 114 } 115 116 /** User runs a query and counts facets. */ 117 public FacetResult search() throws IOException { 118 119 // MatchAllDocsQuery is for "browsing" (counts facets 120 // for all non-deleted docs in the index); normally 121 // you'd use a "normal" query: 122 FacetsCollector fc = 123 FacetsCollectorManager.search( 124 searcher, new MatchAllDocsQuery(), 10, new FacetsCollectorManager()) 125 .facetsCollector(); 126 127 Facets facets = new LongRangeFacetCounts("timestamp", fc, PAST_HOUR, PAST_SIX_HOURS, PAST_DAY); 128 return facets.getAllChildren("timestamp"); 129 } 130 131 /** User runs a query and counts facets. */ 132 public FacetResult searchTopChildren() throws IOException { 133 134 // Aggregates the facet counts 135 FacetsCollectorManager fcm = new FacetsCollectorManager(); 136 137 // MatchAllDocsQuery is for "browsing" (counts facets 138 // for all non-deleted docs in the index); normally 139 // you'd use a "normal" query: 140 FacetsCollector fc = 141 FacetsCollectorManager.search(searcher, new MatchAllDocsQuery(), 10, fcm).facetsCollector(); 142 143 Facets facets = new LongRangeFacetCounts("error timestamp", fc, logTimestampRanges); 144 return facets.getTopChildren(10, "error timestamp"); 145 } 146 147 /** User drills down on the specified range. */ 148 public TopDocs drillDown(LongRange range) throws IOException { 149 150 // Passing no baseQuery means we drill down on all 151 // documents ("browse only"): 152 DrillDownQuery q = new DrillDownQuery(getConfig()); 153 154 q.add("timestamp", LongPoint.newRangeQuery("timestamp", range.min, range.max)); 155 return searcher.search(q, 10); 156 } 157 158 /** User drills down on the specified range, and also computes drill sideways counts. */ 159 public DrillSideways.DrillSidewaysResult drillSideways(LongRange range) throws IOException { 160 // Passing no baseQuery means we drill down on all 161 // documents ("browse only"): 162 DrillDownQuery q = new DrillDownQuery(getConfig()); 163 q.add("timestamp", LongPoint.newRangeQuery("timestamp", range.min, range.max)); 164 165 // DrillSideways only handles taxonomy and sorted set drill facets by default; to do range 166 // facets we must subclass and override the 167 // buildFacetsResult method. 168 DrillSideways.DrillSidewaysResult result = 169 new DrillSideways(searcher, getConfig(), null, null) { 170 @Override 171 protected Facets buildFacetsResult( 172 FacetsCollector drillDowns, 173 FacetsCollector[] drillSideways, 174 String[] drillSidewaysDims) 175 throws IOException { 176 // If we had other dims we would also compute their drill-down or drill-sideways facets 177 // here: 178 assert drillSidewaysDims[0].equals("timestamp"); 179 return new LongRangeFacetCounts( 180 "timestamp", drillSideways[0], PAST_HOUR, PAST_SIX_HOURS, PAST_DAY); 181 } 182 }.search(q, 10); 183 184 return result; 185 } 186 187 @Override 188 public void close() throws IOException { 189 IOUtils.close(searcher.getIndexReader(), indexDir); 190 } 191 192 /** Runs the search and drill-down examples and prints the results. */ 193 public static void main(String[] args) throws Exception { 194 RangeFacetsExample example = new RangeFacetsExample(); 195 example.index(); 196 197 System.out.println("Facet counting example:"); 198 System.out.println("-----------------------"); 199 System.out.println(example.search()); 200 201 System.out.println("\n"); 202 System.out.println("Facet counting example:"); 203 System.out.println("-----------------------"); 204 System.out.println(example.searchTopChildren()); 205 206 System.out.println("\n"); 207 System.out.println("Facet drill-down example (timestamp/Past six hours):"); 208 System.out.println("---------------------------------------------"); 209 TopDocs hits = example.drillDown(example.PAST_SIX_HOURS); 210 System.out.println(hits.totalHits + " totalHits"); 211 212 System.out.println("\n"); 213 System.out.println("Facet drill-sideways example (timestamp/Past six hours):"); 214 System.out.println("---------------------------------------------"); 215 DrillSideways.DrillSidewaysResult sideways = example.drillSideways(example.PAST_SIX_HOURS); 216 System.out.println(sideways.hits.totalHits + " totalHits"); 217 System.out.println(sideways.facets.getTopChildren(10, "timestamp")); 218 219 example.close(); 220 } 221}