001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.lucene.demo.facet;
018
019import java.io.Closeable;
020import java.io.IOException;
021import java.util.Random;
022import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
023import org.apache.lucene.document.Document;
024import org.apache.lucene.document.Field;
025import org.apache.lucene.document.LongPoint;
026import org.apache.lucene.document.NumericDocValuesField;
027import org.apache.lucene.document.StringField;
028import org.apache.lucene.facet.DrillDownQuery;
029import org.apache.lucene.facet.DrillSideways;
030import org.apache.lucene.facet.FacetResult;
031import org.apache.lucene.facet.Facets;
032import org.apache.lucene.facet.FacetsCollector;
033import org.apache.lucene.facet.FacetsCollectorManager;
034import org.apache.lucene.facet.FacetsConfig;
035import org.apache.lucene.facet.range.LongRange;
036import org.apache.lucene.facet.range.LongRangeFacetCounts;
037import org.apache.lucene.index.DirectoryReader;
038import org.apache.lucene.index.IndexWriter;
039import org.apache.lucene.index.IndexWriterConfig;
040import org.apache.lucene.index.IndexWriterConfig.OpenMode;
041import org.apache.lucene.search.IndexSearcher;
042import org.apache.lucene.search.MatchAllDocsQuery;
043import org.apache.lucene.search.TopDocs;
044import org.apache.lucene.store.ByteBuffersDirectory;
045import org.apache.lucene.store.Directory;
046import org.apache.lucene.util.IOUtils;
047
048/** Shows simple usage of dynamic range faceting. */
049public class RangeFacetsExample implements Closeable {
050
051  private final Directory indexDir = new ByteBuffersDirectory();
052  private IndexSearcher searcher;
053  private LongRange[] logTimestampRanges = new LongRange[168];
054  private final long nowSec = System.currentTimeMillis() / 1000L;
055
056  final LongRange PAST_HOUR = new LongRange("Past hour", nowSec - 3600, true, nowSec, true);
057  final LongRange PAST_SIX_HOURS =
058      new LongRange("Past six hours", nowSec - 6 * 3600, true, nowSec, true);
059  final LongRange PAST_DAY = new LongRange("Past day", nowSec - 24 * 3600, true, nowSec, true);
060
061  /** Empty constructor */
062  public RangeFacetsExample() {}
063
064  /** Build the example index. */
065  public void index() throws IOException {
066    IndexWriter indexWriter =
067        new IndexWriter(
068            indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));
069
070    // Add documents with a fake timestamp, 1000 sec before
071    // "now", 2000 sec before "now", ...:
072    for (int i = 0; i < 100; i++) {
073      Document doc = new Document();
074      long then = nowSec - i * 1000L;
075      // Add as doc values field, so we can compute range facets:
076      doc.add(new NumericDocValuesField("timestamp", then));
077      // Add as numeric field so we can drill-down:
078      doc.add(new LongPoint("timestamp", then));
079      indexWriter.addDocument(doc);
080    }
081
082    // Add documents with a fake timestamp for the past 7 days (24 * 7 = 168 hours), 3600 sec (1
083    // hour) from "now", 7200 sec (2 hours) from "now", ...:
084    long startTime = 0;
085    for (int i = 0; i < 168; i++) {
086      long endTime = (i + 1) * 3600L;
087      // Choose a relatively large number, e,g., "35", to create variation in count for
088      // the top n children, so that calling getTopChildren(10) can return top 10 children with
089      // different counts
090      for (int j = 0; j < i % 35; j++) {
091        Document doc = new Document();
092        Random r = new Random();
093        // Randomly generate a timestamp within the current range
094        long randomTimestamp = r.nextLong(1, endTime - startTime) + startTime;
095        // Add as doc values field, so we can compute range facets:
096        doc.add(new NumericDocValuesField("error timestamp", randomTimestamp));
097        doc.add(
098            new StringField(
099                "error message", "server encountered error at " + randomTimestamp, Field.Store.NO));
100        indexWriter.addDocument(doc);
101      }
102      logTimestampRanges[i] =
103          new LongRange("Hour " + i + "-" + (i + 1), startTime, false, endTime, true);
104      startTime = endTime;
105    }
106
107    // Open near-real-time searcher
108    searcher = new IndexSearcher(DirectoryReader.open(indexWriter));
109    indexWriter.close();
110  }
111
112  private FacetsConfig getConfig() {
113    return new FacetsConfig();
114  }
115
116  /** User runs a query and counts facets. */
117  public FacetResult search() throws IOException {
118
119    // MatchAllDocsQuery is for "browsing" (counts facets
120    // for all non-deleted docs in the index); normally
121    // you'd use a "normal" query:
122    FacetsCollector fc =
123        FacetsCollectorManager.search(
124                searcher, new MatchAllDocsQuery(), 10, new FacetsCollectorManager())
125            .facetsCollector();
126
127    Facets facets = new LongRangeFacetCounts("timestamp", fc, PAST_HOUR, PAST_SIX_HOURS, PAST_DAY);
128    return facets.getAllChildren("timestamp");
129  }
130
131  /** User runs a query and counts facets. */
132  public FacetResult searchTopChildren() throws IOException {
133
134    // Aggregates the facet counts
135    FacetsCollectorManager fcm = new FacetsCollectorManager();
136
137    // MatchAllDocsQuery is for "browsing" (counts facets
138    // for all non-deleted docs in the index); normally
139    // you'd use a "normal" query:
140    FacetsCollector fc =
141        FacetsCollectorManager.search(searcher, new MatchAllDocsQuery(), 10, fcm).facetsCollector();
142
143    Facets facets = new LongRangeFacetCounts("error timestamp", fc, logTimestampRanges);
144    return facets.getTopChildren(10, "error timestamp");
145  }
146
147  /** User drills down on the specified range. */
148  public TopDocs drillDown(LongRange range) throws IOException {
149
150    // Passing no baseQuery means we drill down on all
151    // documents ("browse only"):
152    DrillDownQuery q = new DrillDownQuery(getConfig());
153
154    q.add("timestamp", LongPoint.newRangeQuery("timestamp", range.min, range.max));
155    return searcher.search(q, 10);
156  }
157
158  /** User drills down on the specified range, and also computes drill sideways counts. */
159  public DrillSideways.DrillSidewaysResult drillSideways(LongRange range) throws IOException {
160    // Passing no baseQuery means we drill down on all
161    // documents ("browse only"):
162    DrillDownQuery q = new DrillDownQuery(getConfig());
163    q.add("timestamp", LongPoint.newRangeQuery("timestamp", range.min, range.max));
164
165    // DrillSideways only handles taxonomy and sorted set drill facets by default; to do range
166    // facets we must subclass and override the
167    // buildFacetsResult method.
168    DrillSideways.DrillSidewaysResult result =
169        new DrillSideways(searcher, getConfig(), null, null) {
170          @Override
171          protected Facets buildFacetsResult(
172              FacetsCollector drillDowns,
173              FacetsCollector[] drillSideways,
174              String[] drillSidewaysDims)
175              throws IOException {
176            // If we had other dims we would also compute their drill-down or drill-sideways facets
177            // here:
178            assert drillSidewaysDims[0].equals("timestamp");
179            return new LongRangeFacetCounts(
180                "timestamp", drillSideways[0], PAST_HOUR, PAST_SIX_HOURS, PAST_DAY);
181          }
182        }.search(q, 10);
183
184    return result;
185  }
186
187  @Override
188  public void close() throws IOException {
189    IOUtils.close(searcher.getIndexReader(), indexDir);
190  }
191
192  /** Runs the search and drill-down examples and prints the results. */
193  public static void main(String[] args) throws Exception {
194    RangeFacetsExample example = new RangeFacetsExample();
195    example.index();
196
197    System.out.println("Facet counting example:");
198    System.out.println("-----------------------");
199    System.out.println(example.search());
200
201    System.out.println("\n");
202    System.out.println("Facet counting example:");
203    System.out.println("-----------------------");
204    System.out.println(example.searchTopChildren());
205
206    System.out.println("\n");
207    System.out.println("Facet drill-down example (timestamp/Past six hours):");
208    System.out.println("---------------------------------------------");
209    TopDocs hits = example.drillDown(example.PAST_SIX_HOURS);
210    System.out.println(hits.totalHits + " totalHits");
211
212    System.out.println("\n");
213    System.out.println("Facet drill-sideways example (timestamp/Past six hours):");
214    System.out.println("---------------------------------------------");
215    DrillSideways.DrillSidewaysResult sideways = example.drillSideways(example.PAST_SIX_HOURS);
216    System.out.println(sideways.hits.totalHits + " totalHits");
217    System.out.println(sideways.facets.getTopChildren(10, "timestamp"));
218
219    example.close();
220  }
221}