001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.lucene.demo.facet;
018
019import java.io.IOException;
020import java.util.List;
021import java.util.Locale;
022import java.util.concurrent.ExecutorService;
023import java.util.concurrent.Executors;
024import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
025import org.apache.lucene.document.Document;
026import org.apache.lucene.document.Field;
027import org.apache.lucene.document.NumericDocValuesField;
028import org.apache.lucene.document.StringField;
029import org.apache.lucene.facet.FacetsCollector;
030import org.apache.lucene.facet.FacetsCollectorManager;
031import org.apache.lucene.facet.FacetsConfig;
032import org.apache.lucene.facet.range.DynamicRangeUtil;
033import org.apache.lucene.index.DirectoryReader;
034import org.apache.lucene.index.IndexWriter;
035import org.apache.lucene.index.IndexWriterConfig;
036import org.apache.lucene.search.IndexSearcher;
037import org.apache.lucene.search.LongValuesSource;
038import org.apache.lucene.search.MatchAllDocsQuery;
039import org.apache.lucene.store.ByteBuffersDirectory;
040import org.apache.lucene.store.Directory;
041import org.apache.lucene.util.NamedThreadFactory;
042
043/**
044 * Demo dynamic range faceting.
045 *
046 * <p>The results look like so: min: 63 max: 75 centroid: 69.000000 count: 2 weight: 137 min: 79
047 * max: 96 centroid: 86.000000 count: 3 weight: 83
048 *
049 * <p>We've computed dynamic ranges over popularity weighted by number of books. We can read the
050 * results as so: There are 137 books written by authors in the 63 to 75 popularity range.
051 *
052 * <p>How it works: We collect all the values (popularity) and their weights (book counts). We sort
053 * the values and find the approximate weight per range. In this case the total weight is 220 (total
054 * books by all authors) and we want 2 ranges, so we're aiming for 110 books in each range. We add
055 * Chesterton to the first range, since he is the least popular author. He's written a lot of books,
056 * the range's weight is 90. We add Tolstoy to the first range, since he is next in line of
057 * popularity. He's written another 47 books, which brings the total weight to 137. We're over the
058 * 110 target weight, so we stop and add everyone left to the second range.
059 */
060public class DynamicRangeFacetsExample {
061
062  private final Directory indexDir = new ByteBuffersDirectory();
063  private final FacetsConfig config = new FacetsConfig();
064
065  /** Empty constructor */
066  public DynamicRangeFacetsExample() {}
067
068  /** Build the example index. */
069  private void index() throws IOException {
070    IndexWriter indexWriter =
071        new IndexWriter(
072            indexDir,
073            new IndexWriterConfig(new WhitespaceAnalyzer())
074                .setOpenMode(IndexWriterConfig.OpenMode.CREATE));
075
076    Document doc = new Document();
077    doc.add(new StringField("Author", "J. R. R. Tolkien", Field.Store.NO));
078    doc.add(new NumericDocValuesField("Popularity", 96));
079    doc.add(new NumericDocValuesField("Books", 24));
080    indexWriter.addDocument(config.build(doc));
081
082    doc = new Document();
083    doc.add(new StringField("Author", "C. S. Lewis", Field.Store.NO));
084    doc.add(new NumericDocValuesField("Popularity", 83));
085    doc.add(new NumericDocValuesField("Books", 48));
086    indexWriter.addDocument(config.build(doc));
087
088    doc = new Document();
089    doc.add(new StringField("Author", "G. K. Chesterton", Field.Store.NO));
090    doc.add(new NumericDocValuesField("Popularity", 63));
091    doc.add(new NumericDocValuesField("Books", 90));
092    indexWriter.addDocument(config.build(doc));
093    indexWriter.commit();
094
095    doc = new Document();
096    doc.add(new StringField("Author", "Fyodor Dostoevsky", Field.Store.NO));
097    doc.add(new NumericDocValuesField("Popularity", 79));
098    doc.add(new NumericDocValuesField("Books", 11));
099    indexWriter.addDocument(config.build(doc));
100
101    doc = new Document();
102    doc.add(new StringField("Author", "Leo Tolstoy", Field.Store.NO));
103    doc.add(new NumericDocValuesField("Popularity", 75));
104    doc.add(new NumericDocValuesField("Books", 47));
105    indexWriter.addDocument(config.build(doc));
106
107    indexWriter.close();
108  }
109
110  /** User runs a query and counts facets. */
111  private List<DynamicRangeUtil.DynamicRangeInfo> search() throws IOException {
112    DirectoryReader indexReader = DirectoryReader.open(indexDir);
113    IndexSearcher searcher = new IndexSearcher(indexReader);
114
115    LongValuesSource valuesSource = LongValuesSource.fromLongField("Popularity");
116    LongValuesSource weightsSource = LongValuesSource.fromLongField("Books");
117
118    // Aggregates the facet counts
119    FacetsCollectorManager fcm = new FacetsCollectorManager();
120
121    // MatchAllDocsQuery is for "browsing" (counts facets
122    // for all non-deleted docs in the index); normally
123    // you'd use a "normal" query:
124    FacetsCollector fc =
125        FacetsCollectorManager.search(searcher, new MatchAllDocsQuery(), 10, fcm).facetsCollector();
126
127    ExecutorService executor =
128        Executors.newFixedThreadPool(2, new NamedThreadFactory("dynamic-ranges"));
129    // We ask for 2 ranges over popularity weighted by book count
130    List<DynamicRangeUtil.DynamicRangeInfo> res =
131        DynamicRangeUtil.computeDynamicRanges(
132            "Books", weightsSource, valuesSource, fc, 2, executor);
133    executor.shutdown();
134    return res;
135  }
136
137  /** Runs the search example. */
138  public List<DynamicRangeUtil.DynamicRangeInfo> runSearch() throws IOException {
139    index();
140    return search();
141  }
142
143  /** Runs the search example and prints the results. */
144  public static void main(String[] args) throws Exception {
145    System.out.println("Dynamic range facets example:");
146    System.out.println("-----------------------");
147    DynamicRangeFacetsExample example = new DynamicRangeFacetsExample();
148    List<DynamicRangeUtil.DynamicRangeInfo> results = example.runSearch();
149    for (DynamicRangeUtil.DynamicRangeInfo range : results) {
150      System.out.printf(
151          Locale.ROOT,
152          "min: %d max: %d centroid: %f count: %d weight: %d%n",
153          range.min,
154          range.max,
155          range.centroid,
156          range.count,
157          range.weight);
158    }
159  }
160}