/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.cf.taste.impl.recommender;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.PriorityQueue;
import java.util.Queue;
import java.util.concurrent.Callable;
import com.google.common.collect.Lists;
import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
import org.apache.mahout.cf.taste.impl.common.FastIDSet;
import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
import org.apache.mahout.cf.taste.impl.common.RunningAverage;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.recommender.ClusteringRecommender;
import org.apache.mahout.cf.taste.recommender.IDRescorer;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.common.RandomUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Preconditions;
/**
*
* A {@link org.apache.mahout.cf.taste.recommender.Recommender} that clusters users, then determines the
* clusters' top recommendations. This implementation builds clusters by repeatedly merging clusters until
* only a certain number remain, meaning that each cluster is sort of a tree of other clusters.
*
*
*
* This {@link org.apache.mahout.cf.taste.recommender.Recommender} therefore has a few properties to note:
*
*
*
For all users in a cluster, recommendations will be the same
*
{@link #estimatePreference(long, long)} may well return {@link Double#NaN}; it does so when asked to
* estimate preference for an item for which no preference is expressed in the users in the cluster.
*
*
*
* This is an experimental implementation which tries to gain a lot of speed at the cost of accuracy
* in building clusters, compared to {@link TreeClusteringRecommender}. It will sometimes cluster two other
* clusters together that may not be the exact closest two clusters in existence. This may not affect the
* recommendation quality much, but it potentially speeds up the clustering process dramatically.
*
*/
public final class TreeClusteringRecommender2 extends AbstractRecommender implements ClusteringRecommender {
private static final Logger log = LoggerFactory.getLogger(TreeClusteringRecommender2.class);
private static final int NUM_CLUSTER_RECS = 100;
private final ClusterSimilarity clusterSimilarity;
private final int numClusters;
private final double clusteringThreshold;
private final boolean clusteringByThreshold;
private FastByIDMap> topRecsByUserID;
private FastIDSet[] allClusters;
private FastByIDMap clustersByUserID;
private final RefreshHelper refreshHelper;
/**
* @param dataModel
* {@link DataModel} which provides users
* @param clusterSimilarity
* {@link ClusterSimilarity} used to compute cluster similarity
* @param numClusters
* desired number of clusters to create
* @throws IllegalArgumentException
* if arguments are {@code null}, or {@code numClusters} is less than 2
*/
public TreeClusteringRecommender2(DataModel dataModel, ClusterSimilarity clusterSimilarity, int numClusters)
throws TasteException {
super(dataModel);
Preconditions.checkArgument(numClusters >= 2, "numClusters must be at least 2");
this.clusterSimilarity = Preconditions.checkNotNull(clusterSimilarity);
this.numClusters = numClusters;
this.clusteringThreshold = Double.NaN;
this.clusteringByThreshold = false;
this.refreshHelper = new RefreshHelper(new Callable