/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.cf.taste.impl.recommender;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.Callable;
import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.common.FastIDSet;
import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.IDRescorer;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.Rescorer;
import org.apache.mahout.cf.taste.recommender.UserBasedRecommender;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;
import org.apache.mahout.common.LongPair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Preconditions;
/**
*
* A simple {@link org.apache.mahout.cf.taste.recommender.Recommender}
* which uses a given {@link DataModel} and {@link UserNeighborhood} to produce recommendations.
*
*/
public class GenericUserBasedRecommender extends AbstractRecommender implements UserBasedRecommender {
private static final Logger log = LoggerFactory.getLogger(GenericUserBasedRecommender.class);
private final UserNeighborhood neighborhood;
private final UserSimilarity similarity;
private final RefreshHelper refreshHelper;
private EstimatedPreferenceCapper capper;
public GenericUserBasedRecommender(DataModel dataModel,
UserNeighborhood neighborhood,
UserSimilarity similarity) {
super(dataModel);
Preconditions.checkArgument(neighborhood != null, "neighborhood is null");
this.neighborhood = neighborhood;
this.similarity = similarity;
this.refreshHelper = new RefreshHelper(new Callable() {
@Override
public Void call() {
capper = buildCapper();
return null;
}
});
refreshHelper.addDependency(dataModel);
refreshHelper.addDependency(similarity);
refreshHelper.addDependency(neighborhood);
capper = buildCapper();
}
public UserSimilarity getSimilarity() {
return similarity;
}
@Override
public List recommend(long userID, int howMany, IDRescorer rescorer) throws TasteException {
Preconditions.checkArgument(howMany >= 1, "howMany must be at least 1");
log.debug("Recommending items for user ID '{}'", userID);
long[] theNeighborhood = neighborhood.getUserNeighborhood(userID);
if (theNeighborhood.length == 0) {
return Collections.emptyList();
}
FastIDSet allItemIDs = getAllOtherItems(theNeighborhood, userID);
TopItems.Estimator estimator = new Estimator(userID, theNeighborhood);
List topItems = TopItems
.getTopItems(howMany, allItemIDs.iterator(), rescorer, estimator);
log.debug("Recommendations are: {}", topItems);
return topItems;
}
@Override
public float estimatePreference(long userID, long itemID) throws TasteException {
DataModel model = getDataModel();
Float actualPref = model.getPreferenceValue(userID, itemID);
if (actualPref != null) {
return actualPref;
}
long[] theNeighborhood = neighborhood.getUserNeighborhood(userID);
return doEstimatePreference(userID, theNeighborhood, itemID);
}
@Override
public long[] mostSimilarUserIDs(long userID, int howMany) throws TasteException {
return mostSimilarUserIDs(userID, howMany, null);
}
@Override
public long[] mostSimilarUserIDs(long userID, int howMany, Rescorer rescorer) throws TasteException {
TopItems.Estimator estimator = new MostSimilarEstimator(userID, similarity, rescorer);
return doMostSimilarUsers(howMany, estimator);
}
private long[] doMostSimilarUsers(int howMany, TopItems.Estimator estimator) throws TasteException {
DataModel model = getDataModel();
return TopItems.getTopUsers(howMany, model.getUserIDs(), null, estimator);
}
protected float doEstimatePreference(long theUserID, long[] theNeighborhood, long itemID) throws TasteException {
if (theNeighborhood.length == 0) {
return Float.NaN;
}
DataModel dataModel = getDataModel();
double preference = 0.0;
double totalSimilarity = 0.0;
int count = 0;
for (long userID : theNeighborhood) {
if (userID != theUserID) {
// See GenericItemBasedRecommender.doEstimatePreference() too
Float pref = dataModel.getPreferenceValue(userID, itemID);
if (pref != null) {
double theSimilarity = similarity.userSimilarity(theUserID, userID);
if (!Double.isNaN(theSimilarity)) {
preference += theSimilarity * pref;
totalSimilarity += theSimilarity;
count++;
}
}
}
}
// Throw out the estimate if it was based on no data points, of course, but also if based on
// just one. This is a bit of a band-aid on the 'stock' item-based algorithm for the moment.
// The reason is that in this case the estimate is, simply, the user's rating for one item
// that happened to have a defined similarity. The similarity score doesn't matter, and that
// seems like a bad situation.
if (count <= 1) {
return Float.NaN;
}
float estimate = (float) (preference / totalSimilarity);
if (capper != null) {
estimate = capper.capEstimate(estimate);
}
return estimate;
}
protected FastIDSet getAllOtherItems(long[] theNeighborhood, long theUserID) throws TasteException {
DataModel dataModel = getDataModel();
FastIDSet possibleItemIDs = new FastIDSet();
for (long userID : theNeighborhood) {
possibleItemIDs.addAll(dataModel.getItemIDsFromUser(userID));
}
possibleItemIDs.removeAll(dataModel.getItemIDsFromUser(theUserID));
return possibleItemIDs;
}
@Override
public void refresh(Collection alreadyRefreshed) {
refreshHelper.refresh(alreadyRefreshed);
}
@Override
public String toString() {
return "GenericUserBasedRecommender[neighborhood:" + neighborhood + ']';
}
private EstimatedPreferenceCapper buildCapper() {
DataModel dataModel = getDataModel();
if (Float.isNaN(dataModel.getMinPreference()) && Float.isNaN(dataModel.getMaxPreference())) {
return null;
} else {
return new EstimatedPreferenceCapper(dataModel);
}
}
private static final class MostSimilarEstimator implements TopItems.Estimator {
private final long toUserID;
private final UserSimilarity similarity;
private final Rescorer rescorer;
private MostSimilarEstimator(long toUserID, UserSimilarity similarity, Rescorer rescorer) {
this.toUserID = toUserID;
this.similarity = similarity;
this.rescorer = rescorer;
}
@Override
public double estimate(Long userID) throws TasteException {
// Don't consider the user itself as a possible most similar user
if (userID == toUserID) {
return Double.NaN;
}
if (rescorer == null) {
return similarity.userSimilarity(toUserID, userID);
} else {
LongPair pair = new LongPair(toUserID, userID);
if (rescorer.isFiltered(pair)) {
return Double.NaN;
}
double originalEstimate = similarity.userSimilarity(toUserID, userID);
return rescorer.rescore(pair, originalEstimate);
}
}
}
private final class Estimator implements TopItems.Estimator {
private final long theUserID;
private final long[] theNeighborhood;
Estimator(long theUserID, long[] theNeighborhood) {
this.theUserID = theUserID;
this.theNeighborhood = theNeighborhood;
}
@Override
public double estimate(Long itemID) throws TasteException {
return doEstimatePreference(theUserID, theNeighborhood, itemID);
}
}
}