/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.mahout.cf.taste.impl.eval; import java.util.List; import java.util.Random; import org.apache.mahout.cf.taste.common.NoSuchUserException; import org.apache.mahout.cf.taste.common.TasteException; import org.apache.mahout.cf.taste.eval.DataModelBuilder; import org.apache.mahout.cf.taste.eval.IRStatistics; import org.apache.mahout.cf.taste.eval.RecommenderBuilder; import org.apache.mahout.cf.taste.eval.RecommenderIRStatsEvaluator; import org.apache.mahout.cf.taste.eval.RelevantItemsDataSplitter; import org.apache.mahout.cf.taste.impl.common.FastByIDMap; import org.apache.mahout.cf.taste.impl.common.FastIDSet; import org.apache.mahout.cf.taste.impl.common.FullRunningAverage; import org.apache.mahout.cf.taste.impl.common.FullRunningAverageAndStdDev; import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator; import org.apache.mahout.cf.taste.impl.common.RunningAverage; import org.apache.mahout.cf.taste.impl.common.RunningAverageAndStdDev; import org.apache.mahout.cf.taste.impl.model.GenericDataModel; import org.apache.mahout.cf.taste.model.DataModel; import org.apache.mahout.cf.taste.model.PreferenceArray; import org.apache.mahout.cf.taste.recommender.IDRescorer; import org.apache.mahout.cf.taste.recommender.RecommendedItem; import org.apache.mahout.cf.taste.recommender.Recommender; import org.apache.mahout.common.RandomUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.base.Preconditions; /** *
* For each user, these implementation determine the top {@code n} preferences, then evaluate the IR * statistics based on a {@link DataModel} that does not have these values. This number {@code n} is the * "at" value, as in "precision at 5". For example, this would mean precision evaluated by removing the top 5 * preferences for a user and then finding the percentage of those 5 items included in the top 5 * recommendations for that user. *
*/ public final class GenericRecommenderIRStatsEvaluator implements RecommenderIRStatsEvaluator { private static final Logger log = LoggerFactory.getLogger(GenericRecommenderIRStatsEvaluator.class); private static final double LOG2 = Math.log(2.0); /** * Pass as "relevanceThreshold" argument to * {@link #evaluate(RecommenderBuilder, DataModelBuilder, DataModel, IDRescorer, int, double, double)} to * have it attempt to compute a reasonable threshold. Note that this will impact performance. */ public static final double CHOOSE_THRESHOLD = Double.NaN; private final Random random; private final RelevantItemsDataSplitter dataSplitter; public GenericRecommenderIRStatsEvaluator() { this(new GenericRelevantItemsDataSplitter()); } public GenericRecommenderIRStatsEvaluator(RelevantItemsDataSplitter dataSplitter) { Preconditions.checkNotNull(dataSplitter); random = RandomUtils.getRandom(); this.dataSplitter = dataSplitter; } @Override public IRStatistics evaluate(RecommenderBuilder recommenderBuilder, DataModelBuilder dataModelBuilder, DataModel dataModel, IDRescorer rescorer, int at, double relevanceThreshold, double evaluationPercentage) throws TasteException { Preconditions.checkArgument(recommenderBuilder != null, "recommenderBuilder is null"); Preconditions.checkArgument(dataModel != null, "dataModel is null"); Preconditions.checkArgument(at >= 1, "at must be at least 1"); Preconditions.checkArgument(evaluationPercentage > 0.0 && evaluationPercentage <= 1.0, "Invalid evaluationPercentage: %s", evaluationPercentage); int numItems = dataModel.getNumItems(); RunningAverage precision = new FullRunningAverage(); RunningAverage recall = new FullRunningAverage(); RunningAverage fallOut = new FullRunningAverage(); RunningAverage nDCG = new FullRunningAverage(); int numUsersRecommendedFor = 0; int numUsersWithRecommendations = 0; LongPrimitiveIterator it = dataModel.getUserIDs(); while (it.hasNext()) { long userID = it.nextLong(); if (random.nextDouble() >= evaluationPercentage) { // Skipped continue; } long start = System.currentTimeMillis(); PreferenceArray prefs = dataModel.getPreferencesFromUser(userID); // List some most-preferred items that would count as (most) "relevant" results double theRelevanceThreshold = Double.isNaN(relevanceThreshold) ? computeThreshold(prefs) : relevanceThreshold; FastIDSet relevantItemIDs = dataSplitter.getRelevantItemsIDs(userID, at, theRelevanceThreshold, dataModel); int numRelevantItems = relevantItemIDs.size(); if (numRelevantItems <= 0) { continue; } FastByIDMap