-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathGeniusAssociationEnumerator.m
More file actions
393 lines (330 loc) · 12.6 KB
/
GeniusAssociationEnumerator.m
File metadata and controls
393 lines (330 loc) · 12.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
/*
Genius
Copyright (C) 2003-2006 John R Chang
Copyright (C) 2007-2008 Chris Miner
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
http://www.gnu.org/licenses/gpl.txt
*/
#import "GeniusAssociationEnumerator.h"
#include <math.h> // pow
#import "GeniusPair.h"
#import "GeniusAssociation.h"
static unsigned long Factorial(int n)
{
return (n<=1) ? 1 : n * Factorial(n-1);
}
//! Calculates the probablity of x for a given m.
static float PoissonValue(int x, float m)
{
return (pow(m,x) / Factorial(x)) * pow(M_E, -m);
}
//! randomly returns NSOrderedAscending or NSOrderedDescending
int RandomSortFunction(id object1, id object2, void * context)
{
BOOL x = random() & 0x1;
return (x ? NSOrderedAscending : NSOrderedDescending);
}
//! Meant to be used like an NSEnumerator to iterate over a collection of GeniusAssociation items.
/*!
Supports various selection and sorting options.
*/
@implementation GeniusAssociationEnumerator
//! Default initializer.
/*!
Provided @a asociations is copied and used as the basis for later filtering and sorting.
*/
- (id) initWithAssociations:(NSArray *)associations
{
self = [super init];
_inputAssociations = [associations mutableCopy];
_count = [_inputAssociations count];
_minimumScore = -1;
_m_value = 1.0;
_hasPerformedChooseAssociations = NO;
_scheduledAssociations = [[NSMutableArray alloc] init];
return self;
}
//! Releases #_inputAssociations and #_scheduledAssociations and frees up memory.
- (void) dealloc
{
[_inputAssociations release];
[_scheduledAssociations release];
[super dealloc];
}
//! _count setter.
/*!
Parameter @a count is ignored if it is greater than the number of items in #_inputAssociations.
@todo Don't assume GeniusAssociationEnumerator#_inputAssociations is set before count if that isn't enforced.
*/
- (void) setCount:(unsigned int)count
{
_count = MIN([_inputAssociations count], count);
}
//! _minimumScore setter.
- (void) setMinimumScore:(int)score
{
_minimumScore = score;
}
//! _m_value setter.
/*! @todo Change the name of this variable to something practical. */
- (void) setProbabilityCenter:(float)value
{
_m_value = value;
}
//! Loops over #_inputAssociations to find relevent items.
/*!
Filters out disabled GeniusAssociation items and those with a score lower than
the #_minimumScore. As a side effect this method nullifies the GeniusAssociation#dueDate
for items that are past due.
*/
- (NSArray *) _getActiveAssociations
{
#if DEBUG
NSLog(@"_minimumScore=%d, [_inputAssociations count]=%d", _minimumScore, [_inputAssociations count]);
#endif
int requestedMinimumScore = _minimumScore;
_minimumScore = -2;
_maximumScore = _minimumScore;
NSMutableArray * outAssociations = [NSMutableArray array];
NSEnumerator * associationEnumerator = [_inputAssociations objectEnumerator];
GeniusAssociation * association;
while ((association = [associationEnumerator nextObject]))
{
// Filter out disabled pairs
GeniusPair * pair = [association parentPair];
if ([pair importance] == kGeniusPairDisabledImportance)
continue;
// Filter out minimum association scores
if ([association score] < requestedMinimumScore)
continue;
[(NSMutableArray *)outAssociations addObject:association];
// If the fire date has already expired, clear it
if ([[association dueDate] compare:[NSDate date]] == NSOrderedAscending)
[association setDueDate:nil];
// Calculate minimum and maximum scores
if (_minimumScore < -1)
_minimumScore = [association score];
else
_minimumScore = MIN(_minimumScore, [association score]);
_maximumScore = MAX(_maximumScore, [association score]);
}
return outAssociations;
}
//! Function used in sorting array of GeniusAssociation instances by importance attribute.
static NSComparisonResult CompareAssociationByImportance(GeniusAssociation * assoc1, GeniusAssociation * assoc2, void *context)
{
GeniusPair * pair1 = [assoc1 parentPair];
GeniusPair * pair2 = [assoc2 parentPair];
int importance1 = [pair1 importance];
int importance2 = [pair2 importance];
if (importance1 > importance2)
return NSOrderedAscending;
else if (importance1 < importance2)
return NSOrderedDescending;
else
return NSOrderedSame;
}
//! Selects items from @a associations based on GeniusAssociation#score and _m_value.
/*!
Sorts the associations into buckets based on score. Then calculates the Poisson value
for each bucket based on the established #_m_value. Finally generates a series of
random numbers to choose items from the buckets based on the probablility curve.
*/
- (NSArray *) _chooseCountAssociationsByScore:(NSArray *)associations
{
#if DEBUG
NSLog(@"_minimumScore=%d, _maximumScore=%d", _minimumScore, _maximumScore);
NSLog(@"[associations count]=%d, _count=%d", [associations count], _count);
#endif
if ([associations count] <= _count)
return associations;
// Count the number of buckets necessary.
NSMutableArray * buckets = [NSMutableArray array];
int bucketCount = (_maximumScore - _minimumScore + 1);
int b;
for (b=0; b<bucketCount; b++)
[buckets addObject:[NSMutableArray array]];
// Sort the associations into buckets.
NSEnumerator * associationEnumerator = [associations objectEnumerator];
GeniusAssociation * association;
while ((association = [associationEnumerator nextObject]))
{
b = [association score] - _minimumScore;
NSMutableArray * bucket = [buckets objectAtIndex:b];
[bucket addObject:association];
}
#if DEBUG
for (b=0; b<bucketCount; b++)
NSLog(@"bucket %d has %d associations", b, [[buckets objectAtIndex:b] count]);
#endif
// Calculate Poisson distribution curve using _m_value.
float * p = calloc(sizeof(float), bucketCount);
float max_p = 0.0;
for (b=0; b<bucketCount; b++)
{
p[b] = PoissonValue(b, _m_value);
max_p = MAX(max_p, p[b]);
#if DEBUG
NSLog(@"p[%d]=%f --> expect n=%.1f", b, p[b], _count * p[b]);
#endif
}
// Perform weighted random selection of _count objects
NSMutableArray * outAssociations = [NSMutableArray array];
while ([outAssociations count] < _count)
{
float x = random() / (float)LONG_MAX;
#if DEBUG
float origValue = x;
#endif
// Here we translate the random point x to the index of the corresponding weighted bucket.
// We assert that the sum of the probabilities (p[b] for all b) is 1.0.
for (b=0; b<bucketCount; b++)
{
if (x < p[b])
{
NSMutableArray * bucket = [buckets objectAtIndex:b];
if ([bucket count] > 0)
{
[outAssociations addObject:[bucket objectAtIndex:0]];
[bucket removeObjectAtIndex:0];
#if DEBUG
NSLog(@"%f\t--> pull from bucket %d, %d left", origValue, b, [bucket count]);
#endif
break; // done with this association
}
}
x -= p[b];
}
}
free(p);
return outAssociations;
}
//! Helper method to initialize the set of associations for enumeration.
/*!
The process of choosing involves:
@li Filtering out inactive associations
@li Randomizing the remaining ones
@li Sorting the results by importance
@li Finally choosing at least #_count items based on #_minimumScore.
*/
- (void) performChooseAssociations
{
// 1. First, filter out disabled pairs, minimum scores, and long-term dates.
NSArray * activeAssociations = [self _getActiveAssociations];
// 2. Randomize the remaining "active" associations
NSArray * randomActiveAssociations = [activeAssociations sortedArrayUsingFunction:RandomSortFunction context:NULL];
// 3. Weight the associations according to pair importance
NSArray * orderedAssociations = [randomActiveAssociations sortedArrayUsingFunction:CompareAssociationByImportance context:NULL];
// 4. Choose _count associations by score according to a probability curve
NSArray * chosenAssociations = [self _chooseCountAssociationsByScore:orderedAssociations];
// DEBUG
#if DEBUG
NSEnumerator * associationEnumerator = [chosenAssociations objectEnumerator];
GeniusAssociation * association;
while ((association = [associationEnumerator nextObject]))
{
GeniusPair * pair = [association parentPair];
NSLog(@"%@, date=%@, score=%d, importance=%d", [[pair itemA] stringValue], [[association dueDate] description], [association score], [pair importance]);
}
#endif
[_inputAssociations setArray:chosenAssociations]; // HACK
_hasPerformedChooseAssociations = YES;
}
//! Convenience method for returning the number of items in _inputAssociations.
/*!
@todo check if how this is used makes sense. Seems like it should return the count of scheduled associations.
*/
- (int) remainingCount
{
return [_inputAssociations count]; // + [_scheduledAssociations count];
}
//! Returns the next GeniusAssociation in the enumeration.
/*!
Looks for an association from the _scheduledAssociations with a passed dueDate. If none is found
one of the _inputAssociations is returned.
*/
- (GeniusAssociation *) nextAssociation
{
GeniusAssociation * association;
// First time
if (_hasPerformedChooseAssociations == NO)
[self performChooseAssociations];
// Try popping an association off the scheduled associations queue
#if DEBUG
//NSLog(@"_scheduledAssociations = %@", [_scheduledAssociations description]);
#endif
if ([_scheduledAssociations count])
{
association = [[_scheduledAssociations objectAtIndex:0] retain];
if ([[association dueDate] compare:[NSDate date]] == NSOrderedAscending)
{
[_scheduledAssociations removeObjectAtIndex:0];
return [association autorelease];
}
}
// Otherwise try popping an unscheduled association
#if DEBUG
//NSLog(@"_inputAssociations = %@", [_inputAssociations description]);
#endif
if ([_inputAssociations count] == 0)
return nil;
association = [[_inputAssociations objectAtIndex:0] retain];
[_inputAssociations removeObjectAtIndex:0];
return [association autorelease];
}
//! Updates GeniusAssociation#dueDate based on current GeniusAssociation#score provided @a association and inserts in _scheduledAssociations.
- (void) _scheduleAssociation:(GeniusAssociation *)association
{
unsigned int sec = pow(5, [association score]);
NSDate * dueDate = [[NSDate date] addTimeInterval:sec];
[association setDueDate:dueDate];
int i, n = [_scheduledAssociations count];
for (i=0; i<n; i++)
{
NSDate * dueDate = [association dueDate];
GeniusAssociation * currentAssoc = [_scheduledAssociations objectAtIndex:i];
NSDate * currentFireDate = [currentAssoc dueDate];
if ([dueDate compare:currentFireDate] == NSOrderedAscending)
{
[_scheduledAssociations insertObject:association atIndex:i];
return;
}
}
[_scheduledAssociations addObject:association];
}
//! Bumps score of @a association up by one.
- (void) associationRight:(GeniusAssociation *)association
{
// score++
int score = [association score];
[association setScore:score+1];
[self _scheduleAssociation:association];
}
//! Sets score for the @a association back to zero.
/*!
@todo This seems questionable.
*/
- (void) associationWrong:(GeniusAssociation *)association
{
// score = 0
[association setScore:0];
[self _scheduleAssociation:association];
}
//! Sets score for the @a association
/*!
@todo This seems unexpected. Why would skipping something mean nullify the score and due date?
*/
- (void) associationSkip:(GeniusAssociation *)association
{
// score = -1
[association setScoreNumber:nil];
[association setDueDate:nil];
}
@end