-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathmilesvector.m
More file actions
executable file
·108 lines (100 loc) · 3.29 KB
/
milesvector.m
File metadata and controls
executable file
·108 lines (100 loc) · 3.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
%MILESPROXM MILES inspired vector representation of a bag
%
% W = MILESPROXM(X,RTYPE,PAR,PROTOSEL)
%
% INPUT
% X MIL dataset
% RTYPE Method for obtaining a vector from a bag
% (default = 'rbf')
% PAR Parameter of the method (default = [])
% PROTOSEL Reduce the nr of instances (default.type = 'all',
% default.N = [])
%
% OUTPUT
% W MIL Proximity mapping
%
% DESCRIPTION
% Compute a single feature vector from each bag of instances in X.
% It defines a (dis)similarity between all bags, and all (or a subset
% of) instances in X.
% The following measures are defined between bags {x_i} and instance z:
% RTYPE: DOES:
% 'rbf' max_i exp(-(x_i-z)^2/par^2 )
% 'mindist' min_i (x_i-z)^2
%
% For very large datasets X the number of (dis)similarities can become
% very large, and therefore some prototype selection may be useful. For
% this, define a structure PROTOSEL with two fields, 'type' and 'N':
% PROTOSEL.type
% 'all' use all instances
% 'random' select randomly PROTOSEL.N instances
%
% SEE ALSO
% MILCOMBINE, LABELSET
% Copyright: D.M.J. Tax, D.M.J.Tax@prtools.org
% Faculty EWI, Delft University of Technology
% P.O. Box 5031, 2600 GA Delft, The Netherlands
%function y = milproxm(x,rtype,par,protosel)
function y = milproxm(varargin)
argin= shiftargin(varargin,'char');
argin = setdefaults(argin,[],'rbf',10,'all');
if mapping_task(argin,'definition')
[x,rtype,par,protosel] = deal(argin{:});
W = define_mapping(argin,'untrained','MilesVector');
W = setbatch(W,0); %NEVER use batches!!
elseif mapping_task(argin,'training')
[x,rtype,par,protosel] = deal(argin{:});
[n,p] = size(x);
switch protosel.type
case 'all'
I = 1:n;
case 'random'
I = randperm(n);
I = I(1:protosel.N);
case 'kmeans'
[labs,means] = mykmeans(+x,protosel.N);
D = sqeucldistm(+x,means);
[~,I] = min(D);
otherwise
error('I do not know this prototype selection method');
end
W.X = +x(I,:);
W.type = rtype;
W.par = par;
y = prmapping(mfilename,'trained',W,[],p,length(I));
y = setbatch(y,0); % NEVER do batches!
elseif mapping_task(argin,'trained execution')
[x,rtype] = deal(argin{1:2});
x = genmil(x); % I need a MIL dataset to derive the bag labels
% now we have data, and we *apply* the mapping:
W = getdata(rtype);
% now we only have one feature type to take care of:
[bags,lab,bagid] = getbags(x);
[m,p] = size(x);
n = length(bags);
out = zeros(n,size(W.X,1));
switch W.type
case {'r','rbf'}
for i=1:n
mind = min(sqeucldistm(bags{i},W.X),[],1);
out(i,:) = exp(-mind/(W.par*W.par));
end
case 'mindist'
for i=1:n
out(i,:) = min(sqeucldistm(bags{i},W.X),[],1);
end
otherwise
error('Type %s is not defined.',W.type);
end
% we have the new features, and the feature labels, so go:
y = prdataset(out,lab,'prior',0);
[nlab,ll] = renumlab(lab,getlablist(x));
y = setlablist(y,getlablist(x));
y = setnlab(y,nlab);
y = setident(y,(1:n)','milbag');
y = setname(y,getname(x));
y = setprior(y,getprior(x,0)); %DXD well, is this a good idea? What
% alternative do we have?
else
error('Illegal call to milesvector.');
end