Browse Source

did rm on removed files to stock tracking

Nicolas Zimmermann 2 years ago
parent
commit
2d24586889
5 changed files with 0 additions and 328 deletions
  1. BIN
      doc/Notice.pdf
  2. BIN
      doc/article_pbexplore.pdf
  3. 0 1
      doc/citations
  4. 0 327
      doc/doc_kmedoids.py
  5. BIN
      doc/sujet.pdf

BIN
doc/Notice.pdf View File


BIN
doc/article_pbexplore.pdf View File


+ 0 - 1
doc/citations View File

@@ -1 +0,0 @@
1
-mlpy for use of k-medoid : D. Albanese, R. Visintainer, S. Merler, S. Riccadonna, G. Jurman, C. Furlanello. mlpy: Machine Learning Python, 2012. arXiv:1202.6548 [bib] 

+ 0 - 327
doc/doc_kmedoids.py View File

@@ -1,327 +0,0 @@
1
-"""!
2
-
3
-@brief Cluster analysis algorithm: K-Medoids.
4
-@details Implementation based on papers @cite book::algorithms_for_clustering_data, @cite book::finding_groups_in_data.
5
-
6
-@authors Andrei Novikov (pyclustering@yandex.ru)
7
-@date 2014-2019
8
-@copyright GNU Public License
9
-
10
-@cond GNU_PUBLIC_LICENSE
11
-    PyClustering is free software: you can redistribute it and/or modify
12
-    it under the terms of the GNU General Public License as published by
13
-    the Free Software Foundation, either version 3 of the License, or
14
-    (at your option) any later version.
15
-    
16
-    PyClustering is distributed in the hope that it will be useful,
17
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
18
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19
-    GNU General Public License for more details.
20
-    
21
-    You should have received a copy of the GNU General Public License
22
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
23
-@endcond
24
-
25
-"""
26
-
27
-
28
-import numpy
29
-
30
-from pyclustering.cluster.encoder import type_encoding
31
-
32
-from pyclustering.utils import medoid
33
-from pyclustering.utils.metric import distance_metric, type_metric
34
-
35
-import pyclustering.core.kmedoids_wrapper as wrapper
36
-
37
-from pyclustering.core.wrapper import ccore_library
38
-from pyclustering.core.metric_wrapper import metric_wrapper
39
-
40
-
41
-class kmedoids:
42
-    """!
43
-    @brief Class represents clustering algorithm K-Medoids.
44
-    @details The algorithm is less sensitive to outliers tham K-Means. The principle difference between K-Medoids and K-Medians is that
45
-             K-Medoids uses existed points from input data space as medoids, but median in K-Medians can be unreal object (not from
46
-             input data space).
47
-    
48
-    Clustering example:
49
-    @code
50
-        from pyclustering.cluster.kmedoids import kmedoids
51
-        from pyclustering.cluster import cluster_visualizer
52
-        from pyclustering.utils import read_sample
53
-        from pyclustering.samples.definitions import FCPS_SAMPLES
54
-
55
-        # Load list of points for cluster analysis.
56
-        sample = read_sample(FCPS_SAMPLES.SAMPLE_TWO_DIAMONDS)
57
-
58
-        # Set random initial medoids.
59
-        initial_medoids = [1, 500]
60
-
61
-        # Create instance of K-Medoids algorithm.
62
-        kmedoids_instance = kmedoids(sample, initial_medoids)
63
-
64
-        # Run cluster analysis and obtain results.
65
-        kmedoids_instance.process()
66
-        clusters = kmedoids_instance.get_clusters()
67
-
68
-        # Show allocated clusters.
69
-        print(clusters)
70
-
71
-        # Display clusters.
72
-        visualizer = cluster_visualizer()
73
-        visualizer.append_clusters(clusters, sample)
74
-        visualizer.show()
75
-    @endcode
76
-
77
-    Metric for calculation distance between points can be specified by parameter additional 'metric':
78
-    @code
79
-        # create Minkowski distance metric with degree equals to '2'
80
-        metric = distance_metric(type_metric.MINKOWSKI, degree=2)
81
-
82
-        # create K-Medoids algorithm with specific distance metric
83
-        kmedoids_instance = kmedoids(sample, initial_medoids, metric=metric)
84
-
85
-        # run cluster analysis and obtain results
86
-        kmedoids_instance.process()
87
-        clusters = kmedoids_instance.get_clusters()
88
-    @endcode
89
-
90
-    Distance matrix can be used instead of sequence of points to increase performance and for that purpose parameter 'data_type' should be used:
91
-    @code
92
-        # calculate distance matrix for sample
93
-        sample = read_sample(path_to_sample)
94
-        matrix = calculate_distance_matrix(sample)
95
-
96
-        # create K-Medoids algorithm for processing distance matrix instead of points
97
-        kmedoids_instance = kmedoids(matrix, initial_medoids, data_type='distance_matrix')
98
-
99
-        # run cluster analysis and obtain results
100
-        kmedoids_instance.process()
101
-
102
-        clusters = kmedoids_instance.get_clusters()
103
-        medoids = kmedoids_instance.get_medoids()
104
-    @endcode
105
-
106
-    """
107
-    
108
-    
109
-    def __init__(self, data, initial_index_medoids, tolerance=0.001, ccore=True, **kwargs):
110
-        """!
111
-        @brief Constructor of clustering algorithm K-Medoids.
112
-        
113
-        @param[in] data (list): Input data that is presented as list of points (objects), each point should be represented by list or tuple.
114
-        @param[in] initial_index_medoids (list): Indexes of intial medoids (indexes of points in input data).
115
-        @param[in] tolerance (double): Stop condition: if maximum value of distance change of medoids of clusters is less than tolerance than algorithm will stop processing.
116
-        @param[in] ccore (bool): If specified than CCORE library (C++ pyclustering library) is used for clustering instead of Python code.
117
-        @param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'metric', 'data_type', 'itermax').
118
-
119
-        <b>Keyword Args:</b><br>
120
-            - metric (distance_metric): Metric that is used for distance calculation between two points.
121
-            - data_type (string): Data type of input sample 'data' that is processed by the algorithm ('points', 'distance_matrix').
122
-            - itermax (uint): Maximum number of iteration for cluster analysis.
123
-
124
-        """
125
-        self.__pointer_data = data
126
-        self.__clusters = []
127
-        self.__medoid_indexes = initial_index_medoids
128
-        self.__tolerance = tolerance
129
-
130
-        self.__metric = kwargs.get('metric', distance_metric(type_metric.EUCLIDEAN_SQUARE))
131
-        self.__data_type = kwargs.get('data_type', 'points')
132
-        self.__itermax = kwargs.get('itermax', 200)
133
-
134
-        self.__distance_calculator = self.__create_distance_calculator()
135
-
136
-        self.__ccore = ccore and self.__metric.get_type() != type_metric.USER_DEFINED
137
-        if self.__ccore:
138
-            self.__ccore = ccore_library.workable()
139
-
140
-        #self.__verify_instance()
141
-
142
-
143
-    def process(self):
144
-        """!
145
-        @brief Performs cluster analysis in line with rules of K-Medoids algorithm.
146
-
147
-        @return (kmedoids) Returns itself (K-Medoids instance).
148
-
149
-        @remark Results of clustering can be obtained using corresponding get methods.
150
-        
151
-        @see get_clusters()
152
-        @see get_medoids()
153
-        
154
-        """
155
-        
156
-        if self.__ccore is True:
157
-            ccore_metric = metric_wrapper.create_instance(self.__metric)
158
-            self.__clusters, self.__medoid_indexes = wrapper.kmedoids(self.__pointer_data, self.__medoid_indexes, self.__tolerance, self.__itermax, ccore_metric.get_pointer(), self.__data_type)
159
-        
160
-        else:
161
-            changes = float('inf')
162
-            iterations = 0
163
-
164
-            while changes > self.__tolerance and iterations < self.__itermax:
165
-                self.__clusters = self.__update_clusters()
166
-                update_medoid_indexes = self.__update_medoids()
167
-
168
-                changes = max([self.__distance_calculator(self.__medoid_indexes[index], update_medoid_indexes[index]) for index in range(len(update_medoid_indexes))])
169
-
170
-                self.__medoid_indexes = update_medoid_indexes
171
-
172
-                iterations += 1
173
-
174
-        return self
175
-
176
-
177
-    def predict(self, points):
178
-        """!
179
-        @brief Calculates the closest cluster to each point.
180
-
181
-        @param[in] points (array_like): Points for which closest clusters are calculated.
182
-
183
-        @return (list) List of closest clusters for each point. Each cluster is denoted by index. Return empty
184
-                 collection if 'process()' method was not called.
185
-
186
-        An example how to calculate (or predict) the closest cluster to specified points.
187
-        @code
188
-            from pyclustering.cluster.kmedoids import kmedoids
189
-            from pyclustering.samples.definitions import SIMPLE_SAMPLES
190
-            from pyclustering.utils import read_sample
191
-
192
-            # Load list of points for cluster analysis.
193
-            sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE3)
194
-
195
-            # Initial medoids for sample 'Simple3'.
196
-            initial_medoids = [4, 12, 25, 37]
197
-
198
-            # Create instance of K-Medoids algorithm with prepared centers.
199
-            kmedoids_instance = kmedoids(sample, initial_medoids)
200
-
201
-            # Run cluster analysis.
202
-            kmedoids_instance.process()
203
-
204
-            # Calculate the closest cluster to following two points.
205
-            points = [[0.35, 0.5], [2.5, 2.0]]
206
-            closest_clusters = kmedoids_instance.predict(points)
207
-            print(closest_clusters)
208
-        @endcode
209
-
210
-        """
211
-
212
-        if len(self.__clusters) == 0:
213
-            return []
214
-
215
-        medoids = [ self.__pointer_data[index] for index in self.__medoid_indexes ]
216
-        differences = numpy.zeros((len(points), len(medoids)))
217
-        for index_point in range(len(points)):
218
-            differences[index_point] = [ self.__metric(points[index_point], center) for center in medoids ]
219
-
220
-        return numpy.argmin(differences, axis=1)
221
-
222
-
223
-    def get_clusters(self):
224
-        """!
225
-        @brief Returns list of allocated clusters, each cluster contains indexes of objects in list of data.
226
-        
227
-        @see process()
228
-        @see get_medoids()
229
-        
230
-        """
231
-        
232
-        return self.__clusters
233
-    
234
-    
235
-    def get_medoids(self):
236
-        """!
237
-        @brief Returns list of medoids of allocated clusters represented by indexes from the input data.
238
-        
239
-        @see process()
240
-        @see get_clusters()
241
-        
242
-        """
243
-
244
-        return self.__medoid_indexes
245
-
246
-
247
-    def get_cluster_encoding(self):
248
-        """!
249
-        @brief Returns clustering result representation type that indicate how clusters are encoded.
250
-        
251
-        @return (type_encoding) Clustering result representation.
252
-        
253
-        @see get_clusters()
254
-        
255
-        """
256
-        
257
-        return type_encoding.CLUSTER_INDEX_LIST_SEPARATION
258
-
259
-
260
-    def __verify_instance(self):
261
-        pass
262
-
263
-
264
-    def __create_distance_calculator(self):
265
-        """!
266
-        @brief Creates distance calculator in line with algorithms parameters.
267
-
268
-        @return (callable) Distance calculator.
269
-
270
-        """
271
-        if self.__data_type == 'points':
272
-            return lambda index1, index2: self.__metric(self.__pointer_data[index1], self.__pointer_data[index2])
273
-
274
-        elif self.__data_type == 'distance_matrix':
275
-            if isinstance(self.__pointer_data, numpy.matrix):
276
-                return lambda index1, index2: self.__pointer_data.item((index1, index2))
277
-
278
-            return lambda index1, index2: self.__pointer_data[index1][index2]
279
-
280
-        else:
281
-            raise TypeError("Unknown type of data is specified '%s'" % self.__data_type)
282
-
283
-
284
-    def __update_clusters(self):
285
-        """!
286
-        @brief Calculate distance to each point from the each cluster. 
287
-        @details Nearest points are captured by according clusters and as a result clusters are updated.
288
-        
289
-        @return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data.
290
-        
291
-        """
292
-        
293
-        clusters = [[self.__medoid_indexes[i]] for i in range(len(self.__medoid_indexes))]
294
-        for index_point in range(len(self.__pointer_data)):
295
-            if index_point in self.__medoid_indexes:
296
-                continue
297
-
298
-            index_optim = -1
299
-            dist_optim = float('Inf')
300
-            
301
-            for index in range(len(self.__medoid_indexes)):
302
-                dist = self.__distance_calculator(index_point, self.__medoid_indexes[index])
303
-                
304
-                if dist < dist_optim:
305
-                    index_optim = index
306
-                    dist_optim = dist
307
-            
308
-            clusters[index_optim].append(index_point)
309
-        
310
-        return clusters
311
-    
312
-    
313
-    def __update_medoids(self):
314
-        """!
315
-        @brief Find medoids of clusters in line with contained objects.
316
-        
317
-        @return (list) list of medoids for current number of clusters.
318
-        
319
-        """
320
-
321
-        medoid_indexes = [-1] * len(self.__clusters)
322
-        
323
-        for index in range(len(self.__clusters)):
324
-            medoid_index = medoid(self.__pointer_data, self.__clusters[index], metric=self.__metric, data_type=self.__data_type)
325
-            medoid_indexes[index] = medoid_index
326
-             
327
-        return medoid_indexes

BIN
doc/sujet.pdf View File