Finding K-nearest Neighbors For A Given Vector?
Given that I have the following in my knowledge-database: 1 0 6 20 0 0 6 20 1 0 3 6 0 0 3 6 1 0 15 45 0 0 15 45 1 0 17 44 0 0 17 44 1 0 2 5 0 0 2 5 I want
Solution 1:
In Python (from www.comp.mq.edu.au/):
defcount_different_values(k_v1s, k_v2s):
"""kv1s and kv2s should be dictionaries mapping keys to
values. count_different_values() returns the number of keys in
k_v1s and k_v2s that don't have the same value"""
ks = set(k_v1s.iterkeys()) | set(k_v2s.iterkeys())
returnsum(1for k in ks if k_v1s.get(k) != k_v2s.get(k))
defsum_square_diffs(x0s, x1s):
"""x1s and x2s should be equal-lengthed sequences of numbers.
sum_square_differences() returns the sum of the squared differences
of x1s and x2s."""sum((pow(x1-x2,2) for x1,x2 inzip(x1s,x2s)))
defincr(x_c, x, inc=1):
"""increments the value associated with key x in dictionary x_c
by inc, or sets it to inc if key x is not in dictionary x_c."""
x_c[x] = x_c.get(x, 0) + inc
defcount_items(xs, x_c=None):
"""returns a dictionary x_c whose keys are the items in xs, and
whose values are the number of times each item occurs in xs."""if x_c == None:
x_c = {}
for x in xs:
incr(x_c, x)
return x_c
defsecond(xy):
"""returns the second element in a sequence"""return xy[1]
defmost_frequent(xs):
"""returns the most frequent item in xs"""
x_c = count_items(xs)
returnsorted(x_c.iteritems(), key=second, reverse=True)[0][0]
classkNN_classifier:
"""This is a k-nearest-neighbour classifer."""def__init__(self, train_data, k, distf):
self.train_data = train_data
self.k = min(k, len(train_data))
self.distf = distf
defclassify(self, x):
Ns = sorted(self.train_data,
key=lambda xy: self.distf(xy[0], x))
return most_frequent((y for x,y in Ns[:self.k]))
defbatch_classify(self, xs):
return [self.classify(x) for x in xs]
deftrain(train_data, k=1, distf=count_different_values):
"""Returns a kNN_classifer that contains the data, the number of
nearest neighbours k and the distance function"""return kNN_classifier(train_data, k, distf)
also another implementation of www.umanitoba.ca/
#!/usr/bin/env python# This code is part of the Biopython distribution and governed by its# license. Please see the LICENSE file that should have been included# as part of this package."""
This module provides code for doing k-nearest-neighbors classification.
k Nearest Neighbors is a supervised learning algorithm that classifies
a new observation based the classes in its surrounding neighborhood.
Glossary:
distance The distance between two points in the feature space.
weight The importance given to each point for classification.
Classes:
kNN Holds information for a nearest neighbors classifier.
Functions:
train Train a new kNN classifier.
calculate Calculate the probabilities of each class, given an observation.
classify Classify an observation into a class.
Weighting Functions:
equal_weight Every example is given a weight of 1.
"""import numpy
classkNN:
"""Holds information necessary to do nearest neighbors classification.
Members:
classes Set of the possible classes.
xs List of the neighbors.
ys List of the classes that the neighbors belong to.
k Number of neighbors to look at.
"""def__init__(self):
"""kNN()"""
self.classes = set()
self.xs = []
self.ys = []
self.k = Nonedefequal_weight(x, y):
"""equal_weight(x, y) -> 1"""# everything gets 1 votereturn1deftrain(xs, ys, k, typecode=None):
"""train(xs, ys, k) -> kNN
Train a k nearest neighbors classifier on a training set. xs is a
list of observations and ys is a list of the class assignments.
Thus, xs and ys should contain the same number of elements. k is
the number of neighbors that should be examined when doing the
classification.
"""
knn = kNN()
knn.classes = set(ys)
knn.xs = numpy.asarray(xs, typecode)
knn.ys = ys
knn.k = k
return knn
defcalculate(knn, x, weight_fn=equal_weight, distance_fn=None):
"""calculate(knn, x[, weight_fn][, distance_fn]) -> weight dict
Calculate the probability for each class. knn is a kNN object. x
is the observed data. weight_fn is an optional function that
takes x and a training example, and returns a weight. distance_fn
is an optional function that takes two points and returns the
distance between them. If distance_fn is None (the default), the
Euclidean distance is used. Returns a dictionary of the class to
the weight given to the class.
"""
x = numpy.asarray(x)
order = [] # list of (distance, index)if distance_fn:
for i inrange(len(knn.xs)):
dist = distance_fn(x, knn.xs[i])
order.append((dist, i))
else:
# Default: Use a fast implementation of the Euclidean distance
temp = numpy.zeros(len(x))
# Predefining temp allows reuse of this array, making this# function about twice as fast.for i inrange(len(knn.xs)):
temp[:] = x - knn.xs[i]
dist = numpy.sqrt(numpy.dot(temp,temp))
order.append((dist, i))
order.sort()
# first 'k' are the ones I want.
weights = {} # class -> number of votesfor k in knn.classes:
weights[k] = 0.0for dist, i in order[:knn.k]:
klass = knn.ys[i]
weights[klass] = weights[klass] + weight_fn(x, knn.xs[i])
return weights
defclassify(knn, x, weight_fn=equal_weight, distance_fn=None):
"""classify(knn, x[, weight_fn][, distance_fn]) -> class
Classify an observation into a class. If not specified, weight_fn will
give all neighbors equal weight. distance_fn is an optional function
that takes two points and returns the distance between them. If
distance_fn is None (the default), the Euclidean distance is used.
"""
weights = calculate(
knn, x, weight_fn=weight_fn, distance_fn=distance_fn)
most_class = None
most_weight = Nonefor klass, weight in weights.items():
if most_classisNoneor weight > most_weight:
most_class = klass
most_weight = weight
return most_class
Post a Comment for "Finding K-nearest Neighbors For A Given Vector?"