Skip to content Skip to sidebar Skip to footer

Finding K-nearest Neighbors For A Given Vector?

Given that I have the following in my knowledge-database: 1 0 6 20 0 0 6 20 1 0 3 6 0 0 3 6 1 0 15 45 0 0 15 45 1 0 17 44 0 0 17 44 1 0 2 5 0 0 2 5 I want

Solution 1:

In Python (from www.comp.mq.edu.au/):

defcount_different_values(k_v1s, k_v2s):
    """kv1s and kv2s should be dictionaries mapping keys to 
    values.  count_different_values() returns the number of keys in 
    k_v1s and k_v2s that don't have the same value"""
    ks = set(k_v1s.iterkeys()) | set(k_v2s.iterkeys())
    returnsum(1for k in ks if k_v1s.get(k) != k_v2s.get(k))


defsum_square_diffs(x0s, x1s):
    """x1s and x2s should be equal-lengthed sequences of numbers.
    sum_square_differences() returns the sum of the squared differences 
    of x1s and x2s."""sum((pow(x1-x2,2) for x1,x2 inzip(x1s,x2s)))

defincr(x_c, x, inc=1):
    """increments the value associated with key x in dictionary x_c
    by inc, or sets it to inc if key x is not in dictionary x_c."""
    x_c[x] = x_c.get(x, 0) + inc

defcount_items(xs, x_c=None):
    """returns a dictionary x_c whose keys are the items in xs, and 
    whose values are the number of times each item occurs in xs."""if x_c == None:
        x_c = {}
    for x in xs:
        incr(x_c, x)
    return x_c

defsecond(xy):
    """returns the second element in a sequence"""return xy[1]

defmost_frequent(xs):
    """returns the most frequent item in xs"""
    x_c = count_items(xs)
    returnsorted(x_c.iteritems(), key=second, reverse=True)[0][0]


classkNN_classifier:
    """This is a k-nearest-neighbour classifer."""def__init__(self, train_data, k, distf):
        self.train_data = train_data
        self.k = min(k, len(train_data))
        self.distf = distf

    defclassify(self, x):
        Ns = sorted(self.train_data, 
                    key=lambda xy: self.distf(xy[0], x))
        return most_frequent((y for x,y in Ns[:self.k]))

    defbatch_classify(self, xs):
        return [self.classify(x) for x in xs]

deftrain(train_data, k=1, distf=count_different_values):
    """Returns a kNN_classifer that contains the data, the number of
    nearest neighbours k and the distance function"""return kNN_classifier(train_data, k, distf)

also another implementation of www.umanitoba.ca/

#!/usr/bin/env python# This code is part of the Biopython distribution and governed by its# license.  Please see the LICENSE file that should have been included# as part of this package."""
This module provides code for doing k-nearest-neighbors classification.

k Nearest Neighbors is a supervised learning algorithm that classifies
a new observation based the classes in its surrounding neighborhood.

Glossary:
distance   The distance between two points in the feature space.
weight     The importance given to each point for classification. 


Classes:
kNN           Holds information for a nearest neighbors classifier.


Functions:
train        Train a new kNN classifier.
calculate    Calculate the probabilities of each class, given an observation.
classify     Classify an observation into a class.

    Weighting Functions:
equal_weight    Every example is given a weight of 1.

"""import numpy

classkNN:
    """Holds information necessary to do nearest neighbors classification.

    Members:
    classes  Set of the possible classes.
    xs       List of the neighbors.
    ys       List of the classes that the neighbors belong to.
    k        Number of neighbors to look at.

    """def__init__(self):
        """kNN()"""
        self.classes = set()
        self.xs = []
        self.ys = []
        self.k = Nonedefequal_weight(x, y):
    """equal_weight(x, y) -> 1"""# everything gets 1 votereturn1deftrain(xs, ys, k, typecode=None):
    """train(xs, ys, k) -> kNN

    Train a k nearest neighbors classifier on a training set.  xs is a
    list of observations and ys is a list of the class assignments.
    Thus, xs and ys should contain the same number of elements.  k is
    the number of neighbors that should be examined when doing the
    classification.

    """
    knn = kNN()
    knn.classes = set(ys)
    knn.xs = numpy.asarray(xs, typecode)
    knn.ys = ys
    knn.k = k
    return knn

defcalculate(knn, x, weight_fn=equal_weight, distance_fn=None):
    """calculate(knn, x[, weight_fn][, distance_fn]) -> weight dict

    Calculate the probability for each class.  knn is a kNN object.  x
    is the observed data.  weight_fn is an optional function that
    takes x and a training example, and returns a weight.  distance_fn
    is an optional function that takes two points and returns the
    distance between them.  If distance_fn is None (the default), the
    Euclidean distance is used.  Returns a dictionary of the class to
    the weight given to the class.

    """
    x = numpy.asarray(x)

    order = []  # list of (distance, index)if distance_fn:
        for i inrange(len(knn.xs)):
            dist = distance_fn(x, knn.xs[i])
            order.append((dist, i))
    else:
        # Default: Use a fast implementation of the Euclidean distance
        temp = numpy.zeros(len(x))
        # Predefining temp allows reuse of this array, making this# function about twice as fast.for i inrange(len(knn.xs)):
            temp[:] = x - knn.xs[i]
            dist = numpy.sqrt(numpy.dot(temp,temp))
            order.append((dist, i))
    order.sort()

    # first 'k' are the ones I want.
    weights = {}  # class -> number of votesfor k in knn.classes:
        weights[k] = 0.0for dist, i in order[:knn.k]:
        klass = knn.ys[i]
        weights[klass] = weights[klass] + weight_fn(x, knn.xs[i])

    return weights

defclassify(knn, x, weight_fn=equal_weight, distance_fn=None):
    """classify(knn, x[, weight_fn][, distance_fn]) -> class

    Classify an observation into a class.  If not specified, weight_fn will
    give all neighbors equal weight.  distance_fn is an optional function
    that takes two points and returns the distance between them.  If
    distance_fn is None (the default), the Euclidean distance is used.
    """
    weights = calculate(
        knn, x, weight_fn=weight_fn, distance_fn=distance_fn)

    most_class = None
    most_weight = Nonefor klass, weight in weights.items():
        if most_classisNoneor weight > most_weight:
            most_class = klass
            most_weight = weight
    return most_class

Post a Comment for "Finding K-nearest Neighbors For A Given Vector?"