<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta name="description" content="K Nearest Neighbour Algorithm - Data Mining" />
<meta name="keywords" content="k nearest neighbour, data mining, php, computer science, james hamilton" />
<meta http-equiv="Content-Language" content="en-gb" />
<meta http-equiv="content-type" content="text/html; charset=UTF-8" />
<meta name="robots" content="all" />
<meta name="revisit-after" content="1 days" />

<title>K Nearest Neighbour Algorithm - Data Mining</title>
<?php
//lib_functions.php include whoyouknow.co.uk specific functions (e.g. page loging) - not needed for your own site
include "../ants/lib_functions.php";
//change this to include your own database connect script.
include "dbconnect.inc";
logAccess();
?>
<link rel="stylesheet" media="all" href="style.css" type="text/css" />
<link rel="stylesheet" media="all" href="nearestneighbour.css" type="text/css" />
</head>

<body>

<?php
if(isset($_GET[source])) {
    
highlight_file(__FILE__);
}else{
if(!isset(
$_POST[k])) $_POST[k] = 3;
?>

    <p>
        The K-Nearest Neighbour algorithm is similar to the <a href="nearest_neighbour.php" title="Nearest Neighbour">Nearest Neighbour algorithm</a>, except that it looks at the closest <em>K</em> instances to the unclassified instance. The class of the new instance is then given by the class with the highest frequency of those <em>K</em> instances. This is useful because the influence of anomalous instances is reduced.
    </p>
    <p>
    Try this out below. If you diagnose 5 No's then the diagnosis will be 'Strepthroat', compared with the diagnosis of 'Allergy' with the standard  <a href="nearest_neighbour.php" title="Nearest Neighbour">Nearest Neighbour algorithm</a>.
    </p>
    <p>
    <strong>Choosing <em>K</em></strong> K = 1 will be the same as nearest neighbour, as it only looks at the 1st closest. K = N (where N is the number of training instances) would be bad because it would base the classification on the class frequency of all the instances, not just the closest ones. So there must be an optimal value of <em>K</em>. Try changing <em>K</em> to see what happens.
    </p>

    <form method="post" action="<?=$_SERVER['PHP_SELF']?>">
    K:&nbsp;
    <select name="k">
        <?php foreach(range(110) as $k) { ?>
            <option value="<?=$k?><?php if($k == $_POST[k]) { ?> selected="selected"<?php ?>><?=$k?></option>
        <?php ?>
    </select>


    <table id="patients" cellpadding="3" cellspacing="0">
        <tr>
            <th>Patient ID</th>
            <th>Sore Throat</th>
            <th>Fever</th>
            <th>Swollen Glands</th>
            <th>Congestion</th>
            <th>Headache</th>
            <th>Diagnosis</th>
            <th>Distance</th>
        </tr>

    <?php

        
/*
            --
            -- Table structure for table `datamining_diagnoses`
            --

            CREATE TABLE IF NOT EXISTS `datamining_diagnoses` (
              `id` int(11) NOT NULL auto_increment,
              `sorethroat` tinyint(4) NOT NULL default '0',
              `fever` tinyint(4) NOT NULL default '0',
              `swollenglands` tinyint(4) NOT NULL default '0',
              `congestion` tinyint(4) NOT NULL default '0',
              `headache` tinyint(4) NOT NULL default '0',
              `diagnosis` varchar(50) NOT NULL default '',
              PRIMARY KEY  (`id`)
            ) ENGINE=MyISAM  DEFAULT CHARSET=latin1 AUTO_INCREMENT=11 ;

            --
            -- Dumping data for table `datamining_diagnoses`
            --

            INSERT INTO `datamining_diagnoses` (`id`, `sorethroat`, `fever`, `swollenglands`, `congestion`, `headache`, `diagnosis`) VALUES
            (1, 1, 1, 1, 1, 1, 'Strepthroat'),
            (2, 0, 0, 0, 1, 1, 'Allergy'),
            (3, 1, 1, 0, 1, 0, 'Cold'),
            (4, 1, 0, 1, 0, 0, 'Strepthroat'),
            (5, 0, 1, 0, 1, 0, 'Cold'),
            (6, 0, 0, 0, 1, 0, 'Allergy'),
            (7, 0, 0, 1, 0, 0, 'Strepthroat'),
            (8, 1, 0, 0, 1, 1, 'Allergy'),
            (9, 0, 1, 0, 1, 1, 'Cold'),
            (10, 1, 1, 0, 1, 1, 'Cold');
        */

        /*
            function getRows takes an SQL query string (and a optional array key type - NUM or ASSOC)
            and returns and array containing all the rows of the query.

                define(ASSOC, 1);
                define(NUM, 2);

                function getRows($sql, $type=ASSOC) {
                    $result = mysql_query($sql);
                    if($type == ASSOC) {
                        while($row = mysql_fetch_array($result))
                            $ret[] = $row;
                    }else if($type == NUM) {
                        while($row = mysql_fetch_row($result))
                            $ret[] = $row;
                    }else{
                        die("type should be NUM or ASSOC");
                    }
                    return $ret;
                }
        */


        
$rows getRows("SELECT * FROM datamining_diagnoses;"NUM);

        
$closest = -1;

        if(isset(
$_POST[diagnose])) {
            
$distance array_fill(0count($rows), 0);

            
$instance $_POST[instance];

            for(
$i 0$i count($rows); $i++) {

                for(
$j 1$j 6$j++) {
                    if(
$rows[$i][$j] != $instance[$j])
                        
$distance[$i]++;
                }

            }


            
asort($distance);

            
$kclosest = array();

            foreach(
$distance as $key => $d)
                if(++
$x <= $_POST[k]) {
                     
$kclosest[$rows[$key][6]]++;
                     
$newdistance[$key] = $d;
                }

            
arsort($kclosest);
            
$diagnosis key($kclosest);

        }

        
$a 0;
        foreach(
$rows as $row) {

        
?>
            <tr <?php if(isset($newdistance[$a])) { ?> class="selected"<?php ?>>
                <td><?=$row[0]?></td>
                <td><?=$row[1] == "No" "Yes"?></td>
                <td><?=$row[2] == "No" "Yes"?></td>
                <td><?=$row[3] == "No" "Yes"?></td>
                <td><?=$row[4] == "No" "Yes"?></td>
                <td><?=$row[5] == "No" "Yes"?></td>
                <td><?=$row[6]?></td>
                <td><?=$distance[$a++]?></td>
            </tr>

        <?php

        
}
        
?>
        <tr>

                <td><input type="hidden" name="instance[]" value="11" />11</td>
                <td><select name="instance[]">
                        <option value="0"<?php if($_POST[instance][1] == "0") echo "selected=\"selected\"";?>>No</option>
                        <option value="1"<?php if($_POST[instance][1] == "1") echo "selected=\"selected\"";?>>Yes</option>
                    </select>
                </td>
                <td><select name="instance[]">
                        <option value="0"<?php if($_POST[instance][2] == "0") echo "selected=\"selected\"";?>>No</option>
                        <option value="1"<?php if($_POST[instance][2] == "1") echo "selected=\"selected\"";?>>Yes</option>
                    </select>
                </td>
                <td><select name="instance[]">
                        <option value="0"<?php if($_POST[instance][3] == "0") echo "selected=\"selected\"";?>>No</option>
                        <option value="1"<?php if($_POST[instance][3] == "1") echo "selected=\"selected\"";?>>Yes</option>
                    </select>
                </td>
                <td><select name="instance[]">
                        <option value="0"<?php if($_POST[instance][4] == "0") echo "selected=\"selected\"";?>>No</option>
                        <option value="1"<?php if($_POST[instance][4] == "1") echo "selected=\"selected\"";?>>Yes</option>
                    </select>
                </td>
                <td><select name="instance[]">
                        <option value="0"<?php if($_POST[instance][5] == "0") echo "selected=\"selected\"";?>>No</option>
                        <option value="1"<?php if($_POST[instance][5] == "1") echo "selected=\"selected\"";?>>Yes</option>
                    </select>
                </td>
                <td<?php if($diagnosis) { ?> class="selected"<?php ?>><?=$diagnosis?></td>
                <td><input type="submit" name="diagnose" value="Diagnose" /></td>

        </tr>

    </table>
        </form>
    <p>
        <a href="<?=$_SERVER[PHP_SELF]?>?source">View Page Source</a>
    </p>
<?php ?>

    <p>
    Back to <a href="index.php">Data Mining</a>
    </p>
    <p>
        It's not what you know, it's <a href="/">whoyouknow.co.uk</a>
    </p>

<script src="http://www.google-analytics.com/urchin.js" type="text/javascript">
</script>

<script type="text/javascript">
_uacct = "UA-16562388-1";
urchinTracker();
</script>
</body>
</html>

Back to Data Mining

It's not what you know, it's whoyouknow.co.uk