DBSCAN clustering algorithm is implemented by C++

  • 2020-04-01 23:42:05
  • OfStack

Because of the need of work these days, the DBSCAN clustering algorithm was implemented C++. Order n^2, and we're going to spend a lot of time on each point in the domain. The algorithm is very simple, now share everyone's reference, also hope to have more communication.
  The data point type is described as follows:

#include <vector>

 using namespace std;

 const int DIME_NUM=2;        //Data dimension is 2, global constant

 //Data point type
 class DataPoint
 {
 private:
     unsigned long dpID;                //Data point ID
     double dimension[DIME_NUM];        //Dimensional data
     long clusterId;                    //Belongs to the cluster ID
     bool isKey;                        //Core object
     bool visited;                    //Whether or not visited
     vector<unsigned long> arrivalPoints;    //List of domain data point ids
 public:
     DataPoint();                                                    //Default constructor
     DataPoint(unsigned long dpID,double* dimension , bool isKey);    //The constructor

     unsigned long GetDpId();                //GetDpId method
     void SetDpId(unsigned long dpID);        //SetDpId method
     double* GetDimension();                    //Method GetDimension
     void SetDimension(double* dimension);    //SetDimension method
     bool IsKey();                            //GetIsKey method
     void SetKey(bool isKey);                //The SetKey method
     bool isVisited();                        //GetIsVisited method
     void SetVisited(bool visited);            //SetIsVisited method
     long GetClusterId();                    //GetClusterId method
     void SetClusterId(long classId);        //SetClusterId method
     vector<unsigned long>& GetArrivalPoints();    //GetArrivalPoints method
 };

This is the implementation:

#include "DataPoint.h"

 //Default constructor
 DataPoint::DataPoint()
 {
 }

 //The constructor
 DataPoint::DataPoint(unsigned long dpID,double* dimension , bool isKey):isKey(isKey),dpID(dpID)
 {
     //Pass the dimension data for each dimension
     for(int i=0; i<DIME_NUM;i++)
     {
         this->dimension[i]=dimension[i];
     }
 }

 //Setting dimensional data
 void DataPoint::SetDimension(double* dimension)
 {
     for(int i=0; i<DIME_NUM;i++)
     {
         this->dimension[i]=dimension[i];
     }
 }

 //Retrieve dimensional data
 double* DataPoint::GetDimension()
 {
     return this->dimension;
 }

 //Gets whether it is a core object
 bool DataPoint::IsKey()
 {
     return this->isKey;
 }

 //Set the core object flag
 void DataPoint::SetKey(bool isKey)
 {
     this->isKey = isKey;
 }

 //Gets the DpId method
 unsigned long DataPoint::GetDpId()
 {
     return this->dpID;
 }

 //Set the DpId method
 void DataPoint::SetDpId(unsigned long dpID)
 {
     this->dpID = dpID;
 }

 //GetIsVisited method
 bool DataPoint::isVisited()
 {
     return this->visited;
 }

 
 //SetIsVisited method
 void DataPoint::SetVisited( bool visited )
 {
     this->visited = visited;
 }

 //GetClusterId method
 long DataPoint::GetClusterId()
 {
     return this->clusterId;
 }

 //GetClusterId method
 void DataPoint::SetClusterId( long clusterId )
 {
     this->clusterId = clusterId;
 }

 //GetArrivalPoints method
 vector<unsigned long>& DataPoint::GetArrivalPoints()
 {
     return arrivalPoints;
 }

DBSCAN algorithm type description:

#include <iostream>
 #include <cmath>

 using namespace std;

 //Cluster analysis type
 class ClusterAnalysis
 {
 private:
     vector<DataPoint> dadaSets;        //The data set
     unsigned int dimNum;            //The dimension
     double radius;                    //The radius of
     unsigned int dataNum;            //Number of data
     unsigned int minPTs;            //Minimum number of neighborhood data

     double GetDistance(DataPoint& dp1, DataPoint& dp2);                    //Distance functions
     void SetArrivalPoints(DataPoint& dp);                                //Sets the list of domain points for the data points
     void KeyPointCluster( unsigned long i, unsigned long clusterId );    //Perform clustering operations on points in the field of data points
 public:

     ClusterAnalysis(){}                    //Default constructor
     bool Init(char* fileName, double radius, int minPTs);    //Initialization operation
     bool DoDBSCANRecursive();            //DBSCAN recursive algorithm
     bool WriteToFile(char* fileName);    //Writes the clustering results to a file
 };

  Clustering realization:

#include "ClusterAnalysis.h"
 #include <fstream>
 #include <iosfwd>
 #include <math.h>

 /*
  Function: cluster initialization operation 
  Will the data The file name . The radius of , the information of the minimum number of data in the domain is written into the clustering algorithm class, the file is read, and the data information is read and written into the data set of the algorithm class 
  Parameters: 
 char* fileName;    //The file name
 double radius;    //The radius of
 int minPTs;        //Domain minimum number of data & NBSP;
 return Value:  true;    */
 bool ClusterAnalysis::Init(char* fileName, double radius, int minPTs)
 {
     this->radius = radius;        // Set up the The radius of
     this->minPTs = minPTs;        //Sets the minimum number of data in the domain
     this->dimNum = DIME_NUM;    //Setting data dimensions
     ifstream ifs(fileName);        //Open the file
     if (! ifs.is_open())                //If the file has been opened, an error message is reported
     {
         cout << "Error opening file";    //Output error message
         exit (-1);                        //Program exits
     }

     unsigned long i=0;            //Statistics of data number
     while (! ifs.eof() )                //Read the POI information from the file and write the POI information to the POI list
     {
         DataPoint tempDP;                //Temporary data point object
         double tempDimData[DIME_NUM];    //Temporary data point dimension information
         for(int j=0; j<DIME_NUM; j++)    //Read the file, read each dimension
         {
             ifs>>tempDimData[j];
         }
         tempDP.SetDimension(tempDimData);    //Store the dimension information in the data point object

 //char date[20]="";
 //char time[20]="";
         ////double type;    // Useless information 
         //ifs >> date;
 //ifs >> time;    // Useless information read in 

         tempDP.SetDpId(i);                    //Set the data point object ID to I
         tempDP.SetVisited(false);            //The data point object isVisited is set to false
         tempDP.SetClusterId(-1);            //Set the default cluster ID to -1
         dadaSets.push_back(tempDP);            //Presses the object into the data collection container
         i++;        //Count + 1
     }
     ifs.close();        //Close file stream
     dataNum =i;            //Set the data object collection size to I
     for(unsigned long i=0; i<dataNum;i++)
     {
         SetArrivalPoints(dadaSets[i]);            //Computes objects in the data point domain
     }
     return true;    //return
 }

 /*
  Function: writes the data set processed by the clustering algorithm back to the file 
  Description: write the clustering results back to the file 
  Parameters: 
 char* fileName;    // Want to write The file name
 return Value:  true    */
 bool ClusterAnalysis::WriteToFile(char* fileName )
 {
     ofstream of1(fileName);                                //Initializes the file output stream
     for(unsigned long i=0; i<dataNum;i++)                //Writes to a file for each data point processed
     {
         for(int d=0; d<DIME_NUM ; d++)                    //Writes the dimension information to a file
             of1<<dadaSets[i].GetDimension()[d]<<'t';
         of1 << dadaSets[i].GetClusterId() <<endl;        //Writes the cluster ID to a file
     }
     of1.close();    //Close the output file stream
     return true;    //return
 }

 
 void ClusterAnalysis::SetArrivalPoints(DataPoint& dp)
 {
     for(unsigned long i=0; i<dataNum; i++)                //Execute for each data point
     {
         double distance =GetDistance(dadaSets[i], dp);    //Gets the distance from a particular point
         if(distance <= radius && i!=dp.GetDpId())        // If the distance is less than The radius of And point specific id with dp the id Different execution 
             dp.GetArrivalPoints().push_back(i);            //Place specific points in the id pressure dp domain list
     }
     if(dp.GetArrivalPoints().size() >= minPTs)            //If data point data volume in dp field> MinPTs perform
     {
         dp.SetKey(true);    //Set the dp core object flag bit to true
         return;                //return
     }
     dp.SetKey(false);    //Set the dp core object flag bit to false if it is not a core object
 }

 
 
 bool ClusterAnalysis::DoDBSCANRecursive()
 {
     unsigned long clusterId=0;                        //Cluster id count, initialized to 0
     for(unsigned long i=0; i<dataNum;i++)            //Execute for each data point
     {
         DataPoint& dp=dadaSets[i];                    //Take the ith data point object
         if(!dp.isVisited() && dp.IsKey())            //If the object has not been accessed and is executed by the core object
         {
             dp.SetClusterId(clusterId);                //Set the clusterId of the object to clusterId
             dp.SetVisited(true);                    //Sets that the object has been accessed
             KeyPointCluster(i,clusterId);            //Cluster the points in the object domain
             clusterId++;                            //ClusterId from 1-6
         }
         //Cout <<"Isolated dot T" <<I <<Endl;
     }

     cout <<" Copolymerization class " <<clusterId<<" a "<< endl;        //After the algorithm is completed, the number of clusters is output
     return true;    //return
 }

 /*
  Function: performs clustering operations on points in the logpoint domain 
  Note: using recursive method, depth-first clustering data 
  Parameters: 
 unsigned long dpID;            //Data point id
 unsigned long clusterId;    //The cluster id of the data point
 return Value:  void;    */
 void ClusterAnalysis::KeyPointCluster(unsigned long dpID, unsigned long clusterId )
 {
     DataPoint& srcDp = dadaSets[dpID];        //Gets the data point object
     if(!srcDp.IsKey())    return;
     vector<unsigned long>& arrvalPoints = srcDp.GetArrivalPoints();        //Gets a list of point ids in the object domain
     for(unsigned long i=0; i<arrvalPoints.size(); i++)
     {
         DataPoint& desDp = dadaSets[arrvalPoints[i]];    //Gets point points in the domain
         if(!desDp.isVisited())                            //If the object has not been accessed for execution
         {
             //Cout <<"Data point t"<<DesDp. GetDpId () <<" Cluster ID is t" <<ClusterId <<Endl;
             desDp.SetClusterId(clusterId);        //Set the clusterId of the object to clusterId to suck the object into the cluster
             desDp.SetVisited(true);                //Sets that the object is accessed
             if(desDp.IsKey())                    //If the object is the core object
             {
                 KeyPointCluster(desDp.GetDpId(),clusterId);    //Recursively cluster the points in the domain of the domain point data using depth-first method
             }
         }
     }
 }

 //Distance between two data points
 /*
  Function: get Distance between two data points
  Get the Euclidean distance between two data points 
  Parameters: 
 DataPoint& dp1;        //Data point 1
 DataPoint& dp2;        //Data point 2
 return Value:  double;    //The distance between two points & NBSP; & have spent & have spent & have spent & have spent & have spent & have spent * /
 double ClusterAnalysis::GetDistance(DataPoint& dp1, DataPoint& dp2)
 {
     double distance =0;        //The initialization distance is 0
     for(int i=0; i<DIME_NUM;i++)    //Execute on each dimension of the data
     {
         distance += pow(dp1.GetDimension()[i] - dp2.GetDimension()[i],2);    //Distance + difference per dimension squared
     }
     return pow(distance,0.5);        // Prescribing and return distance 
 }

Algorithm call is simple:

#include "ClusterAnalysis.h"
 #include <cstdio>

 using namespace std;

 int main()
 {
     ClusterAnalysis myClusterAnalysis;                        //Clustering algorithm object declaration
     myClusterAnalysis.Init("D:\1108\XY.txt",500,9);        //Algorithm initialization operation, the radius is specified as 15, the minimum number of data points in the field is 3, (the data dimension is specified as 2 in the program)
     myClusterAnalysis.DoDBSCANRecursive();                    //Perform the clustering algorithm
     myClusterAnalysis.WriteToFile("D:\1108\XYResult.txt");//The result of the write execution is written to a file

     system("pause");    //According to the results
     return 0;            //return
 }

Related articles: