DBSCAN clustering algorithm is implemented by C++
- 2020-04-01 23:42:05
- OfStack
Because of the need of work these days, the DBSCAN clustering algorithm was implemented C++. Order n^2, and we're going to spend a lot of time on each point in the domain. The algorithm is very simple, now share everyone's reference, also hope to have more communication.
The data point type is described as follows:
This is the implementation:
DBSCAN algorithm type description:
Clustering realization:
Algorithm call is simple:
The data point type is described as follows:
#include <vector>
using namespace std;
const int DIME_NUM=2; //Data dimension is 2, global constant
//Data point type
class DataPoint
{
private:
unsigned long dpID; //Data point ID
double dimension[DIME_NUM]; //Dimensional data
long clusterId; //Belongs to the cluster ID
bool isKey; //Core object
bool visited; //Whether or not visited
vector<unsigned long> arrivalPoints; //List of domain data point ids
public:
DataPoint(); //Default constructor
DataPoint(unsigned long dpID,double* dimension , bool isKey); //The constructor
unsigned long GetDpId(); //GetDpId method
void SetDpId(unsigned long dpID); //SetDpId method
double* GetDimension(); //Method GetDimension
void SetDimension(double* dimension); //SetDimension method
bool IsKey(); //GetIsKey method
void SetKey(bool isKey); //The SetKey method
bool isVisited(); //GetIsVisited method
void SetVisited(bool visited); //SetIsVisited method
long GetClusterId(); //GetClusterId method
void SetClusterId(long classId); //SetClusterId method
vector<unsigned long>& GetArrivalPoints(); //GetArrivalPoints method
};
This is the implementation:
#include "DataPoint.h"
//Default constructor
DataPoint::DataPoint()
{
}
//The constructor
DataPoint::DataPoint(unsigned long dpID,double* dimension , bool isKey):isKey(isKey),dpID(dpID)
{
//Pass the dimension data for each dimension
for(int i=0; i<DIME_NUM;i++)
{
this->dimension[i]=dimension[i];
}
}
//Setting dimensional data
void DataPoint::SetDimension(double* dimension)
{
for(int i=0; i<DIME_NUM;i++)
{
this->dimension[i]=dimension[i];
}
}
//Retrieve dimensional data
double* DataPoint::GetDimension()
{
return this->dimension;
}
//Gets whether it is a core object
bool DataPoint::IsKey()
{
return this->isKey;
}
//Set the core object flag
void DataPoint::SetKey(bool isKey)
{
this->isKey = isKey;
}
//Gets the DpId method
unsigned long DataPoint::GetDpId()
{
return this->dpID;
}
//Set the DpId method
void DataPoint::SetDpId(unsigned long dpID)
{
this->dpID = dpID;
}
//GetIsVisited method
bool DataPoint::isVisited()
{
return this->visited;
}
//SetIsVisited method
void DataPoint::SetVisited( bool visited )
{
this->visited = visited;
}
//GetClusterId method
long DataPoint::GetClusterId()
{
return this->clusterId;
}
//GetClusterId method
void DataPoint::SetClusterId( long clusterId )
{
this->clusterId = clusterId;
}
//GetArrivalPoints method
vector<unsigned long>& DataPoint::GetArrivalPoints()
{
return arrivalPoints;
}
DBSCAN algorithm type description:
#include <iostream>
#include <cmath>
using namespace std;
//Cluster analysis type
class ClusterAnalysis
{
private:
vector<DataPoint> dadaSets; //The data set
unsigned int dimNum; //The dimension
double radius; //The radius of
unsigned int dataNum; //Number of data
unsigned int minPTs; //Minimum number of neighborhood data
double GetDistance(DataPoint& dp1, DataPoint& dp2); //Distance functions
void SetArrivalPoints(DataPoint& dp); //Sets the list of domain points for the data points
void KeyPointCluster( unsigned long i, unsigned long clusterId ); //Perform clustering operations on points in the field of data points
public:
ClusterAnalysis(){} //Default constructor
bool Init(char* fileName, double radius, int minPTs); //Initialization operation
bool DoDBSCANRecursive(); //DBSCAN recursive algorithm
bool WriteToFile(char* fileName); //Writes the clustering results to a file
};
Clustering realization:
#include "ClusterAnalysis.h"
#include <fstream>
#include <iosfwd>
#include <math.h>
/*
Function: cluster initialization operation
Will the data The file name . The radius of , the information of the minimum number of data in the domain is written into the clustering algorithm class, the file is read, and the data information is read and written into the data set of the algorithm class
Parameters:
char* fileName; //The file name
double radius; //The radius of
int minPTs; //Domain minimum number of data & NBSP;
return Value: true; */
bool ClusterAnalysis::Init(char* fileName, double radius, int minPTs)
{
this->radius = radius; // Set up the The radius of
this->minPTs = minPTs; //Sets the minimum number of data in the domain
this->dimNum = DIME_NUM; //Setting data dimensions
ifstream ifs(fileName); //Open the file
if (! ifs.is_open()) //If the file has been opened, an error message is reported
{
cout << "Error opening file"; //Output error message
exit (-1); //Program exits
}
unsigned long i=0; //Statistics of data number
while (! ifs.eof() ) //Read the POI information from the file and write the POI information to the POI list
{
DataPoint tempDP; //Temporary data point object
double tempDimData[DIME_NUM]; //Temporary data point dimension information
for(int j=0; j<DIME_NUM; j++) //Read the file, read each dimension
{
ifs>>tempDimData[j];
}
tempDP.SetDimension(tempDimData); //Store the dimension information in the data point object
//char date[20]="";
//char time[20]="";
////double type; // Useless information
//ifs >> date;
//ifs >> time; // Useless information read in
tempDP.SetDpId(i); //Set the data point object ID to I
tempDP.SetVisited(false); //The data point object isVisited is set to false
tempDP.SetClusterId(-1); //Set the default cluster ID to -1
dadaSets.push_back(tempDP); //Presses the object into the data collection container
i++; //Count + 1
}
ifs.close(); //Close file stream
dataNum =i; //Set the data object collection size to I
for(unsigned long i=0; i<dataNum;i++)
{
SetArrivalPoints(dadaSets[i]); //Computes objects in the data point domain
}
return true; //return
}
/*
Function: writes the data set processed by the clustering algorithm back to the file
Description: write the clustering results back to the file
Parameters:
char* fileName; // Want to write The file name
return Value: true */
bool ClusterAnalysis::WriteToFile(char* fileName )
{
ofstream of1(fileName); //Initializes the file output stream
for(unsigned long i=0; i<dataNum;i++) //Writes to a file for each data point processed
{
for(int d=0; d<DIME_NUM ; d++) //Writes the dimension information to a file
of1<<dadaSets[i].GetDimension()[d]<<'t';
of1 << dadaSets[i].GetClusterId() <<endl; //Writes the cluster ID to a file
}
of1.close(); //Close the output file stream
return true; //return
}
void ClusterAnalysis::SetArrivalPoints(DataPoint& dp)
{
for(unsigned long i=0; i<dataNum; i++) //Execute for each data point
{
double distance =GetDistance(dadaSets[i], dp); //Gets the distance from a particular point
if(distance <= radius && i!=dp.GetDpId()) // If the distance is less than The radius of And point specific id with dp the id Different execution
dp.GetArrivalPoints().push_back(i); //Place specific points in the id pressure dp domain list
}
if(dp.GetArrivalPoints().size() >= minPTs) //If data point data volume in dp field> MinPTs perform
{
dp.SetKey(true); //Set the dp core object flag bit to true
return; //return
}
dp.SetKey(false); //Set the dp core object flag bit to false if it is not a core object
}
bool ClusterAnalysis::DoDBSCANRecursive()
{
unsigned long clusterId=0; //Cluster id count, initialized to 0
for(unsigned long i=0; i<dataNum;i++) //Execute for each data point
{
DataPoint& dp=dadaSets[i]; //Take the ith data point object
if(!dp.isVisited() && dp.IsKey()) //If the object has not been accessed and is executed by the core object
{
dp.SetClusterId(clusterId); //Set the clusterId of the object to clusterId
dp.SetVisited(true); //Sets that the object has been accessed
KeyPointCluster(i,clusterId); //Cluster the points in the object domain
clusterId++; //ClusterId from 1-6
}
//Cout <<"Isolated dot T" <<I <<Endl;
}
cout <<" Copolymerization class " <<clusterId<<" a "<< endl; //After the algorithm is completed, the number of clusters is output
return true; //return
}
/*
Function: performs clustering operations on points in the logpoint domain
Note: using recursive method, depth-first clustering data
Parameters:
unsigned long dpID; //Data point id
unsigned long clusterId; //The cluster id of the data point
return Value: void; */
void ClusterAnalysis::KeyPointCluster(unsigned long dpID, unsigned long clusterId )
{
DataPoint& srcDp = dadaSets[dpID]; //Gets the data point object
if(!srcDp.IsKey()) return;
vector<unsigned long>& arrvalPoints = srcDp.GetArrivalPoints(); //Gets a list of point ids in the object domain
for(unsigned long i=0; i<arrvalPoints.size(); i++)
{
DataPoint& desDp = dadaSets[arrvalPoints[i]]; //Gets point points in the domain
if(!desDp.isVisited()) //If the object has not been accessed for execution
{
//Cout <<"Data point t"<<DesDp. GetDpId () <<" Cluster ID is t" <<ClusterId <<Endl;
desDp.SetClusterId(clusterId); //Set the clusterId of the object to clusterId to suck the object into the cluster
desDp.SetVisited(true); //Sets that the object is accessed
if(desDp.IsKey()) //If the object is the core object
{
KeyPointCluster(desDp.GetDpId(),clusterId); //Recursively cluster the points in the domain of the domain point data using depth-first method
}
}
}
}
//Distance between two data points
/*
Function: get Distance between two data points
Get the Euclidean distance between two data points
Parameters:
DataPoint& dp1; //Data point 1
DataPoint& dp2; //Data point 2
return Value: double; //The distance between two points & NBSP; & have spent & have spent & have spent & have spent & have spent & have spent * /
double ClusterAnalysis::GetDistance(DataPoint& dp1, DataPoint& dp2)
{
double distance =0; //The initialization distance is 0
for(int i=0; i<DIME_NUM;i++) //Execute on each dimension of the data
{
distance += pow(dp1.GetDimension()[i] - dp2.GetDimension()[i],2); //Distance + difference per dimension squared
}
return pow(distance,0.5); // Prescribing and return distance
}
Algorithm call is simple:
#include "ClusterAnalysis.h"
#include <cstdio>
using namespace std;
int main()
{
ClusterAnalysis myClusterAnalysis; //Clustering algorithm object declaration
myClusterAnalysis.Init("D:\1108\XY.txt",500,9); //Algorithm initialization operation, the radius is specified as 15, the minimum number of data points in the field is 3, (the data dimension is specified as 2 in the program)
myClusterAnalysis.DoDBSCANRecursive(); //Perform the clustering algorithm
myClusterAnalysis.WriteToFile("D:\1108\XYResult.txt");//The result of the write execution is written to a file
system("pause"); //According to the results
return 0; //return
}