Detail the ES1en ES2en clustering algorithm implemented by Java

  • 2020-12-21 18:02:19
  • OfStack

demand

The ES4en-ES5en algorithm is executed on a field of a table in the MySQL database, and the processed data is written to the new table.

Source code and Drivers

kmeans_jb51.rar

The source code


import java.sql.*;
import java.util.*;

/**
 * @author tianshl
 * @version 2018/1/13  In the morning 11:13
 */
public class Kmeans {
  //  The source data 
  private List<Integer> origins = new ArrayList<>();

  //  Grouped data 
  private Map<Double, List<Integer>> grouped;

  //  Initial center of mass list 
  private List<Double> cores;

  //  The data source 
  private String tableName;
  private String colName;

  /**
   *  A constructor 
   *
   * @param tableName  The name of the source table 
   * @param colName   Source data column name 
   * @param cores    List of center of mass 
   */
  private Kmeans(String tableName, String colName,List<Double> cores){
    this.cores = cores;
    this.tableName = tableName;
    this.colName = colName;
  }

  /**
   *  Recalculate the center of mass 
   *
   * @return  New center of mass list 
   */
  private List<Double> newCores(){
    List<Double> newCores = new ArrayList<>();

    for(List<Integer> v: grouped.values()){
      newCores.add(v.stream().reduce(0, (sum, num) -> sum + num) / (v.size() + 0.0));
    }

    Collections.sort(newCores);
    return newCores;
  }

  /**
   *  Judge whether it's over 
   *
   * @return bool
   */
  private Boolean isOver(){
    List<Double> _cores = newCores();
    for(int i=0, len=cores.size(); i<len; i++){
      if(!cores.get(i).toString().equals(_cores.get(i).toString())){
        //  Use the new center of mass 
        cores = _cores;
        return false;
      }
    }
    return true;
  }

  /**
   *  The data packet 
   */
  private void setGrouped(){
    grouped = new HashMap<>();

    Double core;
    for (Integer origin: origins) {
      core = getCore(origin);

      if (!grouped.containsKey(core)) {
        grouped.put(core, new ArrayList<>());
      }

      grouped.get(core).add(origin);
    }
  }

  /**
   *  Select the center of mass 
   *
   * @param num   Data to group 
   * @return    The center of mass 
   */
  private Double getCore(Integer num){

    //  poor   The list of 
    List<Double> diffs = new ArrayList<>();

    //  Calculate poor 
    for(Double core: cores){
      diffs.add(Math.abs(num - core));
    }

    //  The smallest difference  ->  The index  ->  The corresponding center of mass 
    return cores.get(diffs.indexOf(Collections.min(diffs)));
  }

  /**
   *  Establishing a database connection 
   * @return connection
   */
  private Connection getConn(){
    try {
      // URL Points to the name of the database to access mydata
      String url = "jdbc:mysql://localhost:3306/data_analysis_dev";
      // MySQL The user name at the time of configuration 
      String user = "root";
      // MySQL Password at configuration time 
      String password = "root";

      //  The load driver 
      Class.forName("com.mysql.jdbc.Driver");

      // The statement Connection object 
      Connection conn = DriverManager.getConnection(url, user, password);

      if(conn.isClosed()){
        System.out.println(" Failed to connect to database !");
        return null;
      }
      System.out.println(" Database connection successful !");

      return conn;

    } catch (Exception e) {
      System.out.println(" Connection to database failed! ");
      e.printStackTrace();
    }

    return null;
  }

  /**
   *  Close the database connection 
   *
   * @param conn  The connection 
   */
  private void close(Connection conn){
    try {
      if(conn != null && !conn.isClosed()) conn.close();
    } catch (Exception e){
      e.printStackTrace();
    }
  }

  /**
   *  Get source data 
   */
  private void getOrigins(){

    Connection conn = null;
    try {
      conn = getConn();
      if(conn == null) return;

      Statement statement = conn.createStatement();

      ResultSet rs = statement.executeQuery(String.format("select %s from %s", colName, tableName));

      while(rs.next()){
        origins.add(rs.getInt(1));
      }
      conn.close();
    } catch (Exception e){
      e.printStackTrace();
    } finally {
     close(conn);
    }
  }

  /**
   *  Write data to the new table 
   */
  private void write(){

    Connection conn = null;
    try {
      conn = getConn();
      if(conn == null) return;
      
      //  Create a table 
      Statement statement = conn.createStatement();

      //  Delete old tables 
      statement.execute("DROP TABLE IF EXISTS k_means; ");
      //  Create a new table 
      statement.execute("CREATE TABLE IF NOT EXISTS k_means(`core` DECIMAL(11, 7), `col` INTEGER(11));");

      //  Automatic submission is prohibited 
      conn.setAutoCommit(false);

      PreparedStatement ps = conn.prepareStatement("INSERT INTO k_means VALUES (?, ?)");

      for(Map.Entry<Double, List<Integer>> entry: grouped.entrySet()){
        Double core = entry.getKey();
        for(Integer value: entry.getValue()){
          ps.setDouble(1, core);
          ps.setInt(2, value);
          ps.addBatch();
        }
      }

      //  Batch execution 
      ps.executeBatch();

      //  Commit the transaction 
      conn.commit();

      //  Close the connection 
      conn.close();
    } catch (Exception e){
      e.printStackTrace();
    } finally {
      close(conn);
    }
  }

  /**
   *  Process the data 
   */
  private void run(){
    System.out.println(" Get source data ");
    //  Get source data 
    getOrigins();

    //  Stop the grouping 
    Boolean isOver = false;

    System.out.println(" Data grouping processing ");
    while(!isOver) {
      //  The data packet 
      setGrouped();
      //  Determine whether to stop grouping 
      isOver = isOver();
    }

    System.out.println(" Writes the processed data to the database ");
    //  Writes the packet data to the new table 
    write();

    System.out.println(" Finished writing the data ");
  }

  public static void main(String[] args){
    List<Double> cores = new ArrayList<>();
    cores.add(260.0);
    cores.add(600.0);
    //  The name of the table ,  The column name ,  List of center of mass 
    new Kmeans("attributes", "attr_length", cores).run();
  }
}

The source file


Kmeans.java

compile


javac Kmeans.java 

run


#  Specify dependency libraries 
java -Djava.ext.dirs=./lib Kmeans

Related articles: