SparkGraphx calculates the N degree relationship node source code for the specified node

  • 2020-06-23 02:25:15
Direct code:

package horizon.graphx.util
import horizon.graphx.util.CollectionUtil.CollectionHelper
import org.apache.spark.graphx._
import org.apache.spark.rdd.RDD
import scala.collection.mutable.ArrayBuffer
import scala.reflect.ClassTag
 * Created by on 2017/1/19.
 * Description: Used to compute the nodes in the diagram for the specified nodes N Degree relation nodes, output the path length and nodes of these nodes and source nodes id
object GraphNdegUtil {
 val maxNDegVerticesCount = 10000
 val maxDegree = 1000
 *  Computational nodal N Degree of relationship 
 * @param edges
 * @param choosedVertex
 * @param degree
 * @tparam ED
 * @return
 def aggNdegreedVertices[ED: ClassTag](edges: RDD[(VertexId, VertexId)], choosedVertex: RDD[VertexId], degree: Int): VertexRDD[Map[Int, Set[VertexId]]] = {
 val simpleGraph = Graph.fromEdgeTuples(edges, 0, Option(PartitionStrategy.EdgePartition2D), StorageLevel.MEMORY_AND_DISK_SER, StorageLevel.MEMORY_AND_DISK_SER)
 aggNdegreedVertices(simpleGraph, choosedVertex, degree)
 def aggNdegreedVerticesWithAttr[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED], choosedVertex: RDD[VertexId], degree: Int, sendFilter: (VD, VD) => Boolean = (_: VD, _: VD) => true): VertexRDD[Map[Int, Set[VD]]] = {
 val ndegs: VertexRDD[Map[Int, Set[VertexId]]] = aggNdegreedVertices(graph, choosedVertex, degree, sendFilter)
 val flated: RDD[Ver[VD]] = ndegs.flatMap(e => e._2.flatMap(t => => Ver(e._1, s, t._1, null.asInstanceOf[VD])))).persist(StorageLevel.MEMORY_AND_DISK_SER)
 val matched: RDD[Ver[VD]] = => (, e)).join(graph.vertices).map(e => e._2._1.copy(attr = e._2._2)).persist(StorageLevel.MEMORY_AND_DISK_SER)
 flated.unpersist(blocking = false)
 ndegs.unpersist(blocking = false)
 val grouped: RDD[(VertexId, Map[Int, Set[VD]])] = => (e.source, ArrayBuffer(e))).reduceByKey(_ ++= _).map(e => (e._1, => (, Set(t.attr))).reduceByKey(_ ++ _).toMap))
 matched.unpersist(blocking = false)
 def aggNdegreedVertices[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED],
              choosedVertex: RDD[VertexId],
              degree: Int,
              sendFilter: (VD, VD) => Boolean = (_: VD, _: VD) => true
              ): VertexRDD[Map[Int, Set[VertexId]]] = {
 if (degree < 1) {
  throw new InvalidParameterException(" Degree parameter error :" + degree)
 val initVertex = => (e, true)).persist(StorageLevel.MEMORY_AND_DISK_SER)
 var g: Graph[DegVertex[VD], Int] = graph.outerJoinVertices(graph.degrees)((_, old, deg) => (deg.getOrElse(0), old))
  .subgraph(vpred = (_, a) => a._1 <= maxDegree)
  // Remove large nodes 
  .outerJoinVertices(initVertex)((id, old, hasReceivedMsg) => {
  DegVertex(old._2, hasReceivedMsg.getOrElse(false), ArrayBuffer((id, 0))) // Initializes the node on which the message is to be sent 
 }).mapEdges(_ => 0).cache() // Reduced edge attribute 
 choosedVertex.unpersist(blocking = false)
 var i = 0
 var prevG: Graph[DegVertex[VD], Int] = null
 var newVertexRdd: VertexRDD[ArrayBuffer[(VertexId, Int)]] = null
 while (i < degree + 1) {
  prevG = g
  // The first i+1 Wheel message 
  newVertexRdd = prevG.aggregateMessages[ArrayBuffer[(VertexId, Int)]](sendMsg(_, sendFilter), (a, b) => reduceVertexIds(a ++ b)).persist(StorageLevel.MEMORY_AND_DISK_SER)
  g = g.outerJoinVertices(newVertexRdd)((vid, old, msg) => if (msg.isDefined) updateVertexByMsg(vid, old, msg.get) else old.copy(init = false)).cache()
  prevG.unpersistVertices(blocking = false)
  prevG.edges.unpersist(blocking = false)
  newVertexRdd.unpersist(blocking = false)
  i += 1
 newVertexRdd.unpersist(blocking = false)
 val maped = g.vertices.join(initVertex).mapValues(e => sortResult(e._1)).persist(StorageLevel.MEMORY_AND_DISK_SER)
 g.unpersist(blocking = false)
 private case class Ver[VD: ClassTag](source: VertexId, id: VertexId, degree: Int, attr: VD = null.asInstanceOf[VD])
 private def updateVertexByMsg[VD: ClassTag](vertexId: VertexId, oldAttr: DegVertex[VD], msg: ArrayBuffer[(VertexId, Int)]): DegVertex[VD] = {
 val addOne = => (e._1, e._2 + 1))
 val newMsg = reduceVertexIds(oldAttr.degVertices ++ addOne)
 oldAttr.copy(init = msg.nonEmpty, degVertices = newMsg)
 private def sortResult[VD: ClassTag](degs: DegVertex[VD]): Map[Int, Set[VertexId]] = => (e._2, Set(e._1))).reduceByKey(_ ++ _).toMap
 case class DegVertex[VD: ClassTag](var attr: VD, init: Boolean = false, degVertices: ArrayBuffer[(VertexId, Int)])
 case class VertexDegInfo[VD: ClassTag](var attr: VD, init: Boolean = false, degVertices: ArrayBuffer[(VertexId, Int)])
 private def sendMsg[VD: ClassTag](e: EdgeContext[DegVertex[VD], Int, ArrayBuffer[(VertexId, Int)]], sendFilter: (VD, VD) => Boolean): Unit = {
 try {
  val src = e.srcAttr
  val dst = e.dstAttr
  // only dst is ready The state receives the message 
  if (src.degVertices.size < maxNDegVerticesCount && (src.init || dst.init) && dst.degVertices.size < maxNDegVerticesCount && !isAttrSame(src, dst)) {
  if (sendFilter(src.attr, dst.attr)) {
  if (sendFilter(dst.attr, dst.attr)) {
 } catch {
  case ex: Exception =>
  println(s"==========error found: exception:${ex.getMessage}," +
   s"edgeTriplet:(srcId:${e.srcId},srcAttr:(${e.srcAttr.attr},${e.srcAttr.init},${e.srcAttr.degVertices.size}))," +
  throw ex
 private def reduceVertexIds(ids: ArrayBuffer[(VertexId, Int)]): ArrayBuffer[(VertexId, Int)] = ArrayBuffer() ++= ids.reduceByKey(Math.min)
 private def isAttrSame[VD: ClassTag](a: DegVertex[VD], b: DegVertex[VD]): Boolean = a.init == b.init && allKeysAreSame(a.degVertices, b.degVertices)
 private def allKeysAreSame(a: ArrayBuffer[(VertexId, Int)], b: ArrayBuffer[(VertexId, Int)]): Boolean = {
 val aKeys = => e._1).toSet
 val bKeys = => e._1).toSet
 if (aKeys.size != bKeys.size || aKeys.isEmpty) return false
 aKeys.diff(bKeys).isEmpty && bKeys.diff(aKeys).isEmpty

In sortResult method, reduceByKey method is used for Traversable[(K,V)] type collection. This method is self-encapsulated and needs to be imported when used. The code is as follows:

 * Created by on 2016/12/21.
 * Description:
object CollectionUtil {
 *  To have a Traversable[(K, V)] Collection of types added reduceByKey Relevant methods 
 * @param collection
 * @param kt
 * @param vt
 * @tparam K
 * @tparam V
 implicit class CollectionHelper[K, V](collection: Traversable[(K, V)])(implicit kt: ClassTag[K], vt: ClassTag[V]) {
 def reduceByKey(f: (V, V) => V): Traversable[(K, V)] = collection.groupBy(_._1).map { case (_: K, values: Traversable[(K, V)]) => values.reduce((a, b) => (a._1, f(a._2, b._2))) }
  * reduceByKey At the same time, return by reduce The set of missing elements 
  * @param f
  * @return
 def reduceByKeyWithReduced(f: (V, V) => V)(implicit kt: ClassTag[K], vt: ClassTag[V]): (Traversable[(K, V)], Traversable[(K, V)]) = {
  val reduced: ArrayBuffer[(K, V)] = ArrayBuffer()
  val newSeq = collection.groupBy(_._1).map {
  case (_: K, values: Traversable[(K, V)]) => values.reduce((a, b) => {
   val newValue: V = f(a._2, b._2)
   val reducedValue: V = if (newValue == a._2) b._2 else a._2
   val reducedPair: (K, V) = (a._1, reducedValue)
   reduced += reducedPair
   (a._1, newValue)
  (newSeq, reduced.toTraversable)


