GO language USES K nearest neighbor algorithm to realize novel yellow detection
- 2020-05-27 05:52:29
- OfStack
Usuage:
go run kNN.go --file="data.txt"
The key is the choice of the vector point and the decision of the threshold
The sample data came from the list of 40 pornographic online novels released by the general administration of press and publication.
package main
import (
"bufio"
"flag"
"fmt"
"io"
"log"
"math"
"os"
"path"
"path/filepath"
)
var debug bool = false
var data_dir string = "./moyan" // File directory
var limen float64 = 0.1159203888322267 // The threshold value
const (
MIN_HANZI rune = 0x3400
MAX_HANZI rune = 0x9fbb
)
var labels []rune = []rune{
0x817f, 0x80f8, 0x4e73, 0x81c0,
0x5c41, 0x80a1, 0x88f8, 0x6deb,
}
func errHandle(err error) {
if err != nil {
log.Fatal(err)
}
}
func load(name string) (m map[rune]int, err error) {
f, err := os.Open(name)
if err != nil {
return nil, err
}
defer f.Close()
buf := bufio.NewReader(f)
m = make(map[rune]int)
var r rune
for {
r, _, err = buf.ReadRune()
if err != nil {
if err == io.EOF {
break
}
return nil, err
}
if r >= MIN_HANZI && r <= MAX_HANZI {
m[r] += 1
}
}
return m, nil
}
func classify(m map[rune]int) (idv []float64, dis float64) {
len_m := len(m)
for i, v := range labels {
if debug {
fmt.Println(i, m[v], string(v), float64(m[v])/float64(len_m))
}
idv = append(idv, float64(m[v])/float64(len_m))
}
for _, v := range idv {
dis += math.Pow(v, 2)
}
dis = math.Sqrt(dis)
return
}
func check(fp string, dis float64) {
switch {
case dis >= limen:
fmt.Println(fp, dis, " A pornographic ")
case dis == 1.0:
fmt.Println(fp, dis, " Are you cheating ")
case dis == 0:
fmt.Println(fp, dis, " check 1 The following file character encoding is not utf8 Format! ")
default:
fmt.Println(fp, dis, " normal ")
}
}
func walkFunc(fp string, info os.FileInfo, err error) error {
if path.Ext(fp) == ".txt" {
m, err := load(fp)
errHandle(err)
_, dis := classify(m)
check(fp, dis)
}
return err
}
var file string
func init() {
_, err := os.Stat(data_dir)
if err != nil {
err = os.Mkdir(data_dir, os.ModePerm)
errHandle(err)
}
flag.StringVar(&file, "file", "", "file read in,if you don't give the file read in,"+
"it will create a data dictionary,just pust your files in it")
}
func main() {
flag.Parse()
if file == "" {
filepath.Walk(data_dir, walkFunc)
return
}
m, err := load(file)
errHandle(err)
_, dis := classify(m)
check(file, dis)
}
That's all for this article, I hope you enjoy it.