数据存储
[goroutine-traffic-statistics.git] / analysis / processor / parse.go
blob01425d79db6f770830246687e11327ea3e7f1a5e
1 package processor
3 import (
4 "crypto/md5"
5 "encoding/hex"
6 "net/url"
7 "strconv"
8 "strings"
10 "github.com/mgutz/str"
11 "repo.or.cz/goroutine-traffic-statistics/analysis/defs"
14 const (
15 handleDig = " /dig?"
16 handleMovie = "/movie/"
17 handleList = "/list/"
18 handleHTML = ".html"
21 func Parse(logChan defs.LogDataChan, pvChan defs.UrlDataChan, uvChan defs.UrlDataChan) error {
22 for logStr := range logChan {
23 data := cutLogFetchData(logStr)
24 if data == nil {
25 continue
27 hasher := md5.Sum([]byte(data.UserAgnet + data.Refer))
28 uid := hex.EncodeToString(hasher[:])
30 uData := &defs.UrlData{Data: data, UserID: uid, Node: formatUrl(data.Url, data.Time)}
31 pvChan <- uData
32 uvChan <- uData
34 return nil
37 func cutLogFetchData(logStr string) *defs.DigData {
38 logStr = strings.TrimSpace(logStr)
39 pos1 := str.IndexOf(logStr, handleDig, 0)
40 if pos1 < 0 {
41 return nil
43 pos1 += len(handleDig)
44 pos2 := str.IndexOf(logStr, " HTTP/", pos1)
45 d := str.Substr(logStr, pos1, pos2-pos1)
47 urlInfo, err := url.Parse("http://localhost/?" + d)
48 if err != nil {
49 return nil
51 data := urlInfo.Query()
52 return &defs.DigData{
53 Time: data.Get("time"),
54 Url: data.Get("url"),
55 Refer: data.Get("refer"),
56 UserAgnet: data.Get("ua"),
60 func formatUrl(url, time string) *defs.UrlNode {
61 startMovie := str.IndexOf(url, handleMovie, 0)
62 startList := str.IndexOf(url, handleList, 0)
63 end := str.IndexOf(url, handleHTML, 0)
64 var resourceID int
65 var types defs.UrlNodeType
67 if startMovie >= 0 {
68 start := startMovie + len(handleMovie)
69 resourceID = getResourceID(url, start, end)
70 types = defs.UrlNodeTypeDetail
71 } else if startList >= 0 {
72 start := startList + len(handleList)
73 resourceID = getResourceID(url, start, end)
74 types = defs.UrlNodeTypeList
75 } else {
76 resourceID = 1
77 types = defs.UrlNodeTypeIndex
80 return &defs.UrlNode{
81 Type: types,
82 ResourceID: resourceID,
83 Url: url,
84 Time: time,
87 func getResourceID(url string, start, end int) int {
88 s := str.Substr(url, start, end-start)
90 if i, err := strconv.Atoi(s); err == nil {
91 return i
93 return 0