1// Copyright 2017 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// fileslist.py replacement written in GO, which utilizes multi-cores. 16 17package main 18 19import ( 20 "crypto/sha256" 21 "encoding/json" 22 "flag" 23 "fmt" 24 "io" 25 "os" 26 "path/filepath" 27 "runtime" 28 "sort" 29 "strings" 30 "sync" 31) 32 33const ( 34 MAX_DEFAULT_PARA = 24 35) 36 37func defaultPara() int { 38 ret := runtime.NumCPU() 39 if ret > MAX_DEFAULT_PARA { 40 return MAX_DEFAULT_PARA 41 } 42 return ret 43} 44 45var ( 46 para = flag.Int("para", defaultPara(), "Number of goroutines") 47) 48 49// Represents each file. 50type Node struct { 51 SHA256 string 52 Name string // device side path. 53 Size int64 54 path string // host side path. 55 stat os.FileInfo 56} 57 58func newNode(hostPath string, devicePath string, stat os.FileInfo) Node { 59 return Node{Name: devicePath, path: hostPath, stat: stat} 60} 61 62// Scan a Node and returns true if it should be added to the result. 63func (n *Node) scan() bool { 64 n.Size = n.stat.Size() 65 66 // Calculate SHA256. 67 h := sha256.New() 68 if n.stat.Mode()&os.ModeSymlink == 0 { 69 f, err := os.Open(n.path) 70 if err != nil { 71 panic(err) 72 } 73 defer f.Close() 74 75 if _, err := io.Copy(h, f); err != nil { 76 panic(err) 77 } 78 } else { 79 // Hash the content of symlink, not the file it points to. 80 s, err := os.Readlink(n.path) 81 if err != nil { 82 panic(err) 83 } 84 if _, err := io.WriteString(h, s); err != nil { 85 panic(err) 86 } 87 } 88 n.SHA256 = fmt.Sprintf("%x", h.Sum(nil)) 89 return true 90} 91 92func main() { 93 flag.Parse() 94 95 allOutput := make([]Node, 0, 1024) // Store all outputs. 96 mutex := &sync.Mutex{} // Guard allOutput 97 98 ch := make(chan Node) // Pass nodes to goroutines. 99 100 var wg sync.WaitGroup // To wait for all goroutines. 101 wg.Add(*para) 102 103 // Scan files in multiple goroutines. 104 for i := 0; i < *para; i++ { 105 go func() { 106 defer wg.Done() 107 108 output := make([]Node, 0, 1024) // Local output list. 109 for node := range ch { 110 if node.scan() { 111 output = append(output, node) 112 } 113 } 114 // Add to the global output list. 115 mutex.Lock() 116 allOutput = append(allOutput, output...) 117 mutex.Unlock() 118 }() 119 } 120 121 // Walk the directories and find files to scan. 122 for _, dir := range flag.Args() { 123 absDir, err := filepath.Abs(dir) 124 if err != nil { 125 panic(err) 126 } 127 deviceRoot := filepath.Clean(absDir + "/..") 128 err = filepath.Walk(dir, func(path string, stat os.FileInfo, err error) error { 129 if err != nil { 130 panic(err) 131 } 132 if stat.IsDir() { 133 return nil 134 } 135 absPath, err := filepath.Abs(path) 136 if err != nil { 137 panic(err) 138 } 139 devicePath, err := filepath.Rel(deviceRoot, absPath) 140 if err != nil { 141 panic(err) 142 } 143 devicePath = "/" + devicePath 144 ch <- newNode(absPath, devicePath, stat) 145 return nil 146 }) 147 if err != nil { 148 panic(err) 149 } 150 } 151 152 // Wait until all the goroutines finish. 153 close(ch) 154 wg.Wait() 155 156 // Sort the entries and dump as json. 157 sort.Slice(allOutput, func(i, j int) bool { 158 if allOutput[i].Size > allOutput[j].Size { 159 return true 160 } 161 if allOutput[i].Size == allOutput[j].Size && strings.Compare(allOutput[i].Name, allOutput[j].Name) > 0 { 162 return true 163 } 164 return false 165 }) 166 167 j, err := json.MarshalIndent(allOutput, "", " ") 168 if err != nil { 169 panic(nil) 170 } 171 172 fmt.Printf("%s\n", j) 173} 174