Notes from learning the fundamentals of the Go programming language from this amazing tutorial. It is a fantastic video tutorial on YouTube that explains Go concepts from the ground up and offers some great insight into the language design
nil
(From https://www.youtube.com/watch?v=ynoY2xz-F8s)
var
keyword or the shorthand :=
(only inside of functions / methods to simplify parsing!).var a int
// or
a := 2 // in functions or methods
%d %[1]v
will reuse the first passed in argument (e.g. if we want to print a single variable twice in a Printf, you’d normally do fmt.Printf("%d %d", a, a)
, but with this you just need to do fmt.Printf("%d %[1]v", a)
and that parameter a
will be reused)const(
a = 1
b = 3 * 100
s = "hello"
)
byte
is a synonym for uint8
rune
is a synonym for int32
for charactersstring
is an immutable sequence of “characters”
runes
\n
)`string with "quotes"`
string
is the number of UTF-8 bytes required to encode it, NOT THE NUMBER OF LOGICAL CHARACTERSstrings
package contains useful string functions==
, slices are obviously not. Arrays can be used as map keys, slices cannotcopy
and append
helper operatorsint
you’d get 0)var m map[string]int // nil map (reading any key will return the default value of the map value type)
_m := make(map[string]int) // empty non-nil map
make
creates the underlying hash table and allocates memory etc. It is required to instantiate and write to a mapvar m = map[string]int {
"hello": 1
}
p := map[string]int{} // Empty non nil map
a, ok := p["hello"] // Returns 0, false since the key "hello" doesn't exist
p["hello"]++
b, ok := p["hello"] // Returns 1, true
if w, ok := p["the"]; ok {
// Useful if we want to do something if an entry is / isn't in the map
}
&myMap["Hello"]
)
nil
(From https://www.youtube.com/watch?v=ynoY2xz-F8s)nil
indicates the absence of something, with part of the Go philosophy being to make the zero value usefulnil
value has no type; it is defined for the following constructs:
var s fmt.Stringer // This is a nil interface with no concrete type and no value (nil, nil)
fmt.Println(s == nil) // Will print true since (nil, nil) == nil
//---
var p *Person // This Person satisfies the person interface
var s fmt.Stringer = p // Now we have (*Person, nil) - a concrete type (*Person) but still no value. This is now no longer equal to nil
//---
func do() error { // This will return the nil pointer wrapped in the error interface (*doError, nil)
var err *doError
return err // This is a nil pointer of type *doError
}
fmt.Println(do() == nil) // Will be FALSE because of the above example - (*doError, nil) != nil!!!
// It is good practice to not define or return concrete error variables
if x, err := doSomething(); err != nil {
return err
}
for i := range someArr {
// i is an index here. Remember this - this mistake can happen often. i is the INDEX NOT THE VALUE.
// If you want to range over the values you can use the blank identifier like for _, v := range someArray
}
for i, v := range someArr {
// i is an index, v is the value at that index
// The value v is COPIED - don't modify. If the values are some large struct, it might be better to use the explicit indexing for loop
}
for k := range someMap {
// Looping over all keys in a map
}
for k, v := range someMap {
// Getting the keys and values in the loop
}
for {
// Infinite loop
}
switch someVal {
case 0,1,2:
fmt.Println("Low")
case 3,4,5:
// Noop
default:
fmt.Println("Other")
}
a := 3
switch {
case a <= 2:
case a == 8:
default:
// Do something
}
main
package:=
init()
function for a package, however using this isn’t really recommendedvar
keywordvar a int
var a int = 1
var c = 1 // Type inference
var d = 1.0
// Declaration block for simplicity
var (
x, y int
z float64
s string
)
:=
:=
is used to declare and assign to a variableerr := doSomething()
err := doSomethingElse() // This is wrong, you can't re-declare err
x, err := doSomethingOther() // This is fine since you are declaring the new var x, and just reassigning err from the original assignment on the skip line above
if _,err := do(); err != nil
), that err declaration is local to the control structure scope (that if block scope).func do() error {
var err error
for {
n, err := f.Read(buf)
if err != nil {
break
}
doSomething(buf)
}
return err
}
type
keyword
type x int
means that you can’t assign something with type x
to int
or vice versa, you would have to use a type conversion like var thing x = x(12)
&x
), strings (although they’re immutable) slices, maps and channels are all passed by reference, meaning that their values can be updated inside a function(int, error)
(value, error)
where error != nil
indicates some error has occurredfunc main() {
f := os.Stdin
if len(os.Args) > 1 {
if f, err := os.Open(os.Args[1]); err != nil {
...
}
defer f.close()
}
// At this point we can do something with the file and only if it is a file passed in the params will it be closed at function exit
}
f.close()
running at function exit not block endingfunc thing() {
a := 10
defer fmt.Println(a)
a = 11
fmt.Println(a)
// Will print 11,10
}
func fib() func() int {
a, b := 0, 1
return func() int {
a, b = b, a+b
return b
}
}
func main() {
f := fib()
for x := f(); x < 100; x = f() {
fmt.Println(x) // Prints fibonacci numbers less than 100
}
}
thing()
above - it is a function that returns an int alongside the environment containing references to the values a and b// The following shows some different slices, with information on them given below
var s []int
t := []int{}
u := make([]int, 5)
v := make([]int, 0, 5)
w := []int{1,2,3,4,5}
arrAddr
is a pointer to the underlying arrays
is an uninitialised or nil slice
arrAddr
t
is an initialised but empty slice
arrAddr
points to a special sentinel struct{}
value (again an internal thing that is basically a nothing value but not nil)u
is an initialised slice with 5 length and 5 capacity
v
is an initialised slice with 0 length and 5 capacity
a[0:2]
- which will take the 0 and 1 elements of a
(it is exclusive for the to side)0:2
, you get back a slice descriptor with length 2 but capacity 5 (since the underlying array is the same and has length 5)0:3
and you’ll get back a slice descriptor of length 3 - which will contain the value at index 2 of the original slice!!!a[0:2:2]
- this will create a slice descriptor of length 2 AND CAPACITY 2
b := [2]string{"Hello", "world"}
, and you can do b := [...]string
to let Go determine the size of the array for you based on the proceeding literalmake
function (func make([]T, len, cap) []T
)len
and cap
functions can be used to retrieve the length and capacity of a slicearr
and create a slice referencing (or providing a view of) the storage of arr
using s := arr[:]
s = s[:cap(s)]
s := make([]int, 5)
// This is basically the internal implementation of slice growing that Go uses when appending to a slice that has reached it's max capacity
t := make([]int, len(s), (cap(s)+1)*2)
copy(t, s)
s = t
append(s []T, x ...T) []T
...
operator to expand the second arg into a list of args
append(s, x...)
for s []T
and x []T
func filter(s []int, fn func(int) bool) {
var res []int // == nil
for _, v := range s {
if fn(v) {
res = append(res, v)
}
}
return res
}
type Employee struct {
Name string
Number int
Boss *Employyee
Hired time.Time
}
%+v
) to pretty print a struct and it’s fieldsmap[string]MyStruct
) however it is really bad practice to do this because a map’s internal structure is dynamic
map[string]*MyStruct
)++
) on fields of structs by direct access (e.g. myMap["thing"].IntField++
)
type blah struct{...}
, that no longer is the case - structs with different names will always be different types even if they have the same field names and typestype thing1 struct {
field int
}
type thing2 struct {
field int
}
func main() {
a := thing1{field: 1}
b := thing2{field: 1}
a = thing1(b) // Valid
}
thing *myStruct
, thing.field
is equivalent to dereferencing (*thing).field
map[int]struct{}
) or creating a chan struct{}
to be a “complete” notifier without the need to pass any data if that isn’t neededtype Response struct {
Data string `json:"data"` // Only exported fields are included in a marshalled JSON string
Status int `json:"status"`
}
func main() {
// Serializing
r := Response{"Some data", 200}
j, _ := json.Marshal(r)
// j will be []byte containing "{"data":"Some data","status":200}"
// Deserializing
var r2 Response
_ = json.Unmarshal(j, &r2)
}
for i, thing := range things {
// thing is always a copy - mutating it doesn't mutate the thing in things
}
// You have to use an index if you want to mutate the element
for i := range things {
things[i].field = value
}
func update(things []thing) []thing {
things = append(things, x) // Copy
return things
}
go build -gcflags -m=2
to see the results of escape analysisnet/http
is the standard library package for HTTP networkingtype Handler interface {
ServeHTTP(http.ResponseWriter, *http.Request)
}
type HandlerFunc func(ResponseWriter, *Request)
// This is a method declaration on a function type
func (f HandlerFunc) ServeHTTP(w ResponseWriter, r *Request) {
f(w, r)
}
// Then we can define a function that conforms to that interface without
// requiring explicit implementation of ServeHTTPz§
func handler(w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, "Hello, world")
}
http.Template
is a package for doing HTTP templatingvar form = `
<h1>Todo #</h1>
<div></div>
http.Template
library to populate. It uses double bracket syntax for templating and has directives like printf
to do formatting. It will pull values from the fields specified in the template, e.g. pulling the ID from the .ID
field of some struct“An interface, which is something that can do something. Including the empty interface, which is something that can do nothing, which is everything, because everything can do nothing, or at least nothing.” - Brad Fitzpatrick
type IntSlice []int
and attach a method to this named user-declared type, but you can’t attach a method to []int
directlyStringer
interface - this defines a method String()
that can be used to stringify the receiving thingtype Stringer interface {
String() string
}
fmt.Printf
- it will check if the thing it needs to print satisfies the Stringer interface (is a stringer), and if so just copies the output of the String()
method to its outputOutputTo
function that accepts any type that implements the Write([]byte)
method, meaning we can use any thing that has that method rather than a specific implementationtype ReadWriter interface {
Reader
Writer
}
ReadWriter
must implement the Read
and Write
methodstype Bigger struct {
otherpackage.Big // Struct composition to be explored later
}
func (b Bigger) SomeMethod() {
}
type Host struct {
Hostname string
Port int
}
type SimpleURI struct {
Host
Scheme string
Path string
}
func main() {
s := SimpleURI{
Host: other.Host{Hostname: "google.com", Port: 8080},
Scheme: "https",
Path: "/search",
}
fmt.Println(s.Hostname, s.Scheme) // See how the Host has been promoted
}
SimpleURI
structure would have the fields in the Host
struct promoted to it’s levelHost
type are also promoted to the SimpleURI
type, this is the most powerful part of compositiontype Thing struct {
Field string
}
func (t *Thing) bruh() {
fmt.Println(t.Field)
}
// Would also be valid with a value receiver method
// func (t Thing) bruh() {
// fmt.Println(t.Field)
// }
type Thing2 struct {
*Thing
Field2 string
}
func main() {
t := Thing2{&Thing{"Hello"}, "world"}
t.bruh() // Method call here is valid
}
type Interface interface {
// The length of the collection
Len() int
// Says whether the element at index i is less than the element at index j
Less(i, j int) bool
// Swaps the element at index i with the element at index j in the collection
Swap(i, j int)
}
sort.Sort
function can take in any Interface
conforming collection type and sort it in placetype Component struct {
Name string
Weight int
}
type Components []Component
func (c Components) Len() int { return len(c) }
func (c Components) Swap() { c[i], c[j] = c[j], c[i] }
Component
, and we want to make Components
sortableComponents
with the Less
function as follows:func (c Components) Less(i, j int) {
// LT rather than the less than symbol because Jekyll
return c[i].Weight LT c[j].Weight
}
Components
can be sorted by weight by defaulttype ByName struct{ Components }
func (bn ByName) Less(i, j int) bool {
return bn.Components[i].Name LT bn.Components[j].Name
}
type ByWeight struct{ Components }
func (bw ByWeight) Less(i,j int) bool {
return bn.Components[i].Weight LT bn.Components[j].Weight
}
ByName
and ByWeight
conform to sort.Interface
through composition (since Components
has the Len
and Swap
methods defined for it), but they then specialise the Less
method to be a specific sorting strategyreverse
unexported struct in sort
is used to sort something in reverse ordertype reverse struct {
Interface // It just embeds sort.Interface
}
func (r reverse) Less(i, j int) bool {
return r.Interface.Less(j, i) // Note swapped arguments for reverse sorting
}
func Reverse(data Interface) Interface {
return &reverse{data}
}
Less
method on reverse
is flipped
sort.Reverse
is defined that returns a sort.Interface
that has the reverse implementation of Less
nil
useful// The nil / zero value of this struct is ready to use since a nil slice can be appended to
type StringStack struct {
data []string
}
func (s *StringStack) Push(x string) {
s.data = append(s.data, x)
}
func (s *StringStack) Pop() string {
l := len(s.data)
if l == 0 {
panic("pop from empty stack")
}
t := s.data[l-1]
s.data = s.data[:l-1]
return t
}
data
field inside the StringStack
struct so that a client can’t see the implementation detailstype IntList struct {
Value int
Tail *IntList
}
func (list *IntList) Sum() int {
if list == nil {
return 0
}
return list.Value + list.Tail.Sum()
}
v := T{}
you can of course call value receivers on it directly, however you can also call pointer receivers on it. The compiler will implicitly add an (&v).PointerMethod()
v := &T{}
, you can of course call pointer receiver methods on it directly, however Go will also implicitly add a dereference when you call a value receiver method (*v).PointerMethod()
T
is all the value receiver methods of T
*T
is all the value and pointer receiver methods of T
type Thing struct{}
func (t Thing) ValMethod() {}
func (t *Thing) PointerMethod() {}
type IVal interface { ValMethod() }
type IPtr interface { PointerMethod() }
func main() {
var t Thing
var iVal IVal
var iPtr IPtr
iVal = t // Valid
iVal = &t // Valid
iPtr = t // Not valid, since the value t doesn't have the pointer method PointerMethod in it's method set
iPtr = &t // Valid
}
nil
until initialised(type, ptr)
nil
when it’s value is (nil, nil)
var r io.Reader // nil interface here
var b *bytes.Buffer // nil value here
r = b // at this point r is no longer nil itself, but it has a nil pointer to a buffer
r = b
the new value of r is (bytes.Buffer, nil)
type error interface {
func Error() string
}
error
is an interface that has one method Error()
err == nil
to see if the err return value was assigned - this is the idiomatic error checking mechanism in Gotype someErr struct {
err error
someField string
}
func (e someErr) Error() string {
return "this is some error"
}
func someFunc(a int) *someErr { // We should NEVER return a concrete error type
return nil
}
func main() {
var err error = someFunc(123456)
if err != nil {
// Even though we logically didn't want to throw an error, returning a concrete error type
// meant that the err variable was initialised and looks like (*someErr, nil) which in the
// semantics of interfaces ISN'T NIL
fmt.Println("Oops")
} else {
// If we'd done err := someFunc(123456), the above check would have worked although again we
// should never return a concrete error implementation from a function
}
}
err != nil
return true which is logically incorrectBuffer
which has an embedded []byte
which isn’t safe to copy since the underlying array is shared, and any type that embeds any sort of mutex or other synchronisation primitives that should never be copied*os.File
is less restrictive than io.ReadWriteCloser
because files have other useful methods that a caller would want access toerror
interface however is an exception to this ruleinterface{}
has no methods, therefore it is satisfied by anything
type any interface{}
is defined by the standard library for ease of usefmt
for printing any type, and by other packages requiring similar behaviour
nil
is the zero value for pointers, slices, maps, channels, functions and interfacesnil
is not a keyword in Go, it is a predeclared identifiernil
Pointers
Slices
Maps, Channels and Functions
nil
in Go(Concrete Type, Value)
(nil, nil)
(some concrete type, nil)
internally for whatever the assigned type is, and this is no longer ==nil
func bad1() error {
var err *someConcreteError
return err // We are returning (*someConcreteError, nil) which !=nil
}
func bad2() *someConcreteError {
// We are returning a concrete pointer to an error which will pass ==nil, however
// it is very bad practice because the second you wrap this pointer in the error
// interface you will have the same problem as above
return nil
}
v, ok := map[i]
-> zeroVal(type of map value), false
for any key i
). That means nil maps are perfectly valid read only empty maps
zero(t), false
with that false ok
flag indicating the channel is closedfunc Add(a, b int) {
return a+b
}
func main() {
var addTo5 func(int) int = func (a int) int {
return Add(5, a)
}
}
func (p Point) Distance(q Point) float64 {
return math.Hypot(q.X-p.X, q.Y-p.Y)
}
func main() {
p := Point{1,2}
q := Point{4,6}
distanceFromP := p.Distance // Here we close over the receiver value p, returning a curried function
}
Distance
is a value receiver method, the value of p is closed over when defining distanceFromP
; this means that if you update p
, these changes won’t be reflected in the distanceFromP
calls; it will always return the distance to the point (1,2) because that value was captured when the method value was createdDistance
to be a pointer receiver method, any changes to p
will be reflected in the methodtype dollars float32
func (d dollars) String() string {
return fmt.Sprintf("$%.2f", d)
}
type database map[string]dollars
func (db database) list(w http.ResponseWriter, req *http.Request) {
for item, price := range db {
fmt.Fprintf(w, "%s: %s\n", item, price)
}
}
func main() {
db := database{
"shoes": 50,
"socks": 5,
}
http.HandleFunc("/list", db.list)
log.Fatal(http.ListenAndServe("localhost:8080", nil))
}
{1,2a,2b,3a,3b,4}
{1,2a,3a,2b,3b,4}
{1,2a,3a,3b,2b,4}
{1,3a,3b,2a,2b,4}
{1,3a,2a,2b,3b,4}
{1,3a,2a,3b,2b,4}
go
keyword in front of a function call(zeroVal, nok)
if they are closed and you try to readhttp.HandlerFunc
Channel Patternhttp.HandlerFunc
’s signature, we could use a global variable, however this isn’t a great idea since global variables aren’t greattype intCh chan int
func (ch intCh) handler(w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, "Received %d from channel", <-ch)
}
http.HandleFunc("/", someIntCh.handler)
// generates numbers up to the given limit and writes them to a channel, closing the channel when finished
func generate(limit int, ch chan<- int) {
defer close(ch)
for i := 2; i < limit; i++ {
ch <- i
}
}
// receives numbers from a channel and filters for only those not divisible by the given
// divisor, writing to a destination channel and closing the destination when the src is closed
func filter(src <-chan int, dst chan<- int, divisor int) {
defer close(dst)
for i := range src { // Will block until a value is added to src, and break when src is closed
if i%divisor != 0 {
dst <- i
}
}
}
// prime sieving function
func sieve(limit int) {
ch := make(chan int)
go generate(limit, ch) // kicks off generator
for {
prime, ok := <-ch
if !ok {
break // we are done
}
// makes a new filter for the prime that was just seen, then adds it to the chain of running filters
newFilterChan := make(chan int)
go filter(ch, newFilterChan, prime)
ch = newFilterChan
fmt.Print(prime, " ")
}
}
func main() {
sieve(1000)
}
select
statement is used to multiplex channels, allowing any ready alternative to proceed among:
func main() {
chans := []chan int{
make(chan int),
make(chan int)
}
for i := range chans {
go func(i int, ch chan<- int) {
for {
time.Sleep(time.Duration(i)*time.Second)
ch<- i
}
}(i+1, chans[i])
}
for i := 0; i < 12; i ++ {
// Select allows us to listen to both channels at the same time, and whichever
// one is ready first will be read
select {
case m0 := <-chans[0]:
fmt.Println("received", m0)
case m1 := <-chans[1]:
fmt.Println("received", m1)
}
}
}
time.After
functiontime.After(5*time.Second)
will create a new timer, this timer has a channel which is returned by this function. The timer will send the current time on this channel when the timer elapsestime.Ticker
which is similar but will tick indefinitely with a given tick ratedefault
case is already and is chosen if no other case is readyfunc sendOrDrop(data []byte) {
select {
case ch <- data;
// sent ok; do nothing
default:
log.Printf("overflow, dropped %d bytes", len(data))
}
}
Done
channel that closes when the cancellation occursDone()
channel is often used in select blockscontext.Background
top level context) for any timeout contexts
ctx := context.Background()
ctx = context.WithValue(ctx, "traceId", "abc123")
ctx, cancel := context.WithTimeout(ctx, 3 * time.Second)
defer cancel() // it is common to defer cancel
req, _ := http.NewRequest(http.MethodGet, url, nil)
req = req.WithContext(ctx)
resp, err := http.DefaultClient.Do(req)
Done()
)
select
, then you can see the error in ctx.Err()
type contextKey int
// Make sure the keys are exported (but not the type itself), then clients have a single source of truth for requesting context values without the risk of collision
const (
TraceIdContextKey contextKey = iota
StartTimeContextKey contextKey
AuthContextKey contextKey
)
func AddTrace(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
if traceID := r.Header.Get("X-Trace-Context"); traceID != "" {
ctx = context.WithValue(ctx, TraceIdContextKey, traceID)
}
next.ServeHTTP(w, r.WithContext(ctx))
})
}
func LogWithContext(ctx context.Context, f string, args ...any) {
// reflection is required because the context values "map" can contain any. We need
// to downcast the any to a string (this two argument cast will return ok=true if
// the conversion was a success). More on reflection later ;)
traceID, ok := ctx.Value(TraceIdContextKey).(string)
// adding the trace ID to the log message if it is in the context
if ok && traceID != "" {
f = traceID + ": " + f
}
log.Printf(f, args...)
}
func get(url string, ch chan<- result) { }
func collect(ch <-chan result) map[string]int { }
type T struct {
i byte
b bool
}
func send(i int, ch chan<- *T) {
t := &T{i: byte(i)}
ch<- t
t.b = true // NEVER DO THIS
}
func main() {
vs := make([]T, 5)
ch := make(chan *T)
for i := range vs {
go send(i, ch)
}
time.Sleep(1*time.Second)
// This quick copy will read and copy the values written into the channel by
// the 5 running goroutines. But there is a race condition so the value of t.b
// for all the values is false since it is likely (but not guaranteed) that this
// read and copy will finish before the t.b is updated in the goroutine. If the
// channel was buffered, it would be likely (but again not a guarantee) that
// the value is true for all. The time.Sleep() will almost guarantee that this
// is the case but again this is a race condition so it should never be relied upon
for i := range vs {
vs[i] = *<-ch
}
for _,v :+ range vs {
fmt.Println(v)
}
}
type pair struct {
hash, path string
}
type fileList []string
type results map[string]fileList
// calculate the hash of a specific file path, returning a pair of
// (hash, path)
func hashFile(path string) pair {
file, err := os.Open(path)
if err != nil {
log.Fatal(err)
}
defer file.Close()
hash := md5.New()
if _, err := io.Copy(hash, file); err != nil {
log.Fatal(err)
}
return pair{fmt.Sprintf("%x", hash.Sum(nil)), path}
}
// this is a sequential implementation, could be quite slow on a large directory
func walk(dir string) (results, error) {
hashes := make(results)
err := filepath.Walk(dir, func(path string, fi os.FileInfo, err error) error {
if fi.Mode().IsRegular() && fi.Size() > 0 {
h := hashFile(path)
hashes[h.hash] = append(hashes[h.hash], h.path) // add the new file path to it's corresponding hash entry in the map
}
return nil
})
return hashes, er r
}
func collector(pairs <-chan pair, result chan<- results) {
hashes := make(results)
// loop will only stop when the channel closes
for p := range pairs {
hashes[p.hash] = append(hashes[p.hash], p.path)
}
result <- hashes
}
func worker(paths <-chan string, pairs chan<- pair, done chan<- bool) {
// process files until the paths channel is closed
for path := range paths {
pairs <- hashFile(path)
}
done <- true
}
func main() {
numWorkers := 2 * runtime.GOMAXPROCS(0)
// the first model has unbuffered channels
paths := make(chan string)
pairs := make(chan pair)
done := make(chan bool)
result := make(chan results)
for i := 0; i < numWorkers; i++ {
go processFiles(paths, pairs, done)
}
go collectHashes(pairs, result)
err := filePath.Walk(fir, func(path string, fi os.FileInfo, err error) error {
if fi.Mode().IsRegular() && fi.Size() > 0 {
paths <- p
}
return nil
})
if err != nil {
log.Fatal(err)
}
// so the workers stop
close(paths)
for i := 0; i < numWorkers; i++ {
// we then read from the done channel until all workers are done
<-done
}
// after all the workers are done we can close the pairs channel
close(pairs)
// finally we can read the hashes from the result channel
hashes := <-result
fmt.Println(hashes)
}
wg.Done()
and the counter will be 0func searchTree(dir string, paths chan<- string, wg *sync.WaitGroup) error {
defer wg.Done()
visit := func(p string, fi os.FileInfo, err error) error {
if err != nil && err != os.ErrNotExist {
return err
}
// ignore dir itself to avoid an infinite loop
if fi.Mode().IsDir() && p != dir {
wg.Add(1)
go searchTree(p, paths, wg) // we recursively search the tree in new goroutines to speed up listing
return filepath.SkipDir
}
if fi.Mode().IsRegular() && fi.Size() > 0 {
paths <- p
}
return nil
}
return filepath.Walk(dir, visit)
}
func run(dir string) results {
workers := 2 * runtime.GOMAXPROCS(0)
paths := make(chan string)
pairs := make(chan pair)
done := make(chan bool)
result := make(chan results)
wg := new(sync.WaitGroup)
for i := 0; i < workers; i++ {
go processFiles(paths, pairs, done)
}
go collectHashes(pairs, result)
// multi-threaded walk of the directory tree
wg.Add(1)
err := searchTree(dir, paths, wg)
if err != nil {
log.Fatal(err)
}
// wg.Wait() will block until all the directory listing work is done
wg.Wait()
close(paths)
for i := 0; i < workers; i++ {
<-done
}
close(pairs)
return <-result
}
func processFile(path string, pairs chan<- pair, wg *sync.WaitGroup, limits chan bool) {
defer wg.Done()
// writing to limits will block until another processFile goroutine finishes
limits <- true
// this is the point that the goroutine is finished (defered). reading from the channel will free up a slot for another goroutine
defer func() {
<-limits
}()
pairs <- hashFile(path)
}
func collectHashes(pairs <-chan pair, result chan<- results) {
hashes := make(results)
for p := range pairs {
hashes[p.hash] = append(hashes[p.hash], p.path)
}
result <- hashes
}
func walkDir(dir string, pairs chan<- pair, wg *sync.WaitGroup, limits chan bool) error {
defer wg.Done()
visit := func(p string, fi os.FileInfo, err error) error {
if err != nil && err != os.ErrNotExist {
return err
}
// ignore dir itself to avoid an infinite loop!
if fi.Mode().IsDir() && p != dir {
wg.Add(1)
go walkDir(p, pairs, wg, limits)
return filepath.SkipDir
}
if fi.Mode().IsRegular() && fi.Size() > 0 {
wg.Add(1)
go processFile(p, pairs, wg, limits)
}
return nil
}
// again since this walkDir is also IO bound, we have this functionality to wait on the limits channel
// until a slot opens
limits <- true
defer func() {
<-limits
}()
return filepath.Walk(dir, visit)
}
func run(dir string) results {
workers := 2 * runtime.GOMAXPROCS(0)
limits := make(chan bool, workers)
pairs := make(chan pair)
result := make(chan results)
wg := new(sync.WaitGroup)
// we need another goroutine so we don't block here
go collectHashes(pairs, result)
// multi-threaded walk of the directory tree; we need a
// waitGroup because we don't know how many to wait for
wg.Add(1)
err := walkDir(dir, pairs, wg, limits)
if err != nil {
log.Fatal(err)
}
// we must close the paths channel so the workers stop
wg.Wait()
// by closing pairs we signal that all the hashes
// have been collected; we have to do it here AFTER
// all the workers are done
close(pairs)
return <-result
}
func main() {
if len(os.Args) < 2 {
log.Fatal("Missing parameter, provide dir name!")
}
if hashes := run(os.Args[1]); hashes != nil {
for hash, files := range hashes {
if len(files) > 1 {
// we will use just 7 chars like git
fmt.Println(hash[len(hash)-7:], len(files))
for _, file := range files {
fmt.Println(" ", file)
}
}
}
}
}