golang 计算大文件md5

golist 760 2

golang 计算大文件md5

package main

import (
    "crypto/md5"
    "flag"
    "fmt"
    "io"
    "math"
    "os"
    "time"
)

const filechunk = 8192 // we settle for 8KB 8192
var file_src *string = flag.String("file", "", "The file to hash")

func main() {

    flag.Parse()
    fmt.Println("Opening file:" + *file_src)

    t1 := time.Now()

    file, err := os.Open(*file_src)

    if err != nil {
        panic(err.Error())
    }

    defer file.Close()

    // calculate the file size
    info, _ := file.Stat()

    filesize := info.Size()

    blocks := uint64(math.Ceil(float64(filesize) / float64(filechunk)))

    hash := md5.New()

    for i := uint64(0); i < blocks; i++ {
        blocksize := int(math.Min(filechunk, float64(filesize-int64(i*filechunk))))
        buf := make([]byte, blocksize)

        file.Read(buf)
        io.WriteString(hash, string(buf)) // append into the hash
    }

    fmt.Printf("%s checksum is %x\n", file.Name(), hash.Sum(nil))

    t2 := time.Now()
    fmt.Println("消耗时间:", t2.Sub(t1), "秒")

}

该主题到 2016-02-26 14:56:22 共 2 条回复
g
golist #

单独做一个包

// Package md5 computes MD5 checksum for large files
package md5

import (
    "bufio"
    "crypto/md5"
    "fmt"
    "io"
    "os"
)

const bufferSize = 65536

// MD5sum returns MD5 checksum of filename
func MD5sum(filename string) (string, error) {
    if info, err := os.Stat(filename); err != nil {
        return "", err
    } else if info.IsDir() {
        return "", nil
    }

    file, err := os.Open(filename)
    if err != nil {
        return "", err
    }
    defer file.Close()

    hash := md5.New()
    for buf, reader := make([]byte, bufferSize), bufio.NewReader(file); ; {
        n, err := reader.Read(buf)
        if err != nil {
            if err == io.EOF {
                break
            }
            return "", err
        }

        hash.Write(buf[:n])
    }

    checksum := fmt.Sprintf("%x", hash.Sum(nil))
    return checksum, nil
}

使用示例:

package main

import (
    "fmt"
    "./md5"
)

func main() {
    file := "/Users/wei/Downloads/Win8pro.iso"
    md5sum, _ := md5.MD5sum(file)
    fmt.Println(md5sum)
}
g
golist #
package main

import (
    "bufio"
    "crypto/md5"
    "flag"
    "fmt"
    "io"
    "os"
    "time"
)

var file_src *string = flag.String("file", "", "The file to hash")

const bufferSize = 65536

// MD5sum returns MD5 checksum of filename
func MD5sum(filename string) (string, error) {
    if info, err := os.Stat(filename); err != nil {
        return "", err
    } else if info.IsDir() {
        return "", nil
    }

    file, err := os.Open(filename)
    if err != nil {
        return "", err
    }
    defer file.Close()

    hash := md5.New()
    for buf, reader := make([]byte, bufferSize), bufio.NewReader(file); ; {
        n, err := reader.Read(buf)
        if err != nil {
            if err == io.EOF {
                break
            }
            return "", err
        }

        hash.Write(buf[:n])
    }

    checksum := fmt.Sprintf("%x", hash.Sum(nil))
    return checksum, nil
}

func main() {

    flag.Parse()
    fmt.Println("Opening file:" + *file_src)

    t1 := time.Now()

    md5sum, _ := MD5sum(*file_src)
    fmt.Println(md5sum)

    t2 := time.Now()
    fmt.Println("消耗时间:", t2.Sub(t1), "秒")

}

3.8G 的文件比python 快 4秒 ,go 8.491195742s,python 12.5568759441s

登录发表评论

桂公网安备 45122402000014号