Merge "Reimplement ioutil.ReadDir with a version that avoids calling lstat"

This commit is contained in:
Colin Cross
2017-12-27 23:04:28 +00:00
committed by Gerrit Code Review
4 changed files with 547 additions and 1 deletions

View File

@@ -21,6 +21,10 @@ bootstrap_go_package {
pkgPath: "android/soong/finder/fs",
srcs: [
"fs.go",
"readdir.go",
],
testSrcs: [
"readdir_test.go",
],
darwin: {
srcs: [

View File

@@ -75,8 +75,19 @@ type DirEntryInfo interface {
IsDir() bool
}
type dirEntryInfo struct {
name string
mode os.FileMode
modeExists bool
}
var _ DirEntryInfo = os.FileInfo(nil)
func (d *dirEntryInfo) Name() string { return d.name }
func (d *dirEntryInfo) Mode() os.FileMode { return d.mode }
func (d *dirEntryInfo) IsDir() bool { return d.mode.IsDir() }
func (d *dirEntryInfo) String() string { return d.name + ": " + d.mode.String() }
// osFs implements FileSystem using the local disk.
type osFs struct{}
@@ -89,7 +100,7 @@ func (osFs) Lstat(path string) (stats os.FileInfo, err error) {
}
func (osFs) ReadDir(path string) (contents []DirEntryInfo, err error) {
entries, err := ioutil.ReadDir(path)
entries, err := readdir(path)
if err != nil {
return nil, err
}

219
finder/fs/readdir.go Normal file
View File

@@ -0,0 +1,219 @@
// Copyright 2017 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fs
// This is based on the readdir implementation from Go 1.9:
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
import (
"os"
"syscall"
"unsafe"
)
const (
blockSize = 4096
)
func readdir(path string) ([]DirEntryInfo, error) {
f, err := os.Open(path)
defer f.Close()
if err != nil {
return nil, err
}
// This implicitly switches the fd to non-blocking mode, which is less efficient than what
// file.ReadDir does since it will keep a thread blocked and not just a goroutine.
fd := int(f.Fd())
buf := make([]byte, blockSize)
entries := make([]*dirEntryInfo, 0, 100)
for {
n, errno := syscall.ReadDirent(fd, buf)
if errno != nil {
err = os.NewSyscallError("readdirent", errno)
break
}
if n <= 0 {
break // EOF
}
entries = parseDirent(buf[:n], entries)
}
ret := make([]DirEntryInfo, 0, len(entries))
for _, entry := range entries {
if !entry.modeExists {
mode, lerr := lstatFileMode(path + "/" + entry.name)
if os.IsNotExist(lerr) {
// File disappeared between readdir + stat.
// Just treat it as if it didn't exist.
continue
}
if lerr != nil {
return ret, lerr
}
entry.mode = mode
entry.modeExists = true
}
ret = append(ret, entry)
}
return ret, err
}
func parseDirent(buf []byte, entries []*dirEntryInfo) []*dirEntryInfo {
for len(buf) > 0 {
reclen, ok := direntReclen(buf)
if !ok || reclen > uint64(len(buf)) {
return entries
}
rec := buf[:reclen]
buf = buf[reclen:]
ino, ok := direntIno(rec)
if !ok {
break
}
if ino == 0 { // File absent in directory.
continue
}
typ, ok := direntType(rec)
if !ok {
break
}
const namoff = uint64(unsafe.Offsetof(syscall.Dirent{}.Name))
namlen, ok := direntNamlen(rec)
if !ok || namoff+namlen > uint64(len(rec)) {
break
}
name := rec[namoff : namoff+namlen]
for i, c := range name {
if c == 0 {
name = name[:i]
break
}
}
// Check for useless names before allocating a string.
if string(name) == "." || string(name) == ".." {
continue
}
mode, modeExists := direntTypeToFileMode(typ)
entries = append(entries, &dirEntryInfo{string(name), mode, modeExists})
}
return entries
}
func direntIno(buf []byte) (uint64, bool) {
return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Ino), unsafe.Sizeof(syscall.Dirent{}.Ino))
}
func direntType(buf []byte) (uint64, bool) {
return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Type), unsafe.Sizeof(syscall.Dirent{}.Type))
}
func direntReclen(buf []byte) (uint64, bool) {
return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Reclen), unsafe.Sizeof(syscall.Dirent{}.Reclen))
}
func direntNamlen(buf []byte) (uint64, bool) {
reclen, ok := direntReclen(buf)
if !ok {
return 0, false
}
return reclen - uint64(unsafe.Offsetof(syscall.Dirent{}.Name)), true
}
// readInt returns the size-bytes unsigned integer in native byte order at offset off.
func readInt(b []byte, off, size uintptr) (u uint64, ok bool) {
if len(b) < int(off+size) {
return 0, false
}
return readIntLE(b[off:], size), true
}
func readIntLE(b []byte, size uintptr) uint64 {
switch size {
case 1:
return uint64(b[0])
case 2:
_ = b[1] // bounds check hint to compiler; see golang.org/issue/14808
return uint64(b[0]) | uint64(b[1])<<8
case 4:
_ = b[3] // bounds check hint to compiler; see golang.org/issue/14808
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24
case 8:
_ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
default:
panic("syscall: readInt with unsupported size")
}
}
// If the directory entry doesn't specify the type, fall back to using lstat to get the type.
func lstatFileMode(name string) (os.FileMode, error) {
stat, err := os.Lstat(name)
if err != nil {
return 0, err
}
return stat.Mode() & (os.ModeType | os.ModeCharDevice), nil
}
// from Linux and Darwin dirent.h
const (
DT_UNKNOWN = 0
DT_FIFO = 1
DT_CHR = 2
DT_DIR = 4
DT_BLK = 6
DT_REG = 8
DT_LNK = 10
DT_SOCK = 12
)
func direntTypeToFileMode(typ uint64) (os.FileMode, bool) {
exists := true
var mode os.FileMode
switch typ {
case DT_UNKNOWN:
exists = false
case DT_FIFO:
mode = os.ModeNamedPipe
case DT_CHR:
mode = os.ModeDevice | os.ModeCharDevice
case DT_DIR:
mode = os.ModeDir
case DT_BLK:
mode = os.ModeDevice
case DT_REG:
mode = 0
case DT_LNK:
mode = os.ModeSymlink
case DT_SOCK:
mode = os.ModeSocket
default:
exists = false
}
return mode, exists
}

312
finder/fs/readdir_test.go Normal file
View File

@@ -0,0 +1,312 @@
// Copyright 2017 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fs
import (
"os"
"reflect"
"runtime"
"testing"
)
func TestParseDirent(t *testing.T) {
testCases := []struct {
name string
in []byte
out []*dirEntryInfo
}{
{
// Test that type DT_DIR is translated to os.ModeDir
name: "dir",
in: []byte{
// __ino64_t d_ino;
0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
// __off64_t d_off;
0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
// unsigned short int d_reclen;
0x28, 0x00,
// unsigned char d_type;
0x04,
// char d_name[];
0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
},
out: []*dirEntryInfo{
{".module_paths", os.ModeDir, true},
},
},
{
// Test that type DT_REG is translated to a regular file
name: "file",
in: []byte{
// __ino64_t d_ino;
0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
// __off64_t d_off;
0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
// unsigned short int d_reclen;
0x28, 0x00,
// unsigned char d_type;
0x08,
// char d_name[];
0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
},
out: []*dirEntryInfo{
{".module_paths", 0, true},
},
},
{
// Test that type DT_LNK is translated to a regular os.ModeSymlink
name: "symlink",
in: []byte{
// __ino64_t d_ino;
0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
// __off64_t d_off;
0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
// unsigned short int d_reclen;
0x28, 0x00,
// unsigned char d_type;
0x0a,
// char d_name[];
0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
},
out: []*dirEntryInfo{
{".module_paths", os.ModeSymlink, true},
},
},
{
// Test that type DT_UNKNOWN sets modeExists: false
name: "unknown",
in: []byte{
// __ino64_t d_ino;
0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
// __off64_t d_off;
0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
// unsigned short int d_reclen;
0x28, 0x00,
// unsigned char d_type;
0x00,
// char d_name[];
0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
},
out: []*dirEntryInfo{
{".module_paths", 0, false},
},
},
{
// Test a name with no padding after the null terminator
name: "no padding",
in: []byte{
// __ino64_t d_ino;
0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
// __off64_t d_off;
0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
// unsigned short int d_reclen;
0x20, 0x00,
// unsigned char d_type;
0x04,
// char d_name[];
0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x00,
},
out: []*dirEntryInfo{
{".module_path", os.ModeDir, true},
},
},
{
// Test two sequential entries
name: "two entries",
in: []byte{
// __ino64_t d_ino;
0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
// __off64_t d_off;
0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
// unsigned short int d_reclen;
0x28, 0x00,
// unsigned char d_type;
0x04,
// char d_name[];
0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// __ino64_t d_ino;
0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
// __off64_t d_off;
0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
// unsigned short int d_reclen;
0x28, 0x00,
// unsigned char d_type;
0x04,
// char d_name[];
0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x74,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
},
out: []*dirEntryInfo{
{".module_paths", os.ModeDir, true},
{".module_patht", os.ModeDir, true},
},
},
{
// Test two sequential entries with no padding between them
name: "two entries no padding",
in: []byte{
// __ino64_t d_ino;
0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
// __off64_t d_off;
0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
// unsigned short int d_reclen;
0x20, 0x00,
// unsigned char d_type;
0x04,
// char d_name[];
0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x00,
// __ino64_t d_ino;
0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
// __off64_t d_off;
0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
// unsigned short int d_reclen;
0x28, 0x00,
// unsigned char d_type;
0x04,
// char d_name[];
0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
},
out: []*dirEntryInfo{
{".module_path", os.ModeDir, true},
{".module_paths", os.ModeDir, true},
},
},
{
// Test an empty buffer. This shouldn't happen in practice because
// readdir doesn't call parseDirent if no bytes were returned.
name: "empty",
in: []byte{},
out: nil,
},
{
name: "missing null terminator",
in: []byte{
// __ino64_t d_ino;
0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
// __off64_t d_off;
0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
// unsigned short int d_reclen;
0x20, 0x00,
// unsigned char d_type;
0x04,
// char d_name[];
0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73,
},
out: []*dirEntryInfo{
{".module_paths", os.ModeDir, true},
},
},
{
// Test two sequential entries where the first has an incorrect d_reclen.
// Should return with no entries.
name: "two entries first malformed",
in: []byte{
// __ino64_t d_ino;
0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
// __off64_t d_off;
0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
// unsigned short int d_reclen;
0x10, 0x00,
// unsigned char d_type;
0x04,
// char d_name[];
0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x00,
// __ino64_t d_ino;
0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
// __off64_t d_off;
0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
// unsigned short int d_reclen;
0x28, 0x00,
// unsigned char d_type;
0x04,
// char d_name[];
0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
},
out: nil,
},
{
// Test two sequential entries where the second has an incorrect d_reclen.
// Should return the first entry.
name: "two entries second malformed",
in: []byte{
// __ino64_t d_ino;
0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
// __off64_t d_off;
0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
// unsigned short int d_reclen;
0x28, 0x00,
// unsigned char d_type;
0x04,
// char d_name[];
0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x00,
// __ino64_t d_ino;
0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
// __off64_t d_off;
0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
// unsigned short int d_reclen;
0x10, 0x00,
// unsigned char d_type;
0x04,
// char d_name[];
0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
},
out: []*dirEntryInfo{
{".module_path", os.ModeDir, true},
},
},
{
// Test a reclen that goes past the end of the buffer.
name: "overrun",
in: []byte{
// __ino64_t d_ino;
0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
// __off64_t d_off;
0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
// unsigned short int d_reclen;
0x30, 0x00,
// unsigned char d_type;
0x04,
// char d_name[];
0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x00,
},
out: nil,
},
}
if runtime.GOOS != "linux" {
t.Skip("depends on Linux definitions of syscall.Dirent")
}
for _, testCase := range testCases {
t.Run(testCase.name, func(t *testing.T) {
entries := parseDirent(testCase.in, nil)
if !reflect.DeepEqual(testCase.out, entries) {
t.Fatalf("expected:\n %v\ngot:\n %v\n", testCase.out, entries)
}
})
}
}