diff --git a/finder/fs/Android.bp b/finder/fs/Android.bp index fe0a0d36c..27e3c7d71 100644 --- a/finder/fs/Android.bp +++ b/finder/fs/Android.bp @@ -21,6 +21,10 @@ bootstrap_go_package { pkgPath: "android/soong/finder/fs", srcs: [ "fs.go", + "readdir.go", + ], + testSrcs: [ + "readdir_test.go", ], darwin: { srcs: [ diff --git a/finder/fs/fs.go b/finder/fs/fs.go index 3de548659..9c138cd10 100644 --- a/finder/fs/fs.go +++ b/finder/fs/fs.go @@ -75,8 +75,19 @@ type DirEntryInfo interface { IsDir() bool } +type dirEntryInfo struct { + name string + mode os.FileMode + modeExists bool +} + var _ DirEntryInfo = os.FileInfo(nil) +func (d *dirEntryInfo) Name() string { return d.name } +func (d *dirEntryInfo) Mode() os.FileMode { return d.mode } +func (d *dirEntryInfo) IsDir() bool { return d.mode.IsDir() } +func (d *dirEntryInfo) String() string { return d.name + ": " + d.mode.String() } + // osFs implements FileSystem using the local disk. type osFs struct{} @@ -89,7 +100,7 @@ func (osFs) Lstat(path string) (stats os.FileInfo, err error) { } func (osFs) ReadDir(path string) (contents []DirEntryInfo, err error) { - entries, err := ioutil.ReadDir(path) + entries, err := readdir(path) if err != nil { return nil, err } diff --git a/finder/fs/readdir.go b/finder/fs/readdir.go new file mode 100644 index 000000000..f6d7813e1 --- /dev/null +++ b/finder/fs/readdir.go @@ -0,0 +1,219 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fs + +// This is based on the readdir implementation from Go 1.9: +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +import ( + "os" + "syscall" + "unsafe" +) + +const ( + blockSize = 4096 +) + +func readdir(path string) ([]DirEntryInfo, error) { + f, err := os.Open(path) + defer f.Close() + + if err != nil { + return nil, err + } + // This implicitly switches the fd to non-blocking mode, which is less efficient than what + // file.ReadDir does since it will keep a thread blocked and not just a goroutine. + fd := int(f.Fd()) + + buf := make([]byte, blockSize) + entries := make([]*dirEntryInfo, 0, 100) + + for { + n, errno := syscall.ReadDirent(fd, buf) + if errno != nil { + err = os.NewSyscallError("readdirent", errno) + break + } + if n <= 0 { + break // EOF + } + + entries = parseDirent(buf[:n], entries) + } + + ret := make([]DirEntryInfo, 0, len(entries)) + + for _, entry := range entries { + if !entry.modeExists { + mode, lerr := lstatFileMode(path + "/" + entry.name) + if os.IsNotExist(lerr) { + // File disappeared between readdir + stat. + // Just treat it as if it didn't exist. + continue + } + if lerr != nil { + return ret, lerr + } + entry.mode = mode + entry.modeExists = true + } + ret = append(ret, entry) + } + + return ret, err +} + +func parseDirent(buf []byte, entries []*dirEntryInfo) []*dirEntryInfo { + for len(buf) > 0 { + reclen, ok := direntReclen(buf) + if !ok || reclen > uint64(len(buf)) { + return entries + } + rec := buf[:reclen] + buf = buf[reclen:] + ino, ok := direntIno(rec) + if !ok { + break + } + if ino == 0 { // File absent in directory. + continue + } + typ, ok := direntType(rec) + if !ok { + break + } + const namoff = uint64(unsafe.Offsetof(syscall.Dirent{}.Name)) + namlen, ok := direntNamlen(rec) + if !ok || namoff+namlen > uint64(len(rec)) { + break + } + name := rec[namoff : namoff+namlen] + + for i, c := range name { + if c == 0 { + name = name[:i] + break + } + } + // Check for useless names before allocating a string. + if string(name) == "." || string(name) == ".." { + continue + } + + mode, modeExists := direntTypeToFileMode(typ) + + entries = append(entries, &dirEntryInfo{string(name), mode, modeExists}) + } + return entries +} + +func direntIno(buf []byte) (uint64, bool) { + return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Ino), unsafe.Sizeof(syscall.Dirent{}.Ino)) +} + +func direntType(buf []byte) (uint64, bool) { + return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Type), unsafe.Sizeof(syscall.Dirent{}.Type)) +} + +func direntReclen(buf []byte) (uint64, bool) { + return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Reclen), unsafe.Sizeof(syscall.Dirent{}.Reclen)) +} + +func direntNamlen(buf []byte) (uint64, bool) { + reclen, ok := direntReclen(buf) + if !ok { + return 0, false + } + return reclen - uint64(unsafe.Offsetof(syscall.Dirent{}.Name)), true +} + +// readInt returns the size-bytes unsigned integer in native byte order at offset off. +func readInt(b []byte, off, size uintptr) (u uint64, ok bool) { + if len(b) < int(off+size) { + return 0, false + } + return readIntLE(b[off:], size), true +} + +func readIntLE(b []byte, size uintptr) uint64 { + switch size { + case 1: + return uint64(b[0]) + case 2: + _ = b[1] // bounds check hint to compiler; see golang.org/issue/14808 + return uint64(b[0]) | uint64(b[1])<<8 + case 4: + _ = b[3] // bounds check hint to compiler; see golang.org/issue/14808 + return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 + case 8: + _ = b[7] // bounds check hint to compiler; see golang.org/issue/14808 + return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | + uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 + default: + panic("syscall: readInt with unsupported size") + } +} + +// If the directory entry doesn't specify the type, fall back to using lstat to get the type. +func lstatFileMode(name string) (os.FileMode, error) { + stat, err := os.Lstat(name) + if err != nil { + return 0, err + } + + return stat.Mode() & (os.ModeType | os.ModeCharDevice), nil +} + +// from Linux and Darwin dirent.h +const ( + DT_UNKNOWN = 0 + DT_FIFO = 1 + DT_CHR = 2 + DT_DIR = 4 + DT_BLK = 6 + DT_REG = 8 + DT_LNK = 10 + DT_SOCK = 12 +) + +func direntTypeToFileMode(typ uint64) (os.FileMode, bool) { + exists := true + var mode os.FileMode + switch typ { + case DT_UNKNOWN: + exists = false + case DT_FIFO: + mode = os.ModeNamedPipe + case DT_CHR: + mode = os.ModeDevice | os.ModeCharDevice + case DT_DIR: + mode = os.ModeDir + case DT_BLK: + mode = os.ModeDevice + case DT_REG: + mode = 0 + case DT_LNK: + mode = os.ModeSymlink + case DT_SOCK: + mode = os.ModeSocket + default: + exists = false + } + + return mode, exists +} diff --git a/finder/fs/readdir_test.go b/finder/fs/readdir_test.go new file mode 100644 index 000000000..24a6d1884 --- /dev/null +++ b/finder/fs/readdir_test.go @@ -0,0 +1,312 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fs + +import ( + "os" + "reflect" + "runtime" + "testing" +) + +func TestParseDirent(t *testing.T) { + testCases := []struct { + name string + in []byte + out []*dirEntryInfo + }{ + { + // Test that type DT_DIR is translated to os.ModeDir + name: "dir", + in: []byte{ + // __ino64_t d_ino; + 0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, + // __off64_t d_off; + 0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03, + // unsigned short int d_reclen; + 0x28, 0x00, + // unsigned char d_type; + 0x04, + // char d_name[]; + 0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + out: []*dirEntryInfo{ + {".module_paths", os.ModeDir, true}, + }, + }, + { + // Test that type DT_REG is translated to a regular file + name: "file", + in: []byte{ + // __ino64_t d_ino; + 0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, + // __off64_t d_off; + 0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03, + // unsigned short int d_reclen; + 0x28, 0x00, + // unsigned char d_type; + 0x08, + // char d_name[]; + 0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + out: []*dirEntryInfo{ + {".module_paths", 0, true}, + }, + }, + { + // Test that type DT_LNK is translated to a regular os.ModeSymlink + name: "symlink", + in: []byte{ + // __ino64_t d_ino; + 0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, + // __off64_t d_off; + 0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03, + // unsigned short int d_reclen; + 0x28, 0x00, + // unsigned char d_type; + 0x0a, + // char d_name[]; + 0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + out: []*dirEntryInfo{ + {".module_paths", os.ModeSymlink, true}, + }, + }, + { + // Test that type DT_UNKNOWN sets modeExists: false + name: "unknown", + in: []byte{ + // __ino64_t d_ino; + 0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, + // __off64_t d_off; + 0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03, + // unsigned short int d_reclen; + 0x28, 0x00, + // unsigned char d_type; + 0x00, + // char d_name[]; + 0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + out: []*dirEntryInfo{ + {".module_paths", 0, false}, + }, + }, + { + // Test a name with no padding after the null terminator + name: "no padding", + in: []byte{ + // __ino64_t d_ino; + 0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, + // __off64_t d_off; + 0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03, + // unsigned short int d_reclen; + 0x20, 0x00, + // unsigned char d_type; + 0x04, + // char d_name[]; + 0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x00, + }, + out: []*dirEntryInfo{ + {".module_path", os.ModeDir, true}, + }, + }, + { + // Test two sequential entries + name: "two entries", + in: []byte{ + // __ino64_t d_ino; + 0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, + // __off64_t d_off; + 0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03, + // unsigned short int d_reclen; + 0x28, 0x00, + // unsigned char d_type; + 0x04, + // char d_name[]; + 0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + // __ino64_t d_ino; + 0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, + // __off64_t d_off; + 0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03, + // unsigned short int d_reclen; + 0x28, 0x00, + // unsigned char d_type; + 0x04, + // char d_name[]; + 0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x74, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + out: []*dirEntryInfo{ + {".module_paths", os.ModeDir, true}, + {".module_patht", os.ModeDir, true}, + }, + }, + { + // Test two sequential entries with no padding between them + name: "two entries no padding", + in: []byte{ + // __ino64_t d_ino; + 0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, + // __off64_t d_off; + 0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03, + // unsigned short int d_reclen; + 0x20, 0x00, + // unsigned char d_type; + 0x04, + // char d_name[]; + 0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x00, + + // __ino64_t d_ino; + 0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, + // __off64_t d_off; + 0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03, + // unsigned short int d_reclen; + 0x28, 0x00, + // unsigned char d_type; + 0x04, + // char d_name[]; + 0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + out: []*dirEntryInfo{ + {".module_path", os.ModeDir, true}, + {".module_paths", os.ModeDir, true}, + }, + }, + { + // Test an empty buffer. This shouldn't happen in practice because + // readdir doesn't call parseDirent if no bytes were returned. + name: "empty", + in: []byte{}, + out: nil, + }, + { + name: "missing null terminator", + in: []byte{ + // __ino64_t d_ino; + 0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, + // __off64_t d_off; + 0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03, + // unsigned short int d_reclen; + 0x20, 0x00, + // unsigned char d_type; + 0x04, + // char d_name[]; + 0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73, + }, + out: []*dirEntryInfo{ + {".module_paths", os.ModeDir, true}, + }, + }, + { + // Test two sequential entries where the first has an incorrect d_reclen. + // Should return with no entries. + name: "two entries first malformed", + in: []byte{ + // __ino64_t d_ino; + 0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, + // __off64_t d_off; + 0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03, + // unsigned short int d_reclen; + 0x10, 0x00, + // unsigned char d_type; + 0x04, + // char d_name[]; + 0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x00, + + // __ino64_t d_ino; + 0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, + // __off64_t d_off; + 0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03, + // unsigned short int d_reclen; + 0x28, 0x00, + // unsigned char d_type; + 0x04, + // char d_name[]; + 0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + out: nil, + }, + { + // Test two sequential entries where the second has an incorrect d_reclen. + // Should return the first entry. + name: "two entries second malformed", + in: []byte{ + // __ino64_t d_ino; + 0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, + // __off64_t d_off; + 0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03, + // unsigned short int d_reclen; + 0x28, 0x00, + // unsigned char d_type; + 0x04, + // char d_name[]; + 0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x00, + + // __ino64_t d_ino; + 0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, + // __off64_t d_off; + 0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03, + // unsigned short int d_reclen; + 0x10, 0x00, + // unsigned char d_type; + 0x04, + // char d_name[]; + 0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + out: []*dirEntryInfo{ + {".module_path", os.ModeDir, true}, + }, + }, + { + // Test a reclen that goes past the end of the buffer. + name: "overrun", + in: []byte{ + // __ino64_t d_ino; + 0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, + // __off64_t d_off; + 0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03, + // unsigned short int d_reclen; + 0x30, 0x00, + // unsigned char d_type; + 0x04, + // char d_name[]; + 0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x00, + }, + out: nil, + }, + } + + if runtime.GOOS != "linux" { + t.Skip("depends on Linux definitions of syscall.Dirent") + } + + for _, testCase := range testCases { + t.Run(testCase.name, func(t *testing.T) { + entries := parseDirent(testCase.in, nil) + if !reflect.DeepEqual(testCase.out, entries) { + t.Fatalf("expected:\n %v\ngot:\n %v\n", testCase.out, entries) + } + }) + } +}