A tool to facilitate large ninja files comparison.
Bug: 201713929 Test: internal Change-Id: Ifd976eed1e58b7409e3deacf99917206f0149ade
This commit is contained in:
151
tools/canoninja/README.md
Normal file
151
tools/canoninja/README.md
Normal file
@@ -0,0 +1,151 @@
|
|||||||
|
# Ninja File Canonicalizer
|
||||||
|
|
||||||
|
Suppose we have a tool that generates a Ninja file from some other description (think Kati and makefiles), and during
|
||||||
|
the testing we discovered a regression. Furthermore, suppose that the generated Ninja file is large (think millions of
|
||||||
|
lines). And, the new Ninja file has build statements and rules in a slightly different order. As the tool generates the
|
||||||
|
rule names, the real differences in the output of the `diff` command are drowned in noise. Enter Canoninja.
|
||||||
|
|
||||||
|
Canoninja renames each Ninja rule to the hash of its contents. After that, we can just sort the build statements, and a
|
||||||
|
simple `comm` command immediately reveal the essential difference between the files.
|
||||||
|
|
||||||
|
## Example
|
||||||
|
|
||||||
|
Consider the following makefile
|
||||||
|
|
||||||
|
```makefile
|
||||||
|
second :=
|
||||||
|
first: foo
|
||||||
|
foo:
|
||||||
|
@echo foo
|
||||||
|
second: bar
|
||||||
|
bar:
|
||||||
|
@echo bar
|
||||||
|
```
|
||||||
|
|
||||||
|
Depending on Kati version converting it to Ninja file will yield either:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ cat /tmp/1.ninja
|
||||||
|
# Generated by kati 06f2569b2d16628608c000a76e3d495a5a5528cb
|
||||||
|
|
||||||
|
pool local_pool
|
||||||
|
depth = 72
|
||||||
|
|
||||||
|
build _kati_always_build_: phony
|
||||||
|
|
||||||
|
build first: phony foo
|
||||||
|
rule rule0
|
||||||
|
description = build $out
|
||||||
|
command = /bin/sh -c "echo foo"
|
||||||
|
build foo: rule0
|
||||||
|
build second: phony bar
|
||||||
|
rule rule1
|
||||||
|
description = build $out
|
||||||
|
command = /bin/sh -c "echo bar"
|
||||||
|
build bar: rule1
|
||||||
|
|
||||||
|
default first
|
||||||
|
```
|
||||||
|
|
||||||
|
or
|
||||||
|
|
||||||
|
```
|
||||||
|
$ cat 2.ninja
|
||||||
|
# Generated by kati 371194da71b3e191fea6f2ccceb7b061bd0de310
|
||||||
|
|
||||||
|
pool local_pool
|
||||||
|
depth = 72
|
||||||
|
|
||||||
|
build _kati_always_build_: phony
|
||||||
|
|
||||||
|
build second: phony bar
|
||||||
|
rule rule0
|
||||||
|
description = build $out
|
||||||
|
command = /bin/sh -c "echo bar"
|
||||||
|
build bar: rule0
|
||||||
|
build first: phony foo
|
||||||
|
rule rule1
|
||||||
|
description = build $out
|
||||||
|
command = /bin/sh -c "echo foo"
|
||||||
|
build foo: rule1
|
||||||
|
|
||||||
|
default first
|
||||||
|
```
|
||||||
|
|
||||||
|
This is a quirk in Kati, see https://github.com/google/kati/issues/238
|
||||||
|
|
||||||
|
Trying to find out the difference between the targets even after sorting them isn't too helpful:
|
||||||
|
|
||||||
|
```
|
||||||
|
diff <(grep '^build' /tmp/1.ninja|sort) <(grep '^build' /tmp/2.ninja | sort)
|
||||||
|
1c1
|
||||||
|
< build bar: rule1
|
||||||
|
---
|
||||||
|
> build bar: rule0
|
||||||
|
3c3
|
||||||
|
< build foo: rule0
|
||||||
|
---
|
||||||
|
> build foo: rule1
|
||||||
|
```
|
||||||
|
|
||||||
|
However, running these files through `canoninja` yields
|
||||||
|
|
||||||
|
```
|
||||||
|
$ canoninja /tmp/1.ninja
|
||||||
|
# Generated by kati 06f2569b2d16628608c000a76e3d495a5a5528cb
|
||||||
|
|
||||||
|
pool local_pool
|
||||||
|
depth = 72
|
||||||
|
|
||||||
|
build _kati_always_build_: phony
|
||||||
|
|
||||||
|
build first: phony foo
|
||||||
|
rule R2f9981d3c152fc255370dc67028244f7bed72a03
|
||||||
|
description = build $out
|
||||||
|
command = /bin/sh -c "echo foo"
|
||||||
|
build foo: R2f9981d3c152fc255370dc67028244f7bed72a03
|
||||||
|
build second: phony bar
|
||||||
|
rule R62640f3f9095cf2da5b9d9e2a82f746cc710c94c
|
||||||
|
description = build $out
|
||||||
|
command = /bin/sh -c "echo bar"
|
||||||
|
build bar: R62640f3f9095cf2da5b9d9e2a82f746cc710c94c
|
||||||
|
|
||||||
|
default first
|
||||||
|
```
|
||||||
|
|
||||||
|
and
|
||||||
|
|
||||||
|
```
|
||||||
|
~/go/bin/canoninja /tmp/2.ninja
|
||||||
|
# Generated by kati 371194da71b3e191fea6f2ccceb7b061bd0de310
|
||||||
|
|
||||||
|
pool local_pool
|
||||||
|
depth = 72
|
||||||
|
|
||||||
|
build _kati_always_build_: phony
|
||||||
|
|
||||||
|
build second: phony bar
|
||||||
|
rule R62640f3f9095cf2da5b9d9e2a82f746cc710c94c
|
||||||
|
description = build $out
|
||||||
|
command = /bin/sh -c "echo bar"
|
||||||
|
build bar: R62640f3f9095cf2da5b9d9e2a82f746cc710c94c
|
||||||
|
build first: phony foo
|
||||||
|
rule R2f9981d3c152fc255370dc67028244f7bed72a03
|
||||||
|
description = build $out
|
||||||
|
command = /bin/sh -c "echo foo"
|
||||||
|
build foo: R2f9981d3c152fc255370dc67028244f7bed72a03
|
||||||
|
|
||||||
|
default first
|
||||||
|
```
|
||||||
|
|
||||||
|
and when we extract only build statements and sort them, we see that both Ninja files define the same graph:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
$ diff <(~/go/bin/canoninja /tmp/1.ninja | grep '^build' | sort) \
|
||||||
|
<(~/go/bin/canoninja /tmp/2.ninja | grep '^build' | sort)
|
||||||
|
```
|
||||||
|
|
||||||
|
# Todo
|
||||||
|
|
||||||
|
* Optionally output only the build statements, optionally sorted
|
||||||
|
* Handle continuation lines correctly
|
130
tools/canoninja/canoninja.go
Normal file
130
tools/canoninja/canoninja.go
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
package canoninja
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"crypto/sha1"
|
||||||
|
"encoding/hex"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
rulePrefix = []byte("rule ")
|
||||||
|
buildPrefix = []byte("build ")
|
||||||
|
phonyRule = []byte("phony")
|
||||||
|
)
|
||||||
|
|
||||||
|
func Generate(path string, buffer []byte, sink io.Writer) error {
|
||||||
|
// Break file into lines
|
||||||
|
from := 0
|
||||||
|
var lines [][]byte
|
||||||
|
for from < len(buffer) {
|
||||||
|
line := getLine(buffer[from:])
|
||||||
|
lines = append(lines, line)
|
||||||
|
from += len(line)
|
||||||
|
}
|
||||||
|
|
||||||
|
// FOr each rule, calculate and remember its digest
|
||||||
|
ruleDigest := make(map[string]string)
|
||||||
|
for i := 0; i < len(lines); {
|
||||||
|
if bytes.HasPrefix(lines[i], rulePrefix) {
|
||||||
|
// Find ruleName
|
||||||
|
rn := ruleName(lines[i])
|
||||||
|
if len(rn) == 0 {
|
||||||
|
return fmt.Errorf("%s:%d: rule name is missing or on the next line", path, i+1)
|
||||||
|
}
|
||||||
|
sRuleName := string(rn)
|
||||||
|
if _, ok := ruleDigest[sRuleName]; ok {
|
||||||
|
return fmt.Errorf("%s:%d: the rule %s has been already defined", path, i+1, sRuleName)
|
||||||
|
}
|
||||||
|
// Calculate rule text digest as a digests of line digests.
|
||||||
|
var digests []byte
|
||||||
|
doDigest := func(b []byte) {
|
||||||
|
h := sha1.New()
|
||||||
|
h.Write(b)
|
||||||
|
digests = h.Sum(digests)
|
||||||
|
|
||||||
|
}
|
||||||
|
// For the first line, digest everything after rule's name
|
||||||
|
doDigest(lines[i][cap(lines[i])+len(rn)-cap(rn):])
|
||||||
|
for i++; i < len(lines) && lines[i][0] == ' '; i++ {
|
||||||
|
doDigest(lines[i])
|
||||||
|
}
|
||||||
|
h := sha1.New()
|
||||||
|
h.Write(digests)
|
||||||
|
ruleDigest[sRuleName] = "R" + hex.EncodeToString(h.Sum(nil))
|
||||||
|
|
||||||
|
} else {
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rewrite rule names.
|
||||||
|
for i, line := range lines {
|
||||||
|
if bytes.HasPrefix(line, buildPrefix) {
|
||||||
|
brn := getBuildRuleName(line)
|
||||||
|
if bytes.Equal(brn, phonyRule) {
|
||||||
|
sink.Write(line)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if len(brn) == 0 {
|
||||||
|
return fmt.Errorf("%s:%d: build statement lacks rule name", path, i+1)
|
||||||
|
}
|
||||||
|
sink.Write(line[0 : cap(line)-cap(brn)])
|
||||||
|
if digest, ok := ruleDigest[string(brn)]; ok {
|
||||||
|
sink.Write([]byte(digest))
|
||||||
|
} else {
|
||||||
|
return fmt.Errorf("%s:%d: no rule for this build target", path, i+1)
|
||||||
|
}
|
||||||
|
sink.Write(line[cap(line)+len(brn)-cap(brn):])
|
||||||
|
} else if bytes.HasPrefix(line, rulePrefix) {
|
||||||
|
rn := ruleName(line)
|
||||||
|
// Write everything before it
|
||||||
|
sink.Write(line[0 : cap(line)-cap(rn)])
|
||||||
|
sink.Write([]byte(ruleDigest[string(rn)]))
|
||||||
|
sink.Write(line[cap(line)+len(rn)-cap(rn):])
|
||||||
|
} else {
|
||||||
|
//goland:noinspection GoUnhandledErrorResult
|
||||||
|
sink.Write(line)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getLine(b []byte) []byte {
|
||||||
|
if n := bytes.IndexByte(b, '\n'); n >= 0 {
|
||||||
|
return b[:n+1]
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns build statement's rule name
|
||||||
|
func getBuildRuleName(line []byte) []byte {
|
||||||
|
n := bytes.IndexByte(line, ':')
|
||||||
|
if n <= 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
ruleName := line[n+1:]
|
||||||
|
if ruleName[0] == ' ' {
|
||||||
|
ruleName = bytes.TrimLeft(ruleName, " ")
|
||||||
|
}
|
||||||
|
if n := bytes.IndexAny(ruleName, " \t\r\n"); n >= 0 {
|
||||||
|
ruleName = ruleName[0:n]
|
||||||
|
}
|
||||||
|
return ruleName
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns rule statement's rule name
|
||||||
|
func ruleName(lineAfterRule []byte) []byte {
|
||||||
|
ruleName := lineAfterRule[len(rulePrefix):]
|
||||||
|
if len(ruleName) == 0 {
|
||||||
|
return ruleName
|
||||||
|
}
|
||||||
|
if ruleName[0] == ' ' {
|
||||||
|
ruleName = bytes.TrimLeft(ruleName, " ")
|
||||||
|
}
|
||||||
|
if n := bytes.IndexAny(ruleName, " \t\r\n"); n >= 0 {
|
||||||
|
ruleName = ruleName[0:n]
|
||||||
|
}
|
||||||
|
return ruleName
|
||||||
|
}
|
47
tools/canoninja/canoninja_test.go
Normal file
47
tools/canoninja/canoninja_test.go
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
package canoninja
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestGenerate(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
in []byte
|
||||||
|
wantSink string
|
||||||
|
wantErr bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "1",
|
||||||
|
in: []byte(`
|
||||||
|
rule rule1
|
||||||
|
abcd
|
||||||
|
rule rule2
|
||||||
|
abcd
|
||||||
|
build x: rule1
|
||||||
|
`),
|
||||||
|
wantSink: `
|
||||||
|
rule R9c97aba7f61994be6862f5ea9a62d26130c7f48b
|
||||||
|
abcd
|
||||||
|
rule R9c97aba7f61994be6862f5ea9a62d26130c7f48b
|
||||||
|
abcd
|
||||||
|
build x: R9c97aba7f61994be6862f5ea9a62d26130c7f48b
|
||||||
|
`,
|
||||||
|
wantErr: false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
sink := &bytes.Buffer{}
|
||||||
|
err := Generate("<file>", tt.in, sink)
|
||||||
|
if (err != nil) != tt.wantErr {
|
||||||
|
t.Errorf("Generate() error = %v, wantErr %v", err, tt.wantErr)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if gotSink := sink.String(); gotSink != tt.wantSink {
|
||||||
|
t.Errorf("Generate() gotSink = %v, want %v", gotSink, tt.wantSink)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
36
tools/canoninja/cmd/main.go
Normal file
36
tools/canoninja/cmd/main.go
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
/*
|
||||||
|
Canoninja reads a Ninja file and changes the rule names to be the digest of the rule contents.
|
||||||
|
Feed it to a filter that extracts only build statements, sort them, and you will have a crude
|
||||||
|
but effective tool to find small differences between two Ninja files.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import (
|
||||||
|
"canoninja"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
flag.Parse()
|
||||||
|
files := flag.Args()
|
||||||
|
if len(files) == 0 {
|
||||||
|
files = []string{"/dev/stdin"}
|
||||||
|
}
|
||||||
|
rc := 0
|
||||||
|
for _, f := range files {
|
||||||
|
if buffer, err := os.ReadFile(f); err == nil {
|
||||||
|
err = canoninja.Generate(f, buffer, os.Stdout)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintln(os.Stderr, err)
|
||||||
|
rc = 1
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
fmt.Fprintf(os.Stderr, "%s: %s\n", f, err)
|
||||||
|
rc = 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
os.Exit(rc)
|
||||||
|
}
|
1
tools/canoninja/go.mod
Normal file
1
tools/canoninja/go.mod
Normal file
@@ -0,0 +1 @@
|
|||||||
|
module canoninja
|
Reference in New Issue
Block a user