feat: add whitelist char support to nonascii check #100
4
.gitmodules
vendored
4
.gitmodules
vendored
|
|
@ -70,3 +70,7 @@
|
||||||
path = examples/cpplint/simple
|
path = examples/cpplint/simple
|
||||||
url = ssh://git@focs.ji.sjtu.edu.cn:2222/JOJ/JOJ3-examples.git
|
url = ssh://git@focs.ji.sjtu.edu.cn:2222/JOJ/JOJ3-examples.git
|
||||||
branch = cpplint/simple
|
branch = cpplint/simple
|
||||||
|
[submodule "examples/healthcheck/whitelistedchars-success"]
|
||||||
|
path = examples/healthcheck/whitelistedchars-success
|
||||||
|
url = ssh://git@focs.ji.sjtu.edu.cn:2222/JOJ/JOJ3-examples.git
|
||||||
|
branch = healthcheck/whitelistedchars-success
|
||||||
|
|
|
||||||
|
|
@ -45,6 +45,7 @@ var (
|
||||||
checkFileNameList string
|
checkFileNameList string
|
||||||
checkFileSumList string
|
checkFileSumList string
|
||||||
metaFile []string
|
metaFile []string
|
||||||
|
whitelistedChars string
|
||||||
allowedDomainList string
|
allowedDomainList string
|
||||||
actorCsvPath string
|
actorCsvPath string
|
||||||
showVersion *bool
|
showVersion *bool
|
||||||
|
|
@ -57,6 +58,7 @@ func init() {
|
||||||
flag.Float64Var(&repoSize, "repoSize", 2, "maximum size of the repo in MiB")
|
flag.Float64Var(&repoSize, "repoSize", 2, "maximum size of the repo in MiB")
|
||||||
flag.StringVar(&checkFileNameList, "checkFileNameList", "", "comma-separated list of files to check")
|
flag.StringVar(&checkFileNameList, "checkFileNameList", "", "comma-separated list of files to check")
|
||||||
flag.StringVar(&checkFileSumList, "checkFileSumList", "", "comma-separated list of expected checksums")
|
flag.StringVar(&checkFileSumList, "checkFileSumList", "", "comma-separated list of expected checksums")
|
||||||
|
flag.StringVar(&whitelistedChars, "whitelistedChars", "", "comma-separated list of non-ASCII characters allowed in files")
|
||||||
flag.StringVar(&allowedDomainList, "allowedDomainList", "sjtu.edu.cn", "comma-separated list of allowed domains for commit author email")
|
flag.StringVar(&allowedDomainList, "allowedDomainList", "sjtu.edu.cn", "comma-separated list of allowed domains for commit author email")
|
||||||
flag.StringVar(&actorCsvPath, "actorCsvPath", "/home/tt/.config/joj/students.csv", "path to actor csv file")
|
flag.StringVar(&actorCsvPath, "actorCsvPath", "/home/tt/.config/joj/students.csv", "path to actor csv file")
|
||||||
parseMultiValueFlag(&metaFile, "meta", "meta files to check")
|
parseMultiValueFlag(&metaFile, "meta", "meta files to check")
|
||||||
|
|
@ -74,12 +76,14 @@ func main() {
|
||||||
"repoSize", repoSize,
|
"repoSize", repoSize,
|
||||||
"checkFileNameList", checkFileNameList,
|
"checkFileNameList", checkFileNameList,
|
||||||
"checkFileSumList", checkFileSumList,
|
"checkFileSumList", checkFileSumList,
|
||||||
|
"whitelistedChars", whitelistedChars,
|
||||||
"meta", metaFile,
|
"meta", metaFile,
|
||||||
)
|
)
|
||||||
res := healthcheck.All(
|
res := healthcheck.All(
|
||||||
rootDir,
|
rootDir,
|
||||||
checkFileNameList,
|
checkFileNameList,
|
||||||
checkFileSumList,
|
checkFileSumList,
|
||||||
|
whitelistedChars,
|
||||||
allowedDomainList,
|
allowedDomainList,
|
||||||
actorCsvPath,
|
actorCsvPath,
|
||||||
metaFile,
|
metaFile,
|
||||||
|
|
|
||||||
1
examples/healthcheck/whitelistedchars-success
Submodule
1
examples/healthcheck/whitelistedchars-success
Submodule
|
|
@ -0,0 +1 @@
|
||||||
|
Subproject commit bb9bc06fd5753e7338e9b3230b2fc3e3ce971a05
|
||||||
|
|
@ -12,7 +12,8 @@ type Result struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func All(
|
func All(
|
||||||
rootDir, checkFileNameList, checkFileSumList, allowedDomainList, actorCsvPath string,
|
rootDir, checkFileNameList, checkFileSumList, whitelistedChars,
|
||||||
|
allowedDomainList, actorCsvPath string,
|
||||||
metaFile []string, repoSize float64,
|
metaFile []string, repoSize float64,
|
||||||
) (res Result) {
|
) (res Result) {
|
||||||
var err error
|
var err error
|
||||||
|
|
@ -44,7 +45,7 @@ func All(
|
||||||
} else {
|
} else {
|
||||||
res.Msg += "### Meta File Check Passed\n"
|
res.Msg += "### Meta File Check Passed\n"
|
||||||
}
|
}
|
||||||
err = NonASCIIFiles(rootDir)
|
err = NonASCIIFiles(rootDir, whitelistedChars)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
res.Msg += fmt.Sprintf("### Non-ASCII Characters File Check Failed:\n%s\n", err.Error())
|
res.Msg += fmt.Sprintf("### Non-ASCII Characters File Check Failed:\n%s\n", err.Error())
|
||||||
res.Failed = true
|
res.Failed = true
|
||||||
|
|
|
||||||
|
|
@ -8,11 +8,47 @@ import (
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
"unicode"
|
"unicode"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
"github.com/go-git/go-git/v5"
|
"github.com/go-git/go-git/v5"
|
||||||
"github.com/go-git/go-git/v5/plumbing/format/gitattributes"
|
"github.com/go-git/go-git/v5/plumbing/format/gitattributes"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Read the list of comma-separated allowed characters from command line and convert it to a hashmap.
|
||||||
|
func parseWhitelistedChars(csv string) map[rune]struct{} {
|
||||||
|
whitelist := make(map[rune]struct{})
|
||||||
|
if strings.TrimSpace(csv) == "" {
|
||||||
|
return whitelist
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, raw := range strings.Split(csv, ",") {
|
||||||
|
elem := strings.TrimSpace(raw)
|
||||||
|
if elem == "" {
|
||||||
|
slog.Warn("ignoring invalid whitelisted-chars element", "element", raw, "reason", "empty element")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if utf8.RuneCountInString(elem) != 1 {
|
||||||
|
slog.Warn("ignoring invalid whitelisted-chars element", "element", elem, "reason", "element must be exactly one character")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
ch, _ := utf8.DecodeRuneInString(elem)
|
||||||
|
if ch == utf8.RuneError {
|
||||||
|
slog.Warn("ignoring invalid whitelisted-chars element", "element", elem, "reason", "invalid utf-8 rune")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if ch <= unicode.MaxASCII {
|
||||||
|
slog.Warn("ignoring invalid whitelisted-chars element", "element", elem, "reason", "ASCII characters are not allowed")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
whitelist[ch] = struct{}{}
|
||||||
|
}
|
||||||
|
|
||||||
|
return whitelist
|
||||||
|
}
|
||||||
|
|
||||||
// getSubmodulePathsFromGoGit uses the go-git library to open the repository
|
// getSubmodulePathsFromGoGit uses the go-git library to open the repository
|
||||||
// at the given root path and retrieve a list of all submodule paths.
|
// at the given root path and retrieve a list of all submodule paths.
|
||||||
// It returns a set of submodule paths for efficient lookup.
|
// It returns a set of submodule paths for efficient lookup.
|
||||||
|
|
@ -48,7 +84,7 @@ func getSubmodulePathsFromGoGit(root string) (map[string]struct{}, error) {
|
||||||
|
|
||||||
// getNonASCII retrieves a list of files in the specified root directory that contain non-ASCII characters.
|
// getNonASCII retrieves a list of files in the specified root directory that contain non-ASCII characters.
|
||||||
// It searches for non-ASCII characters in each file's content and returns a list of paths to files containing non-ASCII characters.
|
// It searches for non-ASCII characters in each file's content and returns a list of paths to files containing non-ASCII characters.
|
||||||
func getNonASCII(root string) ([]string, error) {
|
func getNonASCII(root string, whitelist map[rune]struct{}) ([]string, error) {
|
||||||
var nonASCII []string
|
var nonASCII []string
|
||||||
gitattrExist := true
|
gitattrExist := true
|
||||||
var matcher gitattributes.Matcher
|
var matcher gitattributes.Matcher
|
||||||
|
|
@ -113,6 +149,9 @@ func getNonASCII(root string) ([]string, error) {
|
||||||
for scanner.Scan() {
|
for scanner.Scan() {
|
||||||
cont := true
|
cont := true
|
||||||
for _, c := range scanner.Text() {
|
for _, c := range scanner.Text() {
|
||||||
|
if _, ok := whitelist[c]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
if c > unicode.MaxASCII {
|
if c > unicode.MaxASCII {
|
||||||
nonASCII = append(nonASCII, "\t"+path)
|
nonASCII = append(nonASCII, "\t"+path)
|
||||||
cont = false
|
cont = false
|
||||||
|
|
@ -132,8 +171,10 @@ func getNonASCII(root string) ([]string, error) {
|
||||||
|
|
||||||
// NonASCIIFiles checks for non-ASCII characters in files within the specified root directory.
|
// NonASCIIFiles checks for non-ASCII characters in files within the specified root directory.
|
||||||
// It prints a message with the paths to files containing non-ASCII characters, if any.
|
// It prints a message with the paths to files containing non-ASCII characters, if any.
|
||||||
func NonASCIIFiles(root string) error {
|
// Additionally it accept a list of whitelisted characters that are allowed, repo-wide.
|
||||||
nonASCII, err := getNonASCII(root)
|
func NonASCIIFiles(root, whitelistedChars string) error {
|
||||||
|
whitelist := parseWhitelistedChars(whitelistedChars)
|
||||||
|
nonASCII, err := getNonASCII(root, whitelist)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Error("getting non-ascii", "err", err)
|
slog.Error("getting non-ascii", "err", err)
|
||||||
return fmt.Errorf("error getting non-ascii: %w", err)
|
return fmt.Errorf("error getting non-ascii: %w", err)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user