Exclude generated files from language statistics (#11653)
* Update go-enry to v2.5.2tokarchuk/v1.17
parent
e8955173a9
commit
bd2335671f
@ -0,0 +1,823 @@ |
||||
package data |
||||
|
||||
import ( |
||||
"bytes" |
||||
"strings" |
||||
|
||||
"github.com/go-enry/go-enry/v2/regex" |
||||
) |
||||
|
||||
// GeneratedCodeExtensions contains all extensions that belong to generated
|
||||
// files for sure.
|
||||
var GeneratedCodeExtensions = map[string]struct{}{ |
||||
// XCode files
|
||||
".nib": {}, |
||||
".xcworkspacedata": {}, |
||||
".xcuserstate": {}, |
||||
} |
||||
|
||||
// GeneratedCodeNameMatcher is a function that tells whether the file with the
|
||||
// given name is generated.
|
||||
type GeneratedCodeNameMatcher func(string) bool |
||||
|
||||
func nameMatches(pattern string) GeneratedCodeNameMatcher { |
||||
r := regex.MustCompile(pattern) |
||||
return func(name string) bool { |
||||
return r.MatchString(name) |
||||
} |
||||
} |
||||
|
||||
func nameContains(pattern string) GeneratedCodeNameMatcher { |
||||
return func(name string) bool { |
||||
return strings.Contains(name, pattern) |
||||
} |
||||
} |
||||
|
||||
func nameEndsWith(pattern string) GeneratedCodeNameMatcher { |
||||
return func(name string) bool { |
||||
return strings.HasSuffix(name, pattern) |
||||
} |
||||
} |
||||
|
||||
// GeneratedCodeNameMatchers are all the matchers that check whether the code
|
||||
// is generated based only on the file name.
|
||||
var GeneratedCodeNameMatchers = []GeneratedCodeNameMatcher{ |
||||
// Cocoa pods
|
||||
nameMatches(`(^Pods|\/Pods)\/`), |
||||
|
||||
// Carthage build
|
||||
nameMatches(`(^|\/)Carthage\/Build\/`), |
||||
|
||||
// NET designer file
|
||||
nameMatches(`(?i)\.designer\.(cs|vb)$`), |
||||
|
||||
// Generated NET specflow feature file
|
||||
nameEndsWith(".feature.cs"), |
||||
|
||||
// Node modules
|
||||
nameContains("node_modules/"), |
||||
|
||||
// Go vendor
|
||||
nameMatches(`vendor\/([-0-9A-Za-z]+\.)+(com|edu|gov|in|me|net|org|fm|io)`), |
||||
|
||||
// Go lock
|
||||
nameEndsWith("Gopkg.lock"), |
||||
nameEndsWith("glide.lock"), |
||||
|
||||
// Esy lock
|
||||
nameMatches(`(^|\/)(\w+\.)?esy.lock$`), |
||||
|
||||
// NPM shrinkwrap
|
||||
nameEndsWith("npm-shrinkwrap.json"), |
||||
|
||||
// NPM package lock
|
||||
nameEndsWith("package-lock.json"), |
||||
|
||||
// Yarn plugnplay
|
||||
nameMatches(`(^|\/)\.pnp\.(c|m)?js$`), |
||||
|
||||
// Godeps
|
||||
nameContains("Godeps/"), |
||||
|
||||
// Composer lock
|
||||
nameEndsWith("composer.lock"), |
||||
|
||||
// Generated by zephir
|
||||
nameMatches(`.\.zep\.(?:c|h|php)$`), |
||||
|
||||
// Cargo lock
|
||||
nameEndsWith("Cargo.lock"), |
||||
|
||||
// Pipenv lock
|
||||
nameEndsWith("Pipfile.lock"), |
||||
|
||||
// GraphQL relay
|
||||
nameContains("__generated__/"), |
||||
} |
||||
|
||||
// GeneratedCodeMatcher checks whether the file with the given data is
|
||||
// generated code.
|
||||
type GeneratedCodeMatcher func(path, ext string, content []byte) bool |
||||
|
||||
// GeneratedCodeMatchers is the list of all generated code matchers that
|
||||
// rely on checking the content of the file to make the guess.
|
||||
var GeneratedCodeMatchers = []GeneratedCodeMatcher{ |
||||
isMinifiedFile, |
||||
hasSourceMapReference, |
||||
isSourceMap, |
||||
isCompiledCoffeeScript, |
||||
isGeneratedNetDocfile, |
||||
isGeneratedJavaScriptPEGParser, |
||||
isGeneratedPostScript, |
||||
isGeneratedGo, |
||||
isGeneratedProtobuf, |
||||
isGeneratedJavaScriptProtocolBuffer, |
||||
isGeneratedApacheThrift, |
||||
isGeneratedJNIHeader, |
||||
isVCRCassette, |
||||
isCompiledCythonFile, |
||||
isGeneratedModule, |
||||
isGeneratedUnity3DMeta, |
||||
isGeneratedRacc, |
||||
isGeneratedJFlex, |
||||
isGeneratedGrammarKit, |
||||
isGeneratedRoxygen2, |
||||
isGeneratedJison, |
||||
isGeneratedGRPCCpp, |
||||
isGeneratedDart, |
||||
isGeneratedPerlPPPortHeader, |
||||
isGeneratedGameMakerStudio, |
||||
isGeneratedGimp, |
||||
isGeneratedVisualStudio6, |
||||
isGeneratedHaxe, |
||||
isGeneratedHTML, |
||||
isGeneratedJooq, |
||||
} |
||||
|
||||
func canBeMinified(ext string) bool { |
||||
return ext == ".js" || ext == ".css" |
||||
} |
||||
|
||||
// isMinifiedFile returns whether the file may be minified.
|
||||
// We consider a minified file any css or js file whose average number of chars
|
||||
// per line is more than 110.
|
||||
func isMinifiedFile(path, ext string, content []byte) bool { |
||||
if !canBeMinified(ext) { |
||||
return false |
||||
} |
||||
|
||||
var chars, lines uint64 |
||||
forEachLine(content, func(line []byte) { |
||||
chars += uint64(len(line)) |
||||
lines++ |
||||
}) |
||||
|
||||
if lines == 0 { |
||||
return false |
||||
} |
||||
|
||||
return chars/lines > 110 |
||||
} |
||||
|
||||
var sourceMapRegex = regex.MustCompile(`^\/[*\/][\#@] source(?:Mapping)?URL|sourceURL=`) |
||||
|
||||
// hasSourceMapReference returns whether the file contains a reference to a
|
||||
// source-map file.
|
||||
func hasSourceMapReference(_ string, ext string, content []byte) bool { |
||||
if !canBeMinified(ext) { |
||||
return false |
||||
} |
||||
|
||||
for _, line := range getLines(content, -2) { |
||||
if sourceMapRegex.Match(line) { |
||||
return true |
||||
} |
||||
} |
||||
|
||||
return false |
||||
} |
||||
|
||||
var sourceMapRegexps = []regex.EnryRegexp{ |
||||
regex.MustCompile(`^{"version":\d+,`), |
||||
regex.MustCompile(`^\/\*\* Begin line maps\. \*\*\/{`), |
||||
} |
||||
|
||||
// isSourceMap returns whether the file itself is a source map.
|
||||
func isSourceMap(path, _ string, content []byte) bool { |
||||
if strings.HasSuffix(path, ".js.map") || strings.HasSuffix(path, ".css.map") { |
||||
return true |
||||
} |
||||
|
||||
firstLine := getFirstLine(content) |
||||
if len(firstLine) == 0 { |
||||
return false |
||||
} |
||||
|
||||
for _, r := range sourceMapRegexps { |
||||
if r.Match(firstLine) { |
||||
return true |
||||
} |
||||
} |
||||
|
||||
return false |
||||
} |
||||
|
||||
func isCompiledCoffeeScript(path, ext string, content []byte) bool { |
||||
if ext != ".js" { |
||||
return false |
||||
} |
||||
|
||||
firstLine := getFirstLine(content) |
||||
lastLines := getLines(content, -2) |
||||
if len(lastLines) < 2 { |
||||
return false |
||||
} |
||||
|
||||
if string(firstLine) == "(function() {" && |
||||
string(lastLines[1]) == "}).call(this);" && |
||||
string(lastLines[0]) == "" { |
||||
score := 0 |
||||
|
||||
forEachLine(content, func(line []byte) { |
||||
if bytes.Contains(line, []byte("var ")) { |
||||
// Underscored temp vars are likely to be Coffee
|
||||
score += 1 * countAppearancesInLine(line, "_fn", "_i", "_len", "_ref", "_results") |
||||
|
||||
// bind and extend functions are very Coffee specific
|
||||
score += 3 * countAppearancesInLine(line, "__bind", "__extends", "__hasProp", "__indexOf", "__slice") |
||||
} |
||||
}) |
||||
|
||||
// Require a score of 3. This is fairly abritrary. Consider tweaking later.
|
||||
// See: https://github.com/github/linguist/blob/master/lib/linguist/generated.rb#L176-L213
|
||||
return score >= 3 |
||||
} |
||||
|
||||
return false |
||||
} |
||||
|
||||
func isGeneratedNetDocfile(_, ext string, content []byte) bool { |
||||
if ext != ".xml" { |
||||
return false |
||||
} |
||||
|
||||
lines := bytes.Split(content, []byte{'\n'}) |
||||
if len(lines) <= 3 { |
||||
return false |
||||
} |
||||
|
||||
return bytes.Contains(lines[1], []byte("<doc>")) && |
||||
bytes.Contains(lines[2], []byte("<assembly>")) && |
||||
bytes.Contains(lines[len(lines)-2], []byte("</doc>")) |
||||
} |
||||
|
||||
var pegJavaScriptGeneratedRegex = regex.MustCompile(`^(?:[^\/]|\/[^\*])*\/\*(?:[^\*]|\*[^\/])*Generated by PEG.js`) |
||||
|
||||
func isGeneratedJavaScriptPEGParser(_, ext string, content []byte) bool { |
||||
if ext != ".js" { |
||||
return false |
||||
} |
||||
|
||||
// PEG.js-generated parsers include a comment near the top of the file
|
||||
// that marks them as such.
|
||||
return pegJavaScriptGeneratedRegex.Match(bytes.Join(getLines(content, 5), []byte(""))) |
||||
} |
||||
|
||||
var postScriptType1And42Regex = regex.MustCompile(`(\n|\r\n|\r)\s*(?:currentfile eexec\s+|\/sfnts\s+\[)`) |
||||
|
||||
var postScriptRegexes = []regex.EnryRegexp{ |
||||
regex.MustCompile(`[0-9]|draw|mpage|ImageMagick|inkscape|MATLAB`), |
||||
regex.MustCompile(`PCBNEW|pnmtops|\(Unknown\)|Serif Affinity|Filterimage -tops`), |
||||
} |
||||
|
||||
func isGeneratedPostScript(_, ext string, content []byte) bool { |
||||
if ext != ".ps" && ext != ".eps" && ext != ".pfa" { |
||||
return false |
||||
} |
||||
|
||||
// Type 1 and Type 42 fonts converted to PostScript are stored as hex-encoded byte streams; these
|
||||
// streams are always preceded the `eexec` operator (if Type 1), or the `/sfnts` key (if Type 42).
|
||||
if postScriptType1And42Regex.Match(content) { |
||||
return true |
||||
} |
||||
|
||||
// We analyze the "%%Creator:" comment, which contains the author/generator
|
||||
// of the file. If there is one, it should be in one of the first few lines.
|
||||
var creator []byte |
||||
for _, line := range getLines(content, 10) { |
||||
if bytes.HasPrefix(line, []byte("%%Creator: ")) { |
||||
creator = line |
||||
break |
||||
} |
||||
} |
||||
|
||||
if len(creator) == 0 { |
||||
return false |
||||
} |
||||
|
||||
// EAGLE doesn't include a version number when it generates PostScript.
|
||||
// However, it does prepend its name to the document's "%%Title" field.
|
||||
if bytes.Contains(creator, []byte("EAGLE")) { |
||||
for _, line := range getLines(content, 5) { |
||||
if bytes.HasPrefix(line, []byte("%%Title: EAGLE Drawing ")) { |
||||
return true |
||||
} |
||||
} |
||||
} |
||||
|
||||
// Most generators write their version number, while human authors' or companies'
|
||||
// names don't contain numbers. So look if the line contains digits. Also
|
||||
// look for some special cases without version numbers.
|
||||
for _, r := range postScriptRegexes { |
||||
if r.Match(creator) { |
||||
return true |
||||
} |
||||
} |
||||
|
||||
return false |
||||
} |
||||
|
||||
func isGeneratedGo(_, ext string, content []byte) bool { |
||||
if ext != ".go" { |
||||
return false |
||||
} |
||||
|
||||
lines := getLines(content, 40) |
||||
if len(lines) <= 1 { |
||||
return false |
||||
} |
||||
|
||||
for _, line := range lines { |
||||
if bytes.Contains(line, []byte("Code generated by")) { |
||||
return true |
||||
} |
||||
} |
||||
|
||||
return false |
||||
} |
||||
|
||||
var protoExtensions = map[string]struct{}{ |
||||
".py": {}, |
||||
".java": {}, |
||||
".h": {}, |
||||
".cc": {}, |
||||
".cpp": {}, |
||||
".m": {}, |
||||
".rb": {}, |
||||
".php": {}, |
||||
} |
||||
|
||||
func isGeneratedProtobuf(_, ext string, content []byte) bool { |
||||
if _, ok := protoExtensions[ext]; !ok { |
||||
return false |
||||
} |
||||
|
||||
lines := getLines(content, 3) |
||||
if len(lines) <= 1 { |
||||
return false |
||||
} |
||||
|
||||
for _, line := range lines { |
||||
if bytes.Contains(line, []byte("Generated by the protocol buffer compiler. DO NOT EDIT!")) { |
||||
return true |
||||
} |
||||
} |
||||
|
||||
return false |
||||
} |
||||
|
||||
func isGeneratedJavaScriptProtocolBuffer(_, ext string, content []byte) bool { |
||||
if ext != ".js" { |
||||
return false |
||||
} |
||||
|
||||
lines := getLines(content, 6) |
||||
if len(lines) < 6 { |
||||
return false |
||||
} |
||||
|
||||
return bytes.Contains(lines[5], []byte("GENERATED CODE -- DO NOT EDIT!")) |
||||
} |
||||
|
||||
var apacheThriftExtensions = map[string]struct{}{ |
||||
".rb": {}, |
||||
".py": {}, |
||||
".go": {}, |
||||
".js": {}, |
||||
".m": {}, |
||||
".java": {}, |
||||
".h": {}, |
||||
".cc": {}, |
||||
".cpp": {}, |
||||
".php": {}, |
||||
} |
||||
|
||||
func isGeneratedApacheThrift(_, ext string, content []byte) bool { |
||||
if _, ok := apacheThriftExtensions[ext]; !ok { |
||||
return false |
||||
} |
||||
|
||||
for _, line := range getLines(content, 6) { |
||||
if bytes.Contains(line, []byte("Autogenerated by Thrift Compiler")) { |
||||
return true |
||||
} |
||||
} |
||||
|
||||
return false |
||||
} |
||||
|
||||
func isGeneratedJNIHeader(_, ext string, content []byte) bool { |
||||
if ext != ".h" { |
||||
return false |
||||
} |
||||
|
||||
lines := getLines(content, 2) |
||||
if len(lines) < 2 { |
||||
return false |
||||
} |
||||
|
||||
return bytes.Contains(lines[0], []byte("/* DO NOT EDIT THIS FILE - it is machine generated */")) && |
||||
bytes.Contains(lines[1], []byte("#include <jni.h>")) |
||||
} |
||||
|
||||
func isVCRCassette(_, ext string, content []byte) bool { |
||||
if ext != ".yml" { |
||||
return false |
||||
} |
||||
|
||||
lines := getLines(content, -2) |
||||
if len(lines) < 2 { |
||||
return false |
||||
} |
||||
|
||||
return bytes.Contains(lines[1], []byte("recorded_with: VCR")) |
||||
} |
||||
|
||||
func isCompiledCythonFile(_, ext string, content []byte) bool { |
||||
if ext != ".c" && ext != ".cpp" { |
||||
return false |
||||
} |
||||
|
||||
lines := getLines(content, 1) |
||||
if len(lines) < 1 { |
||||
return false |
||||
} |
||||
|
||||
return bytes.Contains(lines[0], []byte("Generated by Cython")) |
||||
} |
||||
|
||||
func isGeneratedModule(_, ext string, content []byte) bool { |
||||
if ext != ".mod" { |
||||
return false |
||||
} |
||||
|
||||
lines := getLines(content, 1) |
||||
if len(lines) < 1 { |
||||
return false |
||||
} |
||||
|
||||
return bytes.Contains(lines[0], []byte("PCBNEW-LibModule-V")) || |
||||
bytes.Contains(lines[0], []byte("GFORTRAN module version '")) |
||||
} |
||||
|
||||
func isGeneratedUnity3DMeta(_, ext string, content []byte) bool { |
||||
if ext != ".meta" { |
||||
return false |
||||
} |
||||
|
||||
lines := getLines(content, 1) |
||||
if len(lines) < 1 { |
||||
return false |
||||
} |
||||
|
||||
return bytes.Contains(lines[0], []byte("fileFormatVersion: ")) |
||||
} |
||||
|
||||
func isGeneratedRacc(_, ext string, content []byte) bool { |
||||
if ext != ".rb" { |
||||
return false |
||||
} |
||||
|
||||
lines := getLines(content, 3) |
||||
if len(lines) < 3 { |
||||
return false |
||||
} |
||||
|
||||
return bytes.HasPrefix(lines[2], []byte("# This file is automatically generated by Racc")) |
||||
} |
||||
|
||||
func isGeneratedJFlex(_, ext string, content []byte) bool { |
||||
if ext != ".java" { |
||||
return false |
||||
} |
||||
|
||||
lines := getLines(content, 1) |
||||
if len(lines) < 1 { |
||||
return false |
||||
} |
||||
|
||||
return bytes.HasPrefix(lines[0], []byte("/* The following code was generated by JFlex ")) |
||||
} |
||||
|
||||
func isGeneratedGrammarKit(_, ext string, content []byte) bool { |
||||
if ext != ".java" { |
||||
return false |
||||
} |
||||
|
||||
lines := getLines(content, 1) |
||||
if len(lines) < 1 { |
||||
return false |
||||
} |
||||
|
||||
return bytes.Contains(lines[0], []byte("// This is a generated file. Not intended for manual editing.")) |
||||
} |
||||
|
||||
func isGeneratedRoxygen2(_, ext string, content []byte) bool { |
||||
if ext != ".rd" { |
||||
return false |
||||
} |
||||
|
||||
lines := getLines(content, 1) |
||||
if len(lines) < 1 { |
||||
return false |
||||
} |
||||
|
||||
return bytes.Contains(lines[0], []byte("% Generated by roxygen2: do not edit by hand")) |
||||
} |
||||
|
||||
func isGeneratedJison(_, ext string, content []byte) bool { |
||||
if ext != ".js" { |
||||
return false |
||||
} |
||||
|
||||
lines := getLines(content, 1) |
||||
if len(lines) < 1 { |
||||
return false |
||||
} |
||||
|
||||
return bytes.Contains(lines[0], []byte("/* parser generated by jison ")) || |
||||
bytes.Contains(lines[0], []byte("/* generated by jison-lex ")) |
||||
} |
||||
|
||||
func isGeneratedGRPCCpp(_, ext string, content []byte) bool { |
||||
switch ext { |
||||
case ".cpp", ".hpp", ".h", ".cc": |
||||
lines := getLines(content, 1) |
||||
if len(lines) < 1 { |
||||
return false |
||||
} |
||||
|
||||
return bytes.Contains(lines[0], []byte("// Generated by the gRPC")) |
||||
default: |
||||
return false |
||||
} |
||||
} |
||||
|
||||
var dartRegex = regex.MustCompile(`generated code\W{2,3}do not modify`) |
||||
|
||||
func isGeneratedDart(_, ext string, content []byte) bool { |
||||
if ext != ".dart" { |
||||
return false |
||||
} |
||||
|
||||
lines := getLines(content, 1) |
||||
if len(lines) < 1 { |
||||
return false |
||||
} |
||||
|
||||
return dartRegex.Match(bytes.ToLower(lines[0])) |
||||
} |
||||
|
||||
func isGeneratedPerlPPPortHeader(name, _ string, content []byte) bool { |
||||
if !strings.HasSuffix(name, "ppport.h") { |
||||
return false |
||||
} |
||||
|
||||
lines := getLines(content, 10) |
||||
if len(lines) < 10 { |
||||
return false |
||||
} |
||||
|
||||
return bytes.Contains(lines[8], []byte("Automatically created by Devel::PPPort")) |
||||
} |
||||
|
||||
var ( |
||||
gameMakerStudioFirstLineRegex = regex.MustCompile(`^\d\.\d\.\d.+\|\{`) |
||||
gameMakerStudioThirdLineRegex = regex.MustCompile(`\"modelName\"\:\s*\"GM`) |
||||
) |
||||
|
||||
func isGeneratedGameMakerStudio(_, ext string, content []byte) bool { |
||||
if ext != ".yy" && ext != ".yyp" { |
||||
return false |
||||
} |
||||
|
||||
lines := getLines(content, 3) |
||||
if len(lines) < 3 { |
||||
return false |
||||
} |
||||
|
||||
return gameMakerStudioThirdLineRegex.Match(lines[2]) || |
||||
gameMakerStudioFirstLineRegex.Match(lines[0]) |
||||
} |
||||
|
||||
var gimpRegexes = []regex.EnryRegexp{ |
||||
regex.MustCompile(`\/\* GIMP [a-zA-Z0-9\- ]+ C\-Source image dump \(.+?\.c\) \*\/`), |
||||
regex.MustCompile(`\/\* GIMP header image file format \([a-zA-Z0-9\- ]+\)\: .+?\.h \*\/`), |
||||
} |
||||
|
||||
func isGeneratedGimp(_, ext string, content []byte) bool { |
||||
if ext != ".c" && ext != ".h" { |
||||
return false |
||||
} |
||||
|
||||
lines := getLines(content, 1) |
||||
if len(lines) < 1 { |
||||
return false |
||||
} |
||||
|
||||
for _, r := range gimpRegexes { |
||||
if r.Match(lines[0]) { |
||||
return true |
||||
} |
||||
} |
||||
|
||||
return false |
||||
} |
||||
|
||||
func isGeneratedVisualStudio6(_, ext string, content []byte) bool { |
||||
if ext != ".dsp" { |
||||
return false |
||||
} |
||||
|
||||
for _, l := range getLines(content, 3) { |
||||
if bytes.Contains(l, []byte("# Microsoft Developer Studio Generated Build File")) { |
||||
return true |
||||
} |
||||
} |
||||
|
||||
return false |
||||
} |
||||
|
||||
var haxeExtensions = map[string]struct{}{ |
||||
".js": {}, |
||||
".py": {}, |
||||
".lua": {}, |
||||
".cpp": {}, |
||||
".h": {}, |
||||
".java": {}, |
||||
".cs": {}, |
||||
".php": {}, |
||||
} |
||||
|
||||
func isGeneratedHaxe(_, ext string, content []byte) bool { |
||||
if _, ok := haxeExtensions[ext]; !ok { |
||||
return false |
||||
} |
||||
|
||||
for _, l := range getLines(content, 3) { |
||||
if bytes.Contains(l, []byte("Generated by Haxe")) { |
||||
return true |
||||
} |
||||
} |
||||
|
||||
return false |
||||
} |
||||
|
||||
var ( |
||||
doxygenRegex = regex.MustCompile(`<!--\s+Generated by Doxygen\s+[.0-9]+\s*-->`) |
||||
htmlMetaRegex = regex.MustCompile(`<meta(\s+[^>]+)>`) |
||||
htmlMetaContentRegex = regex.MustCompile(`\s+(name|content|value)\s*=\s*("[^"]+"|'[^']+'|[^\s"']+)`) |
||||
orgModeMetaRegex = regex.MustCompile(`org\s+mode`) |
||||
) |
||||
|
||||
func isGeneratedHTML(_, ext string, content []byte) bool { |
||||
if ext != ".html" && ext != ".htm" && ext != ".xhtml" { |
||||
return false |
||||
} |
||||
|
||||
lines := getLines(content, 30) |
||||
|
||||
// Pkgdown
|
||||
if len(lines) >= 2 { |
||||
for _, l := range lines[:2] { |
||||
if bytes.Contains(l, []byte("<!-- Generated by pkgdown: do not edit by hand -->")) { |
||||
return true |
||||
} |
||||
} |
||||
} |
||||
|
||||
// Mandoc
|
||||
if len(lines) > 2 && |
||||
bytes.HasPrefix(lines[2], []byte("<!-- This is an automatically generated file.")) { |
||||
return true |
||||
} |
||||
|
||||
// Doxygen
|
||||
for _, l := range lines { |
||||
if doxygenRegex.Match(l) { |
||||
return true |
||||
} |
||||
} |
||||
|
||||
// HTML tag: <meta name="generator" content="" />
|
||||
part := bytes.ToLower(bytes.Join(lines, []byte{' '})) |
||||
part = bytes.ReplaceAll(part, []byte{'\n'}, []byte{}) |
||||
part = bytes.ReplaceAll(part, []byte{'\r'}, []byte{}) |
||||
matches := htmlMetaRegex.FindAll(part, -1) |
||||
if len(matches) == 0 { |
||||
return false |
||||
} |
||||
|
||||
for _, m := range matches { |
||||
var name, value, content string |
||||
ms := htmlMetaContentRegex.FindAllStringSubmatch(string(m), -1) |
||||
for _, m := range ms { |
||||
switch m[1] { |
||||
case "name": |
||||
name = m[2] |
||||
case "value": |
||||
value = m[2] |
||||
case "content": |
||||
content = m[2] |
||||
} |
||||
} |
||||
|
||||
var val = value |
||||
if val == "" { |
||||
val = content |
||||
} |
||||
|
||||
name = strings.Trim(name, `"'`) |
||||
val = strings.Trim(val, `"'`) |
||||
|
||||
if name != "generator" || val == "" { |
||||
continue |
||||
} |
||||
|
||||
if strings.Contains(val, "jlatex2html") || |
||||
strings.Contains(val, "latex2html") || |
||||
strings.Contains(val, "groff") || |
||||
strings.Contains(val, "makeinfo") || |
||||
strings.Contains(val, "texi2html") || |
||||
strings.Contains(val, "ronn") || |
||||
orgModeMetaRegex.MatchString(val) { |
||||
return true |
||||
} |
||||
} |
||||
|
||||
return false |
||||
} |
||||
|
||||
func isGeneratedJooq(_, ext string, content []byte) bool { |
||||
if ext != ".java" { |
||||
return false |
||||
} |
||||
|
||||
for _, l := range getLines(content, 2) { |
||||
if bytes.Contains(l, []byte("This file is generated by jOOQ.")) { |
||||
return true |
||||
} |
||||
} |
||||
|
||||
return false |
||||
} |
||||
|
||||
func getFirstLine(content []byte) []byte { |
||||
lines := getLines(content, 1) |
||||
if len(lines) > 0 { |
||||
return lines[0] |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
// getLines returns up to the first n lines. A negative index will return up to
|
||||
// the last n lines in reverse order.
|
||||
func getLines(content []byte, n int) [][]byte { |
||||
var result [][]byte |
||||
if n < 0 { |
||||
for pos := len(content); pos > 0 && len(result) < -n; { |
||||
nlpos := bytes.LastIndexByte(content[:pos], '\n') |
||||
if nlpos+1 < len(content)-1 { |
||||
result = append(result, content[nlpos+1:pos]) |
||||
} |
||||
pos = nlpos |
||||
} |
||||
} else { |
||||
for pos := 0; pos < len(content) && len(result) < n; { |
||||
nlpos := bytes.IndexByte(content[pos:], '\n') |
||||
if nlpos < 0 && pos < len(content) { |
||||
nlpos = len(content) |
||||
} else if nlpos >= 0 { |
||||
nlpos += pos |
||||
} |
||||
|
||||
result = append(result, content[pos:nlpos]) |
||||
pos = nlpos + 1 |
||||
} |
||||
} |
||||
|
||||
return result |
||||
} |
||||
|
||||
func forEachLine(content []byte, cb func([]byte)) { |
||||
var pos int |
||||
for pos < len(content) { |
||||
nlpos := bytes.IndexByte(content[pos:], '\n') |
||||
if nlpos < 0 && pos < len(content) { |
||||
nlpos = len(content) |
||||
} else if nlpos >= 0 { |
||||
nlpos += pos |
||||
} |
||||
|
||||
cb(content[pos:nlpos]) |
||||
pos = nlpos + 1 |
||||
} |
||||
} |
||||
|
||||
func countAppearancesInLine(line []byte, targets ...string) int { |
||||
var count int |
||||
for _, t := range targets { |
||||
count += bytes.Count(line, []byte(t)) |
||||
} |
||||
return count |
||||
} |
@ -0,0 +1,17 @@ |
||||
package data |
||||
|
||||
import "github.com/go-enry/go-enry/v2/regex" |
||||
|
||||
// TestMatchers is hand made collection of regexp used by the function `enry.IsTest`
|
||||
// to identify test files in different languages.
|
||||
var TestMatchers = []regex.EnryRegexp{ |
||||
regex.MustCompile(`(^|/)tests/.*Test\.php$`), |
||||
regex.MustCompile(`(^|/)test/.*Test(s?)\.java$`), |
||||
regex.MustCompile(`(^|/)test(/|/.*/)Test.*\.java$`), |
||||
regex.MustCompile(`(^|/)test/.*(Test(s?)|Spec(s?))\.scala$`), |
||||
regex.MustCompile(`(^|/)test_.*\.py$`), |
||||
regex.MustCompile(`(^|/).*_test\.go$`), |
||||
regex.MustCompile(`(^|/).*_(test|spec)\.rb$`), |
||||
regex.MustCompile(`(^|/).*Test(s?)\.cs$`), |
||||
regex.MustCompile(`(^|/).*\.(test|spec)\.(ts|tsx|js)$`), |
||||
} |
@ -1,14 +1,14 @@ |
||||
#include <oniguruma.h> |
||||
|
||||
extern int NewOnigRegex( char *pattern, int pattern_length, int option, |
||||
OnigRegex *regex, OnigRegion **region, OnigEncoding *encoding, OnigErrorInfo **error_info, char **error_buffer); |
||||
OnigRegex *regex, OnigEncoding *encoding, OnigErrorInfo **error_info, char **error_buffer); |
||||
|
||||
extern int SearchOnigRegex( void *str, int str_length, int offset, int option, |
||||
OnigRegex regex, OnigRegion *region, OnigErrorInfo *error_info, char *error_buffer, int *captures, int *numCaptures); |
||||
OnigRegex regex, OnigErrorInfo *error_info, char *error_buffer, int *captures, int *numCaptures); |
||||
|
||||
extern int MatchOnigRegex( void *str, int str_length, int offset, int option, |
||||
OnigRegex regex, OnigRegion *region); |
||||
OnigRegex regex); |
||||
|
||||
extern int LookupOnigCaptureByName(char *name, int name_length, OnigRegex regex, OnigRegion *region); |
||||
extern int LookupOnigCaptureByName(char *name, int name_length, OnigRegex regex); |
||||
|
||||
extern int GetCaptureNames(OnigRegex regex, void *buffer, int bufferSize, int* groupNumbers); |
||||
|
@ -1,22 +0,0 @@ |
||||
Copyright (c) 2013 Caleb Spare |
||||
|
||||
MIT License |
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining |
||||
a copy of this software and associated documentation files (the |
||||
"Software"), to deal in the Software without restriction, including |
||||
without limitation the rights to use, copy, modify, merge, publish, |
||||
distribute, sublicense, and/or sell copies of the Software, and to |
||||
permit persons to whom the Software is furnished to do so, subject to |
||||
the following conditions: |
||||
|
||||
The above copyright notice and this permission notice shall be |
||||
included in all copies or substantial portions of the Software. |
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE |
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
@ -1,7 +0,0 @@ |
||||
# Trie |
||||
|
||||
[![GoDoc](http://godoc.org/github.com/toqueteos/trie?status.png)](http://godoc.org/github.com/toqueteos/trie) |
||||
|
||||
This is a fork of https://github.com/cespare/go-trie that adds the `PrefixIndex` method. |
||||
|
||||
It's required for https://github.com/toqueteos/substring. |
@ -1 +0,0 @@ |
||||
module github.com/toqueteos/trie |
@ -1,102 +0,0 @@ |
||||
// Package trie is an implementation of a trie (prefix tree) data structure over byte slices. It provides a
|
||||
// small and simple API for usage as a set as well as a 'Node' API for walking the trie.
|
||||
package trie |
||||
|
||||
// A Trie is a a prefix tree.
|
||||
type Trie struct { |
||||
root *Node |
||||
} |
||||
|
||||
// New construct a new, empty Trie ready for use.
|
||||
func New() *Trie { |
||||
return &Trie{ |
||||
root: &Node{}, |
||||
} |
||||
} |
||||
|
||||
// Insert puts b into the Trie. It returns true if the element was not previously in t.
|
||||
func (t *Trie) Insert(b []byte) bool { |
||||
n := t.root |
||||
for _, c := range b { |
||||
next, ok := n.Walk(c) |
||||
if !ok { |
||||
next = &Node{} |
||||
n.branches[c] = next |
||||
n.hasChildren = true |
||||
} |
||||
n = next |
||||
} |
||||
if n.terminal { |
||||
return false |
||||
} |
||||
n.terminal = true |
||||
return true |
||||
} |
||||
|
||||
// Contains checks t for membership of b.
|
||||
func (t *Trie) Contains(b []byte) bool { |
||||
n := t.root |
||||
for _, c := range b { |
||||
next, ok := n.Walk(c) |
||||
if !ok { |
||||
return false |
||||
} |
||||
n = next |
||||
} |
||||
return n.terminal |
||||
} |
||||
|
||||
// PrefixIndex walks through `b` until a prefix is found (terminal node) or it is exhausted.
|
||||
func (t *Trie) PrefixIndex(b []byte) int { |
||||
var idx int |
||||
n := t.root |
||||
for _, c := range b { |
||||
next, ok := n.Walk(c) |
||||
if !ok { |
||||
return -1 |
||||
} |
||||
if next.terminal { |
||||
return idx |
||||
} |
||||
n = next |
||||
idx++ |
||||
} |
||||
if !n.terminal { |
||||
idx = -1 |
||||
} |
||||
return idx |
||||
} |
||||
|
||||
// Root returns the root node of a Trie. A valid Trie (i.e., constructed with New), always has a non-nil root
|
||||
// node.
|
||||
func (t *Trie) Root() *Node { |
||||
return t.root |
||||
} |
||||
|
||||
// A Node represents a logical vertex in the trie structure.
|
||||
type Node struct { |
||||
branches [256]*Node |
||||
terminal bool |
||||
hasChildren bool |
||||
} |
||||
|
||||
// Walk returns the node reached along edge c, if one exists. The ok value indicates whether such a node
|
||||
// exist.
|
||||
func (n *Node) Walk(c byte) (next *Node, ok bool) { |
||||
next = n.branches[int(c)] |
||||
return next, (next != nil) |
||||
} |
||||
|
||||
// Terminal indicates whether n is terminal in the trie (that is, whether the path from the root to n
|
||||
// represents an element in the set). For instance, if the root node is terminal, then []byte{} is in the
|
||||
// trie.
|
||||
func (n *Node) Terminal() bool { |
||||
return n.terminal |
||||
} |
||||
|
||||
// Leaf indicates whether n is a leaf node in the trie (that is, whether it has children). A leaf node must be
|
||||
// terminal (else it would not exist). Logically, if n is a leaf node then the []byte represented by the path
|
||||
// from the root to n is not a proper prefix of any element of the trie.
|
||||
func (n *Node) Leaf() bool { |
||||
return !n.hasChildren |
||||
} |
@ -1,24 +0,0 @@ |
||||
# Compiled Object files, Static and Dynamic libs (Shared Objects) |
||||
*.o |
||||
*.a |
||||
*.so |
||||
|
||||
# Folders |
||||
_obj |
||||
_test |
||||
|
||||
# Architecture specific extensions/prefixes |
||||
*.[568vq] |
||||
[568vq].out |
||||
|
||||
*.cgo1.go |
||||
*.cgo2.c |
||||
_cgo_defun.c |
||||
_cgo_gotypes.go |
||||
_cgo_export.* |
||||
|
||||
_testmain.go |
||||
|
||||
*.exe |
||||
*.test |
||||
*.prof |
@ -1,11 +0,0 @@ |
||||
language: go |
||||
|
||||
go: |
||||
- 1.2 |
||||
- 1.3 |
||||
- 1.4 |
||||
- tip |
||||
|
||||
script: |
||||
- go get launchpad.net/gocheck |
||||
- go test |
@ -1,22 +0,0 @@ |
||||
The MIT License (MIT) |
||||
|
||||
Copyright (c) 2015 Carlos Cobo |
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
of this software and associated documentation files (the "Software"), to deal |
||||
in the Software without restriction, including without limitation the rights |
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
||||
copies of the Software, and to permit persons to whom the Software is |
||||
furnished to do so, subject to the following conditions: |
||||
|
||||
The above copyright notice and this permission notice shall be included in all |
||||
copies or substantial portions of the Software. |
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||||
SOFTWARE. |
||||
|
@ -1,80 +0,0 @@ |
||||
# substring [![Build Status](https://travis-ci.org/toqueteos/substring.png?branch=master)](https://travis-ci.org/toqueteos/substring) [![GoDoc](http://godoc.org/github.com/toqueteos/substring?status.png)](http://godoc.org/github.com/toqueteos/substring) [![GitHub release](https://img.shields.io/github/release/toqueteos/substring.svg)](https://github.com/toqueteos/substring/releases) |
||||
|
||||
Simple and composable alternative to [regexp](http://golang.org/pkg/regexp/) package for fast substring searches. |
||||
|
||||
## Installation |
||||
|
||||
The recommended way to install substring |
||||
|
||||
``` |
||||
go get -t gopkg.in/toqueteos/substring.v1 |
||||
``` |
||||
|
||||
The `-t` flag is for fetching [gocheck](https://gopkg.in/check.v1), required for tests and benchmarks. |
||||
|
||||
## Examples |
||||
|
||||
A basic example with two matchers: |
||||
|
||||
```go |
||||
package main |
||||
|
||||
import ( |
||||
"fmt" |
||||
"regexp" |
||||
|
||||
"gopkg.in/toqueteos/substring.v1" |
||||
) |
||||
|
||||
func main() { |
||||
m1 := substring.After("assets/", substring.Or( |
||||
substring.Has("jquery"), |
||||
substring.Has("angular"), |
||||
substring.Suffixes(".js", ".css", ".html"), |
||||
)) |
||||
fmt.Println(m1.Match("assets/angular/foo/bar")) //Prints: true |
||||
fmt.Println(m1.Match("assets/js/file.js")) //Prints: true |
||||
fmt.Println(m1.Match("assets/style/bar.css")) //Prints: true |
||||
fmt.Println(m1.Match("assets/foo/bar.html")) //Prints: false |
||||
fmt.Println(m1.Match("assets/js/qux.json")) //Prints: false |
||||
fmt.Println(m1.Match("core/file.html")) //Prints: false |
||||
fmt.Println(m1.Match("foobar/that.jsx")) //Prints: false |
||||
|
||||
m2 := substring.After("vendor/", substring.Suffixes(".css", ".js", ".less")) |
||||
|
||||
fmt.Println(m2.Match("foo/vendor/bar/qux.css")) //Prints: true |
||||
fmt.Println(m2.Match("foo/var/qux.less")) //Prints: false |
||||
|
||||
re := regexp.MustCompile(`vendor\/.*\.(css|js|less)$`) |
||||
fmt.Println(re.MatchString("foo/vendor/bar/qux.css")) //Prints: true |
||||
fmt.Println(re.MatchString("foo/var/qux.less")) //Prints: false |
||||
} |
||||
``` |
||||
|
||||
## How fast? |
||||
|
||||
It may vary depending on your use case but 1~2 orders of magnitude faster than `regexp` is pretty common. |
||||
|
||||
Test it out for yourself by running `go test -check.b`! |
||||
|
||||
``` |
||||
$ go test -check.b |
||||
PASS: lib_test.go:18: LibSuite.BenchmarkExample1 10000000 221 ns/op |
||||
PASS: lib_test.go:23: LibSuite.BenchmarkExample2 10000000 229 ns/op |
||||
PASS: lib_test.go:28: LibSuite.BenchmarkExample3 10000000 216 ns/op |
||||
PASS: lib_test.go:33: LibSuite.BenchmarkExample4 10000000 208 ns/op |
||||
PASS: lib_test.go:38: LibSuite.BenchmarkExample5 20000000 82.1 ns/op |
||||
PASS: lib_test.go:48: LibSuite.BenchmarkExampleRe1 500000 4136 ns/op |
||||
PASS: lib_test.go:53: LibSuite.BenchmarkExampleRe2 500000 5222 ns/op |
||||
PASS: lib_test.go:58: LibSuite.BenchmarkExampleRe3 500000 5116 ns/op |
||||
PASS: lib_test.go:63: LibSuite.BenchmarkExampleRe4 500000 4020 ns/op |
||||
PASS: lib_test.go:68: LibSuite.BenchmarkExampleRe5 10000000 226 ns/op |
||||
OK: 10 passed |
||||
PASS |
||||
ok gopkg.in/toqueteos/substring.v1 23.471s |
||||
``` |
||||
|
||||
License |
||||
------- |
||||
|
||||
MIT, see [LICENSE](LICENSE) |
@ -1,229 +0,0 @@ |
||||
package substring |
||||
|
||||
import ( |
||||
"bytes" |
||||
"regexp" |
||||
|
||||
"github.com/toqueteos/trie" |
||||
) |
||||
|
||||
type BytesMatcher interface { |
||||
Match(b []byte) bool |
||||
MatchIndex(b []byte) int |
||||
} |
||||
|
||||
// regexp
|
||||
type regexpBytes struct{ re *regexp.Regexp } |
||||
|
||||
func BytesRegexp(pat string) *regexpBytes { return ®expBytes{regexp.MustCompile(pat)} } |
||||
func (m *regexpBytes) Match(b []byte) bool { return m.re.Match(b) } |
||||
func (m *regexpBytes) MatchIndex(b []byte) int { |
||||
found := m.re.FindIndex(b) |
||||
if found != nil { |
||||
return found[1] |
||||
} |
||||
return -1 |
||||
} |
||||
|
||||
// exact
|
||||
type exactBytes struct{ pat []byte } |
||||
|
||||
func BytesExact(pat string) *exactBytes { return &exactBytes{[]byte(pat)} } |
||||
func (m *exactBytes) Match(b []byte) bool { |
||||
l, r := len(m.pat), len(b) |
||||
if l != r { |
||||
return false |
||||
} |
||||
for i := 0; i < l; i++ { |
||||
if b[i] != m.pat[i] { |
||||
return false |
||||
} |
||||
} |
||||
return true |
||||
} |
||||
func (m *exactBytes) MatchIndex(b []byte) int { |
||||
if m.Match(b) { |
||||
return len(b) |
||||
} |
||||
return -1 |
||||
} |
||||
|
||||
// any, search `s` in `.Match(pat)`
|
||||
type anyBytes struct { |
||||
pat []byte |
||||
} |
||||
|
||||
func BytesAny(pat string) *anyBytes { return &anyBytes{[]byte(pat)} } |
||||
func (m *anyBytes) Match(b []byte) bool { return bytes.Index(m.pat, b) >= 0 } |
||||
func (m *anyBytes) MatchIndex(b []byte) int { |
||||
if idx := bytes.Index(m.pat, b); idx >= 0 { |
||||
return idx + len(b) |
||||
} |
||||
return -1 |
||||
} |
||||
|
||||
// has, search `pat` in `.Match(s)`
|
||||
type hasBytes struct { |
||||
pat []byte |
||||
} |
||||
|
||||
func BytesHas(pat string) *hasBytes { return &hasBytes{[]byte(pat)} } |
||||
func (m *hasBytes) Match(b []byte) bool { return bytes.Index(b, m.pat) >= 0 } |
||||
func (m *hasBytes) MatchIndex(b []byte) int { |
||||
if idx := bytes.Index(b, m.pat); idx >= 0 { |
||||
return idx + len(m.pat) |
||||
} |
||||
return -1 |
||||
} |
||||
|
||||
// prefix
|
||||
type prefixBytes struct{ pat []byte } |
||||
|
||||
func BytesPrefix(pat string) *prefixBytes { return &prefixBytes{[]byte(pat)} } |
||||
func (m *prefixBytes) Match(b []byte) bool { return bytes.HasPrefix(b, m.pat) } |
||||
func (m *prefixBytes) MatchIndex(b []byte) int { |
||||
if bytes.HasPrefix(b, m.pat) { |
||||
return len(m.pat) |
||||
} |
||||
return -1 |
||||
} |
||||
|
||||
// prefixes
|
||||
type prefixesBytes struct { |
||||
t *trie.Trie |
||||
} |
||||
|
||||
func BytesPrefixes(pats ...string) *prefixesBytes { |
||||
t := trie.New() |
||||
for _, pat := range pats { |
||||
t.Insert([]byte(pat)) |
||||
} |
||||
return &prefixesBytes{t} |
||||
} |
||||
func (m *prefixesBytes) Match(b []byte) bool { return m.t.PrefixIndex(b) >= 0 } |
||||
func (m *prefixesBytes) MatchIndex(b []byte) int { |
||||
if idx := m.t.PrefixIndex(b); idx >= 0 { |
||||
return idx |
||||
} |
||||
return -1 |
||||
} |
||||
|
||||
// suffix
|
||||
type suffixBytes struct{ pat []byte } |
||||
|
||||
func BytesSuffix(pat string) *suffixBytes { return &suffixBytes{[]byte(pat)} } |
||||
func (m *suffixBytes) Match(b []byte) bool { return bytes.HasSuffix(b, m.pat) } |
||||
func (m *suffixBytes) MatchIndex(b []byte) int { |
||||
if bytes.HasSuffix(b, m.pat) { |
||||
return len(m.pat) |
||||
} |
||||
return -1 |
||||
} |
||||
|
||||
// suffixes
|
||||
type suffixesBytes struct { |
||||
t *trie.Trie |
||||
} |
||||
|
||||
func BytesSuffixes(pats ...string) *suffixesBytes { |
||||
t := trie.New() |
||||
for _, pat := range pats { |
||||
t.Insert(reverse([]byte(pat))) |
||||
} |
||||
return &suffixesBytes{t} |
||||
} |
||||
func (m *suffixesBytes) Match(b []byte) bool { |
||||
return m.t.PrefixIndex(reverse(b)) >= 0 |
||||
} |
||||
func (m *suffixesBytes) MatchIndex(b []byte) int { |
||||
if idx := m.t.PrefixIndex(reverse(b)); idx >= 0 { |
||||
return idx |
||||
} |
||||
return -1 |
||||
} |
||||
|
||||
// after
|
||||
type afterBytes struct { |
||||
first []byte |
||||
matcher BytesMatcher |
||||
} |
||||
|
||||
func BytesAfter(first string, m BytesMatcher) *afterBytes { return &afterBytes{[]byte(first), m} } |
||||
func (a *afterBytes) Match(b []byte) bool { |
||||
if idx := bytes.Index(b, a.first); idx >= 0 { |
||||
return a.matcher.Match(b[idx+len(a.first):]) |
||||
} |
||||
return false |
||||
} |
||||
func (a *afterBytes) MatchIndex(b []byte) int { |
||||
if idx := bytes.Index(b, a.first); idx >= 0 { |
||||
return idx + a.matcher.MatchIndex(b[idx:]) |
||||
} |
||||
return -1 |
||||
} |
||||
|
||||
// and, returns true iff all matchers return true
|
||||
type andBytes struct{ matchers []BytesMatcher } |
||||
|
||||
func BytesAnd(m ...BytesMatcher) *andBytes { return &andBytes{m} } |
||||
func (a *andBytes) Match(b []byte) bool { |
||||
for _, m := range a.matchers { |
||||
if !m.Match(b) { |
||||
return false |
||||
} |
||||
} |
||||
return true |
||||
} |
||||
func (a *andBytes) MatchIndex(b []byte) int { |
||||
longest := 0 |
||||
for _, m := range a.matchers { |
||||
if idx := m.MatchIndex(b); idx < 0 { |
||||
return -1 |
||||
} else if idx > longest { |
||||
longest = idx |
||||
} |
||||
} |
||||
return longest |
||||
} |
||||
|
||||
// or, returns true iff any matcher returns true
|
||||
type orBytes struct{ matchers []BytesMatcher } |
||||
|
||||
func BytesOr(m ...BytesMatcher) *orBytes { return &orBytes{m} } |
||||
func (o *orBytes) Match(b []byte) bool { |
||||
for _, m := range o.matchers { |
||||
if m.Match(b) { |
||||
return true |
||||
} |
||||
} |
||||
return false |
||||
} |
||||
func (o *orBytes) MatchIndex(b []byte) int { |
||||
for _, m := range o.matchers { |
||||
if idx := m.MatchIndex(b); idx >= 0 { |
||||
return idx |
||||
} |
||||
} |
||||
return -1 |
||||
} |
||||
|
||||
type suffixGroupBytes struct { |
||||
suffix BytesMatcher |
||||
matchers []BytesMatcher |
||||
} |
||||
|
||||
func BytesSuffixGroup(s string, m ...BytesMatcher) *suffixGroupBytes { |
||||
return &suffixGroupBytes{BytesSuffix(s), m} |
||||
} |
||||
func (sg *suffixGroupBytes) Match(b []byte) bool { |
||||
if sg.suffix.Match(b) { |
||||
return BytesOr(sg.matchers...).Match(b) |
||||
} |
||||
return false |
||||
} |
||||
func (sg *suffixGroupBytes) MatchIndex(b []byte) int { |
||||
if sg.suffix.MatchIndex(b) >= 0 { |
||||
return BytesOr(sg.matchers...).MatchIndex(b) |
||||
} |
||||
return -1 |
||||
} |
@ -1,10 +0,0 @@ |
||||
package substring |
||||
|
||||
// reverse is a helper fn for Suffixes
|
||||
func reverse(b []byte) []byte { |
||||
n := len(b) |
||||
for i := 0; i < n/2; i++ { |
||||
b[i], b[n-1-i] = b[n-1-i], b[i] |
||||
} |
||||
return b |
||||
} |
@ -1,216 +0,0 @@ |
||||
package substring |
||||
|
||||
import ( |
||||
"regexp" |
||||
"strings" |
||||
|
||||
"github.com/toqueteos/trie" |
||||
) |
||||
|
||||
type StringsMatcher interface { |
||||
Match(s string) bool |
||||
MatchIndex(s string) int |
||||
} |
||||
|
||||
// regexp
|
||||
type regexpString struct{ re *regexp.Regexp } |
||||
|
||||
func Regexp(pat string) *regexpString { return ®expString{regexp.MustCompile(pat)} } |
||||
func (m *regexpString) Match(s string) bool { return m.re.MatchString(s) } |
||||
func (m *regexpString) MatchIndex(s string) int { |
||||
found := m.re.FindStringIndex(s) |
||||
if found != nil { |
||||
return found[1] |
||||
} |
||||
return -1 |
||||
} |
||||
|
||||
// exact
|
||||
type exactString struct{ pat string } |
||||
|
||||
func Exact(pat string) *exactString { return &exactString{pat} } |
||||
func (m *exactString) Match(s string) bool { return m.pat == s } |
||||
func (m *exactString) MatchIndex(s string) int { |
||||
if m.pat == s { |
||||
return len(s) |
||||
} |
||||
return -1 |
||||
} |
||||
|
||||
// any, search `s` in `.Match(pat)`
|
||||
type anyString struct{ pat string } |
||||
|
||||
func Any(pat string) *anyString { return &anyString{pat} } |
||||
func (m *anyString) Match(s string) bool { |
||||
return strings.Index(m.pat, s) >= 0 |
||||
} |
||||
func (m *anyString) MatchIndex(s string) int { |
||||
if idx := strings.Index(m.pat, s); idx >= 0 { |
||||
return idx + len(s) |
||||
} |
||||
return -1 |
||||
} |
||||
|
||||
// has, search `pat` in `.Match(s)`
|
||||
type hasString struct{ pat string } |
||||
|
||||
func Has(pat string) *hasString { return &hasString{pat} } |
||||
func (m *hasString) Match(s string) bool { |
||||
return strings.Index(s, m.pat) >= 0 |
||||
} |
||||
func (m *hasString) MatchIndex(s string) int { |
||||
if idx := strings.Index(s, m.pat); idx >= 0 { |
||||
return idx + len(m.pat) |
||||
} |
||||
return -1 |
||||
} |
||||
|
||||
// prefix
|
||||
type prefixString struct{ pat string } |
||||
|
||||
func Prefix(pat string) *prefixString { return &prefixString{pat} } |
||||
func (m *prefixString) Match(s string) bool { return strings.HasPrefix(s, m.pat) } |
||||
func (m *prefixString) MatchIndex(s string) int { |
||||
if strings.HasPrefix(s, m.pat) { |
||||
return len(m.pat) |
||||
} |
||||
return -1 |
||||
} |
||||
|
||||
// prefixes
|
||||
type prefixesString struct{ t *trie.Trie } |
||||
|
||||
func Prefixes(pats ...string) *prefixesString { |
||||
t := trie.New() |
||||
for _, pat := range pats { |
||||
t.Insert([]byte(pat)) |
||||
} |
||||
return &prefixesString{t} |
||||
} |
||||
func (m *prefixesString) Match(s string) bool { return m.t.PrefixIndex([]byte(s)) >= 0 } |
||||
func (m *prefixesString) MatchIndex(s string) int { |
||||
if idx := m.t.PrefixIndex([]byte(s)); idx >= 0 { |
||||
return idx |
||||
} |
||||
return -1 |
||||
} |
||||
|
||||
// suffix
|
||||
type suffixString struct{ pat string } |
||||
|
||||
func Suffix(pat string) *suffixString { return &suffixString{pat} } |
||||
func (m *suffixString) Match(s string) bool { return strings.HasSuffix(s, m.pat) } |
||||
func (m *suffixString) MatchIndex(s string) int { |
||||
if strings.HasSuffix(s, m.pat) { |
||||
return len(m.pat) |
||||
} |
||||
return -1 |
||||
} |
||||
|
||||
// suffixes
|
||||
type suffixesString struct{ t *trie.Trie } |
||||
|
||||
func Suffixes(pats ...string) *suffixesString { |
||||
t := trie.New() |
||||
for _, pat := range pats { |
||||
t.Insert(reverse([]byte(pat))) |
||||
} |
||||
return &suffixesString{t} |
||||
} |
||||
func (m *suffixesString) Match(s string) bool { |
||||
return m.t.PrefixIndex(reverse([]byte(s))) >= 0 |
||||
} |
||||
func (m *suffixesString) MatchIndex(s string) int { |
||||
if idx := m.t.PrefixIndex(reverse([]byte(s))); idx >= 0 { |
||||
return idx |
||||
} |
||||
return -1 |
||||
} |
||||
|
||||
// after
|
||||
type afterString struct { |
||||
first string |
||||
matcher StringsMatcher |
||||
} |
||||
|
||||
func After(first string, m StringsMatcher) *afterString { |
||||
return &afterString{first, m} |
||||
} |
||||
func (a *afterString) Match(s string) bool { |
||||
if idx := strings.Index(s, a.first); idx >= 0 { |
||||
return a.matcher.Match(s[idx+len(a.first):]) |
||||
} |
||||
return false |
||||
} |
||||
func (a *afterString) MatchIndex(s string) int { |
||||
if idx := strings.Index(s, a.first); idx >= 0 { |
||||
return idx + a.matcher.MatchIndex(s[idx+len(a.first):]) |
||||
} |
||||
return -1 |
||||
} |
||||
|
||||
// and, returns true iff all matchers return true
|
||||
type andString struct{ matchers []StringsMatcher } |
||||
|
||||
func And(m ...StringsMatcher) *andString { return &andString{m} } |
||||
func (a *andString) Match(s string) bool { |
||||
for _, m := range a.matchers { |
||||
if !m.Match(s) { |
||||
return false |
||||
} |
||||
} |
||||
return true |
||||
} |
||||
func (a *andString) MatchIndex(s string) int { |
||||
longest := 0 |
||||
for _, m := range a.matchers { |
||||
if idx := m.MatchIndex(s); idx < 0 { |
||||
return -1 |
||||
} else if idx > longest { |
||||
longest = idx |
||||
} |
||||
} |
||||
return longest |
||||
} |
||||
|
||||
// or, returns true iff any matcher returns true
|
||||
type orString struct{ matchers []StringsMatcher } |
||||
|
||||
func Or(m ...StringsMatcher) *orString { return &orString{m} } |
||||
func (o *orString) Match(s string) bool { |
||||
for _, m := range o.matchers { |
||||
if m.Match(s) { |
||||
return true |
||||
} |
||||
} |
||||
return false |
||||
} |
||||
func (o *orString) MatchIndex(s string) int { |
||||
for _, m := range o.matchers { |
||||
if idx := m.MatchIndex(s); idx >= 0 { |
||||
return idx |
||||
} |
||||
} |
||||
return -1 |
||||
} |
||||
|
||||
type suffixGroupString struct { |
||||
suffix StringsMatcher |
||||
matchers []StringsMatcher |
||||
} |
||||
|
||||
func SuffixGroup(s string, m ...StringsMatcher) *suffixGroupString { |
||||
return &suffixGroupString{Suffix(s), m} |
||||
} |
||||
func (sg *suffixGroupString) Match(s string) bool { |
||||
if sg.suffix.Match(s) { |
||||
return Or(sg.matchers...).Match(s) |
||||
} |
||||
return false |
||||
} |
||||
func (sg *suffixGroupString) MatchIndex(s string) int { |
||||
if sg.suffix.MatchIndex(s) >= 0 { |
||||
return Or(sg.matchers...).MatchIndex(s) |
||||
} |
||||
return -1 |
||||
} |
Loading…
Reference in new issue