// Copyright (c) 2012-2016 The go-diff authors. All rights reserved. // https://github.com/sergi/go-diff // See the included LICENSE file for license details. // // go-diff is a Go implementation of Google's Diff, Match, and Patch library // Original library is Copyright (c) 2006 Google Inc. // http://code.google.com/p/google-diff-match-patch/ package diffmatchpatch import ( "math" ) // MatchMain locates the best instance of 'pattern' in 'text' near 'loc'. // Returns -1 if no match found. func (dmp *DiffMatchPatch) MatchMain(text, pattern string, loc int) int { // Check for null inputs not needed since null can't be passed in C#. loc = int(math.Max(0, math.Min(float64(loc), float64(len(text))))) if text == pattern { // Shortcut (potentially not guaranteed by the algorithm) return 0 } else if len(text) == 0 { // Nothing to match. return -1 } else if loc+len(pattern) <= len(text) && text[loc:loc+len(pattern)] == pattern { // Perfect match at the perfect spot! (Includes case of null pattern) return loc } // Do a fuzzy compare. return dmp.MatchBitap(text, pattern, loc) } // MatchBitap locates the best instance of 'pattern' in 'text' near 'loc' using the Bitap algorithm. // Returns -1 if no match was found. func (dmp *DiffMatchPatch) MatchBitap(text, pattern string, loc int) int { // Initialise the alphabet. s := dmp.MatchAlphabet(pattern) // Highest score beyond which we give up. scoreThreshold := dmp.MatchThreshold // Is there a nearby exact match? (speedup) bestLoc := indexOf(text, pattern, loc) if bestLoc != -1 { scoreThreshold = math.Min(dmp.matchBitapScore(0, bestLoc, loc, pattern), scoreThreshold) // What about in the other direction? (speedup) bestLoc = lastIndexOf(text, pattern, loc+len(pattern)) if bestLoc != -1 { scoreThreshold = math.Min(dmp.matchBitapScore(0, bestLoc, loc, pattern), scoreThreshold) } } // Initialise the bit arrays. matchmask := 1 << uint((len(pattern) - 1)) bestLoc = -1 var binMin, binMid int binMax := len(pattern) + len(text) lastRd := []int{} for d := 0; d < len(pattern); d++ { // Scan for the best match; each iteration allows for one more error. Run a binary search to determine how far from 'loc' we can stray at this error level. binMin = 0 binMid = binMax for binMin < binMid { if dmp.matchBitapScore(d, loc+binMid, loc, pattern) <= scoreThreshold { binMin = binMid } else { binMax = binMid } binMid = (binMax-binMin)/2 + binMin } // Use the result from this iteration as the maximum for the next. binMax = binMid start := int(math.Max(1, float64(loc-binMid+1))) finish := int(math.Min(float64(loc+binMid), float64(len(text))) + float64(len(pattern))) rd := make([]int, finish+2) rd[finish+1] = (1 << uint(d)) - 1 for j := finish; j >= start; j-- { var charMatch int if len(text) <= j-1 { // Out of range. charMatch = 0 } else if _, ok := s[text[j-1]]; !ok { charMatch = 0 } else { charMatch = s[text[j-1]] } if d == 0 { // First pass: exact match. rd[j] = ((rd[j+1] << 1) | 1) & charMatch } else { // Subsequent passes: fuzzy match. rd[j] = ((rd[j+1]<<1)|1)&charMatch | (((lastRd[j+1] | lastRd[j]) << 1) | 1) | lastRd[j+1] } if (rd[j] & matchmask) != 0 { score := dmp.matchBitapScore(d, j-1, loc, pattern) // This match will almost certainly be better than any existing match. But check anyway. if score <= scoreThreshold { // Told you so. scoreThreshold = score bestLoc = j - 1 if bestLoc > loc { // When passing loc, don't exceed our current distance from loc. start = int(math.Max(1, float64(2*loc-bestLoc))) } else { // Already passed loc, downhill from here on in. break } } } } if dmp.matchBitapScore(d+1, loc, loc, pattern) > scoreThreshold { // No hope for a (better) match at greater error levels. break } lastRd = rd } return bestLoc } // matchBitapScore computes and returns the score for a match with e errors and x location. func (dmp *DiffMatchPatch) matchBitapScore(e, x, loc int, pattern string) float64 { accuracy := float64(e) / float64(len(pattern)) proximity := math.Abs(float64(loc - x)) if dmp.MatchDistance == 0 { // Dodge divide by zero error. if proximity == 0 { return accuracy } return 1.0 } return accuracy + (proximity / float64(dmp.MatchDistance)) } // MatchAlphabet initialises the alphabet for the Bitap algorithm. func (dmp *DiffMatchPatch) MatchAlphabet(pattern string) map[byte]int { s := map[byte]int{} charPattern := []byte(pattern) for _, c := range charPattern { _, ok := s[c] if !ok { s[c] = 0 } } i := 0 for _, c := range charPattern { value := s[c] | int(uint(1)<<uint((len(pattern)-i-1))) s[c] = value i++ } return s }