Improve issue search (#2387)

* Improve issue indexer

* Fix new issue sqlite bug

* Different test indexer paths for each db

* Add integration indexer paths to make clean
tokarchuk/v1.17
Ethan Koenig 7 years ago committed by Lauris BH
parent 52e11b24bf
commit b0f7457d9e
  1. 3
      .gitignore
  2. 6
      Makefile
  3. 9
      integrations/integration_test.go
  4. 43
      integrations/issue_test.go
  5. 3
      integrations/mysql.ini.tmpl
  6. 3
      integrations/pgsql.ini.tmpl
  7. 3
      integrations/sqlite.ini
  8. 10
      models/fixtures/issue.yml
  9. 29
      models/issue.go
  10. 24
      models/issue_comment.go
  11. 164
      models/issue_indexer.go
  12. 2
      models/pull.go
  13. 37
      modules/indexer/indexer.go
  14. 143
      modules/indexer/issue.go
  15. 3
      routers/init.go
  16. 3
      routers/repo/issue.go
  17. 45
      vendor/github.com/blevesearch/bleve/README.md
  18. 145
      vendor/github.com/blevesearch/bleve/analysis/analyzer/custom/custom.go
  19. 46
      vendor/github.com/blevesearch/bleve/analysis/analyzer/simple/simple.go
  20. 79
      vendor/github.com/blevesearch/bleve/analysis/token/unicodenorm/unicodenorm.go
  21. 76
      vendor/github.com/blevesearch/bleve/analysis/tokenizer/character/character.go
  22. 33
      vendor/github.com/blevesearch/bleve/analysis/tokenizer/letter/letter.go
  23. 23
      vendor/github.com/blevesearch/bleve/config_app.go
  24. 137
      vendor/github.com/blevesearch/bleve/document/field_geopoint.go
  25. 9
      vendor/github.com/blevesearch/bleve/geo/README.md
  26. 170
      vendor/github.com/blevesearch/bleve/geo/geo.go
  27. 98
      vendor/github.com/blevesearch/bleve/geo/geo_dist.go
  28. 140
      vendor/github.com/blevesearch/bleve/geo/parse.go
  29. 212
      vendor/github.com/blevesearch/bleve/geo/sloppy.go
  30. 22
      vendor/github.com/blevesearch/bleve/index.go
  31. 4
      vendor/github.com/blevesearch/bleve/index/index.go
  32. 6
      vendor/github.com/blevesearch/bleve/index/upsidedown/analysis.go
  33. 10
      vendor/github.com/blevesearch/bleve/index/upsidedown/dump.go
  34. 42
      vendor/github.com/blevesearch/bleve/index/upsidedown/index_reader.go
  35. 63
      vendor/github.com/blevesearch/bleve/index/upsidedown/reader.go
  36. 309
      vendor/github.com/blevesearch/bleve/index/upsidedown/row.go
  37. 121
      vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go
  38. 142
      vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.pb.go
  39. 8
      vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.proto
  40. 17
      vendor/github.com/blevesearch/bleve/index_alias_impl.go
  41. 31
      vendor/github.com/blevesearch/bleve/index_impl.go
  42. 4
      vendor/github.com/blevesearch/bleve/mapping.go
  43. 51
      vendor/github.com/blevesearch/bleve/mapping/document.go
  44. 25
      vendor/github.com/blevesearch/bleve/mapping/field.go
  45. 7
      vendor/github.com/blevesearch/bleve/mapping/index.go
  46. 13
      vendor/github.com/blevesearch/bleve/mapping/mapping.go
  47. 43
      vendor/github.com/blevesearch/bleve/numeric/bin.go
  48. 32
      vendor/github.com/blevesearch/bleve/query.go
  49. 92
      vendor/github.com/blevesearch/bleve/search.go
  50. 30
      vendor/github.com/blevesearch/bleve/search/collector/heap.go
  51. 15
      vendor/github.com/blevesearch/bleve/search/collector/list.go
  52. 15
      vendor/github.com/blevesearch/bleve/search/collector/slice.go
  53. 83
      vendor/github.com/blevesearch/bleve/search/collector/topn.go
  54. 53
      vendor/github.com/blevesearch/bleve/search/facet/facet_builder_datetime.go
  55. 53
      vendor/github.com/blevesearch/bleve/search/facet/facet_builder_numeric.go
  56. 29
      vendor/github.com/blevesearch/bleve/search/facet/facet_builder_terms.go
  57. 41
      vendor/github.com/blevesearch/bleve/search/facets_builder.go
  58. 2
      vendor/github.com/blevesearch/bleve/search/highlight/format/html/html.go
  59. 2
      vendor/github.com/blevesearch/bleve/search/highlight/highlighter/simple/fragment_scorer_simple.go
  60. 2
      vendor/github.com/blevesearch/bleve/search/highlight/highlighter/simple/highlighter_simple.go
  61. 14
      vendor/github.com/blevesearch/bleve/search/highlight/term_locations.go
  62. 12
      vendor/github.com/blevesearch/bleve/search/pool.go
  63. 13
      vendor/github.com/blevesearch/bleve/search/query/bool_field.go
  64. 69
      vendor/github.com/blevesearch/bleve/search/query/boolean.go
  65. 26
      vendor/github.com/blevesearch/bleve/search/query/conjunction.go
  66. 9
      vendor/github.com/blevesearch/bleve/search/query/date_range.go
  67. 29
      vendor/github.com/blevesearch/bleve/search/query/disjunction.go
  68. 6
      vendor/github.com/blevesearch/bleve/search/query/docid.go
  69. 8
      vendor/github.com/blevesearch/bleve/search/query/fuzzy.go
  70. 113
      vendor/github.com/blevesearch/bleve/search/query/geo_boundingbox.go
  71. 100
      vendor/github.com/blevesearch/bleve/search/query/geo_distance.go
  72. 12
      vendor/github.com/blevesearch/bleve/search/query/match.go
  73. 8
      vendor/github.com/blevesearch/bleve/search/query/match_all.go
  74. 4
      vendor/github.com/blevesearch/bleve/search/query/match_none.go
  75. 21
      vendor/github.com/blevesearch/bleve/search/query/match_phrase.go
  76. 80
      vendor/github.com/blevesearch/bleve/search/query/multi_phrase.go
  77. 8
      vendor/github.com/blevesearch/bleve/search/query/numeric_range.go
  78. 38
      vendor/github.com/blevesearch/bleve/search/query/phrase.go
  79. 8
      vendor/github.com/blevesearch/bleve/search/query/prefix.go
  80. 52
      vendor/github.com/blevesearch/bleve/search/query/query.go
  81. 10
      vendor/github.com/blevesearch/bleve/search/query/query_string.go
  82. 63
      vendor/github.com/blevesearch/bleve/search/query/query_string.y
  83. 148
      vendor/github.com/blevesearch/bleve/search/query/query_string.y.go
  84. 10
      vendor/github.com/blevesearch/bleve/search/query/query_string_parser.go
  85. 24
      vendor/github.com/blevesearch/bleve/search/query/regexp.go
  86. 8
      vendor/github.com/blevesearch/bleve/search/query/term.go
  87. 95
      vendor/github.com/blevesearch/bleve/search/query/term_range.go
  88. 10
      vendor/github.com/blevesearch/bleve/search/query/wildcard.go
  89. 12
      vendor/github.com/blevesearch/bleve/search/scorer/scorer_conjunction.go
  90. 14
      vendor/github.com/blevesearch/bleve/search/scorer/scorer_constant.go
  91. 14
      vendor/github.com/blevesearch/bleve/search/scorer/scorer_disjunction.go
  92. 56
      vendor/github.com/blevesearch/bleve/search/scorer/scorer_term.go
  93. 38
      vendor/github.com/blevesearch/bleve/search/search.go
  94. 4
      vendor/github.com/blevesearch/bleve/search/searcher/search_boolean.go
  95. 8
      vendor/github.com/blevesearch/bleve/search/searcher/search_conjunction.go
  96. 25
      vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction.go
  97. 4
      vendor/github.com/blevesearch/bleve/search/searcher/search_docid.go
  98. 88
      vendor/github.com/blevesearch/bleve/search/searcher/search_filter.go
  99. 87
      vendor/github.com/blevesearch/bleve/search/searcher/search_fuzzy.go
  100. 173
      vendor/github.com/blevesearch/bleve/search/searcher/search_geoboundingbox.go
  101. Some files were not shown because too many files have changed in this diff Show More

3
.gitignore vendored

@ -53,5 +53,8 @@ coverage.all
/integrations/gitea-integration-mysql /integrations/gitea-integration-mysql
/integrations/gitea-integration-pgsql /integrations/gitea-integration-pgsql
/integrations/gitea-integration-sqlite /integrations/gitea-integration-sqlite
/integrations/indexers-mysql
/integrations/indexers-pgsql
/integrations/indexers-sqlite
/integrations/mysql.ini /integrations/mysql.ini
/integrations/pgsql.ini /integrations/pgsql.ini

@ -63,7 +63,11 @@ all: build
.PHONY: clean .PHONY: clean
clean: clean:
$(GO) clean -i ./... $(GO) clean -i ./...
rm -rf $(EXECUTABLE) $(DIST) $(BINDATA) integrations*.test integrations/gitea-integration-pgsql/ integrations/gitea-integration-mysql/ integrations/gitea-integration-sqlite/ integrations/mysql.ini integrations/pgsql.ini rm -rf $(EXECUTABLE) $(DIST) $(BINDATA) \
integrations*.test \
integrations/gitea-integration-pgsql/ integrations/gitea-integration-mysql/ integrations/gitea-integration-sqlite/ \
integrations/indexers-mysql/ integrations/indexers-pgsql integrations/indexers-sqlite \
integrations/mysql.ini integrations/pgsql.ini
required-gofmt-version: required-gofmt-version:
@$(GO) version | grep -q '\(1.7\|1.8\)' || { echo "We require go version 1.7 or 1.8 to format code" >&2 && exit 1; } @$(GO) version | grep -q '\(1.7\|1.8\)' || { echo "We require go version 1.7 or 1.8 to format code" >&2 && exit 1; }

@ -57,7 +57,14 @@ func TestMain(m *testing.M) {
fmt.Printf("Error initializing test database: %v\n", err) fmt.Printf("Error initializing test database: %v\n", err)
os.Exit(1) os.Exit(1)
} }
os.Exit(m.Run()) exitCode := m.Run()
if err = os.RemoveAll(setting.Indexer.IssuePath); err != nil {
fmt.Printf("os.RemoveAll: %v\n", err)
os.Exit(1)
}
os.Exit(exitCode)
} }
func initIntegrationTest() { func initIntegrationTest() {

@ -18,8 +18,10 @@ import (
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
) )
func getIssuesSelection(htmlDoc *HTMLDoc) *goquery.Selection { func getIssuesSelection(t testing.TB, htmlDoc *HTMLDoc) *goquery.Selection {
return htmlDoc.doc.Find(".issue.list").Find("li").Find(".title") issueList := htmlDoc.doc.Find(".issue.list")
assert.EqualValues(t, 1, issueList.Length())
return issueList.Find("li").Find(".title")
} }
func getIssue(t *testing.T, repoID int64, issueSelection *goquery.Selection) *models.Issue { func getIssue(t *testing.T, repoID int64, issueSelection *goquery.Selection) *models.Issue {
@ -31,6 +33,18 @@ func getIssue(t *testing.T, repoID int64, issueSelection *goquery.Selection) *mo
return models.AssertExistsAndLoadBean(t, &models.Issue{RepoID: repoID, Index: int64(index)}).(*models.Issue) return models.AssertExistsAndLoadBean(t, &models.Issue{RepoID: repoID, Index: int64(index)}).(*models.Issue)
} }
func assertMatch(t testing.TB, issue *models.Issue, keyword string) {
matches := strings.Contains(strings.ToLower(issue.Title), keyword) ||
strings.Contains(strings.ToLower(issue.Content), keyword)
for _, comment := range issue.Comments {
matches = matches || strings.Contains(
strings.ToLower(comment.Content),
keyword,
)
}
assert.True(t, matches)
}
func TestNoLoginViewIssues(t *testing.T) { func TestNoLoginViewIssues(t *testing.T) {
prepareTestEnv(t) prepareTestEnv(t)
@ -38,19 +52,18 @@ func TestNoLoginViewIssues(t *testing.T) {
MakeRequest(t, req, http.StatusOK) MakeRequest(t, req, http.StatusOK)
} }
func TestNoLoginViewIssuesSortByType(t *testing.T) { func TestViewIssuesSortByType(t *testing.T) {
prepareTestEnv(t) prepareTestEnv(t)
user := models.AssertExistsAndLoadBean(t, &models.User{ID: 1}).(*models.User) user := models.AssertExistsAndLoadBean(t, &models.User{ID: 1}).(*models.User)
repo := models.AssertExistsAndLoadBean(t, &models.Repository{ID: 1}).(*models.Repository) repo := models.AssertExistsAndLoadBean(t, &models.Repository{ID: 1}).(*models.Repository)
repo.Owner = models.AssertExistsAndLoadBean(t, &models.User{ID: repo.OwnerID}).(*models.User)
session := loginUser(t, user.Name) session := loginUser(t, user.Name)
req := NewRequest(t, "GET", repo.RelLink()+"/issues?type=created_by") req := NewRequest(t, "GET", repo.RelLink()+"/issues?type=created_by")
resp := session.MakeRequest(t, req, http.StatusOK) resp := session.MakeRequest(t, req, http.StatusOK)
htmlDoc := NewHTMLParser(t, resp.Body) htmlDoc := NewHTMLParser(t, resp.Body)
issuesSelection := getIssuesSelection(htmlDoc) issuesSelection := getIssuesSelection(t, htmlDoc)
expectedNumIssues := models.GetCount(t, expectedNumIssues := models.GetCount(t,
&models.Issue{RepoID: repo.ID, PosterID: user.ID}, &models.Issue{RepoID: repo.ID, PosterID: user.ID},
models.Cond("is_closed=?", false), models.Cond("is_closed=?", false),
@ -67,6 +80,26 @@ func TestNoLoginViewIssuesSortByType(t *testing.T) {
}) })
} }
func TestViewIssuesKeyword(t *testing.T) {
prepareTestEnv(t)
repo := models.AssertExistsAndLoadBean(t, &models.Repository{ID: 1}).(*models.Repository)
const keyword = "first"
req := NewRequestf(t, "GET", "%s/issues?q=%s", repo.RelLink(), keyword)
resp := MakeRequest(t, req, http.StatusOK)
htmlDoc := NewHTMLParser(t, resp.Body)
issuesSelection := getIssuesSelection(t, htmlDoc)
assert.EqualValues(t, 1, issuesSelection.Length())
issuesSelection.Each(func(_ int, selection *goquery.Selection) {
issue := getIssue(t, repo.ID, selection)
assert.False(t, issue.IsClosed)
assert.False(t, issue.IsPull)
assertMatch(t, issue, keyword)
})
}
func TestNoLoginViewIssue(t *testing.T) { func TestNoLoginViewIssue(t *testing.T) {
prepareTestEnv(t) prepareTestEnv(t)

@ -10,6 +10,9 @@ PASSWD = {{TEST_MYSQL_PASSWORD}}
SSL_MODE = disable SSL_MODE = disable
PATH = data/gitea.db PATH = data/gitea.db
[indexer]
ISSUE_INDEXER_PATH = integrations/indexers-mysql/issues.bleve
[repository] [repository]
ROOT = integrations/gitea-integration-mysql/gitea-repositories ROOT = integrations/gitea-integration-mysql/gitea-repositories

@ -10,6 +10,9 @@ PASSWD = {{TEST_PGSQL_PASSWORD}}
SSL_MODE = disable SSL_MODE = disable
PATH = data/gitea.db PATH = data/gitea.db
[indexer]
ISSUE_INDEXER_PATH = integrations/indexers-pgsql/issues.bleve
[repository] [repository]
ROOT = integrations/gitea-integration-pgsql/gitea-repositories ROOT = integrations/gitea-integration-pgsql/gitea-repositories

@ -5,6 +5,9 @@ RUN_MODE = prod
DB_TYPE = sqlite3 DB_TYPE = sqlite3
PATH = :memory: PATH = :memory:
[indexer]
ISSUE_INDEXER_PATH = integrations/indexers-sqlite/issues.bleve
[repository] [repository]
ROOT = integrations/gitea-integration-sqlite/gitea-repositories ROOT = integrations/gitea-integration-sqlite/gitea-repositories

@ -5,7 +5,7 @@
poster_id: 1 poster_id: 1
assignee_id: 1 assignee_id: 1
name: issue1 name: issue1
content: content1 content: content for the first issue
is_closed: false is_closed: false
is_pull: false is_pull: false
num_comments: 2 num_comments: 2
@ -18,7 +18,7 @@
index: 2 index: 2
poster_id: 1 poster_id: 1
name: issue2 name: issue2
content: content2 content: content for the second issue
milestone_id: 1 milestone_id: 1
is_closed: false is_closed: false
is_pull: true is_pull: true
@ -32,7 +32,7 @@
index: 3 index: 3
poster_id: 1 poster_id: 1
name: issue3 name: issue3
content: content4 content: content for the third issue
is_closed: false is_closed: false
is_pull: true is_pull: true
created_unix: 946684820 created_unix: 946684820
@ -44,7 +44,7 @@
index: 1 index: 1
poster_id: 2 poster_id: 2
name: issue4 name: issue4
content: content4 content: content for the fourth issue
is_closed: true is_closed: true
is_pull: false is_pull: false
@ -54,7 +54,7 @@
index: 4 index: 4
poster_id: 2 poster_id: 2
name: issue5 name: issue5
content: content5 content: content for the fifth issue
is_closed: true is_closed: true
is_pull: false is_pull: false
- -

@ -155,6 +155,17 @@ func (issue *Issue) loadPullRequest(e Engine) (err error) {
return nil return nil
} }
func (issue *Issue) loadComments(e Engine) (err error) {
if issue.Comments != nil {
return nil
}
issue.Comments, err = findComments(e, FindCommentsOptions{
IssueID: issue.ID,
Type: CommentTypeUnknown,
})
return err
}
func (issue *Issue) loadAttributes(e Engine) (err error) { func (issue *Issue) loadAttributes(e Engine) (err error) {
if err = issue.loadRepo(e); err != nil { if err = issue.loadRepo(e); err != nil {
return return
@ -191,14 +202,8 @@ func (issue *Issue) loadAttributes(e Engine) (err error) {
} }
} }
if issue.Comments == nil { if err = issue.loadComments(e); err != nil {
issue.Comments, err = findComments(e, FindCommentsOptions{ return
IssueID: issue.ID,
Type: CommentTypeUnknown,
})
if err != nil {
return fmt.Errorf("getCommentsByIssueID [%d]: %v", issue.ID, err)
}
} }
return nil return nil
@ -577,7 +582,7 @@ func updateIssueCols(e Engine, issue *Issue, cols ...string) error {
if _, err := e.Id(issue.ID).Cols(cols...).Update(issue); err != nil { if _, err := e.Id(issue.ID).Cols(cols...).Update(issue); err != nil {
return err return err
} }
UpdateIssueIndexer(issue) UpdateIssueIndexer(issue.ID)
return nil return nil
} }
@ -907,8 +912,6 @@ func newIssue(e *xorm.Session, doer *User, opts NewIssueOptions) (err error) {
return err return err
} }
UpdateIssueIndexer(opts.Issue)
if len(opts.Attachments) > 0 { if len(opts.Attachments) > 0 {
attachments, err := getAttachmentsByUUIDs(e, opts.Attachments) attachments, err := getAttachmentsByUUIDs(e, opts.Attachments)
if err != nil { if err != nil {
@ -947,6 +950,8 @@ func NewIssue(repo *Repository, issue *Issue, labelIDs []int64, uuids []string)
return fmt.Errorf("Commit: %v", err) return fmt.Errorf("Commit: %v", err)
} }
UpdateIssueIndexer(issue.ID)
if err = NotifyWatchers(&Action{ if err = NotifyWatchers(&Action{
ActUserID: issue.Poster.ID, ActUserID: issue.Poster.ID,
ActUser: issue.Poster, ActUser: issue.Poster,
@ -1448,7 +1453,7 @@ func updateIssue(e Engine, issue *Issue) error {
if err != nil { if err != nil {
return err return err
} }
UpdateIssueIndexer(issue) UpdateIssueIndexer(issue.ID)
return nil return nil
} }

@ -520,7 +520,14 @@ func CreateComment(opts *CreateCommentOptions) (comment *Comment, err error) {
return nil, err return nil, err
} }
return comment, sess.Commit() if err = sess.Commit(); err != nil {
return nil, err
}
if opts.Type == CommentTypeComment {
UpdateIssueIndexer(opts.Issue.ID)
}
return comment, nil
} }
// CreateIssueComment creates a plain issue comment. // CreateIssueComment creates a plain issue comment.
@ -645,8 +652,12 @@ func GetCommentsByRepoIDSince(repoID, since int64) ([]*Comment, error) {
// UpdateComment updates information of comment. // UpdateComment updates information of comment.
func UpdateComment(c *Comment) error { func UpdateComment(c *Comment) error {
_, err := x.Id(c.ID).AllCols().Update(c) if _, err := x.Id(c.ID).AllCols().Update(c); err != nil {
return err return err
} else if c.Type == CommentTypeComment {
UpdateIssueIndexer(c.IssueID)
}
return nil
} }
// DeleteComment deletes the comment // DeleteComment deletes the comment
@ -672,5 +683,10 @@ func DeleteComment(comment *Comment) error {
return err return err
} }
return sess.Commit() if err := sess.Commit(); err != nil {
return err
} else if comment.Type == CommentTypeComment {
UpdateIssueIndexer(comment.IssueID)
}
return nil
} }

@ -6,112 +6,21 @@ package models
import ( import (
"fmt" "fmt"
"os"
"strconv"
"strings"
"code.gitea.io/gitea/modules/indexer"
"code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/util" "code.gitea.io/gitea/modules/util"
"github.com/blevesearch/bleve"
"github.com/blevesearch/bleve/analysis/analyzer/simple"
"github.com/blevesearch/bleve/search/query"
) )
// issueIndexerUpdateQueue queue of issues that need to be updated in the issues // issueIndexerUpdateQueue queue of issue ids to be updated
// indexer var issueIndexerUpdateQueue chan int64
var issueIndexerUpdateQueue chan *Issue
// issueIndexer (thread-safe) index for searching issues
var issueIndexer bleve.Index
// issueIndexerData data stored in the issue indexer
type issueIndexerData struct {
ID int64
RepoID int64
Title string
Content string
}
// numericQuery an numeric-equality query for the given value and field
func numericQuery(value int64, field string) *query.NumericRangeQuery {
f := float64(value)
tru := true
q := bleve.NewNumericRangeInclusiveQuery(&f, &f, &tru, &tru)
q.SetField(field)
return q
}
// SearchIssuesByKeyword searches for issues by given conditions.
// Returns the matching issue IDs
func SearchIssuesByKeyword(repoID int64, keyword string) ([]int64, error) {
terms := strings.Fields(strings.ToLower(keyword))
indexerQuery := bleve.NewConjunctionQuery(
numericQuery(repoID, "RepoID"),
bleve.NewDisjunctionQuery(
bleve.NewPhraseQuery(terms, "Title"),
bleve.NewPhraseQuery(terms, "Content"),
))
search := bleve.NewSearchRequestOptions(indexerQuery, 2147483647, 0, false)
search.Fields = []string{"ID"}
result, err := issueIndexer.Search(search)
if err != nil {
return nil, err
}
issueIDs := make([]int64, len(result.Hits))
for i, hit := range result.Hits {
issueIDs[i] = int64(hit.Fields["ID"].(float64))
}
return issueIDs, nil
}
// InitIssueIndexer initialize issue indexer // InitIssueIndexer initialize issue indexer
func InitIssueIndexer() { func InitIssueIndexer() {
_, err := os.Stat(setting.Indexer.IssuePath) indexer.InitIssueIndexer(populateIssueIndexer)
if err != nil { issueIndexerUpdateQueue = make(chan int64, setting.Indexer.UpdateQueueLength)
if os.IsNotExist(err) {
if err = createIssueIndexer(); err != nil {
log.Fatal(4, "CreateIssuesIndexer: %v", err)
}
if err = populateIssueIndexer(); err != nil {
log.Fatal(4, "PopulateIssuesIndex: %v", err)
}
} else {
log.Fatal(4, "InitIssuesIndexer: %v", err)
}
} else {
issueIndexer, err = bleve.Open(setting.Indexer.IssuePath)
if err != nil {
log.Fatal(4, "InitIssuesIndexer, open index: %v", err)
}
}
issueIndexerUpdateQueue = make(chan *Issue, setting.Indexer.UpdateQueueLength)
go processIssueIndexerUpdateQueue() go processIssueIndexerUpdateQueue()
// TODO close issueIndexer when Gitea closes
}
// createIssueIndexer create an issue indexer if one does not already exist
func createIssueIndexer() error {
mapping := bleve.NewIndexMapping()
docMapping := bleve.NewDocumentMapping()
docMapping.AddFieldMappingsAt("ID", bleve.NewNumericFieldMapping())
docMapping.AddFieldMappingsAt("RepoID", bleve.NewNumericFieldMapping())
textFieldMapping := bleve.NewTextFieldMapping()
textFieldMapping.Analyzer = simple.Name
docMapping.AddFieldMappingsAt("Title", textFieldMapping)
docMapping.AddFieldMappingsAt("Content", textFieldMapping)
mapping.AddDocumentMapping("issues", docMapping)
var err error
issueIndexer, err = bleve.New(setting.Indexer.IssuePath, mapping)
return err
} }
// populateIssueIndexer populate the issue indexer with issue data // populateIssueIndexer populate the issue indexer with issue data
@ -127,57 +36,64 @@ func populateIssueIndexer() error {
if len(repos) == 0 { if len(repos) == 0 {
return nil return nil
} }
batch := issueIndexer.NewBatch()
for _, repo := range repos { for _, repo := range repos {
issues, err := Issues(&IssuesOptions{ issues, err := Issues(&IssuesOptions{
RepoID: repo.ID, RepoID: repo.ID,
IsClosed: util.OptionalBoolNone, IsClosed: util.OptionalBoolNone,
IsPull: util.OptionalBoolNone, IsPull: util.OptionalBoolNone,
}) })
if err != nil { updates := make([]indexer.IssueIndexerUpdate, len(issues))
return fmt.Errorf("Issues: %v", err) for i, issue := range issues {
updates[i] = issue.update()
} }
for _, issue := range issues { if err = indexer.BatchUpdateIssues(updates...); err != nil {
err = batch.Index(issue.indexUID(), issue.issueData()) return fmt.Errorf("BatchUpdate: %v", err)
if err != nil {
return fmt.Errorf("batch.Index: %v", err)
}
} }
} }
if err = issueIndexer.Batch(batch); err != nil {
return fmt.Errorf("index.Batch: %v", err)
}
} }
} }
func processIssueIndexerUpdateQueue() { func processIssueIndexerUpdateQueue() {
for { for {
select { select {
case issue := <-issueIndexerUpdateQueue: case issueID := <-issueIndexerUpdateQueue:
if err := issueIndexer.Index(issue.indexUID(), issue.issueData()); err != nil { issue, err := GetIssueByID(issueID)
if err != nil {
log.Error(4, "issuesIndexer.Index: %v", err)
continue
}
if err = indexer.UpdateIssue(issue.update()); err != nil {
log.Error(4, "issuesIndexer.Index: %v", err) log.Error(4, "issuesIndexer.Index: %v", err)
} }
} }
} }
} }
// indexUID a unique identifier for an issue used in full-text indices func (issue *Issue) update() indexer.IssueIndexerUpdate {
func (issue *Issue) indexUID() string { comments := make([]string, 0, 5)
return strconv.FormatInt(issue.ID, 36) for _, comment := range issue.Comments {
} if comment.Type == CommentTypeComment {
comments = append(comments, comment.Content)
func (issue *Issue) issueData() *issueIndexerData { }
return &issueIndexerData{ }
ID: issue.ID, return indexer.IssueIndexerUpdate{
RepoID: issue.RepoID, IssueID: issue.ID,
Title: issue.Title, Data: &indexer.IssueIndexerData{
Content: issue.Content, RepoID: issue.RepoID,
Title: issue.Title,
Content: issue.Content,
Comments: comments,
},
} }
} }
// UpdateIssueIndexer add/update an issue to the issue indexer // UpdateIssueIndexer add/update an issue to the issue indexer
func UpdateIssueIndexer(issue *Issue) { func UpdateIssueIndexer(issueID int64) {
go func() { select {
issueIndexerUpdateQueue <- issue case issueIndexerUpdateQueue <- issueID:
}() default:
go func() {
issueIndexerUpdateQueue <- issueID
}()
}
} }

@ -640,6 +640,8 @@ func NewPullRequest(repo *Repository, pull *Issue, labelIDs []int64, uuids []str
return fmt.Errorf("Commit: %v", err) return fmt.Errorf("Commit: %v", err)
} }
UpdateIssueIndexer(pull.ID)
if err = NotifyWatchers(&Action{ if err = NotifyWatchers(&Action{
ActUserID: pull.Poster.ID, ActUserID: pull.Poster.ID,
ActUser: pull.Poster, ActUser: pull.Poster,

@ -5,10 +5,39 @@
package indexer package indexer
import ( import (
"code.gitea.io/gitea/models" "fmt"
"strconv"
"github.com/blevesearch/bleve"
"github.com/blevesearch/bleve/search/query"
) )
// NewContext start indexer service // indexerID a bleve-compatible unique identifier for an integer id
func NewContext() { func indexerID(id int64) string {
models.InitIssueIndexer() return strconv.FormatInt(id, 36)
}
// idOfIndexerID the integer id associated with an indexer id
func idOfIndexerID(indexerID string) (int64, error) {
id, err := strconv.ParseInt(indexerID, 36, 64)
if err != nil {
return 0, fmt.Errorf("Unexpected indexer ID %s: %v", indexerID, err)
}
return id, nil
}
// numericEqualityQuery a numeric equality query for the given value and field
func numericEqualityQuery(value int64, field string) *query.NumericRangeQuery {
f := float64(value)
tru := true
q := bleve.NewNumericRangeInclusiveQuery(&f, &f, &tru, &tru)
q.SetField(field)
return q
}
func newMatchPhraseQuery(matchPhrase, field, analyzer string) *query.MatchPhraseQuery {
q := bleve.NewMatchPhraseQuery(matchPhrase)
q.FieldVal = field
q.Analyzer = analyzer
return q
} }

@ -0,0 +1,143 @@
// Copyright 2017 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package indexer
import (
"os"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"github.com/blevesearch/bleve"
"github.com/blevesearch/bleve/analysis/analyzer/custom"
"github.com/blevesearch/bleve/analysis/token/lowercase"
"github.com/blevesearch/bleve/analysis/token/unicodenorm"
"github.com/blevesearch/bleve/analysis/tokenizer/unicode"
)
// issueIndexer (thread-safe) index for searching issues
var issueIndexer bleve.Index
// IssueIndexerData data stored in the issue indexer
type IssueIndexerData struct {
RepoID int64
Title string
Content string
Comments []string
}
// IssueIndexerUpdate an update to the issue indexer
type IssueIndexerUpdate struct {
IssueID int64
Data *IssueIndexerData
}
const issueIndexerAnalyzer = "issueIndexer"
// InitIssueIndexer initialize issue indexer
func InitIssueIndexer(populateIndexer func() error) {
_, err := os.Stat(setting.Indexer.IssuePath)
if err != nil {
if os.IsNotExist(err) {
if err = createIssueIndexer(); err != nil {
log.Fatal(4, "CreateIssuesIndexer: %v", err)
}
if err = populateIndexer(); err != nil {
log.Fatal(4, "PopulateIssuesIndex: %v", err)
}
} else {
log.Fatal(4, "InitIssuesIndexer: %v", err)
}
} else {
issueIndexer, err = bleve.Open(setting.Indexer.IssuePath)
if err != nil {
log.Error(4, "Unable to open issues indexer (%s)."+
" If the error is due to incompatible versions, try deleting the indexer files;"+
" gitea will recreate them with the appropriate version the next time it runs."+
" Deleting the indexer files will not result in loss of data.",
setting.Indexer.IssuePath)
log.Fatal(4, "InitIssuesIndexer, open index: %v", err)
}
}
}
// createIssueIndexer create an issue indexer if one does not already exist
func createIssueIndexer() error {
mapping := bleve.NewIndexMapping()
docMapping := bleve.NewDocumentMapping()
docMapping.AddFieldMappingsAt("RepoID", bleve.NewNumericFieldMapping())
textFieldMapping := bleve.NewTextFieldMapping()
docMapping.AddFieldMappingsAt("Title", textFieldMapping)
docMapping.AddFieldMappingsAt("Content", textFieldMapping)
docMapping.AddFieldMappingsAt("Comments", textFieldMapping)
const unicodeNormNFC = "unicodeNormNFC"
if err := mapping.AddCustomTokenFilter(unicodeNormNFC, map[string]interface{}{
"type": unicodenorm.Name,
"form": unicodenorm.NFC,
}); err != nil {
return err
} else if err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]interface{}{
"type": custom.Name,
"char_filters": []string{},
"tokenizer": unicode.Name,
"token_filters": []string{unicodeNormNFC, lowercase.Name},
}); err != nil {
return err
}
mapping.DefaultAnalyzer = issueIndexerAnalyzer
mapping.AddDocumentMapping("issues", docMapping)
var err error
issueIndexer, err = bleve.New(setting.Indexer.IssuePath, mapping)
return err
}
// UpdateIssue update the issue indexer
func UpdateIssue(update IssueIndexerUpdate) error {
return issueIndexer.Index(indexerID(update.IssueID), update.Data)
}
// BatchUpdateIssues perform a batch update of the issue indexer
func BatchUpdateIssues(updates ...IssueIndexerUpdate) error {
batch := issueIndexer.NewBatch()
for _, update := range updates {
err := batch.Index(indexerID(update.IssueID), update.Data)
if err != nil {
return err
}
}
return issueIndexer.Batch(batch)
}
// SearchIssuesByKeyword searches for issues by given conditions.
// Returns the matching issue IDs
func SearchIssuesByKeyword(repoID int64, keyword string) ([]int64, error) {
indexerQuery := bleve.NewConjunctionQuery(
numericEqualityQuery(repoID, "RepoID"),
bleve.NewDisjunctionQuery(
newMatchPhraseQuery(keyword, "Title", issueIndexerAnalyzer),
newMatchPhraseQuery(keyword, "Content", issueIndexerAnalyzer),
newMatchPhraseQuery(keyword, "Comments", issueIndexerAnalyzer),
))
search := bleve.NewSearchRequestOptions(indexerQuery, 2147483647, 0, false)
result, err := issueIndexer.Search(search)
if err != nil {
return nil, err
}
issueIDs := make([]int64, len(result.Hits))
for i, hit := range result.Hits {
issueIDs[i], err = idOfIndexerID(hit.ID)
if err != nil {
return nil, err
}
}
return issueIDs, nil
}

@ -13,7 +13,6 @@ import (
"code.gitea.io/gitea/models/migrations" "code.gitea.io/gitea/models/migrations"
"code.gitea.io/gitea/modules/cron" "code.gitea.io/gitea/modules/cron"
"code.gitea.io/gitea/modules/highlight" "code.gitea.io/gitea/modules/highlight"
"code.gitea.io/gitea/modules/indexer"
"code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/mailer" "code.gitea.io/gitea/modules/mailer"
"code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/markup"
@ -63,7 +62,7 @@ func GlobalInit() {
// Booting long running goroutines. // Booting long running goroutines.
cron.NewContext() cron.NewContext()
indexer.NewContext() models.InitIssueIndexer()
models.InitSyncMirrors() models.InitSyncMirrors()
models.InitDeliverHooks() models.InitDeliverHooks()
models.InitTestPullRequests() models.InitTestPullRequests()

@ -22,6 +22,7 @@ import (
"code.gitea.io/gitea/modules/auth" "code.gitea.io/gitea/modules/auth"
"code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/context" "code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/indexer"
"code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/markdown" "code.gitea.io/gitea/modules/markdown"
"code.gitea.io/gitea/modules/notification" "code.gitea.io/gitea/modules/notification"
@ -142,7 +143,7 @@ func Issues(ctx *context.Context) {
var issueIDs []int64 var issueIDs []int64
var err error var err error
if len(keyword) > 0 { if len(keyword) > 0 {
issueIDs, err = models.SearchIssuesByKeyword(repo.ID, keyword) issueIDs, err = indexer.SearchIssuesByKeyword(repo.ID, keyword)
if len(issueIDs) == 0 { if len(issueIDs) == 0 {
forceEmpty = true forceEmpty = true
} }

@ -4,6 +4,7 @@
[![Join the chat at https://gitter.im/blevesearch/bleve](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/blevesearch/bleve?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [![Join the chat at https://gitter.im/blevesearch/bleve](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/blevesearch/bleve?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
[![codebeat](https://codebeat.co/badges/38a7cbc9-9cf5-41c0-a315-0746178230f4)](https://codebeat.co/projects/github-com-blevesearch-bleve) [![codebeat](https://codebeat.co/badges/38a7cbc9-9cf5-41c0-a315-0746178230f4)](https://codebeat.co/projects/github-com-blevesearch-bleve)
[![Go Report Card](https://goreportcard.com/badge/blevesearch/bleve)](https://goreportcard.com/report/blevesearch/bleve) [![Go Report Card](https://goreportcard.com/badge/blevesearch/bleve)](https://goreportcard.com/report/blevesearch/bleve)
[![Sourcegraph](https://sourcegraph.com/github.com/blevesearch/bleve/-/badge.svg)](https://sourcegraph.com/github.com/blevesearch/bleve?badge) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
modern text indexing in go - [blevesearch.com](http://www.blevesearch.com/) modern text indexing in go - [blevesearch.com](http://www.blevesearch.com/)
@ -33,29 +34,33 @@ Discuss usage and development of bleve in the [google group](https://groups.goog
## Indexing ## Indexing
message := struct{ ```go
Id string message := struct{
From string Id string
Body string From string
}{ Body string
Id: "example", }{
From: "marty.schoch@gmail.com", Id: "example",
Body: "bleve indexing is easy", From: "marty.schoch@gmail.com",
} Body: "bleve indexing is easy",
}
mapping := bleve.NewIndexMapping()
index, err := bleve.New("example.bleve", mapping) mapping := bleve.NewIndexMapping()
if err != nil { index, err := bleve.New("example.bleve", mapping)
panic(err) if err != nil {
} panic(err)
index.Index(message.Id, message) }
index.Index(message.Id, message)
```
## Querying ## Querying
index, _ := bleve.Open("example.bleve") ```go
query := bleve.NewQueryStringQuery("bleve") index, _ := bleve.Open("example.bleve")
searchRequest := bleve.NewSearchRequest(query) query := bleve.NewQueryStringQuery("bleve")
searchResult, _ := index.Search(searchRequest) searchRequest := bleve.NewSearchRequest(query)
searchResult, _ := index.Search(searchRequest)
```
## License ## License

@ -0,0 +1,145 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package custom
import (
"fmt"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/registry"
)
const Name = "custom"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
var err error
var charFilters []analysis.CharFilter
charFiltersValue, ok := config["char_filters"]
if ok {
switch charFiltersValue := charFiltersValue.(type) {
case []string:
charFilters, err = getCharFilters(charFiltersValue, cache)
if err != nil {
return nil, err
}
case []interface{}:
charFiltersNames, err := convertInterfaceSliceToStringSlice(charFiltersValue, "char filter")
if err != nil {
return nil, err
}
charFilters, err = getCharFilters(charFiltersNames, cache)
if err != nil {
return nil, err
}
default:
return nil, fmt.Errorf("unsupported type for char_filters, must be slice")
}
}
var tokenizerName string
tokenizerValue, ok := config["tokenizer"]
if ok {
tokenizerName, ok = tokenizerValue.(string)
if !ok {
return nil, fmt.Errorf("must specify tokenizer as string")
}
} else {
return nil, fmt.Errorf("must specify tokenizer")
}
tokenizer, err := cache.TokenizerNamed(tokenizerName)
if err != nil {
return nil, err
}
var tokenFilters []analysis.TokenFilter
tokenFiltersValue, ok := config["token_filters"]
if ok {
switch tokenFiltersValue := tokenFiltersValue.(type) {
case []string:
tokenFilters, err = getTokenFilters(tokenFiltersValue, cache)
if err != nil {
return nil, err
}
case []interface{}:
tokenFiltersNames, err := convertInterfaceSliceToStringSlice(tokenFiltersValue, "token filter")
if err != nil {
return nil, err
}
tokenFilters, err = getTokenFilters(tokenFiltersNames, cache)
if err != nil {
return nil, err
}
default:
return nil, fmt.Errorf("unsupported type for token_filters, must be slice")
}
}
rv := analysis.Analyzer{
Tokenizer: tokenizer,
}
if charFilters != nil {
rv.CharFilters = charFilters
}
if tokenFilters != nil {
rv.TokenFilters = tokenFilters
}
return &rv, nil
}
func init() {
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
}
func getCharFilters(charFilterNames []string, cache *registry.Cache) ([]analysis.CharFilter, error) {
charFilters := make([]analysis.CharFilter, len(charFilterNames))
for i, charFilterName := range charFilterNames {
charFilter, err := cache.CharFilterNamed(charFilterName)
if err != nil {
return nil, err
}
charFilters[i] = charFilter
}
return charFilters, nil
}
func getTokenFilters(tokenFilterNames []string, cache *registry.Cache) ([]analysis.TokenFilter, error) {
tokenFilters := make([]analysis.TokenFilter, len(tokenFilterNames))
for i, tokenFilterName := range tokenFilterNames {
tokenFilter, err := cache.TokenFilterNamed(tokenFilterName)
if err != nil {
return nil, err
}
tokenFilters[i] = tokenFilter
}
return tokenFilters, nil
}
func convertInterfaceSliceToStringSlice(interfaceSlice []interface{}, objType string) ([]string, error) {
stringSlice := make([]string, len(interfaceSlice))
for i, interfaceObj := range interfaceSlice {
stringObj, ok := interfaceObj.(string)
if ok {
stringSlice[i] = stringObj
} else {
return nil, fmt.Errorf(objType + " name must be a string")
}
}
return stringSlice, nil
}

@ -1,46 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package simple
import (
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/analysis/token/lowercase"
"github.com/blevesearch/bleve/analysis/tokenizer/letter"
"github.com/blevesearch/bleve/registry"
)
const Name = "simple"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
tokenizer, err := cache.TokenizerNamed(letter.Name)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
if err != nil {
return nil, err
}
rv := analysis.Analyzer{
Tokenizer: tokenizer,
TokenFilters: []analysis.TokenFilter{
toLowerFilter,
},
}
return &rv, nil
}
func init() {
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
}

@ -0,0 +1,79 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package unicodenorm
import (
"fmt"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/registry"
"golang.org/x/text/unicode/norm"
)
const Name = "normalize_unicode"
const NFC = "nfc"
const NFD = "nfd"
const NFKC = "nfkc"
const NFKD = "nfkd"
var forms = map[string]norm.Form{
NFC: norm.NFC,
NFD: norm.NFD,
NFKC: norm.NFKC,
NFKD: norm.NFKD,
}
type UnicodeNormalizeFilter struct {
form norm.Form
}
func NewUnicodeNormalizeFilter(formName string) (*UnicodeNormalizeFilter, error) {
form, ok := forms[formName]
if !ok {
return nil, fmt.Errorf("no form named %s", formName)
}
return &UnicodeNormalizeFilter{
form: form,
}, nil
}
func MustNewUnicodeNormalizeFilter(formName string) *UnicodeNormalizeFilter {
filter, err := NewUnicodeNormalizeFilter(formName)
if err != nil {
panic(err)
}
return filter
}
func (s *UnicodeNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
token.Term = s.form.Bytes(token.Term)
}
return input
}
func UnicodeNormalizeFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
formVal, ok := config["form"].(string)
if !ok {
return nil, fmt.Errorf("must specify form")
}
form := formVal
return NewUnicodeNormalizeFilter(form)
}
func init() {
registry.RegisterTokenFilter(Name, UnicodeNormalizeFilterConstructor)
}

@ -1,76 +0,0 @@
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package character
import (
"unicode/utf8"
"github.com/blevesearch/bleve/analysis"
)
type IsTokenRune func(r rune) bool
type CharacterTokenizer struct {
isTokenRun IsTokenRune
}
func NewCharacterTokenizer(f IsTokenRune) *CharacterTokenizer {
return &CharacterTokenizer{
isTokenRun: f,
}
}
func (c *CharacterTokenizer) Tokenize(input []byte) analysis.TokenStream {
rv := make(analysis.TokenStream, 0, 1024)
offset := 0
start := 0
end := 0
count := 0
for currRune, size := utf8.DecodeRune(input[offset:]); currRune != utf8.RuneError; currRune, size = utf8.DecodeRune(input[offset:]) {
isToken := c.isTokenRun(currRune)
if isToken {
end = offset + size
} else {
if end-start > 0 {
// build token
rv = append(rv, &analysis.Token{
Term: input[start:end],
Start: start,
End: end,
Position: count + 1,
Type: analysis.AlphaNumeric,
})
count++
}
start = offset + size
end = start
}
offset += size
}
// if we ended in the middle of a token, finish it
if end-start > 0 {
// build token
rv = append(rv, &analysis.Token{
Term: input[start:end],
Start: start,
End: end,
Position: count + 1,
Type: analysis.AlphaNumeric,
})
}
return rv
}

@ -1,33 +0,0 @@
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package letter
import (
"unicode"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/analysis/tokenizer/character"
"github.com/blevesearch/bleve/registry"
)
const Name = "letter"
func TokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) {
return character.NewCharacterTokenizer(unicode.IsLetter), nil
}
func init() {
registry.RegisterTokenizer(Name, TokenizerConstructor)
}

@ -1,23 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// +build appengine appenginevm
package bleve
// in the appengine environment we cannot support disk based indexes
// so we do no extra configuration in this method
func initDisk() {
}

@ -0,0 +1,137 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"fmt"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/geo"
"github.com/blevesearch/bleve/numeric"
)
var GeoPrecisionStep uint = 9
type GeoPointField struct {
name string
arrayPositions []uint64
options IndexingOptions
value numeric.PrefixCoded
numPlainTextBytes uint64
}
func (n *GeoPointField) Name() string {
return n.name
}
func (n *GeoPointField) ArrayPositions() []uint64 {
return n.arrayPositions
}
func (n *GeoPointField) Options() IndexingOptions {
return n.options
}
func (n *GeoPointField) Analyze() (int, analysis.TokenFrequencies) {
tokens := make(analysis.TokenStream, 0)
tokens = append(tokens, &analysis.Token{
Start: 0,
End: len(n.value),
Term: n.value,
Position: 1,
Type: analysis.Numeric,
})
original, err := n.value.Int64()
if err == nil {
shift := GeoPrecisionStep
for shift < 64 {
shiftEncoded, err := numeric.NewPrefixCodedInt64(original, shift)
if err != nil {
break
}
token := analysis.Token{
Start: 0,
End: len(shiftEncoded),
Term: shiftEncoded,
Position: 1,
Type: analysis.Numeric,
}
tokens = append(tokens, &token)
shift += GeoPrecisionStep
}
}
fieldLength := len(tokens)
tokenFreqs := analysis.TokenFrequency(tokens, n.arrayPositions, n.options.IncludeTermVectors())
return fieldLength, tokenFreqs
}
func (n *GeoPointField) Value() []byte {
return n.value
}
func (n *GeoPointField) Lon() (float64, error) {
i64, err := n.value.Int64()
if err != nil {
return 0.0, err
}
return geo.MortonUnhashLon(uint64(i64)), nil
}
func (n *GeoPointField) Lat() (float64, error) {
i64, err := n.value.Int64()
if err != nil {
return 0.0, err
}
return geo.MortonUnhashLat(uint64(i64)), nil
}
func (n *GeoPointField) GoString() string {
return fmt.Sprintf("&document.GeoPointField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value)
}
func (n *GeoPointField) NumPlainTextBytes() uint64 {
return n.numPlainTextBytes
}
func NewGeoPointFieldFromBytes(name string, arrayPositions []uint64, value []byte) *GeoPointField {
return &GeoPointField{
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultNumericIndexingOptions,
numPlainTextBytes: uint64(len(value)),
}
}
func NewGeoPointField(name string, arrayPositions []uint64, lon, lat float64) *GeoPointField {
return NewGeoPointFieldWithIndexingOptions(name, arrayPositions, lon, lat, DefaultNumericIndexingOptions)
}
func NewGeoPointFieldWithIndexingOptions(name string, arrayPositions []uint64, lon, lat float64, options IndexingOptions) *GeoPointField {
mhash := geo.MortonHash(lon, lat)
prefixCoded := numeric.MustNewPrefixCodedInt64(int64(mhash), 0)
return &GeoPointField{
name: name,
arrayPositions: arrayPositions,
value: prefixCoded,
options: options,
// not correct, just a place holder until we revisit how fields are
// represented and can fix this better
numPlainTextBytes: uint64(8),
}
}

@ -0,0 +1,9 @@
# geo support in bleve
First, all of this geo code is a Go adaptation of the [Lucene 5.3.2 sandbox geo support](https://lucene.apache.org/core/5_3_2/sandbox/org/apache/lucene/util/package-summary.html).
## Notes
- All of the APIs will use float64 for lon/lat values.
- When describing a point in function arguments or return values, we always use the order lon, lat.
- High level APIs will use TopLeft and BottomRight to describe bounding boxes. This may not map cleanly to min/max lon/lat when crossing the dateline. The lower level APIs will use min/max lon/lat and require the higher-level code to split boxes accordingly.

@ -0,0 +1,170 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package geo
import (
"fmt"
"math"
"github.com/blevesearch/bleve/numeric"
)
// GeoBits is the number of bits used for a single geo point
// Currently this is 32bits for lon and 32bits for lat
var GeoBits uint = 32
var minLon = -180.0
var minLat = -90.0
var maxLon = 180.0
var maxLat = 90.0
var minLonRad = minLon * degreesToRadian
var minLatRad = minLat * degreesToRadian
var maxLonRad = maxLon * degreesToRadian
var maxLatRad = maxLat * degreesToRadian
var geoTolerance = 1E-6
var lonScale = float64((uint64(0x1)<<GeoBits)-1) / 360.0
var latScale = float64((uint64(0x1)<<GeoBits)-1) / 180.0
// MortonHash computes the morton hash value for the provided geo point
// This point is ordered as lon, lat.
func MortonHash(lon, lat float64) uint64 {
return numeric.Interleave(scaleLon(lon), scaleLat(lat))
}
func scaleLon(lon float64) uint64 {
rv := uint64((lon - minLon) * lonScale)
return rv
}
func scaleLat(lat float64) uint64 {
rv := uint64((lat - minLat) * latScale)
return rv
}
// MortonUnhashLon extracts the longitude value from the provided morton hash.
func MortonUnhashLon(hash uint64) float64 {
return unscaleLon(numeric.Deinterleave(hash))
}
// MortonUnhashLat extracts the latitude value from the provided morton hash.
func MortonUnhashLat(hash uint64) float64 {
return unscaleLat(numeric.Deinterleave(hash >> 1))
}
func unscaleLon(lon uint64) float64 {
return (float64(lon) / lonScale) + minLon
}
func unscaleLat(lat uint64) float64 {
return (float64(lat) / latScale) + minLat
}
// compareGeo will compare two float values and see if they are the same
// taking into consideration a known geo tolerance.
func compareGeo(a, b float64) float64 {
compare := a - b
if math.Abs(compare) <= geoTolerance {
return 0
}
return compare
}
// RectIntersects checks whether rectangles a and b intersect
func RectIntersects(aMinX, aMinY, aMaxX, aMaxY, bMinX, bMinY, bMaxX, bMaxY float64) bool {
return !(aMaxX < bMinX || aMinX > bMaxX || aMaxY < bMinY || aMinY > bMaxY)
}
// RectWithin checks whether box a is within box b
func RectWithin(aMinX, aMinY, aMaxX, aMaxY, bMinX, bMinY, bMaxX, bMaxY float64) bool {
rv := !(aMinX < bMinX || aMinY < bMinY || aMaxX > bMaxX || aMaxY > bMaxY)
return rv
}
// BoundingBoxContains checks whether the lon/lat point is within the box
func BoundingBoxContains(lon, lat, minLon, minLat, maxLon, maxLat float64) bool {
return compareGeo(lon, minLon) >= 0 && compareGeo(lon, maxLon) <= 0 &&
compareGeo(lat, minLat) >= 0 && compareGeo(lat, maxLat) <= 0
}
const degreesToRadian = math.Pi / 180
const radiansToDegrees = 180 / math.Pi
// DegreesToRadians converts an angle in degrees to radians
func DegreesToRadians(d float64) float64 {
return d * degreesToRadian
}
// RadiansToDegrees converts an angle in radians to degress
func RadiansToDegrees(r float64) float64 {
return r * radiansToDegrees
}
var earthMeanRadiusMeters = 6371008.7714
func RectFromPointDistance(lon, lat, dist float64) (float64, float64, float64, float64, error) {
err := checkLongitude(lon)
if err != nil {
return 0, 0, 0, 0, err
}
err = checkLatitude(lat)
if err != nil {
return 0, 0, 0, 0, err
}
radLon := DegreesToRadians(lon)
radLat := DegreesToRadians(lat)
radDistance := (dist + 7e-2) / earthMeanRadiusMeters
minLatL := radLat - radDistance
maxLatL := radLat + radDistance
var minLonL, maxLonL float64
if minLatL > minLatRad && maxLatL < maxLatRad {
deltaLon := asin(sin(radDistance) / cos(radLat))
minLonL = radLon - deltaLon
if minLonL < minLonRad {
minLonL += 2 * math.Pi
}
maxLonL = radLon + deltaLon
if maxLonL > maxLonRad {
maxLonL -= 2 * math.Pi
}
} else {
// pole is inside distance
minLatL = math.Max(minLatL, minLatRad)
maxLatL = math.Min(maxLatL, maxLatRad)
minLonL = minLonRad
maxLonL = maxLonRad
}
return RadiansToDegrees(minLonL),
RadiansToDegrees(maxLatL),
RadiansToDegrees(maxLonL),
RadiansToDegrees(minLatL),
nil
}
func checkLatitude(latitude float64) error {
if math.IsNaN(latitude) || latitude < minLat || latitude > maxLat {
return fmt.Errorf("invalid latitude %f; must be between %f and %f", latitude, minLat, maxLat)
}
return nil
}
func checkLongitude(longitude float64) error {
if math.IsNaN(longitude) || longitude < minLon || longitude > maxLon {
return fmt.Errorf("invalid longitude %f; must be between %f and %f", longitude, minLon, maxLon)
}
return nil
}

@ -0,0 +1,98 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package geo
import (
"fmt"
"math"
"strconv"
"strings"
)
type distanceUnit struct {
conv float64
suffixes []string
}
var inch = distanceUnit{0.0254, []string{"in", "inch"}}
var yard = distanceUnit{0.9144, []string{"yd", "yards"}}
var feet = distanceUnit{0.3048, []string{"ft", "feet"}}
var kilom = distanceUnit{1000, []string{"km", "kilometers"}}
var nauticalm = distanceUnit{1852.0, []string{"nm", "nauticalmiles"}}
var millim = distanceUnit{0.001, []string{"mm", "millimeters"}}
var centim = distanceUnit{0.01, []string{"cm", "centimeters"}}
var miles = distanceUnit{1609.344, []string{"mi", "miles"}}
var meters = distanceUnit{1, []string{"m", "meters"}}
var distanceUnits = []*distanceUnit{
&inch, &yard, &feet, &kilom, &nauticalm, &millim, &centim, &miles, &meters,
}
// ParseDistance attempts to parse a distance string and return distance in
// meters. Example formats supported:
// "5in" "5inch" "7yd" "7yards" "9ft" "9feet" "11km" "11kilometers"
// "3nm" "3nauticalmiles" "13mm" "13millimeters" "15cm" "15centimeters"
// "17mi" "17miles" "19m" "19meters"
// If the unit cannot be determined, the entire string is parsed and the
// unit of meters is assumed.
// If the number portion cannot be parsed, 0 and the parse error are returned.
func ParseDistance(d string) (float64, error) {
for _, unit := range distanceUnits {
for _, unitSuffix := range unit.suffixes {
if strings.HasSuffix(d, unitSuffix) {
parsedNum, err := strconv.ParseFloat(d[0:len(d)-len(unitSuffix)], 64)
if err != nil {
return 0, err
}
return parsedNum * unit.conv, nil
}
}
}
// no unit matched, try assuming meters?
parsedNum, err := strconv.ParseFloat(d, 64)
if err != nil {
return 0, err
}
return parsedNum, nil
}
// ParseDistanceUnit attempts to parse a distance unit and return the
// multiplier for converting this to meters. If the unit cannot be parsed
// then 0 and the error message is returned.
func ParseDistanceUnit(u string) (float64, error) {
for _, unit := range distanceUnits {
for _, unitSuffix := range unit.suffixes {
if u == unitSuffix {
return unit.conv, nil
}
}
}
return 0, fmt.Errorf("unknown distance unit: %s", u)
}
// Haversin computes the distance between two points.
// This implemenation uses the sloppy math implemenations which trade off
// accuracy for performance. The distance returned is in kilometers.
func Haversin(lon1, lat1, lon2, lat2 float64) float64 {
x1 := lat1 * degreesToRadian
x2 := lat2 * degreesToRadian
h1 := 1 - cos(x1-x2)
h2 := 1 - cos((lon1-lon2)*degreesToRadian)
h := (h1 + cos(x1)*cos(x2)*h2) / 2
avgLat := (x1 + x2) / 2
diameter := earthDiameter(avgLat)
return diameter * asin(math.Min(1, math.Sqrt(h)))
}

@ -0,0 +1,140 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package geo
import (
"reflect"
"strings"
)
// ExtractGeoPoint takes an arbitrary interface{} and tries it's best to
// interpret it is as geo point. Supported formats:
// Container:
// slice length 2 (GeoJSON)
// first element lon, second element lat
// map[string]interface{}
// exact keys lat and lon or lng
// struct
// w/exported fields case-insensitive match on lat and lon or lng
// struct
// satisfying Later and Loner or Lnger interfaces
//
// in all cases values must be some sort of numeric-like thing: int/uint/float
func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
var foundLon, foundLat bool
thingVal := reflect.ValueOf(thing)
thingTyp := thingVal.Type()
// is it a slice
if thingVal.IsValid() && thingVal.Kind() == reflect.Slice {
// must be length 2
if thingVal.Len() == 2 {
first := thingVal.Index(0)
if first.CanInterface() {
firstVal := first.Interface()
lon, foundLon = extractNumericVal(firstVal)
}
second := thingVal.Index(1)
if second.CanInterface() {
secondVal := second.Interface()
lat, foundLat = extractNumericVal(secondVal)
}
}
}
// is it a map
if l, ok := thing.(map[string]interface{}); ok {
if lval, ok := l["lon"]; ok {
lon, foundLon = extractNumericVal(lval)
} else if lval, ok := l["lng"]; ok {
lon, foundLon = extractNumericVal(lval)
}
if lval, ok := l["lat"]; ok {
lat, foundLat = extractNumericVal(lval)
}
}
// now try reflection on struct fields
if thingVal.IsValid() && thingVal.Kind() == reflect.Struct {
for i := 0; i < thingVal.NumField(); i++ {
fieldName := thingTyp.Field(i).Name
if strings.HasPrefix(strings.ToLower(fieldName), "lon") {
if thingVal.Field(i).CanInterface() {
fieldVal := thingVal.Field(i).Interface()
lon, foundLon = extractNumericVal(fieldVal)
}
}
if strings.HasPrefix(strings.ToLower(fieldName), "lng") {
if thingVal.Field(i).CanInterface() {
fieldVal := thingVal.Field(i).Interface()
lon, foundLon = extractNumericVal(fieldVal)
}
}
if strings.HasPrefix(strings.ToLower(fieldName), "lat") {
if thingVal.Field(i).CanInterface() {
fieldVal := thingVal.Field(i).Interface()
lat, foundLat = extractNumericVal(fieldVal)
}
}
}
}
// last hope, some interfaces
// lon
if l, ok := thing.(loner); ok {
lon = l.Lon()
foundLon = true
} else if l, ok := thing.(lnger); ok {
lon = l.Lng()
foundLon = true
}
// lat
if l, ok := thing.(later); ok {
lat = l.Lat()
foundLat = true
}
return lon, lat, foundLon && foundLat
}
// extract numeric value (if possible) and returns a float64
func extractNumericVal(v interface{}) (float64, bool) {
val := reflect.ValueOf(v)
typ := val.Type()
switch typ.Kind() {
case reflect.Float32, reflect.Float64:
return val.Float(), true
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return float64(val.Int()), true
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
return float64(val.Uint()), true
}
return 0, false
}
// various support interfaces which can be used to find lat/lon
type loner interface {
Lon() float64
}
type later interface {
Lat() float64
}
type lnger interface {
Lng() float64
}

@ -0,0 +1,212 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package geo
import (
"math"
)
var earthDiameterPerLatitude []float64
var sinTab []float64
var cosTab []float64
var asinTab []float64
var asinDer1DivF1Tab []float64
var asinDer2DivF2Tab []float64
var asinDer3DivF3Tab []float64
var asinDer4DivF4Tab []float64
const radiusTabsSize = (1 << 10) + 1
const radiusDelta = (math.Pi / 2) / (radiusTabsSize - 1)
const radiusIndexer = 1 / radiusDelta
const sinCosTabsSize = (1 << 11) + 1
const asinTabsSize = (1 << 13) + 1
const oneDivF2 = 1 / 2.0
const oneDivF3 = 1 / 6.0
const oneDivF4 = 1 / 24.0
// 1.57079632673412561417e+00 first 33 bits of pi/2
var pio2Hi = math.Float64frombits(0x3FF921FB54400000)
// 6.07710050650619224932e-11 pi/2 - PIO2_HI
var pio2Lo = math.Float64frombits(0x3DD0B4611A626331)
var asinPio2Hi = math.Float64frombits(0x3FF921FB54442D18) // 1.57079632679489655800e+00
var asinPio2Lo = math.Float64frombits(0x3C91A62633145C07) // 6.12323399573676603587e-17
var asinPs0 = math.Float64frombits(0x3fc5555555555555) // 1.66666666666666657415e-01
var asinPs1 = math.Float64frombits(0xbfd4d61203eb6f7d) // -3.25565818622400915405e-01
var asinPs2 = math.Float64frombits(0x3fc9c1550e884455) // 2.01212532134862925881e-01
var asinPs3 = math.Float64frombits(0xbfa48228b5688f3b) // -4.00555345006794114027e-02
var asinPs4 = math.Float64frombits(0x3f49efe07501b288) // 7.91534994289814532176e-04
var asinPs5 = math.Float64frombits(0x3f023de10dfdf709) // 3.47933107596021167570e-05
var asinQs1 = math.Float64frombits(0xc0033a271c8a2d4b) // -2.40339491173441421878e+00
var asinQs2 = math.Float64frombits(0x40002ae59c598ac8) // 2.02094576023350569471e+00
var asinQs3 = math.Float64frombits(0xbfe6066c1b8d0159) // -6.88283971605453293030e-01
var asinQs4 = math.Float64frombits(0x3fb3b8c5b12e9282) // 7.70381505559019352791e-02
var twoPiHi = 4 * pio2Hi
var twoPiLo = 4 * pio2Lo
var sinCosDeltaHi = twoPiHi/sinCosTabsSize - 1
var sinCosDeltaLo = twoPiLo/sinCosTabsSize - 1
var sinCosIndexer = 1 / (sinCosDeltaHi + sinCosDeltaLo)
var sinCosMaxValueForIntModulo = ((math.MaxInt64 >> 9) / sinCosIndexer) * 0.99
var asinMaxValueForTabs = math.Sin(73.0 * degreesToRadian)
var asinDelta = asinMaxValueForTabs / (asinTabsSize - 1)
var asinIndexer = 1 / asinDelta
func init() {
// initializes the tables used for the sloppy math functions
// sin and cos
sinTab = make([]float64, sinCosTabsSize)
cosTab = make([]float64, sinCosTabsSize)
sinCosPiIndex := (sinCosTabsSize - 1) / 2
sinCosPiMul2Index := 2 * sinCosPiIndex
sinCosPiMul05Index := sinCosPiIndex / 2
sinCosPiMul15Index := 3 * sinCosPiIndex / 2
for i := 0; i < sinCosTabsSize; i++ {
// angle: in [0,2*PI].
angle := float64(i)*sinCosDeltaHi + float64(i)*sinCosDeltaLo
sinAngle := math.Sin(angle)
cosAngle := math.Cos(angle)
// For indexes corresponding to null cosine or sine, we make sure the value is zero
// and not an epsilon. This allows for a much better accuracy for results close to zero.
if i == sinCosPiIndex {
sinAngle = 0.0
} else if i == sinCosPiMul2Index {
sinAngle = 0.0
} else if i == sinCosPiMul05Index {
sinAngle = 0.0
} else if i == sinCosPiMul15Index {
sinAngle = 0.0
}
sinTab[i] = sinAngle
cosTab[i] = cosAngle
}
// asin
asinTab = make([]float64, asinTabsSize)
asinDer1DivF1Tab = make([]float64, asinTabsSize)
asinDer2DivF2Tab = make([]float64, asinTabsSize)
asinDer3DivF3Tab = make([]float64, asinTabsSize)
asinDer4DivF4Tab = make([]float64, asinTabsSize)
for i := 0; i < asinTabsSize; i++ {
// x: in [0,ASIN_MAX_VALUE_FOR_TABS].
x := float64(i) * asinDelta
asinTab[i] = math.Asin(x)
oneMinusXSqInv := 1.0 / (1 - x*x)
oneMinusXSqInv05 := math.Sqrt(oneMinusXSqInv)
oneMinusXSqInv15 := oneMinusXSqInv05 * oneMinusXSqInv
oneMinusXSqInv25 := oneMinusXSqInv15 * oneMinusXSqInv
oneMinusXSqInv35 := oneMinusXSqInv25 * oneMinusXSqInv
asinDer1DivF1Tab[i] = oneMinusXSqInv05
asinDer2DivF2Tab[i] = (x * oneMinusXSqInv15) * oneDivF2
asinDer3DivF3Tab[i] = ((1 + 2*x*x) * oneMinusXSqInv25) * oneDivF3
asinDer4DivF4Tab[i] = ((5 + 2*x*(2+x*(5-2*x))) * oneMinusXSqInv35) * oneDivF4
}
// earth radius
a := 6378137.0
b := 6356752.31420
a2 := a * a
b2 := b * b
earthDiameterPerLatitude = make([]float64, radiusTabsSize)
earthDiameterPerLatitude[0] = 2.0 * a / 1000
earthDiameterPerLatitude[radiusTabsSize-1] = 2.0 * b / 1000
for i := 1; i < radiusTabsSize-1; i++ {
lat := math.Pi * float64(i) / (2*radiusTabsSize - 1)
one := math.Pow(a2*math.Cos(lat), 2)
two := math.Pow(b2*math.Sin(lat), 2)
three := math.Pow(float64(a)*math.Cos(lat), 2)
four := math.Pow(b*math.Sin(lat), 2)
radius := math.Sqrt((one + two) / (three + four))
earthDiameterPerLatitude[i] = 2 * radius / 1000
}
}
// earthDiameter returns an estimation of the earth's diameter at the specified
// latitude in kilometers
func earthDiameter(lat float64) float64 {
index := math.Mod(math.Abs(lat)*radiusIndexer+0.5, float64(len(earthDiameterPerLatitude)))
if math.IsNaN(index) {
return 0
}
return earthDiameterPerLatitude[int(index)]
}
var pio2 = math.Pi / 2
func sin(a float64) float64 {
return cos(a - pio2)
}
// cos is a sloppy math (faster) implementation of math.Cos
func cos(a float64) float64 {
if a < 0.0 {
a = -a
}
if a > sinCosMaxValueForIntModulo {
return math.Cos(a)
}
// index: possibly outside tables range.
index := int(a*sinCosIndexer + 0.5)
delta := (a - float64(index)*sinCosDeltaHi) - float64(index)*sinCosDeltaLo
// Making sure index is within tables range.
// Last value of each table is the same than first, so we ignore it (tabs size minus one) for modulo.
index &= (sinCosTabsSize - 2) // index % (SIN_COS_TABS_SIZE-1)
indexCos := cosTab[index]
indexSin := sinTab[index]
return indexCos + delta*(-indexSin+delta*(-indexCos*oneDivF2+delta*(indexSin*oneDivF3+delta*indexCos*oneDivF4)))
}
// asin is a sloppy math (faster) implementation of math.Asin
func asin(a float64) float64 {
var negateResult bool
if a < 0 {
a = -a
negateResult = true
}
if a <= asinMaxValueForTabs {
index := int(a*asinIndexer + 0.5)
delta := a - float64(index)*asinDelta
result := asinTab[index] + delta*(asinDer1DivF1Tab[index]+delta*(asinDer2DivF2Tab[index]+delta*(asinDer3DivF3Tab[index]+delta*asinDer4DivF4Tab[index])))
if negateResult {
return -result
}
return result
}
// value > ASIN_MAX_VALUE_FOR_TABS, or value is NaN
// This part is derived from fdlibm.
if a < 1 {
t := (1.0 - a) * 0.5
p := t * (asinPs0 + t*(asinPs1+t*(asinPs2+t*(asinPs3+t*(asinPs4+t+asinPs5)))))
q := 1.0 + t*(asinQs1+t*(asinQs2+t*(asinQs3+t*asinQs4)))
s := math.Sqrt(t)
z := s + s*(p/q)
result := asinPio2Hi - ((z + z) - asinPio2Lo)
if negateResult {
return -result
}
return result
}
// value >= 1.0, or value is NaN
if a == 1.0 {
if negateResult {
return -math.Pi / 2
}
return math.Pi / 2
}
return math.NaN()
}

@ -49,6 +49,17 @@ func (b *Batch) Index(id string, data interface{}) error {
return nil return nil
} }
// IndexAdvanced adds the specified index operation to the
// batch which skips the mapping. NOTE: the bleve Index is not updated
// until the batch is executed.
func (b *Batch) IndexAdvanced(doc *document.Document) (err error) {
if doc.ID == "" {
return ErrorEmptyID
}
b.internal.Update(doc)
return nil
}
// Delete adds the specified delete operation to the // Delete adds the specified delete operation to the
// batch. NOTE: the bleve Index is not updated until // batch. NOTE: the bleve Index is not updated until
// the batch is executed. // the batch is executed.
@ -99,12 +110,15 @@ func (b *Batch) Reset() {
// them. // them.
// //
// The DocumentMapping used to index a value is deduced by the following rules: // The DocumentMapping used to index a value is deduced by the following rules:
// 1) If value implements Classifier interface, resolve the mapping from Type(). // 1) If value implements mapping.bleveClassifier interface, resolve the mapping
// 2) If value has a string field or value at IndexMapping.TypeField. // from BleveType().
// 2) If value implements mapping.Classifier interface, resolve the mapping
// from Type().
// 3) If value has a string field or value at IndexMapping.TypeField.
// (defaulting to "_type"), use it to resolve the mapping. Fields addressing // (defaulting to "_type"), use it to resolve the mapping. Fields addressing
// is described below. // is described below.
// 3) If IndexMapping.DefaultType is registered, return it. // 4) If IndexMapping.DefaultType is registered, return it.
// 4) Return IndexMapping.DefaultMapping. // 5) Return IndexMapping.DefaultMapping.
// //
// Each field or nested field of the value is identified by a string path, then // Each field or nested field of the value is identified by a string path, then
// mapped to one or several FieldMappings which extract the result for analysis. // mapped to one or several FieldMappings which extract the result for analysis.

@ -48,6 +48,8 @@ type Index interface {
Advanced() (store.KVStore, error) Advanced() (store.KVStore, error)
} }
type DocumentFieldTermVisitor func(field string, term []byte)
type IndexReader interface { type IndexReader interface {
TermFieldReader(term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (TermFieldReader, error) TermFieldReader(term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (TermFieldReader, error)
@ -64,7 +66,7 @@ type IndexReader interface {
FieldDictPrefix(field string, termPrefix []byte) (FieldDict, error) FieldDictPrefix(field string, termPrefix []byte) (FieldDict, error)
Document(id string) (*document.Document, error) Document(id string) (*document.Document, error)
DocumentFieldTerms(id IndexInternalID, fields []string) (FieldTerms, error) DocumentVisitFieldTerms(id IndexInternalID, fields []string, visitor DocumentFieldTermVisitor) error
Fields() ([]string, error) Fields() ([]string, error)

@ -90,7 +90,7 @@ func (udc *UpsideDownCouch) Analyze(d *document.Document) *index.AnalysisResult
rv.Rows = append(make([]index.IndexRow, 0, rowsCapNeeded), rv.Rows...) rv.Rows = append(make([]index.IndexRow, 0, rowsCapNeeded), rv.Rows...)
backIndexTermEntries := make([]*BackIndexTermEntry, 0, rowsCapNeeded) backIndexTermsEntries := make([]*BackIndexTermsEntry, 0, len(fieldTermFreqs))
// walk through the collated information and process // walk through the collated information and process
// once for each indexed field (unique name) // once for each indexed field (unique name)
@ -99,11 +99,11 @@ func (udc *UpsideDownCouch) Analyze(d *document.Document) *index.AnalysisResult
includeTermVectors := fieldIncludeTermVectors[fieldIndex] includeTermVectors := fieldIncludeTermVectors[fieldIndex]
// encode this field // encode this field
rv.Rows, backIndexTermEntries = udc.indexField(docIDBytes, includeTermVectors, fieldIndex, fieldLength, tokenFreqs, rv.Rows, backIndexTermEntries) rv.Rows, backIndexTermsEntries = udc.indexField(docIDBytes, includeTermVectors, fieldIndex, fieldLength, tokenFreqs, rv.Rows, backIndexTermsEntries)
} }
// build the back index row // build the back index row
backIndexRow := NewBackIndexRow(docIDBytes, backIndexTermEntries, backIndexStoredEntries) backIndexRow := NewBackIndexRow(docIDBytes, backIndexTermsEntries, backIndexStoredEntries)
rv.Rows = append(rv.Rows, backIndexRow) rv.Rows = append(rv.Rows, backIndexRow)
return rv return rv

@ -127,10 +127,12 @@ func (i *IndexReader) DumpDoc(id string) chan interface{} {
} }
// build sorted list of term keys // build sorted list of term keys
keys := make(keyset, 0) keys := make(keyset, 0)
for _, entry := range back.termEntries { for _, entry := range back.termsEntries {
tfr := NewTermFrequencyRow([]byte(*entry.Term), uint16(*entry.Field), idBytes, 0, 0) for i := range entry.Terms {
key := tfr.Key() tfr := NewTermFrequencyRow([]byte(entry.Terms[i]), uint16(*entry.Field), idBytes, 0, 0)
keys = append(keys, key) key := tfr.Key()
keys = append(keys, key)
}
} }
sort.Sort(keys) sort.Sort(keys)

@ -101,15 +101,7 @@ func (i *IndexReader) Document(id string) (doc *document.Document, err error) {
return return
} }
func (i *IndexReader) DocumentFieldTerms(id index.IndexInternalID, fields []string) (index.FieldTerms, error) { func (i *IndexReader) DocumentVisitFieldTerms(id index.IndexInternalID, fields []string, visitor index.DocumentFieldTermVisitor) error {
back, err := backIndexRowForDoc(i.kvreader, id)
if err != nil {
return nil, err
}
if back == nil {
return nil, nil
}
rv := make(index.FieldTerms, len(fields))
fieldsMap := make(map[uint16]string, len(fields)) fieldsMap := make(map[uint16]string, len(fields))
for _, f := range fields { for _, f := range fields {
id, ok := i.index.fieldCache.FieldNamed(f, false) id, ok := i.index.fieldCache.FieldNamed(f, false)
@ -117,12 +109,34 @@ func (i *IndexReader) DocumentFieldTerms(id index.IndexInternalID, fields []stri
fieldsMap[id] = f fieldsMap[id] = f
} }
} }
for _, entry := range back.termEntries {
if field, ok := fieldsMap[uint16(*entry.Field)]; ok { tempRow := BackIndexRow{
rv[field] = append(rv[field], *entry.Term) doc: id,
} }
keyBuf := GetRowBuffer()
if tempRow.KeySize() > len(keyBuf) {
keyBuf = make([]byte, 2*tempRow.KeySize())
} }
return rv, nil defer PutRowBuffer(keyBuf)
keySize, err := tempRow.KeyTo(keyBuf)
if err != nil {
return err
}
value, err := i.kvreader.Get(keyBuf[:keySize])
if err != nil {
return err
}
if value == nil {
return nil
}
return visitBackIndexRow(value, func(field uint32, term []byte) {
if field, ok := fieldsMap[uint16(field)]; ok {
visitor(field, term)
}
})
} }
func (i *IndexReader) Fields() (fields []string, err error) { func (i *IndexReader) Fields() (fields []string, err error) {

@ -24,46 +24,57 @@ import (
) )
type UpsideDownCouchTermFieldReader struct { type UpsideDownCouchTermFieldReader struct {
count uint64 count uint64
indexReader *IndexReader indexReader *IndexReader
iterator store.KVIterator iterator store.KVIterator
term []byte term []byte
tfrNext *TermFrequencyRow tfrNext *TermFrequencyRow
keyBuf []byte tfrPrealloc TermFrequencyRow
field uint16 keyBuf []byte
field uint16
includeTermVectors bool
} }
func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16, includeFreq, includeNorm, includeTermVectors bool) (*UpsideDownCouchTermFieldReader, error) { func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16, includeFreq, includeNorm, includeTermVectors bool) (*UpsideDownCouchTermFieldReader, error) {
dictionaryRow := NewDictionaryRow(term, field, 0) bufNeeded := termFrequencyRowKeySize(term, nil)
val, err := indexReader.kvreader.Get(dictionaryRow.Key()) if bufNeeded < dictionaryRowKeySize(term) {
bufNeeded = dictionaryRowKeySize(term)
}
buf := make([]byte, bufNeeded)
bufUsed := dictionaryRowKeyTo(buf, field, term)
val, err := indexReader.kvreader.Get(buf[:bufUsed])
if err != nil { if err != nil {
return nil, err return nil, err
} }
if val == nil { if val == nil {
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1)) atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1))
return &UpsideDownCouchTermFieldReader{ rv := &UpsideDownCouchTermFieldReader{
count: 0, count: 0,
term: term, term: term,
tfrNext: &TermFrequencyRow{}, field: field,
field: field, includeTermVectors: includeTermVectors,
}, nil }
rv.tfrNext = &rv.tfrPrealloc
return rv, nil
} }
err = dictionaryRow.parseDictionaryV(val) count, err := dictionaryRowParseV(val)
if err != nil { if err != nil {
return nil, err return nil, err
} }
tfr := NewTermFrequencyRow(term, field, []byte{}, 0, 0) bufUsed = termFrequencyRowKeyTo(buf, field, term, nil)
it := indexReader.kvreader.PrefixIterator(tfr.Key()) it := indexReader.kvreader.PrefixIterator(buf[:bufUsed])
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1)) atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1))
return &UpsideDownCouchTermFieldReader{ return &UpsideDownCouchTermFieldReader{
indexReader: indexReader, indexReader: indexReader,
iterator: it, iterator: it,
count: dictionaryRow.count, count: count,
term: term, term: term,
field: field, field: field,
includeTermVectors: includeTermVectors,
}, nil }, nil
} }
@ -79,7 +90,7 @@ func (r *UpsideDownCouchTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*
if r.tfrNext != nil { if r.tfrNext != nil {
r.iterator.Next() r.iterator.Next()
} else { } else {
r.tfrNext = &TermFrequencyRow{} r.tfrNext = &r.tfrPrealloc
} }
key, val, valid := r.iterator.Current() key, val, valid := r.iterator.Current()
if valid { if valid {
@ -88,7 +99,7 @@ func (r *UpsideDownCouchTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*
if err != nil { if err != nil {
return nil, err return nil, err
} }
err = tfr.parseV(val) err = tfr.parseV(val, r.includeTermVectors)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -125,7 +136,7 @@ func (r *UpsideDownCouchTermFieldReader) Advance(docID index.IndexInternalID, pr
if err != nil { if err != nil {
return nil, err return nil, err
} }
err = tfr.parseV(val) err = tfr.parseV(val, r.includeTermVectors)
if err != nil { if err != nil {
return nil, err return nil, err
} }

@ -254,14 +254,22 @@ func (dr *DictionaryRow) Key() []byte {
} }
func (dr *DictionaryRow) KeySize() int { func (dr *DictionaryRow) KeySize() int {
return len(dr.term) + 3 return dictionaryRowKeySize(dr.term)
}
func dictionaryRowKeySize(term []byte) int {
return len(term) + 3
} }
func (dr *DictionaryRow) KeyTo(buf []byte) (int, error) { func (dr *DictionaryRow) KeyTo(buf []byte) (int, error) {
return dictionaryRowKeyTo(buf, dr.field, dr.term), nil
}
func dictionaryRowKeyTo(buf []byte, field uint16, term []byte) int {
buf[0] = 'd' buf[0] = 'd'
binary.LittleEndian.PutUint16(buf[1:3], dr.field) binary.LittleEndian.PutUint16(buf[1:3], field)
size := copy(buf[3:], dr.term) size := copy(buf[3:], term)
return size + 3, nil return size + 3
} }
func (dr *DictionaryRow) Value() []byte { func (dr *DictionaryRow) Value() []byte {
@ -324,14 +332,22 @@ func (dr *DictionaryRow) parseDictionaryK(key []byte) error {
} }
func (dr *DictionaryRow) parseDictionaryV(value []byte) error { func (dr *DictionaryRow) parseDictionaryV(value []byte) error {
count, nread := binary.Uvarint(value) count, err := dictionaryRowParseV(value)
if nread <= 0 { if err != nil {
return fmt.Errorf("DictionaryRow parse Uvarint error, nread: %d", nread) return err
} }
dr.count = count dr.count = count
return nil return nil
} }
func dictionaryRowParseV(value []byte) (uint64, error) {
count, nread := binary.Uvarint(value)
if nread <= 0 {
return 0, fmt.Errorf("DictionaryRow parse Uvarint error, nread: %d", nread)
}
return count, nil
}
// TERM FIELD FREQUENCY // TERM FIELD FREQUENCY
type TermVector struct { type TermVector struct {
@ -394,16 +410,24 @@ func (tfr *TermFrequencyRow) Key() []byte {
} }
func (tfr *TermFrequencyRow) KeySize() int { func (tfr *TermFrequencyRow) KeySize() int {
return 3 + len(tfr.term) + 1 + len(tfr.doc) return termFrequencyRowKeySize(tfr.term, tfr.doc)
}
func termFrequencyRowKeySize(term, doc []byte) int {
return 3 + len(term) + 1 + len(doc)
} }
func (tfr *TermFrequencyRow) KeyTo(buf []byte) (int, error) { func (tfr *TermFrequencyRow) KeyTo(buf []byte) (int, error) {
return termFrequencyRowKeyTo(buf, tfr.field, tfr.term, tfr.doc), nil
}
func termFrequencyRowKeyTo(buf []byte, field uint16, term, doc []byte) int {
buf[0] = 't' buf[0] = 't'
binary.LittleEndian.PutUint16(buf[1:3], tfr.field) binary.LittleEndian.PutUint16(buf[1:3], field)
termLen := copy(buf[3:], tfr.term) termLen := copy(buf[3:], term)
buf[3+termLen] = ByteSeparator buf[3+termLen] = ByteSeparator
docLen := copy(buf[3+termLen+1:], tfr.doc) docLen := copy(buf[3+termLen+1:], doc)
return 3 + termLen + 1 + docLen, nil return 3 + termLen + 1 + docLen
} }
func (tfr *TermFrequencyRow) KeyAppendTo(buf []byte) ([]byte, error) { func (tfr *TermFrequencyRow) KeyAppendTo(buf []byte) ([]byte, error) {
@ -538,7 +562,7 @@ func (tfr *TermFrequencyRow) parseKDoc(key []byte, term []byte) error {
return nil return nil
} }
func (tfr *TermFrequencyRow) parseV(value []byte) error { func (tfr *TermFrequencyRow) parseV(value []byte, includeTermVectors bool) error {
var bytesRead int var bytesRead int
tfr.freq, bytesRead = binary.Uvarint(value) tfr.freq, bytesRead = binary.Uvarint(value)
if bytesRead <= 0 { if bytesRead <= 0 {
@ -556,6 +580,10 @@ func (tfr *TermFrequencyRow) parseV(value []byte) error {
tfr.norm = math.Float32frombits(uint32(norm)) tfr.norm = math.Float32frombits(uint32(norm))
tfr.vectors = nil tfr.vectors = nil
if !includeTermVectors {
return nil
}
var field uint64 var field uint64
field, bytesRead = binary.Uvarint(value[currOffset:]) field, bytesRead = binary.Uvarint(value[currOffset:])
for bytesRead > 0 { for bytesRead > 0 {
@ -620,7 +648,7 @@ func NewTermFrequencyRowKV(key, value []byte) (*TermFrequencyRow, error) {
return nil, err return nil, err
} }
err = rv.parseV(value) err = rv.parseV(value, true)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -630,7 +658,7 @@ func NewTermFrequencyRowKV(key, value []byte) (*TermFrequencyRow, error) {
type BackIndexRow struct { type BackIndexRow struct {
doc []byte doc []byte
termEntries []*BackIndexTermEntry termsEntries []*BackIndexTermsEntry
storedEntries []*BackIndexStoreEntry storedEntries []*BackIndexStoreEntry
} }
@ -638,10 +666,12 @@ func (br *BackIndexRow) AllTermKeys() [][]byte {
if br == nil { if br == nil {
return nil return nil
} }
rv := make([][]byte, len(br.termEntries)) rv := make([][]byte, 0, len(br.termsEntries)) // FIXME this underestimates severely
for i, termEntry := range br.termEntries { for _, termsEntry := range br.termsEntries {
termRow := NewTermFrequencyRow([]byte(termEntry.GetTerm()), uint16(termEntry.GetField()), br.doc, 0, 0) for i := range termsEntry.Terms {
rv[i] = termRow.Key() termRow := NewTermFrequencyRow([]byte(termsEntry.Terms[i]), uint16(termsEntry.GetField()), br.doc, 0, 0)
rv = append(rv, termRow.Key())
}
} }
return rv return rv
} }
@ -682,7 +712,7 @@ func (br *BackIndexRow) Value() []byte {
func (br *BackIndexRow) ValueSize() int { func (br *BackIndexRow) ValueSize() int {
birv := &BackIndexRowValue{ birv := &BackIndexRowValue{
TermEntries: br.termEntries, TermsEntries: br.termsEntries,
StoredEntries: br.storedEntries, StoredEntries: br.storedEntries,
} }
return birv.Size() return birv.Size()
@ -690,20 +720,20 @@ func (br *BackIndexRow) ValueSize() int {
func (br *BackIndexRow) ValueTo(buf []byte) (int, error) { func (br *BackIndexRow) ValueTo(buf []byte) (int, error) {
birv := &BackIndexRowValue{ birv := &BackIndexRowValue{
TermEntries: br.termEntries, TermsEntries: br.termsEntries,
StoredEntries: br.storedEntries, StoredEntries: br.storedEntries,
} }
return birv.MarshalTo(buf) return birv.MarshalTo(buf)
} }
func (br *BackIndexRow) String() string { func (br *BackIndexRow) String() string {
return fmt.Sprintf("Backindex DocId: `%s` Term Entries: %v, Stored Entries: %v", string(br.doc), br.termEntries, br.storedEntries) return fmt.Sprintf("Backindex DocId: `%s` Terms Entries: %v, Stored Entries: %v", string(br.doc), br.termsEntries, br.storedEntries)
} }
func NewBackIndexRow(docID []byte, entries []*BackIndexTermEntry, storedFields []*BackIndexStoreEntry) *BackIndexRow { func NewBackIndexRow(docID []byte, entries []*BackIndexTermsEntry, storedFields []*BackIndexStoreEntry) *BackIndexRow {
return &BackIndexRow{ return &BackIndexRow{
doc: docID, doc: docID,
termEntries: entries, termsEntries: entries,
storedEntries: storedFields, storedEntries: storedFields,
} }
} }
@ -732,7 +762,7 @@ func NewBackIndexRowKV(key, value []byte) (*BackIndexRow, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
rv.termEntries = birv.TermEntries rv.termsEntries = birv.TermsEntries
rv.storedEntries = birv.StoredEntries rv.storedEntries = birv.StoredEntries
return &rv, nil return &rv, nil
@ -851,3 +881,232 @@ func NewStoredRowKV(key, value []byte) (*StoredRow, error) {
rv.value = value[1:] rv.value = value[1:]
return rv, nil return rv, nil
} }
type backIndexFieldTermVisitor func(field uint32, term []byte)
// visitBackIndexRow is designed to process a protobuf encoded
// value, without creating unnecessary garbage. Instead values are passed
// to a callback, inspected first, and only copied if necessary.
// Due to the fact that this borrows from generated code, it must be marnually
// updated if the protobuf definition changes.
//
// This code originates from:
// func (m *BackIndexRowValue) Unmarshal(data []byte) error
// the sections which create garbage or parse unintersting sections
// have been commented out. This was done by design to allow for easier
// merging in the future if that original function is regenerated
func visitBackIndexRow(data []byte, callback backIndexFieldTermVisitor) error {
l := len(data)
iNdEx := 0
for iNdEx < l {
var wire uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
wire |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
fieldNum := int32(wire >> 3)
wireType := int(wire & 0x7)
switch fieldNum {
case 1:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field TermsEntries", wireType)
}
var msglen int
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
msglen |= (int(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
postIndex := iNdEx + msglen
if msglen < 0 {
return ErrInvalidLengthUpsidedown
}
if postIndex > l {
return io.ErrUnexpectedEOF
}
// dont parse term entries
// m.TermsEntries = append(m.TermsEntries, &BackIndexTermsEntry{})
// if err := m.TermsEntries[len(m.TermsEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil {
// return err
// }
// instead, inspect them
if err := visitBackIndexRowFieldTerms(data[iNdEx:postIndex], callback); err != nil {
return err
}
iNdEx = postIndex
case 2:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field StoredEntries", wireType)
}
var msglen int
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
msglen |= (int(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
postIndex := iNdEx + msglen
if msglen < 0 {
return ErrInvalidLengthUpsidedown
}
if postIndex > l {
return io.ErrUnexpectedEOF
}
// don't parse stored entries
// m.StoredEntries = append(m.StoredEntries, &BackIndexStoreEntry{})
// if err := m.StoredEntries[len(m.StoredEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil {
// return err
// }
iNdEx = postIndex
default:
var sizeOfWire int
for {
sizeOfWire++
wire >>= 7
if wire == 0 {
break
}
}
iNdEx -= sizeOfWire
skippy, err := skipUpsidedown(data[iNdEx:])
if err != nil {
return err
}
if skippy < 0 {
return ErrInvalidLengthUpsidedown
}
if (iNdEx + skippy) > l {
return io.ErrUnexpectedEOF
}
// don't track unrecognized data
//m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...)
iNdEx += skippy
}
}
return nil
}
// visitBackIndexRowFieldTerms is designed to process a protobuf encoded
// sub-value within the BackIndexRowValue, without creating unnecessary garbage.
// Instead values are passed to a callback, inspected first, and only copied if
// necessary. Due to the fact that this borrows from generated code, it must
// be marnually updated if the protobuf definition changes.
//
// This code originates from:
// func (m *BackIndexTermsEntry) Unmarshal(data []byte) error {
// the sections which create garbage or parse uninteresting sections
// have been commented out. This was done by design to allow for easier
// merging in the future if that original function is regenerated
func visitBackIndexRowFieldTerms(data []byte, callback backIndexFieldTermVisitor) error {
var theField uint32
var hasFields [1]uint64
l := len(data)
iNdEx := 0
for iNdEx < l {
var wire uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
wire |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
fieldNum := int32(wire >> 3)
wireType := int(wire & 0x7)
switch fieldNum {
case 1:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType)
}
var v uint32
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
v |= (uint32(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
// m.Field = &v
theField = v
hasFields[0] |= uint64(0x00000001)
case 2:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field Terms", wireType)
}
var stringLen uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
stringLen |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
postIndex := iNdEx + int(stringLen)
if postIndex > l {
return io.ErrUnexpectedEOF
}
//m.Terms = append(m.Terms, string(data[iNdEx:postIndex]))
callback(theField, data[iNdEx:postIndex])
iNdEx = postIndex
default:
var sizeOfWire int
for {
sizeOfWire++
wire >>= 7
if wire == 0 {
break
}
}
iNdEx -= sizeOfWire
skippy, err := skipUpsidedown(data[iNdEx:])
if err != nil {
return err
}
if skippy < 0 {
return ErrInvalidLengthUpsidedown
}
if (iNdEx + skippy) > l {
return io.ErrUnexpectedEOF
}
//m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...)
iNdEx += skippy
}
}
// if hasFields[0]&uint64(0x00000001) == 0 {
// return new(github_com_golang_protobuf_proto.RequiredNotSetError)
// }
return nil
}

@ -45,7 +45,7 @@ const RowBufferSize = 4 * 1024
var VersionKey = []byte{'v'} var VersionKey = []byte{'v'}
const Version uint8 = 5 const Version uint8 = 7
var IncompatibleVersion = fmt.Errorf("incompatible version, %d is supported", Version) var IncompatibleVersion = fmt.Errorf("incompatible version, %d is supported", Version)
@ -499,44 +499,65 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
func (udc *UpsideDownCouch) mergeOldAndNew(backIndexRow *BackIndexRow, rows []index.IndexRow) (addRows []UpsideDownCouchRow, updateRows []UpsideDownCouchRow, deleteRows []UpsideDownCouchRow) { func (udc *UpsideDownCouch) mergeOldAndNew(backIndexRow *BackIndexRow, rows []index.IndexRow) (addRows []UpsideDownCouchRow, updateRows []UpsideDownCouchRow, deleteRows []UpsideDownCouchRow) {
addRows = make([]UpsideDownCouchRow, 0, len(rows)) addRows = make([]UpsideDownCouchRow, 0, len(rows))
if backIndexRow == nil {
addRows = addRows[0:len(rows)]
for i, row := range rows {
addRows[i] = row
}
return addRows, nil, nil
}
updateRows = make([]UpsideDownCouchRow, 0, len(rows)) updateRows = make([]UpsideDownCouchRow, 0, len(rows))
deleteRows = make([]UpsideDownCouchRow, 0, len(rows)) deleteRows = make([]UpsideDownCouchRow, 0, len(rows))
existingTermKeys := make(map[string]bool) var existingTermKeys map[string]struct{}
for _, key := range backIndexRow.AllTermKeys() { backIndexTermKeys := backIndexRow.AllTermKeys()
existingTermKeys[string(key)] = true if len(backIndexTermKeys) > 0 {
existingTermKeys = make(map[string]struct{}, len(backIndexTermKeys))
for _, key := range backIndexTermKeys {
existingTermKeys[string(key)] = struct{}{}
}
} }
existingStoredKeys := make(map[string]bool) var existingStoredKeys map[string]struct{}
for _, key := range backIndexRow.AllStoredKeys() { backIndexStoredKeys := backIndexRow.AllStoredKeys()
existingStoredKeys[string(key)] = true if len(backIndexStoredKeys) > 0 {
existingStoredKeys = make(map[string]struct{}, len(backIndexStoredKeys))
for _, key := range backIndexStoredKeys {
existingStoredKeys[string(key)] = struct{}{}
}
} }
keyBuf := GetRowBuffer() keyBuf := GetRowBuffer()
for _, row := range rows { for _, row := range rows {
switch row := row.(type) { switch row := row.(type) {
case *TermFrequencyRow: case *TermFrequencyRow:
if row.KeySize() > len(keyBuf) { if existingTermKeys != nil {
keyBuf = make([]byte, row.KeySize()) if row.KeySize() > len(keyBuf) {
} keyBuf = make([]byte, row.KeySize())
keySize, _ := row.KeyTo(keyBuf) }
if _, ok := existingTermKeys[string(keyBuf[:keySize])]; ok { keySize, _ := row.KeyTo(keyBuf)
updateRows = append(updateRows, row) if _, ok := existingTermKeys[string(keyBuf[:keySize])]; ok {
delete(existingTermKeys, string(keyBuf[:keySize])) updateRows = append(updateRows, row)
} else { delete(existingTermKeys, string(keyBuf[:keySize]))
addRows = append(addRows, row) continue
}
} }
addRows = append(addRows, row)
case *StoredRow: case *StoredRow:
if row.KeySize() > len(keyBuf) { if existingStoredKeys != nil {
keyBuf = make([]byte, row.KeySize()) if row.KeySize() > len(keyBuf) {
} keyBuf = make([]byte, row.KeySize())
keySize, _ := row.KeyTo(keyBuf) }
if _, ok := existingStoredKeys[string(keyBuf[:keySize])]; ok { keySize, _ := row.KeyTo(keyBuf)
updateRows = append(updateRows, row) if _, ok := existingStoredKeys[string(keyBuf[:keySize])]; ok {
delete(existingStoredKeys, string(keyBuf[:keySize])) updateRows = append(updateRows, row)
} else { delete(existingStoredKeys, string(keyBuf[:keySize]))
addRows = append(addRows, row) continue
}
} }
addRows = append(addRows, row)
default: default:
updateRows = append(updateRows, row) updateRows = append(updateRows, row)
} }
@ -583,33 +604,41 @@ func encodeFieldType(f document.Field) byte {
fieldType = 'd' fieldType = 'd'
case *document.BooleanField: case *document.BooleanField:
fieldType = 'b' fieldType = 'b'
case *document.GeoPointField:
fieldType = 'g'
case *document.CompositeField: case *document.CompositeField:
fieldType = 'c' fieldType = 'c'
} }
return fieldType return fieldType
} }
func (udc *UpsideDownCouch) indexField(docID []byte, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies, rows []index.IndexRow, backIndexTermEntries []*BackIndexTermEntry) ([]index.IndexRow, []*BackIndexTermEntry) { func (udc *UpsideDownCouch) indexField(docID []byte, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies, rows []index.IndexRow, backIndexTermsEntries []*BackIndexTermsEntry) ([]index.IndexRow, []*BackIndexTermsEntry) {
fieldNorm := float32(1.0 / math.Sqrt(float64(fieldLength))) fieldNorm := float32(1.0 / math.Sqrt(float64(fieldLength)))
termFreqRows := make([]TermFrequencyRow, len(tokenFreqs))
termFreqRowsUsed := 0
terms := make([]string, 0, len(tokenFreqs))
for k, tf := range tokenFreqs { for k, tf := range tokenFreqs {
var termFreqRow *TermFrequencyRow termFreqRow := &termFreqRows[termFreqRowsUsed]
termFreqRowsUsed++
InitTermFrequencyRow(termFreqRow, tf.Term, fieldIndex, docID,
uint64(frequencyFromTokenFreq(tf)), fieldNorm)
if includeTermVectors { if includeTermVectors {
var tv []*TermVector termFreqRow.vectors, rows = udc.termVectorsFromTokenFreq(fieldIndex, tf, rows)
tv, rows = udc.termVectorsFromTokenFreq(fieldIndex, tf, rows)
termFreqRow = NewTermFrequencyRowWithTermVectors(tf.Term, fieldIndex, docID, uint64(frequencyFromTokenFreq(tf)), fieldNorm, tv)
} else {
termFreqRow = NewTermFrequencyRow(tf.Term, fieldIndex, docID, uint64(frequencyFromTokenFreq(tf)), fieldNorm)
} }
// record the back index entry // record the back index entry
backIndexTermEntry := BackIndexTermEntry{Term: proto.String(k), Field: proto.Uint32(uint32(fieldIndex))} terms = append(terms, k)
backIndexTermEntries = append(backIndexTermEntries, &backIndexTermEntry)
rows = append(rows, termFreqRow) rows = append(rows, termFreqRow)
} }
backIndexTermsEntry := BackIndexTermsEntry{Field: proto.Uint32(uint32(fieldIndex)), Terms: terms}
backIndexTermsEntries = append(backIndexTermsEntries, &backIndexTermsEntry)
return rows, backIndexTermEntries return rows, backIndexTermsEntries
} }
func (udc *UpsideDownCouch) Delete(id string) (err error) { func (udc *UpsideDownCouch) Delete(id string) (err error) {
@ -682,9 +711,11 @@ func (udc *UpsideDownCouch) Delete(id string) (err error) {
func (udc *UpsideDownCouch) deleteSingle(id string, backIndexRow *BackIndexRow, deleteRows []UpsideDownCouchRow) []UpsideDownCouchRow { func (udc *UpsideDownCouch) deleteSingle(id string, backIndexRow *BackIndexRow, deleteRows []UpsideDownCouchRow) []UpsideDownCouchRow {
idBytes := []byte(id) idBytes := []byte(id)
for _, backIndexEntry := range backIndexRow.termEntries { for _, backIndexEntry := range backIndexRow.termsEntries {
tfr := NewTermFrequencyRow([]byte(*backIndexEntry.Term), uint16(*backIndexEntry.Field), idBytes, 0, 0) for i := range backIndexEntry.Terms {
deleteRows = append(deleteRows, tfr) tfr := NewTermFrequencyRow([]byte(backIndexEntry.Terms[i]), uint16(*backIndexEntry.Field), idBytes, 0, 0)
deleteRows = append(deleteRows, tfr)
}
} }
for _, se := range backIndexRow.storedEntries { for _, se := range backIndexRow.storedEntries {
sf := NewStoredRow(idBytes, uint16(*se.Field), se.ArrayPositions, 'x', nil) sf := NewStoredRow(idBytes, uint16(*se.Field), se.ArrayPositions, 'x', nil)
@ -706,6 +737,8 @@ func decodeFieldType(typ byte, name string, pos []uint64, value []byte) document
return document.NewDateTimeFieldFromBytes(name, pos, value) return document.NewDateTimeFieldFromBytes(name, pos, value)
case 'b': case 'b':
return document.NewBooleanFieldFromBytes(name, pos, value) return document.NewBooleanFieldFromBytes(name, pos, value)
case 'g':
return document.NewGeoPointFieldFromBytes(name, pos, value)
} }
return nil return nil
} }
@ -715,6 +748,7 @@ func frequencyFromTokenFreq(tf *analysis.TokenFreq) int {
} }
func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis.TokenFreq, rows []index.IndexRow) ([]*TermVector, []index.IndexRow) { func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis.TokenFreq, rows []index.IndexRow) ([]*TermVector, []index.IndexRow) {
a := make([]TermVector, len(tf.Locations))
rv := make([]*TermVector, len(tf.Locations)) rv := make([]*TermVector, len(tf.Locations))
for i, l := range tf.Locations { for i, l := range tf.Locations {
@ -727,14 +761,14 @@ func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis.
rows = append(rows, newFieldRow) rows = append(rows, newFieldRow)
} }
} }
tv := TermVector{ a[i] = TermVector{
field: fieldIndex, field: fieldIndex,
arrayPositions: l.ArrayPositions, arrayPositions: l.ArrayPositions,
pos: uint64(l.Position), pos: uint64(l.Position),
start: uint64(l.Start), start: uint64(l.Start),
end: uint64(l.End), end: uint64(l.End),
} }
rv[i] = &tv rv[i] = &a[i]
} }
return rv, rows return rv, rows
@ -745,18 +779,19 @@ func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) []
return nil return nil
} }
a := make([]index.TermFieldVector, len(in))
rv := make([]*index.TermFieldVector, len(in)) rv := make([]*index.TermFieldVector, len(in))
for i, tv := range in { for i, tv := range in {
fieldName := udc.fieldCache.FieldIndexed(tv.field) fieldName := udc.fieldCache.FieldIndexed(tv.field)
tfv := index.TermFieldVector{ a[i] = index.TermFieldVector{
Field: fieldName, Field: fieldName,
ArrayPositions: tv.arrayPositions, ArrayPositions: tv.arrayPositions,
Pos: tv.pos, Pos: tv.pos,
Start: tv.start, Start: tv.start,
End: tv.end, End: tv.end,
} }
rv[i] = &tfv rv[i] = &a[i]
} }
return rv return rv
} }
@ -1008,7 +1043,7 @@ func init() {
func backIndexRowForDoc(kvreader store.KVReader, docID index.IndexInternalID) (*BackIndexRow, error) { func backIndexRowForDoc(kvreader store.KVReader, docID index.IndexInternalID) (*BackIndexRow, error) {
// use a temporary row structure to build key // use a temporary row structure to build key
tempRow := &BackIndexRow{ tempRow := BackIndexRow{
doc: docID, doc: docID,
} }

@ -3,15 +3,15 @@
// DO NOT EDIT! // DO NOT EDIT!
/* /*
Package upsidedown is a generated protocol buffer package. Package upsidedown is a generated protocol buffer package.
It is generated from these files: It is generated from these files:
upsidedown.proto upsidedown.proto
It has these top-level messages: It has these top-level messages:
BackIndexTermEntry BackIndexTermsEntry
BackIndexStoreEntry BackIndexStoreEntry
BackIndexRowValue BackIndexRowValue
*/ */
package upsidedown package upsidedown
@ -26,30 +26,30 @@ import github_com_golang_protobuf_proto "github.com/golang/protobuf/proto"
var _ = proto.Marshal var _ = proto.Marshal
var _ = math.Inf var _ = math.Inf
type BackIndexTermEntry struct { type BackIndexTermsEntry struct {
Term *string `protobuf:"bytes,1,req,name=term" json:"term,omitempty"` Field *uint32 `protobuf:"varint,1,req,name=field" json:"field,omitempty"`
Field *uint32 `protobuf:"varint,2,req,name=field" json:"field,omitempty"` Terms []string `protobuf:"bytes,2,rep,name=terms" json:"terms,omitempty"`
XXX_unrecognized []byte `json:"-"` XXX_unrecognized []byte `json:"-"`
} }
func (m *BackIndexTermEntry) Reset() { *m = BackIndexTermEntry{} } func (m *BackIndexTermsEntry) Reset() { *m = BackIndexTermsEntry{} }
func (m *BackIndexTermEntry) String() string { return proto.CompactTextString(m) } func (m *BackIndexTermsEntry) String() string { return proto.CompactTextString(m) }
func (*BackIndexTermEntry) ProtoMessage() {} func (*BackIndexTermsEntry) ProtoMessage() {}
func (m *BackIndexTermEntry) GetTerm() string { func (m *BackIndexTermsEntry) GetField() uint32 {
if m != nil && m.Term != nil {
return *m.Term
}
return ""
}
func (m *BackIndexTermEntry) GetField() uint32 {
if m != nil && m.Field != nil { if m != nil && m.Field != nil {
return *m.Field return *m.Field
} }
return 0 return 0
} }
func (m *BackIndexTermsEntry) GetTerms() []string {
if m != nil {
return m.Terms
}
return nil
}
type BackIndexStoreEntry struct { type BackIndexStoreEntry struct {
Field *uint32 `protobuf:"varint,1,req,name=field" json:"field,omitempty"` Field *uint32 `protobuf:"varint,1,req,name=field" json:"field,omitempty"`
ArrayPositions []uint64 `protobuf:"varint,2,rep,name=arrayPositions" json:"arrayPositions,omitempty"` ArrayPositions []uint64 `protobuf:"varint,2,rep,name=arrayPositions" json:"arrayPositions,omitempty"`
@ -75,7 +75,7 @@ func (m *BackIndexStoreEntry) GetArrayPositions() []uint64 {
} }
type BackIndexRowValue struct { type BackIndexRowValue struct {
TermEntries []*BackIndexTermEntry `protobuf:"bytes,1,rep,name=termEntries" json:"termEntries,omitempty"` TermsEntries []*BackIndexTermsEntry `protobuf:"bytes,1,rep,name=termsEntries" json:"termsEntries,omitempty"`
StoredEntries []*BackIndexStoreEntry `protobuf:"bytes,2,rep,name=storedEntries" json:"storedEntries,omitempty"` StoredEntries []*BackIndexStoreEntry `protobuf:"bytes,2,rep,name=storedEntries" json:"storedEntries,omitempty"`
XXX_unrecognized []byte `json:"-"` XXX_unrecognized []byte `json:"-"`
} }
@ -84,9 +84,9 @@ func (m *BackIndexRowValue) Reset() { *m = BackIndexRowValue{} }
func (m *BackIndexRowValue) String() string { return proto.CompactTextString(m) } func (m *BackIndexRowValue) String() string { return proto.CompactTextString(m) }
func (*BackIndexRowValue) ProtoMessage() {} func (*BackIndexRowValue) ProtoMessage() {}
func (m *BackIndexRowValue) GetTermEntries() []*BackIndexTermEntry { func (m *BackIndexRowValue) GetTermsEntries() []*BackIndexTermsEntry {
if m != nil { if m != nil {
return m.TermEntries return m.TermsEntries
} }
return nil return nil
} }
@ -98,7 +98,7 @@ func (m *BackIndexRowValue) GetStoredEntries() []*BackIndexStoreEntry {
return nil return nil
} }
func (m *BackIndexTermEntry) Unmarshal(data []byte) error { func (m *BackIndexTermsEntry) Unmarshal(data []byte) error {
var hasFields [1]uint64 var hasFields [1]uint64
l := len(data) l := len(data)
iNdEx := 0 iNdEx := 0
@ -119,47 +119,45 @@ func (m *BackIndexTermEntry) Unmarshal(data []byte) error {
wireType := int(wire & 0x7) wireType := int(wire & 0x7)
switch fieldNum { switch fieldNum {
case 1: case 1:
if wireType != 2 { if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field Term", wireType) return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType)
} }
var stringLen uint64 var v uint32
for shift := uint(0); ; shift += 7 { for shift := uint(0); ; shift += 7 {
if iNdEx >= l { if iNdEx >= l {
return io.ErrUnexpectedEOF return io.ErrUnexpectedEOF
} }
b := data[iNdEx] b := data[iNdEx]
iNdEx++ iNdEx++
stringLen |= (uint64(b) & 0x7F) << shift v |= (uint32(b) & 0x7F) << shift
if b < 0x80 { if b < 0x80 {
break break
} }
} }
postIndex := iNdEx + int(stringLen) m.Field = &v
if postIndex > l {
return io.ErrUnexpectedEOF
}
s := string(data[iNdEx:postIndex])
m.Term = &s
iNdEx = postIndex
hasFields[0] |= uint64(0x00000001) hasFields[0] |= uint64(0x00000001)
case 2: case 2:
if wireType != 0 { if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType) return fmt.Errorf("proto: wrong wireType = %d for field Terms", wireType)
} }
var v uint32 var stringLen uint64
for shift := uint(0); ; shift += 7 { for shift := uint(0); ; shift += 7 {
if iNdEx >= l { if iNdEx >= l {
return io.ErrUnexpectedEOF return io.ErrUnexpectedEOF
} }
b := data[iNdEx] b := data[iNdEx]
iNdEx++ iNdEx++
v |= (uint32(b) & 0x7F) << shift stringLen |= (uint64(b) & 0x7F) << shift
if b < 0x80 { if b < 0x80 {
break break
} }
} }
m.Field = &v postIndex := iNdEx + int(stringLen)
hasFields[0] |= uint64(0x00000002) if postIndex > l {
return io.ErrUnexpectedEOF
}
m.Terms = append(m.Terms, string(data[iNdEx:postIndex]))
iNdEx = postIndex
default: default:
var sizeOfWire int var sizeOfWire int
for { for {
@ -187,9 +185,6 @@ func (m *BackIndexTermEntry) Unmarshal(data []byte) error {
if hasFields[0]&uint64(0x00000001) == 0 { if hasFields[0]&uint64(0x00000001) == 0 {
return new(github_com_golang_protobuf_proto.RequiredNotSetError) return new(github_com_golang_protobuf_proto.RequiredNotSetError)
} }
if hasFields[0]&uint64(0x00000002) == 0 {
return new(github_com_golang_protobuf_proto.RequiredNotSetError)
}
return nil return nil
} }
@ -299,7 +294,7 @@ func (m *BackIndexRowValue) Unmarshal(data []byte) error {
switch fieldNum { switch fieldNum {
case 1: case 1:
if wireType != 2 { if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field TermEntries", wireType) return fmt.Errorf("proto: wrong wireType = %d for field TermsEntries", wireType)
} }
var msglen int var msglen int
for shift := uint(0); ; shift += 7 { for shift := uint(0); ; shift += 7 {
@ -320,8 +315,8 @@ func (m *BackIndexRowValue) Unmarshal(data []byte) error {
if postIndex > l { if postIndex > l {
return io.ErrUnexpectedEOF return io.ErrUnexpectedEOF
} }
m.TermEntries = append(m.TermEntries, &BackIndexTermEntry{}) m.TermsEntries = append(m.TermsEntries, &BackIndexTermsEntry{})
if err := m.TermEntries[len(m.TermEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil { if err := m.TermsEntries[len(m.TermsEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil {
return err return err
} }
iNdEx = postIndex iNdEx = postIndex
@ -472,16 +467,18 @@ var (
ErrInvalidLengthUpsidedown = fmt.Errorf("proto: negative length found during unmarshaling") ErrInvalidLengthUpsidedown = fmt.Errorf("proto: negative length found during unmarshaling")
) )
func (m *BackIndexTermEntry) Size() (n int) { func (m *BackIndexTermsEntry) Size() (n int) {
var l int var l int
_ = l _ = l
if m.Term != nil {
l = len(*m.Term)
n += 1 + l + sovUpsidedown(uint64(l))
}
if m.Field != nil { if m.Field != nil {
n += 1 + sovUpsidedown(uint64(*m.Field)) n += 1 + sovUpsidedown(uint64(*m.Field))
} }
if len(m.Terms) > 0 {
for _, s := range m.Terms {
l = len(s)
n += 1 + l + sovUpsidedown(uint64(l))
}
}
if m.XXX_unrecognized != nil { if m.XXX_unrecognized != nil {
n += len(m.XXX_unrecognized) n += len(m.XXX_unrecognized)
} }
@ -508,8 +505,8 @@ func (m *BackIndexStoreEntry) Size() (n int) {
func (m *BackIndexRowValue) Size() (n int) { func (m *BackIndexRowValue) Size() (n int) {
var l int var l int
_ = l _ = l
if len(m.TermEntries) > 0 { if len(m.TermsEntries) > 0 {
for _, e := range m.TermEntries { for _, e := range m.TermsEntries {
l = e.Size() l = e.Size()
n += 1 + l + sovUpsidedown(uint64(l)) n += 1 + l + sovUpsidedown(uint64(l))
} }
@ -539,7 +536,7 @@ func sovUpsidedown(x uint64) (n int) {
func sozUpsidedown(x uint64) (n int) { func sozUpsidedown(x uint64) (n int) {
return sovUpsidedown(uint64((x << 1) ^ uint64((int64(x) >> 63)))) return sovUpsidedown(uint64((x << 1) ^ uint64((int64(x) >> 63))))
} }
func (m *BackIndexTermEntry) Marshal() (data []byte, err error) { func (m *BackIndexTermsEntry) Marshal() (data []byte, err error) {
size := m.Size() size := m.Size()
data = make([]byte, size) data = make([]byte, size)
n, err := m.MarshalTo(data) n, err := m.MarshalTo(data)
@ -549,26 +546,33 @@ func (m *BackIndexTermEntry) Marshal() (data []byte, err error) {
return data[:n], nil return data[:n], nil
} }
func (m *BackIndexTermEntry) MarshalTo(data []byte) (n int, err error) { func (m *BackIndexTermsEntry) MarshalTo(data []byte) (n int, err error) {
var i int var i int
_ = i _ = i
var l int var l int
_ = l _ = l
if m.Term == nil {
return 0, new(github_com_golang_protobuf_proto.RequiredNotSetError)
} else {
data[i] = 0xa
i++
i = encodeVarintUpsidedown(data, i, uint64(len(*m.Term)))
i += copy(data[i:], *m.Term)
}
if m.Field == nil { if m.Field == nil {
return 0, new(github_com_golang_protobuf_proto.RequiredNotSetError) return 0, new(github_com_golang_protobuf_proto.RequiredNotSetError)
} else { } else {
data[i] = 0x10 data[i] = 0x8
i++ i++
i = encodeVarintUpsidedown(data, i, uint64(*m.Field)) i = encodeVarintUpsidedown(data, i, uint64(*m.Field))
} }
if len(m.Terms) > 0 {
for _, s := range m.Terms {
data[i] = 0x12
i++
l = len(s)
for l >= 1<<7 {
data[i] = uint8(uint64(l)&0x7f | 0x80)
l >>= 7
i++
}
data[i] = uint8(l)
i++
i += copy(data[i:], s)
}
}
if m.XXX_unrecognized != nil { if m.XXX_unrecognized != nil {
i += copy(data[i:], m.XXX_unrecognized) i += copy(data[i:], m.XXX_unrecognized)
} }
@ -625,8 +629,8 @@ func (m *BackIndexRowValue) MarshalTo(data []byte) (n int, err error) {
_ = i _ = i
var l int var l int
_ = l _ = l
if len(m.TermEntries) > 0 { if len(m.TermsEntries) > 0 {
for _, msg := range m.TermEntries { for _, msg := range m.TermsEntries {
data[i] = 0xa data[i] = 0xa
i++ i++
i = encodeVarintUpsidedown(data, i, uint64(msg.Size())) i = encodeVarintUpsidedown(data, i, uint64(msg.Size()))

@ -1,6 +1,6 @@
message BackIndexTermEntry { message BackIndexTermsEntry {
required string term = 1; required uint32 field = 1;
required uint32 field = 2; repeated string terms = 2;
} }
message BackIndexStoreEntry { message BackIndexStoreEntry {
@ -9,6 +9,6 @@ message BackIndexStoreEntry {
} }
message BackIndexRowValue { message BackIndexRowValue {
repeated BackIndexTermEntry termEntries = 1; repeated BackIndexTermsEntry termsEntries = 1;
repeated BackIndexStoreEntry storedEntries = 2; repeated BackIndexStoreEntry storedEntries = 2;
} }

@ -425,14 +425,15 @@ func (i *indexAliasImpl) Swap(in, out []Index) {
// could be slower in remote usages. // could be slower in remote usages.
func createChildSearchRequest(req *SearchRequest) *SearchRequest { func createChildSearchRequest(req *SearchRequest) *SearchRequest {
rv := SearchRequest{ rv := SearchRequest{
Query: req.Query, Query: req.Query,
Size: req.Size + req.From, Size: req.Size + req.From,
From: 0, From: 0,
Highlight: req.Highlight, Highlight: req.Highlight,
Fields: req.Fields, Fields: req.Fields,
Facets: req.Facets, Facets: req.Facets,
Explain: req.Explain, Explain: req.Explain,
Sort: req.Sort, Sort: req.Sort.Copy(),
IncludeLocations: req.IncludeLocations,
} }
return &rv return &rv
} }

@ -253,6 +253,24 @@ func (i *indexImpl) Index(id string, data interface{}) (err error) {
return return
} }
// IndexAdvanced takes a document.Document object
// skips the mapping and indexes it.
func (i *indexImpl) IndexAdvanced(doc *document.Document) (err error) {
if doc.ID == "" {
return ErrorEmptyID
}
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return ErrorIndexClosed
}
err = i.i.Update(doc)
return
}
// Delete entries for the specified identifier from // Delete entries for the specified identifier from
// the index. // the index.
func (i *indexImpl) Delete(id string) (err error) { func (i *indexImpl) Delete(id string) (err error) {
@ -370,7 +388,10 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
} }
}() }()
searcher, err := req.Query.Searcher(indexReader, i.m, req.Explain) searcher, err := req.Query.Searcher(indexReader, i.m, search.SearcherOptions{
Explain: req.Explain,
IncludeTermVectors: req.IncludeLocations || req.Highlight != nil,
})
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -461,6 +482,14 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
if err == nil { if err == nil {
value = boolean value = boolean
} }
case *document.GeoPointField:
lon, err := docF.Lon()
if err == nil {
lat, err := docF.Lat()
if err == nil {
value = []float64{lon, lat}
}
}
} }
if value != nil { if value != nil {
hit.AddFieldValue(docF.Name(), value) hit.AddFieldValue(docF.Name(), value)

@ -59,3 +59,7 @@ func NewDateTimeFieldMapping() *mapping.FieldMapping {
func NewBooleanFieldMapping() *mapping.FieldMapping { func NewBooleanFieldMapping() *mapping.FieldMapping {
return mapping.NewBooleanFieldMapping() return mapping.NewBooleanFieldMapping()
} }
func NewGeoPointFieldMapping() *mapping.FieldMapping {
return mapping.NewGeoPointFieldMapping()
}

@ -15,6 +15,7 @@
package mapping package mapping
import ( import (
"encoding"
"encoding/json" "encoding/json"
"fmt" "fmt"
"reflect" "reflect"
@ -75,7 +76,7 @@ func (dm *DocumentMapping) Validate(cache *registry.Cache) error {
} }
} }
switch field.Type { switch field.Type {
case "text", "datetime", "number", "boolean": case "text", "datetime", "number", "boolean", "geopoint":
default: default:
return fmt.Errorf("unknown field type: '%s'", field.Type) return fmt.Errorf("unknown field type: '%s'", field.Type)
} }
@ -481,9 +482,57 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
fieldMapping := newDateTimeFieldMappingDynamic(context.im) fieldMapping := newDateTimeFieldMappingDynamic(context.im)
fieldMapping.processTime(property, pathString, path, indexes, context) fieldMapping.processTime(property, pathString, path, indexes, context)
} }
case encoding.TextMarshaler:
txt, err := property.MarshalText()
if err == nil && subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
if fieldMapping.Type == "text" {
fieldMapping.processString(string(txt), pathString, path, indexes, context)
}
}
}
dm.walkDocument(property, path, indexes, context)
default: default:
if subDocMapping != nil {
for _, fieldMapping := range subDocMapping.Fields {
if fieldMapping.Type == "geopoint" {
fieldMapping.processGeoPoint(property, pathString, path, indexes, context)
}
}
}
dm.walkDocument(property, path, indexes, context) dm.walkDocument(property, path, indexes, context)
} }
case reflect.Map:
if subDocMapping != nil {
for _, fieldMapping := range subDocMapping.Fields {
if fieldMapping.Type == "geopoint" {
fieldMapping.processGeoPoint(property, pathString, path, indexes, context)
}
}
}
dm.walkDocument(property, path, indexes, context)
case reflect.Ptr:
if !propertyValue.IsNil() {
switch property := property.(type) {
case encoding.TextMarshaler:
txt, err := property.MarshalText()
if err == nil && subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
if fieldMapping.Type == "text" {
fieldMapping.processString(string(txt), pathString, path, indexes, context)
}
}
} else {
dm.walkDocument(property, path, indexes, context)
}
default:
dm.walkDocument(property, path, indexes, context)
}
}
default: default:
dm.walkDocument(property, path, indexes, context) dm.walkDocument(property, path, indexes, context)
} }

@ -21,6 +21,7 @@ import (
"github.com/blevesearch/bleve/analysis" "github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/document" "github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/geo"
) )
// control the default behavior for dynamic fields (those not explicitly mapped) // control the default behavior for dynamic fields (those not explicitly mapped)
@ -124,6 +125,16 @@ func newBooleanFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping {
return rv return rv
} }
// NewGeoPointFieldMapping returns a default field mapping for geo points
func NewGeoPointFieldMapping() *FieldMapping {
return &FieldMapping{
Type: "geopoint",
Store: true,
Index: true,
IncludeInAll: true,
}
}
// Options returns the indexing options for this field. // Options returns the indexing options for this field.
func (fm *FieldMapping) Options() document.IndexingOptions { func (fm *FieldMapping) Options() document.IndexingOptions {
var rv document.IndexingOptions var rv document.IndexingOptions
@ -208,6 +219,20 @@ func (fm *FieldMapping) processBoolean(propertyValueBool bool, pathString string
} }
} }
func (fm *FieldMapping) processGeoPoint(propertyMightBeGeoPoint interface{}, pathString string, path []string, indexes []uint64, context *walkContext) {
lon, lat, found := geo.ExtractGeoPoint(propertyMightBeGeoPoint)
if found {
fieldName := getFieldName(pathString, path, fm)
options := fm.Options()
field := document.NewGeoPointFieldWithIndexingOptions(fieldName, indexes, lon, lat, options)
context.doc.AddField(field)
if !fm.IncludeInAll {
context.excludedFromAll = append(context.excludedFromAll, fieldName)
}
}
}
func (fm *FieldMapping) analyzerForField(path []string, context *walkContext) *analysis.Analyzer { func (fm *FieldMapping) analyzerForField(path []string, context *walkContext) *analysis.Analyzer {
analyzerName := fm.Analyzer analyzerName := fm.Analyzer
if analyzerName == "" { if analyzerName == "" {

@ -289,7 +289,12 @@ func (im *IndexMappingImpl) UnmarshalJSON(data []byte) error {
} }
func (im *IndexMappingImpl) determineType(data interface{}) string { func (im *IndexMappingImpl) determineType(data interface{}) string {
// first see if the object implements Classifier // first see if the object implements bleveClassifier
bleveClassifier, ok := data.(bleveClassifier)
if ok {
return bleveClassifier.BleveType()
}
// next see if the object implements Classifier
classifier, ok := data.(Classifier) classifier, ok := data.(Classifier)
if ok { if ok {
return classifier.Type() return classifier.Type()

@ -22,12 +22,21 @@ import (
"github.com/blevesearch/bleve/document" "github.com/blevesearch/bleve/document"
) )
// A Classifier is an interface describing any object // A Classifier is an interface describing any object which knows how to
// which knows how to identify its own type. // identify its own type. Alternatively, if a struct already has a Type
// field or method in conflict, one can use BleveType instead.
type Classifier interface { type Classifier interface {
Type() string Type() string
} }
// A bleveClassifier is an interface describing any object which knows how
// to identify its own type. This is introduced as an alternative to the
// Classifier interface which often has naming conflicts with existing
// structures.
type bleveClassifier interface {
BleveType() string
}
var logger = log.New(ioutil.Discard, "bleve mapping ", log.LstdFlags) var logger = log.New(ioutil.Discard, "bleve mapping ", log.LstdFlags)
// SetLog sets the logger used for logging // SetLog sets the logger used for logging

@ -0,0 +1,43 @@
package numeric
var interleaveMagic = []uint64{
0x5555555555555555,
0x3333333333333333,
0x0F0F0F0F0F0F0F0F,
0x00FF00FF00FF00FF,
0x0000FFFF0000FFFF,
0x00000000FFFFFFFF,
0xAAAAAAAAAAAAAAAA,
}
var interleaveShift = []uint{1, 2, 4, 8, 16}
// Interleave the first 32 bits of each uint64
// apdated from org.apache.lucene.util.BitUtil
// whcih was adapted from:
// http://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN
func Interleave(v1, v2 uint64) uint64 {
v1 = (v1 | (v1 << interleaveShift[4])) & interleaveMagic[4]
v1 = (v1 | (v1 << interleaveShift[3])) & interleaveMagic[3]
v1 = (v1 | (v1 << interleaveShift[2])) & interleaveMagic[2]
v1 = (v1 | (v1 << interleaveShift[1])) & interleaveMagic[1]
v1 = (v1 | (v1 << interleaveShift[0])) & interleaveMagic[0]
v2 = (v2 | (v2 << interleaveShift[4])) & interleaveMagic[4]
v2 = (v2 | (v2 << interleaveShift[3])) & interleaveMagic[3]
v2 = (v2 | (v2 << interleaveShift[2])) & interleaveMagic[2]
v2 = (v2 | (v2 << interleaveShift[1])) & interleaveMagic[1]
v2 = (v2 | (v2 << interleaveShift[0])) & interleaveMagic[0]
return (v2 << 1) | v1
}
// Deinterleave the 32-bit value starting at position 0
// to get the other 32-bit value, shift it by 1 first
func Deinterleave(b uint64) uint64 {
b &= interleaveMagic[0]
b = (b ^ (b >> interleaveShift[0])) & interleaveMagic[1]
b = (b ^ (b >> interleaveShift[1])) & interleaveMagic[2]
b = (b ^ (b >> interleaveShift[2])) & interleaveMagic[3]
b = (b ^ (b >> interleaveShift[3])) & interleaveMagic[4]
b = (b ^ (b >> interleaveShift[4])) & interleaveMagic[5]
return b
}

@ -139,6 +139,23 @@ func NewNumericRangeInclusiveQuery(min, max *float64, minInclusive, maxInclusive
return query.NewNumericRangeInclusiveQuery(min, max, minInclusive, maxInclusive) return query.NewNumericRangeInclusiveQuery(min, max, minInclusive, maxInclusive)
} }
// NewTermRangeQuery creates a new Query for ranges
// of text terms.
// Either, but not both endpoints can be "".
// The minimum value is inclusive.
// The maximum value is exclusive.
func NewTermRangeQuery(min, max string) *query.TermRangeQuery {
return query.NewTermRangeQuery(min, max)
}
// NewTermRangeInclusiveQuery creates a new Query for ranges
// of text terms.
// Either, but not both endpoints can be "".
// Control endpoint inclusion with inclusiveMin, inclusiveMax.
func NewTermRangeInclusiveQuery(min, max string, minInclusive, maxInclusive *bool) *query.TermRangeQuery {
return query.NewTermRangeInclusiveQuery(min, max, minInclusive, maxInclusive)
}
// NewPhraseQuery creates a new Query for finding // NewPhraseQuery creates a new Query for finding
// exact term phrases in the index. // exact term phrases in the index.
// The provided terms must exist in the correct // The provided terms must exist in the correct
@ -184,3 +201,18 @@ func NewTermQuery(term string) *query.TermQuery {
func NewWildcardQuery(wildcard string) *query.WildcardQuery { func NewWildcardQuery(wildcard string) *query.WildcardQuery {
return query.NewWildcardQuery(wildcard) return query.NewWildcardQuery(wildcard)
} }
// NewGeoBoundingBoxQuery creates a new Query for performing geo bounding
// box searches. The arguments describe the position of the box and documents
// which have an indexed geo point inside the box will be returned.
func NewGeoBoundingBoxQuery(topLeftLon, topLeftLat, bottomRightLon, bottomRightLat float64) *query.GeoBoundingBoxQuery {
return query.NewGeoBoundingBoxQuery(topLeftLon, topLeftLat, bottomRightLon, bottomRightLat)
}
// NewGeoDistanceQuery creates a new Query for performing geo bounding
// box searches. The arguments describe a position and a distance. Documents
// which have an indexed geo point which is less than or equal to the provided
// distance from the given position will be returned.
func NewGeoDistanceQuery(lon, lat float64, distance string) *query.GeoDistanceQuery {
return query.NewGeoDistanceQuery(lon, lat, distance)
}

@ -20,10 +20,16 @@ import (
"time" "time"
"github.com/blevesearch/bleve/analysis" "github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/analysis/datetime/optional"
"github.com/blevesearch/bleve/registry"
"github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/query" "github.com/blevesearch/bleve/search/query"
) )
var cache = registry.NewCache()
const defaultDateTimeParser = optional.Name
type numericRange struct { type numericRange struct {
Name string `json:"name,omitempty"` Name string `json:"name,omitempty"`
Min *float64 `json:"min,omitempty"` Min *float64 `json:"min,omitempty"`
@ -105,26 +111,41 @@ type FacetRequest struct {
} }
func (fr *FacetRequest) Validate() error { func (fr *FacetRequest) Validate() error {
if len(fr.NumericRanges) > 0 && len(fr.DateTimeRanges) > 0 { nrCount := len(fr.NumericRanges)
drCount := len(fr.DateTimeRanges)
if nrCount > 0 && drCount > 0 {
return fmt.Errorf("facet can only conain numeric ranges or date ranges, not both") return fmt.Errorf("facet can only conain numeric ranges or date ranges, not both")
} }
nrNames := map[string]interface{}{} if nrCount > 0 {
for _, nr := range fr.NumericRanges { nrNames := map[string]interface{}{}
if _, ok := nrNames[nr.Name]; ok { for _, nr := range fr.NumericRanges {
return fmt.Errorf("numeric ranges contains duplicate name '%s'", nr.Name) if _, ok := nrNames[nr.Name]; ok {
return fmt.Errorf("numeric ranges contains duplicate name '%s'", nr.Name)
}
nrNames[nr.Name] = struct{}{}
if nr.Min == nil && nr.Max == nil {
return fmt.Errorf("numeric range query must specify either min, max or both for range name '%s'", nr.Name)
}
} }
nrNames[nr.Name] = struct{}{}
}
drNames := map[string]interface{}{} } else {
for _, dr := range fr.DateTimeRanges { dateTimeParser, err := cache.DateTimeParserNamed(defaultDateTimeParser)
if _, ok := drNames[dr.Name]; ok { if err != nil {
return fmt.Errorf("date ranges contains duplicate name '%s'", dr.Name) return err
}
drNames := map[string]interface{}{}
for _, dr := range fr.DateTimeRanges {
if _, ok := drNames[dr.Name]; ok {
return fmt.Errorf("date ranges contains duplicate name '%s'", dr.Name)
}
drNames[dr.Name] = struct{}{}
start, end := dr.ParseDates(dateTimeParser)
if start.IsZero() && end.IsZero() {
return fmt.Errorf("date range query must specify either start, end or both for range name '%s'", dr.Name)
}
} }
drNames[dr.Name] = struct{}{}
} }
return nil return nil
} }
@ -149,6 +170,16 @@ func (fr *FacetRequest) AddDateTimeRange(name string, start, end time.Time) {
fr.DateTimeRanges = append(fr.DateTimeRanges, &dateTimeRange{Name: name, Start: start, End: end}) fr.DateTimeRanges = append(fr.DateTimeRanges, &dateTimeRange{Name: name, Start: start, End: end})
} }
// AddDateTimeRangeString adds a bucket to a field
// containing date values.
func (fr *FacetRequest) AddDateTimeRangeString(name string, start, end *string) {
if fr.DateTimeRanges == nil {
fr.DateTimeRanges = make([]*dateTimeRange, 0, 1)
}
fr.DateTimeRanges = append(fr.DateTimeRanges,
&dateTimeRange{Name: name, startString: start, endString: end})
}
// AddNumericRange adds a bucket to a field // AddNumericRange adds a bucket to a field
// containing numeric values. Documents with a // containing numeric values. Documents with a
// numeric value falling into this range are // numeric value falling into this range are
@ -219,14 +250,15 @@ func (h *HighlightRequest) AddField(field string) {
// //
// A special field named "*" can be used to return all fields. // A special field named "*" can be used to return all fields.
type SearchRequest struct { type SearchRequest struct {
Query query.Query `json:"query"` Query query.Query `json:"query"`
Size int `json:"size"` Size int `json:"size"`
From int `json:"from"` From int `json:"from"`
Highlight *HighlightRequest `json:"highlight"` Highlight *HighlightRequest `json:"highlight"`
Fields []string `json:"fields"` Fields []string `json:"fields"`
Facets FacetsRequest `json:"facets"` Facets FacetsRequest `json:"facets"`
Explain bool `json:"explain"` Explain bool `json:"explain"`
Sort search.SortOrder `json:"sort"` Sort search.SortOrder `json:"sort"`
IncludeLocations bool `json:"includeLocations"`
} }
func (r *SearchRequest) Validate() error { func (r *SearchRequest) Validate() error {
@ -267,14 +299,15 @@ func (r *SearchRequest) SortByCustom(order search.SortOrder) {
// a SearchRequest // a SearchRequest
func (r *SearchRequest) UnmarshalJSON(input []byte) error { func (r *SearchRequest) UnmarshalJSON(input []byte) error {
var temp struct { var temp struct {
Q json.RawMessage `json:"query"` Q json.RawMessage `json:"query"`
Size *int `json:"size"` Size *int `json:"size"`
From int `json:"from"` From int `json:"from"`
Highlight *HighlightRequest `json:"highlight"` Highlight *HighlightRequest `json:"highlight"`
Fields []string `json:"fields"` Fields []string `json:"fields"`
Facets FacetsRequest `json:"facets"` Facets FacetsRequest `json:"facets"`
Explain bool `json:"explain"` Explain bool `json:"explain"`
Sort []json.RawMessage `json:"sort"` Sort []json.RawMessage `json:"sort"`
IncludeLocations bool `json:"includeLocations"`
} }
err := json.Unmarshal(input, &temp) err := json.Unmarshal(input, &temp)
@ -300,6 +333,7 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
r.Highlight = temp.Highlight r.Highlight = temp.Highlight
r.Fields = temp.Fields r.Fields = temp.Fields
r.Facets = temp.Facets r.Facets = temp.Facets
r.IncludeLocations = temp.IncludeLocations
r.Query, err = query.ParseQuery(temp.Q) r.Query, err = query.ParseQuery(temp.Q)
if err != nil { if err != nil {
return err return err

@ -34,11 +34,20 @@ func newStoreHeap(cap int, compare collectorCompare) *collectStoreHeap {
return rv return rv
} }
func (c *collectStoreHeap) Add(doc *search.DocumentMatch) { func (c *collectStoreHeap) AddNotExceedingSize(doc *search.DocumentMatch,
size int) *search.DocumentMatch {
c.add(doc)
if c.Len() > size {
return c.removeLast()
}
return nil
}
func (c *collectStoreHeap) add(doc *search.DocumentMatch) {
heap.Push(c, doc) heap.Push(c, doc)
} }
func (c *collectStoreHeap) RemoveLast() *search.DocumentMatch { func (c *collectStoreHeap) removeLast() *search.DocumentMatch {
return heap.Pop(c).(*search.DocumentMatch) return heap.Pop(c).(*search.DocumentMatch)
} }
@ -49,17 +58,12 @@ func (c *collectStoreHeap) Final(skip int, fixup collectorFixup) (search.Documen
return make(search.DocumentMatchCollection, 0), nil return make(search.DocumentMatchCollection, 0), nil
} }
rv := make(search.DocumentMatchCollection, size) rv := make(search.DocumentMatchCollection, size)
for count > 0 { for i := size - 1; i >= 0; i-- {
count-- doc := heap.Pop(c).(*search.DocumentMatch)
rv[i] = doc
if count >= skip { err := fixup(doc)
size-- if err != nil {
doc := heap.Pop(c).(*search.DocumentMatch) return nil, err
rv[size] = doc
err := fixup(doc)
if err != nil {
return nil, err
}
} }
} }
return rv, nil return rv, nil

@ -34,7 +34,16 @@ func newStoreList(cap int, compare collectorCompare) *collectStoreList {
return rv return rv
} }
func (c *collectStoreList) Add(doc *search.DocumentMatch) { func (c *collectStoreList) AddNotExceedingSize(doc *search.DocumentMatch,
size int) *search.DocumentMatch {
c.add(doc)
if c.len() > size {
return c.removeLast()
}
return nil
}
func (c *collectStoreList) add(doc *search.DocumentMatch) {
for e := c.results.Front(); e != nil; e = e.Next() { for e := c.results.Front(); e != nil; e = e.Next() {
curr := e.Value.(*search.DocumentMatch) curr := e.Value.(*search.DocumentMatch)
if c.compare(doc, curr) >= 0 { if c.compare(doc, curr) >= 0 {
@ -46,7 +55,7 @@ func (c *collectStoreList) Add(doc *search.DocumentMatch) {
c.results.PushBack(doc) c.results.PushBack(doc)
} }
func (c *collectStoreList) RemoveLast() *search.DocumentMatch { func (c *collectStoreList) removeLast() *search.DocumentMatch {
return c.results.Remove(c.results.Front()).(*search.DocumentMatch) return c.results.Remove(c.results.Front()).(*search.DocumentMatch)
} }
@ -73,6 +82,6 @@ func (c *collectStoreList) Final(skip int, fixup collectorFixup) (search.Documen
return search.DocumentMatchCollection{}, nil return search.DocumentMatchCollection{}, nil
} }
func (c *collectStoreList) Len() int { func (c *collectStoreList) len() int {
return c.results.Len() return c.results.Len()
} }

@ -29,7 +29,16 @@ func newStoreSlice(cap int, compare collectorCompare) *collectStoreSlice {
return rv return rv
} }
func (c *collectStoreSlice) Add(doc *search.DocumentMatch) { func (c *collectStoreSlice) AddNotExceedingSize(doc *search.DocumentMatch,
size int) *search.DocumentMatch {
c.add(doc)
if c.len() > size {
return c.removeLast()
}
return nil
}
func (c *collectStoreSlice) add(doc *search.DocumentMatch) {
// find where to insert, starting at end (lowest) // find where to insert, starting at end (lowest)
i := len(c.slice) i := len(c.slice)
for ; i > 0; i-- { for ; i > 0; i-- {
@ -44,7 +53,7 @@ func (c *collectStoreSlice) Add(doc *search.DocumentMatch) {
c.slice[i] = doc c.slice[i] = doc
} }
func (c *collectStoreSlice) RemoveLast() *search.DocumentMatch { func (c *collectStoreSlice) removeLast() *search.DocumentMatch {
var rv *search.DocumentMatch var rv *search.DocumentMatch
rv, c.slice = c.slice[len(c.slice)-1], c.slice[:len(c.slice)-1] rv, c.slice = c.slice[len(c.slice)-1], c.slice[:len(c.slice)-1]
return rv return rv
@ -63,6 +72,6 @@ func (c *collectStoreSlice) Final(skip int, fixup collectorFixup) (search.Docume
return search.DocumentMatchCollection{}, nil return search.DocumentMatchCollection{}, nil
} }
func (c *collectStoreSlice) Len() int { func (c *collectStoreSlice) len() int {
return len(c.slice) return len(c.slice)
} }

@ -22,6 +22,15 @@ import (
"golang.org/x/net/context" "golang.org/x/net/context"
) )
type collectorStore interface {
// Add the document, and if the new store size exceeds the provided size
// the last element is removed and returned. If the size has not been
// exceeded, nil is returned.
AddNotExceedingSize(doc *search.DocumentMatch, size int) *search.DocumentMatch
Final(skip int, fixup collectorFixup) (search.DocumentMatchCollection, error)
}
// PreAllocSizeSkipCap will cap preallocation to this amount when // PreAllocSizeSkipCap will cap preallocation to this amount when
// size+skip exceeds this value // size+skip exceeds this value
var PreAllocSizeSkipCap = 1000 var PreAllocSizeSkipCap = 1000
@ -41,7 +50,7 @@ type TopNCollector struct {
results search.DocumentMatchCollection results search.DocumentMatchCollection
facetsBuilder *search.FacetsBuilder facetsBuilder *search.FacetsBuilder
store *collectStoreSlice store collectorStore
needDocIds bool needDocIds bool
neededFields []string neededFields []string
@ -68,9 +77,15 @@ func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector
backingSize = PreAllocSizeSkipCap + 1 backingSize = PreAllocSizeSkipCap + 1
} }
hc.store = newStoreSlice(backingSize, func(i, j *search.DocumentMatch) int { if size+skip > 10 {
return hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, i, j) hc.store = newStoreHeap(backingSize, func(i, j *search.DocumentMatch) int {
}) return hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, i, j)
})
} else {
hc.store = newStoreSlice(backingSize, func(i, j *search.DocumentMatch) int {
return hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, i, j)
})
}
// these lookups traverse an interface, so do once up-front // these lookups traverse an interface, so do once up-front
if sort.RequiresDocID() { if sort.RequiresDocID() {
@ -114,12 +129,6 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
default: default:
} }
} }
if hc.facetsBuilder != nil {
err = hc.facetsBuilder.Update(next)
if err != nil {
break
}
}
err = hc.collectSingle(searchContext, reader, next) err = hc.collectSingle(searchContext, reader, next)
if err != nil { if err != nil {
@ -144,6 +153,16 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
var sortByScoreOpt = []string{"_score"} var sortByScoreOpt = []string{"_score"}
func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.IndexReader, d *search.DocumentMatch) error { func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.IndexReader, d *search.DocumentMatch) error {
var err error
// visit field terms for features that require it (sort, facets)
if len(hc.neededFields) > 0 {
err = hc.visitFieldTerms(reader, d)
if err != nil {
return err
}
}
// increment total hits // increment total hits
hc.total++ hc.total++
d.HitNumber = hc.total d.HitNumber = hc.total
@ -153,7 +172,6 @@ func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.I
hc.maxScore = d.Score hc.maxScore = d.Score
} }
var err error
// see if we need to load ID (at this early stage, for example to sort on it) // see if we need to load ID (at this early stage, for example to sort on it)
if hc.needDocIds { if hc.needDocIds {
d.ID, err = reader.ExternalID(d.IndexInternalID) d.ID, err = reader.ExternalID(d.IndexInternalID)
@ -162,22 +180,6 @@ func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.I
} }
} }
// see if we need to load the stored fields
if len(hc.neededFields) > 0 {
// find out which fields haven't been loaded yet
fieldsToLoad := d.CachedFieldTerms.FieldsNotYetCached(hc.neededFields)
// look them up
fieldTerms, err := reader.DocumentFieldTerms(d.IndexInternalID, fieldsToLoad)
if err != nil {
return err
}
// cache these as well
if d.CachedFieldTerms == nil {
d.CachedFieldTerms = make(map[string][]string)
}
d.CachedFieldTerms.Merge(fieldTerms)
}
// compute this hits sort value // compute this hits sort value
if len(hc.sort) == 1 && hc.cachedScoring[0] { if len(hc.sort) == 1 && hc.cachedScoring[0] {
d.Sort = sortByScoreOpt d.Sort = sortByScoreOpt
@ -197,9 +199,8 @@ func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.I
} }
} }
hc.store.Add(d) removed := hc.store.AddNotExceedingSize(d, hc.size+hc.skip)
if hc.store.Len() > hc.size+hc.skip { if removed != nil {
removed := hc.store.RemoveLast()
if hc.lowestMatchOutsideResults == nil { if hc.lowestMatchOutsideResults == nil {
hc.lowestMatchOutsideResults = removed hc.lowestMatchOutsideResults = removed
} else { } else {
@ -215,9 +216,31 @@ func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.I
return nil return nil
} }
// visitFieldTerms is responsible for visiting the field terms of the
// search hit, and passing visited terms to the sort and facet builder
func (hc *TopNCollector) visitFieldTerms(reader index.IndexReader, d *search.DocumentMatch) error {
if hc.facetsBuilder != nil {
hc.facetsBuilder.StartDoc()
}
err := reader.DocumentVisitFieldTerms(d.IndexInternalID, hc.neededFields, func(field string, term []byte) {
if hc.facetsBuilder != nil {
hc.facetsBuilder.UpdateVisitor(field, term)
}
hc.sort.UpdateVisitor(field, term)
})
if hc.facetsBuilder != nil {
hc.facetsBuilder.EndDoc()
}
return err
}
// SetFacetsBuilder registers a facet builder for this collector // SetFacetsBuilder registers a facet builder for this collector
func (hc *TopNCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) { func (hc *TopNCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) {
hc.facetsBuilder = facetsBuilder hc.facetsBuilder = facetsBuilder
hc.neededFields = append(hc.neededFields, hc.facetsBuilder.RequiredFields()...)
} }
// finalizeResults starts with the heap containing the final top size+skip // finalizeResults starts with the heap containing the final top size+skip

@ -18,7 +18,6 @@ import (
"sort" "sort"
"time" "time"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/numeric" "github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search"
) )
@ -35,6 +34,7 @@ type DateTimeFacetBuilder struct {
total int total int
missing int missing int
ranges map[string]*dateTimeRange ranges map[string]*dateTimeRange
sawValue bool
} }
func NewDateTimeFacetBuilder(field string, size int) *DateTimeFacetBuilder { func NewDateTimeFacetBuilder(field string, size int) *DateTimeFacetBuilder {
@ -58,36 +58,35 @@ func (fb *DateTimeFacetBuilder) Field() string {
return fb.field return fb.field
} }
func (fb *DateTimeFacetBuilder) Update(ft index.FieldTerms) { func (fb *DateTimeFacetBuilder) UpdateVisitor(field string, term []byte) {
terms, ok := ft[fb.field] if field == fb.field {
if ok { fb.sawValue = true
for _, term := range terms { // only consider the values which are shifted 0
// only consider the values which are shifted 0 prefixCoded := numeric.PrefixCoded(term)
prefixCoded := numeric.PrefixCoded(term) shift, err := prefixCoded.Shift()
shift, err := prefixCoded.Shift() if err == nil && shift == 0 {
if err == nil && shift == 0 { i64, err := prefixCoded.Int64()
i64, err := prefixCoded.Int64() if err == nil {
if err == nil { t := time.Unix(0, i64)
t := time.Unix(0, i64)
// look at each of the ranges for a match
// look at each of the ranges for a match for rangeName, r := range fb.ranges {
for rangeName, r := range fb.ranges { if (r.start.IsZero() || t.After(r.start) || t.Equal(r.start)) && (r.end.IsZero() || t.Before(r.end)) {
fb.termsCount[rangeName] = fb.termsCount[rangeName] + 1
if (r.start.IsZero() || t.After(r.start) || t.Equal(r.start)) && (r.end.IsZero() || t.Before(r.end)) { fb.total++
existingCount, existed := fb.termsCount[rangeName]
if existed {
fb.termsCount[rangeName] = existingCount + 1
} else {
fb.termsCount[rangeName] = 1
}
fb.total++
}
} }
} }
} }
} }
} else { }
}
func (fb *DateTimeFacetBuilder) StartDoc() {
fb.sawValue = false
}
func (fb *DateTimeFacetBuilder) EndDoc() {
if !fb.sawValue {
fb.missing++ fb.missing++
} }
} }

@ -17,7 +17,6 @@ package facet
import ( import (
"sort" "sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/numeric" "github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search"
) )
@ -34,6 +33,7 @@ type NumericFacetBuilder struct {
total int total int
missing int missing int
ranges map[string]*numericRange ranges map[string]*numericRange
sawValue bool
} }
func NewNumericFacetBuilder(field string, size int) *NumericFacetBuilder { func NewNumericFacetBuilder(field string, size int) *NumericFacetBuilder {
@ -57,36 +57,35 @@ func (fb *NumericFacetBuilder) Field() string {
return fb.field return fb.field
} }
func (fb *NumericFacetBuilder) Update(ft index.FieldTerms) { func (fb *NumericFacetBuilder) UpdateVisitor(field string, term []byte) {
terms, ok := ft[fb.field] if field == fb.field {
if ok { fb.sawValue = true
for _, term := range terms { // only consider the values which are shifted 0
// only consider the values which are shifted 0 prefixCoded := numeric.PrefixCoded(term)
prefixCoded := numeric.PrefixCoded(term) shift, err := prefixCoded.Shift()
shift, err := prefixCoded.Shift() if err == nil && shift == 0 {
if err == nil && shift == 0 { i64, err := prefixCoded.Int64()
i64, err := prefixCoded.Int64() if err == nil {
if err == nil { f64 := numeric.Int64ToFloat64(i64)
f64 := numeric.Int64ToFloat64(i64)
// look at each of the ranges for a match
// look at each of the ranges for a match for rangeName, r := range fb.ranges {
for rangeName, r := range fb.ranges { if (r.min == nil || f64 >= *r.min) && (r.max == nil || f64 < *r.max) {
fb.termsCount[rangeName] = fb.termsCount[rangeName] + 1
if (r.min == nil || f64 >= *r.min) && (r.max == nil || f64 < *r.max) { fb.total++
existingCount, existed := fb.termsCount[rangeName]
if existed {
fb.termsCount[rangeName] = existingCount + 1
} else {
fb.termsCount[rangeName] = 1
}
fb.total++
}
} }
} }
} }
} }
} else { }
}
func (fb *NumericFacetBuilder) StartDoc() {
fb.sawValue = false
}
func (fb *NumericFacetBuilder) EndDoc() {
if !fb.sawValue {
fb.missing++ fb.missing++
} }
} }

@ -17,7 +17,6 @@ package facet
import ( import (
"sort" "sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search"
) )
@ -27,6 +26,7 @@ type TermsFacetBuilder struct {
termsCount map[string]int termsCount map[string]int
total int total int
missing int missing int
sawValue bool
} }
func NewTermsFacetBuilder(field string, size int) *TermsFacetBuilder { func NewTermsFacetBuilder(field string, size int) *TermsFacetBuilder {
@ -41,19 +41,20 @@ func (fb *TermsFacetBuilder) Field() string {
return fb.field return fb.field
} }
func (fb *TermsFacetBuilder) Update(ft index.FieldTerms) { func (fb *TermsFacetBuilder) UpdateVisitor(field string, term []byte) {
terms, ok := ft[fb.field] if field == fb.field {
if ok { fb.sawValue = true
for _, term := range terms { fb.termsCount[string(term)] = fb.termsCount[string(term)] + 1
existingCount, existed := fb.termsCount[term] fb.total++
if existed { }
fb.termsCount[term] = existingCount + 1 }
} else {
fb.termsCount[term] = 1 func (fb *TermsFacetBuilder) StartDoc() {
} fb.sawValue = false
fb.total++ }
}
} else { func (fb *TermsFacetBuilder) EndDoc() {
if !fb.sawValue {
fb.missing++ fb.missing++
} }
} }

@ -21,7 +21,10 @@ import (
) )
type FacetBuilder interface { type FacetBuilder interface {
Update(index.FieldTerms) StartDoc()
UpdateVisitor(field string, term []byte)
EndDoc()
Result() *FacetResult Result() *FacetResult
Field() string Field() string
} }
@ -41,33 +44,29 @@ func NewFacetsBuilder(indexReader index.IndexReader) *FacetsBuilder {
func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) { func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) {
fb.facets[name] = facetBuilder fb.facets[name] = facetBuilder
fb.fields = append(fb.fields, facetBuilder.Field())
} }
func (fb *FacetsBuilder) Update(docMatch *DocumentMatch) error { func (fb *FacetsBuilder) RequiredFields() []string {
if fb.fields == nil { return fb.fields
for _, facetBuilder := range fb.facets { }
fb.fields = append(fb.fields, facetBuilder.Field())
} func (fb *FacetsBuilder) StartDoc() {
for _, facetBuilder := range fb.facets {
facetBuilder.StartDoc()
} }
}
if len(fb.fields) > 0 { func (fb *FacetsBuilder) EndDoc() {
// find out which fields haven't been loaded yet for _, facetBuilder := range fb.facets {
fieldsToLoad := docMatch.CachedFieldTerms.FieldsNotYetCached(fb.fields) facetBuilder.EndDoc()
// look them up
fieldTerms, err := fb.indexReader.DocumentFieldTerms(docMatch.IndexInternalID, fieldsToLoad)
if err != nil {
return err
}
// cache these as well
if docMatch.CachedFieldTerms == nil {
docMatch.CachedFieldTerms = make(map[string][]string)
}
docMatch.CachedFieldTerms.Merge(fieldTerms)
} }
}
func (fb *FacetsBuilder) UpdateVisitor(field string, term []byte) {
for _, facetBuilder := range fb.facets { for _, facetBuilder := range fb.facets {
facetBuilder.Update(docMatch.CachedFieldTerms) facetBuilder.UpdateVisitor(field, term)
} }
return nil
} }
type TermFacet struct { type TermFacet struct {

@ -44,7 +44,7 @@ func (a *FragmentFormatter) Format(f *highlight.Fragment, orderedTermLocations h
continue continue
} }
// make sure the array positions match // make sure the array positions match
if !highlight.SameArrayPositions(f.ArrayPositions, termLocation.ArrayPositions) { if !termLocation.ArrayPositions.Equals(f.ArrayPositions) {
continue continue
} }
if termLocation.Start < curr { if termLocation.Start < curr {

@ -37,7 +37,7 @@ func (s *FragmentScorer) Score(f *highlight.Fragment) {
OUTER: OUTER:
for _, locations := range s.tlm { for _, locations := range s.tlm {
for _, location := range locations { for _, location := range locations {
if highlight.SameArrayPositions(f.ArrayPositions, location.ArrayPositions) && int(location.Start) >= f.Start && int(location.End) <= f.End { if location.ArrayPositions.Equals(f.ArrayPositions) && int(location.Start) >= f.Start && int(location.End) <= f.End {
score += 1.0 score += 1.0
// once we find a term in the fragment // once we find a term in the fragment
// don't care about additional matches // don't care about additional matches

@ -87,7 +87,7 @@ func (s *Highlighter) BestFragmentsInField(dm *search.DocumentMatch, doc *docume
if ok { if ok {
termLocationsSameArrayPosition := make(highlight.TermLocations, 0) termLocationsSameArrayPosition := make(highlight.TermLocations, 0)
for _, otl := range orderedTermLocations { for _, otl := range orderedTermLocations {
if highlight.SameArrayPositions(f.ArrayPositions(), otl.ArrayPositions) { if otl.ArrayPositions.Equals(f.ArrayPositions()) {
termLocationsSameArrayPosition = append(termLocationsSameArrayPosition, otl) termLocationsSameArrayPosition = append(termLocationsSameArrayPosition, otl)
} }
} }

@ -23,7 +23,7 @@ import (
type TermLocation struct { type TermLocation struct {
Term string Term string
ArrayPositions []float64 ArrayPositions search.ArrayPositions
Pos int Pos int
Start int Start int
End int End int
@ -103,15 +103,3 @@ func OrderTermLocations(tlm search.TermLocationMap) TermLocations {
sort.Sort(rv) sort.Sort(rv)
return rv return rv
} }
func SameArrayPositions(fieldArrayPositions []uint64, termLocationArrayPositions []float64) bool {
if len(fieldArrayPositions) != len(termLocationArrayPositions) {
return false
}
for i := 0; i < len(fieldArrayPositions); i++ {
if fieldArrayPositions[i] != uint64(termLocationArrayPositions[i]) {
return false
}
}
return true
}

@ -37,13 +37,17 @@ func defaultDocumentMatchPoolTooSmall(p *DocumentMatchPool) *DocumentMatch {
// pre-allocated to accommodate the requested number of DocumentMatch // pre-allocated to accommodate the requested number of DocumentMatch
// instances // instances
func NewDocumentMatchPool(size, sortsize int) *DocumentMatchPool { func NewDocumentMatchPool(size, sortsize int) *DocumentMatchPool {
avail := make(DocumentMatchCollection, 0, size) avail := make(DocumentMatchCollection, size)
// pre-allocate the expected number of instances // pre-allocate the expected number of instances
startBlock := make([]DocumentMatch, size) startBlock := make([]DocumentMatch, size)
startSorts := make([]string, size*sortsize)
// make these initial instances available // make these initial instances available
for i := range startBlock { i, j := 0, 0
startBlock[i].Sort = make([]string, 0, sortsize) for i < size {
avail = append(avail, &startBlock[i]) avail[i] = &startBlock[i]
avail[i].Sort = startSorts[j:j]
i += 1
j += sortsize
} }
return &DocumentMatchPool{ return &DocumentMatchPool{
avail: avail, avail: avail,

@ -22,7 +22,7 @@ import (
) )
type BoolFieldQuery struct { type BoolFieldQuery struct {
Bool bool `json:"bool"` Bool bool `json:"bool"`
FieldVal string `json:"field,omitempty"` FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"` BoostVal *Boost `json:"boost,omitempty"`
} }
@ -39,20 +39,19 @@ func (q *BoolFieldQuery) SetBoost(b float64) {
q.BoostVal = &boost q.BoostVal = &boost
} }
func (q *BoolFieldQuery) Boost() float64{ func (q *BoolFieldQuery) Boost() float64 {
return q.BoostVal.Value() return q.BoostVal.Value()
} }
func (q *BoolFieldQuery) SetField(f string) { func (q *BoolFieldQuery) SetField(f string) {
q.FieldVal = f q.FieldVal = f
} }
func (q *BoolFieldQuery) Field() string{ func (q *BoolFieldQuery) Field() string {
return q.FieldVal return q.FieldVal
} }
func (q *BoolFieldQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
func (q *BoolFieldQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) {
field := q.FieldVal field := q.FieldVal
if q.FieldVal == "" { if q.FieldVal == "" {
field = m.DefaultSearchField() field = m.DefaultSearchField()
@ -61,5 +60,5 @@ func (q *BoolFieldQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, e
if q.Bool { if q.Bool {
term = "T" term = "T"
} }
return searcher.NewTermSearcher(i, term, field, q.BoostVal.Value(), explain) return searcher.NewTermSearcher(i, term, field, q.BoostVal.Value(), options)
} }

@ -25,10 +25,11 @@ import (
) )
type BooleanQuery struct { type BooleanQuery struct {
Must Query `json:"must,omitempty"` Must Query `json:"must,omitempty"`
Should Query `json:"should,omitempty"` Should Query `json:"should,omitempty"`
MustNot Query `json:"must_not,omitempty"` MustNot Query `json:"must_not,omitempty"`
BoostVal *Boost `json:"boost,omitempty"` BoostVal *Boost `json:"boost,omitempty"`
queryStringMode bool
} }
// NewBooleanQuery creates a compound Query composed // NewBooleanQuery creates a compound Query composed
@ -55,6 +56,15 @@ func NewBooleanQuery(must []Query, should []Query, mustNot []Query) *BooleanQuer
return &rv return &rv
} }
func NewBooleanQueryForQueryString(must []Query, should []Query, mustNot []Query) *BooleanQuery {
rv := NewBooleanQuery(nil, nil, nil)
rv.queryStringMode = true
rv.AddMust(must...)
rv.AddShould(should...)
rv.AddMustNot(mustNot...)
return rv
}
// SetMinShould requires that at least minShould of the // SetMinShould requires that at least minShould of the
// should Queries must be satisfied. // should Queries must be satisfied.
func (q *BooleanQuery) SetMinShould(minShould float64) { func (q *BooleanQuery) SetMinShould(minShould float64) {
@ -63,7 +73,9 @@ func (q *BooleanQuery) SetMinShould(minShould float64) {
func (q *BooleanQuery) AddMust(m ...Query) { func (q *BooleanQuery) AddMust(m ...Query) {
if q.Must == nil { if q.Must == nil {
q.Must = NewConjunctionQuery([]Query{}) tmp := NewConjunctionQuery([]Query{})
tmp.queryStringMode = q.queryStringMode
q.Must = tmp
} }
for _, mq := range m { for _, mq := range m {
q.Must.(*ConjunctionQuery).AddQuery(mq) q.Must.(*ConjunctionQuery).AddQuery(mq)
@ -72,7 +84,9 @@ func (q *BooleanQuery) AddMust(m ...Query) {
func (q *BooleanQuery) AddShould(m ...Query) { func (q *BooleanQuery) AddShould(m ...Query) {
if q.Should == nil { if q.Should == nil {
q.Should = NewDisjunctionQuery([]Query{}) tmp := NewDisjunctionQuery([]Query{})
tmp.queryStringMode = q.queryStringMode
q.Should = tmp
} }
for _, mq := range m { for _, mq := range m {
q.Should.(*DisjunctionQuery).AddQuery(mq) q.Should.(*DisjunctionQuery).AddQuery(mq)
@ -81,7 +95,9 @@ func (q *BooleanQuery) AddShould(m ...Query) {
func (q *BooleanQuery) AddMustNot(m ...Query) { func (q *BooleanQuery) AddMustNot(m ...Query) {
if q.MustNot == nil { if q.MustNot == nil {
q.MustNot = NewDisjunctionQuery([]Query{}) tmp := NewDisjunctionQuery([]Query{})
tmp.queryStringMode = q.queryStringMode
q.MustNot = tmp
} }
for _, mq := range m { for _, mq := range m {
q.MustNot.(*DisjunctionQuery).AddQuery(mq) q.MustNot.(*DisjunctionQuery).AddQuery(mq)
@ -93,44 +109,67 @@ func (q *BooleanQuery) SetBoost(b float64) {
q.BoostVal = &boost q.BoostVal = &boost
} }
func (q *BooleanQuery) Boost() float64{ func (q *BooleanQuery) Boost() float64 {
return q.BoostVal.Value() return q.BoostVal.Value()
} }
func (q *BooleanQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { func (q *BooleanQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
var err error var err error
var mustNotSearcher search.Searcher var mustNotSearcher search.Searcher
if q.MustNot != nil { if q.MustNot != nil {
mustNotSearcher, err = q.MustNot.Searcher(i, m, explain) mustNotSearcher, err = q.MustNot.Searcher(i, m, options)
if err != nil { if err != nil {
return nil, err return nil, err
} }
if q.Must == nil && q.Should == nil { // if must not is MatchNone, reset it to nil
q.Must = NewMatchAllQuery() if _, ok := mustNotSearcher.(*searcher.MatchNoneSearcher); ok {
mustNotSearcher = nil
} }
} }
var mustSearcher search.Searcher var mustSearcher search.Searcher
if q.Must != nil { if q.Must != nil {
mustSearcher, err = q.Must.Searcher(i, m, explain) mustSearcher, err = q.Must.Searcher(i, m, options)
if err != nil { if err != nil {
return nil, err return nil, err
} }
// if must searcher is MatchNone, reset it to nil
if _, ok := mustSearcher.(*searcher.MatchNoneSearcher); ok {
mustSearcher = nil
}
} }
var shouldSearcher search.Searcher var shouldSearcher search.Searcher
if q.Should != nil { if q.Should != nil {
shouldSearcher, err = q.Should.Searcher(i, m, explain) shouldSearcher, err = q.Should.Searcher(i, m, options)
if err != nil {
return nil, err
}
// if should searcher is MatchNone, reset it to nil
if _, ok := shouldSearcher.(*searcher.MatchNoneSearcher); ok {
shouldSearcher = nil
}
}
// if all 3 are nil, return MatchNone
if mustSearcher == nil && shouldSearcher == nil && mustNotSearcher == nil {
return searcher.NewMatchNoneSearcher(i)
}
// if only mustNotSearcher, start with MatchAll
if mustSearcher == nil && shouldSearcher == nil && mustNotSearcher != nil {
mustSearcher, err = searcher.NewMatchAllSearcher(i, 1.0, options)
if err != nil { if err != nil {
return nil, err return nil, err
} }
} }
// optimization, if only should searcher, just return it instead
if mustSearcher == nil && shouldSearcher != nil && mustNotSearcher == nil { if mustSearcher == nil && shouldSearcher != nil && mustNotSearcher == nil {
return shouldSearcher, nil return shouldSearcher, nil
} }
return searcher.NewBooleanSearcher(i, mustSearcher, shouldSearcher, mustNotSearcher, explain) return searcher.NewBooleanSearcher(i, mustSearcher, shouldSearcher, mustNotSearcher, options)
} }
func (q *BooleanQuery) Validate() error { func (q *BooleanQuery) Validate() error {

@ -24,8 +24,9 @@ import (
) )
type ConjunctionQuery struct { type ConjunctionQuery struct {
Conjuncts []Query `json:"conjuncts"` Conjuncts []Query `json:"conjuncts"`
BoostVal *Boost `json:"boost,omitempty"` BoostVal *Boost `json:"boost,omitempty"`
queryStringMode bool
} }
// NewConjunctionQuery creates a new compound Query. // NewConjunctionQuery creates a new compound Query.
@ -41,7 +42,7 @@ func (q *ConjunctionQuery) SetBoost(b float64) {
q.BoostVal = &boost q.BoostVal = &boost
} }
func (q *ConjunctionQuery) Boost() float64{ func (q *ConjunctionQuery) Boost() float64 {
return q.BoostVal.Value() return q.BoostVal.Value()
} }
@ -51,11 +52,10 @@ func (q *ConjunctionQuery) AddQuery(aq ...Query) {
} }
} }
func (q *ConjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { func (q *ConjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
ss := make([]search.Searcher, len(q.Conjuncts)) ss := make([]search.Searcher, 0, len(q.Conjuncts))
for in, conjunct := range q.Conjuncts { for _, conjunct := range q.Conjuncts {
var err error sr, err := conjunct.Searcher(i, m, options)
ss[in], err = conjunct.Searcher(i, m, explain)
if err != nil { if err != nil {
for _, searcher := range ss { for _, searcher := range ss {
if searcher != nil { if searcher != nil {
@ -64,8 +64,16 @@ func (q *ConjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
} }
return nil, err return nil, err
} }
if _, ok := sr.(*searcher.MatchNoneSearcher); ok && q.queryStringMode {
// in query string mode, skip match none
continue
}
ss = append(ss, sr)
}
if len(ss) < 1 {
return searcher.NewMatchNoneSearcher(i)
} }
return searcher.NewConjunctionSearcher(i, ss, explain) return searcher.NewConjunctionSearcher(i, ss, options)
} }
func (q *ConjunctionQuery) Validate() error { func (q *ConjunctionQuery) Validate() error {

@ -113,20 +113,19 @@ func (q *DateRangeQuery) SetBoost(b float64) {
q.BoostVal = &boost q.BoostVal = &boost
} }
func (q *DateRangeQuery) Boost() float64{ func (q *DateRangeQuery) Boost() float64 {
return q.BoostVal.Value() return q.BoostVal.Value()
} }
func (q *DateRangeQuery) SetField(f string) { func (q *DateRangeQuery) SetField(f string) {
q.FieldVal = f q.FieldVal = f
} }
func (q *DateRangeQuery) Field() string{ func (q *DateRangeQuery) Field() string {
return q.FieldVal return q.FieldVal
} }
func (q *DateRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { func (q *DateRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
min, max, err := q.parseEndpoints() min, max, err := q.parseEndpoints()
if err != nil { if err != nil {
return nil, err return nil, err
@ -137,7 +136,7 @@ func (q *DateRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, e
field = m.DefaultSearchField() field = m.DefaultSearchField()
} }
return searcher.NewNumericRangeSearcher(i, min, max, q.InclusiveStart, q.InclusiveEnd, field, q.BoostVal.Value(), explain) return searcher.NewNumericRangeSearcher(i, min, max, q.InclusiveStart, q.InclusiveEnd, field, q.BoostVal.Value(), options)
} }
func (q *DateRangeQuery) parseEndpoints() (*float64, *float64, error) { func (q *DateRangeQuery) parseEndpoints() (*float64, *float64, error) {

@ -25,9 +25,10 @@ import (
) )
type DisjunctionQuery struct { type DisjunctionQuery struct {
Disjuncts []Query `json:"disjuncts"` Disjuncts []Query `json:"disjuncts"`
BoostVal *Boost `json:"boost,omitempty"` BoostVal *Boost `json:"boost,omitempty"`
Min float64 `json:"min"` Min float64 `json:"min"`
queryStringMode bool
} }
// NewDisjunctionQuery creates a new compound Query. // NewDisjunctionQuery creates a new compound Query.
@ -43,11 +44,10 @@ func (q *DisjunctionQuery) SetBoost(b float64) {
q.BoostVal = &boost q.BoostVal = &boost
} }
func (q *DisjunctionQuery) Boost() float64{ func (q *DisjunctionQuery) Boost() float64 {
return q.BoostVal.Value() return q.BoostVal.Value()
} }
func (q *DisjunctionQuery) AddQuery(aq ...Query) { func (q *DisjunctionQuery) AddQuery(aq ...Query) {
for _, aaq := range aq { for _, aaq := range aq {
q.Disjuncts = append(q.Disjuncts, aaq) q.Disjuncts = append(q.Disjuncts, aaq)
@ -58,11 +58,10 @@ func (q *DisjunctionQuery) SetMin(m float64) {
q.Min = m q.Min = m
} }
func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
ss := make([]search.Searcher, len(q.Disjuncts)) ss := make([]search.Searcher, 0, len(q.Disjuncts))
for in, disjunct := range q.Disjuncts { for _, disjunct := range q.Disjuncts {
var err error sr, err := disjunct.Searcher(i, m, options)
ss[in], err = disjunct.Searcher(i, m, explain)
if err != nil { if err != nil {
for _, searcher := range ss { for _, searcher := range ss {
if searcher != nil { if searcher != nil {
@ -71,8 +70,16 @@ func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
} }
return nil, err return nil, err
} }
if _, ok := sr.(*searcher.MatchNoneSearcher); ok && q.queryStringMode {
// in query string mode, skip match none
continue
}
ss = append(ss, sr)
}
if len(ss) < 1 {
return searcher.NewMatchNoneSearcher(i)
} }
return searcher.NewDisjunctionSearcher(i, ss, q.Min, explain) return searcher.NewDisjunctionSearcher(i, ss, q.Min, options)
} }
func (q *DisjunctionQuery) Validate() error { func (q *DisjunctionQuery) Validate() error {

@ -40,10 +40,10 @@ func (q *DocIDQuery) SetBoost(b float64) {
q.BoostVal = &boost q.BoostVal = &boost
} }
func (q *DocIDQuery) Boost() float64{ func (q *DocIDQuery) Boost() float64 {
return q.BoostVal.Value() return q.BoostVal.Value()
} }
func (q *DocIDQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { func (q *DocIDQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
return searcher.NewDocIDSearcher(i, q.IDs, q.BoostVal.Value(), explain) return searcher.NewDocIDSearcher(i, q.IDs, q.BoostVal.Value(), options)
} }

@ -48,7 +48,7 @@ func (q *FuzzyQuery) SetBoost(b float64) {
q.BoostVal = &boost q.BoostVal = &boost
} }
func (q *FuzzyQuery) Boost() float64{ func (q *FuzzyQuery) Boost() float64 {
return q.BoostVal.Value() return q.BoostVal.Value()
} }
@ -56,7 +56,7 @@ func (q *FuzzyQuery) SetField(f string) {
q.FieldVal = f q.FieldVal = f
} }
func (q *FuzzyQuery) Field() string{ func (q *FuzzyQuery) Field() string {
return q.FieldVal return q.FieldVal
} }
@ -68,10 +68,10 @@ func (q *FuzzyQuery) SetPrefix(p int) {
q.Prefix = p q.Prefix = p
} }
func (q *FuzzyQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { func (q *FuzzyQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal field := q.FieldVal
if q.FieldVal == "" { if q.FieldVal == "" {
field = m.DefaultSearchField() field = m.DefaultSearchField()
} }
return searcher.NewFuzzySearcher(i, q.Term, q.Prefix, q.Fuzziness, field, q.BoostVal.Value(), explain) return searcher.NewFuzzySearcher(i, q.Term, q.Prefix, q.Fuzziness, field, q.BoostVal.Value(), options)
} }

@ -0,0 +1,113 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"fmt"
"github.com/blevesearch/bleve/geo"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searcher"
)
type GeoBoundingBoxQuery struct {
TopLeft []float64 `json:"top_left,omitempty"`
BottomRight []float64 `json:"bottom_right,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
func NewGeoBoundingBoxQuery(topLeftLon, topLeftLat, bottomRightLon, bottomRightLat float64) *GeoBoundingBoxQuery {
return &GeoBoundingBoxQuery{
TopLeft: []float64{topLeftLon, topLeftLat},
BottomRight: []float64{bottomRightLon, bottomRightLat},
}
}
func (q *GeoBoundingBoxQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *GeoBoundingBoxQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *GeoBoundingBoxQuery) SetField(f string) {
q.FieldVal = f
}
func (q *GeoBoundingBoxQuery) Field() string {
return q.FieldVal
}
func (q *GeoBoundingBoxQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
if q.BottomRight[0] < q.TopLeft[0] {
// cross date line, rewrite as two parts
leftSearcher, err := searcher.NewGeoBoundingBoxSearcher(i, -180, q.BottomRight[1], q.BottomRight[0], q.TopLeft[1], field, q.BoostVal.Value(), options, true)
if err != nil {
return nil, err
}
rightSearcher, err := searcher.NewGeoBoundingBoxSearcher(i, q.TopLeft[0], q.BottomRight[1], 180, q.TopLeft[1], field, q.BoostVal.Value(), options, true)
if err != nil {
_ = leftSearcher.Close()
return nil, err
}
return searcher.NewDisjunctionSearcher(i, []search.Searcher{leftSearcher, rightSearcher}, 0, options)
}
return searcher.NewGeoBoundingBoxSearcher(i, q.TopLeft[0], q.BottomRight[1], q.BottomRight[0], q.TopLeft[1], field, q.BoostVal.Value(), options, true)
}
func (q *GeoBoundingBoxQuery) Validate() error {
return nil
}
func (q *GeoBoundingBoxQuery) UnmarshalJSON(data []byte) error {
tmp := struct {
TopLeft interface{} `json:"top_left,omitempty"`
BottomRight interface{} `json:"bottom_right,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}{}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
// now use our generic point parsing code from the geo package
lon, lat, found := geo.ExtractGeoPoint(tmp.TopLeft)
if !found {
return fmt.Errorf("geo location top_left not in a valid format")
}
q.TopLeft = []float64{lon, lat}
lon, lat, found = geo.ExtractGeoPoint(tmp.BottomRight)
if !found {
return fmt.Errorf("geo location bottom_right not in a valid format")
}
q.BottomRight = []float64{lon, lat}
q.FieldVal = tmp.FieldVal
q.BoostVal = tmp.BoostVal
return nil
}

@ -0,0 +1,100 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"fmt"
"github.com/blevesearch/bleve/geo"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searcher"
)
type GeoDistanceQuery struct {
Location []float64 `json:"location,omitempty"`
Distance string `json:"distance,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
func NewGeoDistanceQuery(lon, lat float64, distance string) *GeoDistanceQuery {
return &GeoDistanceQuery{
Location: []float64{lon, lat},
Distance: distance,
}
}
func (q *GeoDistanceQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *GeoDistanceQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *GeoDistanceQuery) SetField(f string) {
q.FieldVal = f
}
func (q *GeoDistanceQuery) Field() string {
return q.FieldVal
}
func (q *GeoDistanceQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
dist, err := geo.ParseDistance(q.Distance)
if err != nil {
return nil, err
}
return searcher.NewGeoPointDistanceSearcher(i, q.Location[0], q.Location[1],
dist, field, q.BoostVal.Value(), options)
}
func (q *GeoDistanceQuery) Validate() error {
return nil
}
func (q *GeoDistanceQuery) UnmarshalJSON(data []byte) error {
tmp := struct {
Location interface{} `json:"location,omitempty"`
Distance string `json:"distance,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}{}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
// now use our generic point parsing code from the geo package
lon, lat, found := geo.ExtractGeoPoint(tmp.Location)
if !found {
return fmt.Errorf("geo location not in a valid format")
}
q.Location = []float64{lon, lat}
q.Distance = tmp.Distance
q.FieldVal = tmp.FieldVal
q.BoostVal = tmp.BoostVal
return nil
}

@ -90,7 +90,7 @@ func (q *MatchQuery) SetBoost(b float64) {
q.BoostVal = &boost q.BoostVal = &boost
} }
func (q *MatchQuery) Boost() float64{ func (q *MatchQuery) Boost() float64 {
return q.BoostVal.Value() return q.BoostVal.Value()
} }
@ -98,7 +98,7 @@ func (q *MatchQuery) SetField(f string) {
q.FieldVal = f q.FieldVal = f
} }
func (q *MatchQuery) Field() string{ func (q *MatchQuery) Field() string {
return q.FieldVal return q.FieldVal
} }
@ -114,7 +114,7 @@ func (q *MatchQuery) SetOperator(operator MatchQueryOperator) {
q.Operator = operator q.Operator = operator
} }
func (q *MatchQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { func (q *MatchQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal field := q.FieldVal
if q.FieldVal == "" { if q.FieldVal == "" {
@ -160,17 +160,17 @@ func (q *MatchQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, expla
shouldQuery := NewDisjunctionQuery(tqs) shouldQuery := NewDisjunctionQuery(tqs)
shouldQuery.SetMin(1) shouldQuery.SetMin(1)
shouldQuery.SetBoost(q.BoostVal.Value()) shouldQuery.SetBoost(q.BoostVal.Value())
return shouldQuery.Searcher(i, m, explain) return shouldQuery.Searcher(i, m, options)
case MatchQueryOperatorAnd: case MatchQueryOperatorAnd:
mustQuery := NewConjunctionQuery(tqs) mustQuery := NewConjunctionQuery(tqs)
mustQuery.SetBoost(q.BoostVal.Value()) mustQuery.SetBoost(q.BoostVal.Value())
return mustQuery.Searcher(i, m, explain) return mustQuery.Searcher(i, m, options)
default: default:
return nil, fmt.Errorf("unhandled operator %d", q.Operator) return nil, fmt.Errorf("unhandled operator %d", q.Operator)
} }
} }
noneQuery := NewMatchNoneQuery() noneQuery := NewMatchNoneQuery()
return noneQuery.Searcher(i, m, explain) return noneQuery.Searcher(i, m, options)
} }

@ -38,14 +38,12 @@ func (q *MatchAllQuery) SetBoost(b float64) {
q.BoostVal = &boost q.BoostVal = &boost
} }
func (q *MatchAllQuery) Boost() float64{ func (q *MatchAllQuery) Boost() float64 {
return q.BoostVal.Value() return q.BoostVal.Value()
} }
func (q *MatchAllQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
return searcher.NewMatchAllSearcher(i, q.BoostVal.Value(), options)
func (q *MatchAllQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) {
return searcher.NewMatchAllSearcher(i, q.BoostVal.Value(), explain)
} }
func (q *MatchAllQuery) MarshalJSON() ([]byte, error) { func (q *MatchAllQuery) MarshalJSON() ([]byte, error) {

@ -38,11 +38,11 @@ func (q *MatchNoneQuery) SetBoost(b float64) {
q.BoostVal = &boost q.BoostVal = &boost
} }
func (q *MatchNoneQuery) Boost() float64{ func (q *MatchNoneQuery) Boost() float64 {
return q.BoostVal.Value() return q.BoostVal.Value()
} }
func (q *MatchNoneQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { func (q *MatchNoneQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
return searcher.NewMatchNoneSearcher(i) return searcher.NewMatchNoneSearcher(i)
} }

@ -49,7 +49,7 @@ func (q *MatchPhraseQuery) SetBoost(b float64) {
q.BoostVal = &boost q.BoostVal = &boost
} }
func (q *MatchPhraseQuery) Boost() float64{ func (q *MatchPhraseQuery) Boost() float64 {
return q.BoostVal.Value() return q.BoostVal.Value()
} }
@ -57,11 +57,11 @@ func (q *MatchPhraseQuery) SetField(f string) {
q.FieldVal = f q.FieldVal = f
} }
func (q *MatchPhraseQuery) Field() string{ func (q *MatchPhraseQuery) Field() string {
return q.FieldVal return q.FieldVal
} }
func (q *MatchPhraseQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { func (q *MatchPhraseQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal field := q.FieldVal
if q.FieldVal == "" { if q.FieldVal == "" {
field = m.DefaultSearchField() field = m.DefaultSearchField()
@ -81,15 +81,15 @@ func (q *MatchPhraseQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
tokens := analyzer.Analyze([]byte(q.MatchPhrase)) tokens := analyzer.Analyze([]byte(q.MatchPhrase))
if len(tokens) > 0 { if len(tokens) > 0 {
phrase := tokenStreamToPhrase(tokens) phrase := tokenStreamToPhrase(tokens)
phraseQuery := NewPhraseQuery(phrase, field) phraseQuery := NewMultiPhraseQuery(phrase, field)
phraseQuery.SetBoost(q.BoostVal.Value()) phraseQuery.SetBoost(q.BoostVal.Value())
return phraseQuery.Searcher(i, m, explain) return phraseQuery.Searcher(i, m, options)
} }
noneQuery := NewMatchNoneQuery() noneQuery := NewMatchNoneQuery()
return noneQuery.Searcher(i, m, explain) return noneQuery.Searcher(i, m, options)
} }
func tokenStreamToPhrase(tokens analysis.TokenStream) []string { func tokenStreamToPhrase(tokens analysis.TokenStream) [][]string {
firstPosition := int(^uint(0) >> 1) firstPosition := int(^uint(0) >> 1)
lastPosition := 0 lastPosition := 0
for _, token := range tokens { for _, token := range tokens {
@ -102,13 +102,10 @@ func tokenStreamToPhrase(tokens analysis.TokenStream) []string {
} }
phraseLen := lastPosition - firstPosition + 1 phraseLen := lastPosition - firstPosition + 1
if phraseLen > 0 { if phraseLen > 0 {
rv := make([]string, phraseLen) rv := make([][]string, phraseLen)
for i := 0; i < phraseLen; i++ {
rv[i] = ""
}
for _, token := range tokens { for _, token := range tokens {
pos := token.Position - firstPosition pos := token.Position - firstPosition
rv[pos] = string(token.Term) rv[pos] = append(rv[pos], string(token.Term))
} }
return rv return rv
} }

@ -0,0 +1,80 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"fmt"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searcher"
)
type MultiPhraseQuery struct {
Terms [][]string `json:"terms"`
Field string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewMultiPhraseQuery creates a new Query for finding
// term phrases in the index.
// It is like PhraseQuery, but each position in the
// phrase may be satisfied by a list of terms
// as opposed to just one.
// At least one of the terms must exist in the correct
// order, at the correct index offsets, in the
// specified field. Queried field must have been indexed with
// IncludeTermVectors set to true.
func NewMultiPhraseQuery(terms [][]string, field string) *MultiPhraseQuery {
return &MultiPhraseQuery{
Terms: terms,
Field: field,
}
}
func (q *MultiPhraseQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *MultiPhraseQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *MultiPhraseQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
return searcher.NewMultiPhraseSearcher(i, q.Terms, q.Field, options)
}
func (q *MultiPhraseQuery) Validate() error {
if len(q.Terms) < 1 {
return fmt.Errorf("phrase query must contain at least one term")
}
return nil
}
func (q *MultiPhraseQuery) UnmarshalJSON(data []byte) error {
type _mphraseQuery MultiPhraseQuery
tmp := _mphraseQuery{}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
q.Terms = tmp.Terms
q.Field = tmp.Field
q.BoostVal = tmp.BoostVal
return nil
}

@ -59,7 +59,7 @@ func (q *NumericRangeQuery) SetBoost(b float64) {
q.BoostVal = &boost q.BoostVal = &boost
} }
func (q *NumericRangeQuery) Boost() float64{ func (q *NumericRangeQuery) Boost() float64 {
return q.BoostVal.Value() return q.BoostVal.Value()
} }
@ -67,16 +67,16 @@ func (q *NumericRangeQuery) SetField(f string) {
q.FieldVal = f q.FieldVal = f
} }
func (q *NumericRangeQuery) Field() string{ func (q *NumericRangeQuery) Field() string {
return q.FieldVal return q.FieldVal
} }
func (q *NumericRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { func (q *NumericRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal field := q.FieldVal
if q.FieldVal == "" { if q.FieldVal == "" {
field = m.DefaultSearchField() field = m.DefaultSearchField()
} }
return searcher.NewNumericRangeSearcher(i, q.Min, q.Max, q.InclusiveMin, q.InclusiveMax, field, q.BoostVal.Value(), explain) return searcher.NewNumericRangeSearcher(i, q.Min, q.Max, q.InclusiveMin, q.InclusiveMax, field, q.BoostVal.Value(), options)
} }
func (q *NumericRangeQuery) Validate() error { func (q *NumericRangeQuery) Validate() error {

@ -25,10 +25,9 @@ import (
) )
type PhraseQuery struct { type PhraseQuery struct {
Terms []string `json:"terms"` Terms []string `json:"terms"`
Field string `json:"field,omitempty"` Field string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"` BoostVal *Boost `json:"boost,omitempty"`
termQueries []Query
} }
// NewPhraseQuery creates a new Query for finding // NewPhraseQuery creates a new Query for finding
@ -38,18 +37,9 @@ type PhraseQuery struct {
// specified field. Queried field must have been indexed with // specified field. Queried field must have been indexed with
// IncludeTermVectors set to true. // IncludeTermVectors set to true.
func NewPhraseQuery(terms []string, field string) *PhraseQuery { func NewPhraseQuery(terms []string, field string) *PhraseQuery {
termQueries := make([]Query, 0)
for _, term := range terms {
if term != "" {
tq := NewTermQuery(term)
tq.SetField(field)
termQueries = append(termQueries, tq)
}
}
return &PhraseQuery{ return &PhraseQuery{
Terms: terms, Terms: terms,
Field: field, Field: field,
termQueries: termQueries,
} }
} }
@ -58,22 +48,16 @@ func (q *PhraseQuery) SetBoost(b float64) {
q.BoostVal = &boost q.BoostVal = &boost
} }
func (q *PhraseQuery) Boost() float64{ func (q *PhraseQuery) Boost() float64 {
return q.BoostVal.Value() return q.BoostVal.Value()
} }
func (q *PhraseQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { func (q *PhraseQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
return searcher.NewPhraseSearcher(i, q.Terms, q.Field, options)
conjunctionQuery := NewConjunctionQuery(q.termQueries)
conjunctionSearcher, err := conjunctionQuery.Searcher(i, m, explain)
if err != nil {
return nil, err
}
return searcher.NewPhraseSearcher(i, conjunctionSearcher.(*searcher.ConjunctionSearcher), q.Terms)
} }
func (q *PhraseQuery) Validate() error { func (q *PhraseQuery) Validate() error {
if len(q.termQueries) < 1 { if len(q.Terms) < 1 {
return fmt.Errorf("phrase query must contain at least one term") return fmt.Errorf("phrase query must contain at least one term")
} }
return nil return nil
@ -89,9 +73,5 @@ func (q *PhraseQuery) UnmarshalJSON(data []byte) error {
q.Terms = tmp.Terms q.Terms = tmp.Terms
q.Field = tmp.Field q.Field = tmp.Field
q.BoostVal = tmp.BoostVal q.BoostVal = tmp.BoostVal
q.termQueries = make([]Query, len(q.Terms))
for i, term := range q.Terms {
q.termQueries[i] = &TermQuery{Term: term, FieldVal: q.Field, BoostVal: q.BoostVal}
}
return nil return nil
} }

@ -41,7 +41,7 @@ func (q *PrefixQuery) SetBoost(b float64) {
q.BoostVal = &boost q.BoostVal = &boost
} }
func (q *PrefixQuery) Boost() float64{ func (q *PrefixQuery) Boost() float64 {
return q.BoostVal.Value() return q.BoostVal.Value()
} }
@ -49,14 +49,14 @@ func (q *PrefixQuery) SetField(f string) {
q.FieldVal = f q.FieldVal = f
} }
func (q *PrefixQuery) Field() string{ func (q *PrefixQuery) Field() string {
return q.FieldVal return q.FieldVal
} }
func (q *PrefixQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { func (q *PrefixQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal field := q.FieldVal
if q.FieldVal == "" { if q.FieldVal == "" {
field = m.DefaultSearchField() field = m.DefaultSearchField()
} }
return searcher.NewTermPrefixSearcher(i, q.Prefix, field, q.BoostVal.Value(), explain) return searcher.NewTermPrefixSearcher(i, q.Prefix, field, q.BoostVal.Value(), options)
} }

@ -36,7 +36,8 @@ func SetLog(l *log.Logger) {
// A Query represents a description of the type // A Query represents a description of the type
// and parameters for a query into the index. // and parameters for a query into the index.
type Query interface { type Query interface {
Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) Searcher(i index.IndexReader, m mapping.IndexMapping,
options search.SearcherOptions) (search.Searcher, error)
} }
// A BoostableQuery represents a Query which can be boosted // A BoostableQuery represents a Query which can be boosted
@ -122,7 +123,13 @@ func ParseQuery(input []byte) (Query, error) {
var rv PhraseQuery var rv PhraseQuery
err := json.Unmarshal(input, &rv) err := json.Unmarshal(input, &rv)
if err != nil { if err != nil {
return nil, err // now try multi-phrase
var rv2 MultiPhraseQuery
err = json.Unmarshal(input, &rv2)
if err != nil {
return nil, err
}
return &rv2, nil
} }
return &rv, nil return &rv, nil
} }
@ -154,8 +161,8 @@ func ParseQuery(input []byte) (Query, error) {
} }
return &rv, nil return &rv, nil
} }
_, hasMin := tmp["min"] _, hasMin := tmp["min"].(float64)
_, hasMax := tmp["max"] _, hasMax := tmp["max"].(float64)
if hasMin || hasMax { if hasMin || hasMax {
var rv NumericRangeQuery var rv NumericRangeQuery
err := json.Unmarshal(input, &rv) err := json.Unmarshal(input, &rv)
@ -164,6 +171,16 @@ func ParseQuery(input []byte) (Query, error) {
} }
return &rv, nil return &rv, nil
} }
_, hasMinStr := tmp["min"].(string)
_, hasMaxStr := tmp["max"].(string)
if hasMinStr || hasMaxStr {
var rv TermRangeQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasStart := tmp["start"] _, hasStart := tmp["start"]
_, hasEnd := tmp["end"] _, hasEnd := tmp["end"]
if hasStart || hasEnd { if hasStart || hasEnd {
@ -237,6 +254,25 @@ func ParseQuery(input []byte) (Query, error) {
} }
return &rv, nil return &rv, nil
} }
_, hasTopLeft := tmp["top_left"]
_, hasBottomRight := tmp["bottom_right"]
if hasTopLeft && hasBottomRight {
var rv GeoBoundingBoxQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasDistance := tmp["distance"]
if hasDistance {
var rv GeoDistanceQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
return nil, fmt.Errorf("unknown query type") return nil, fmt.Errorf("unknown query type")
} }
@ -300,14 +336,6 @@ func expandQuery(m mapping.IndexMapping, query Query) (Query, error) {
return nil, err return nil, err
} }
return &q, nil return &q, nil
case *PhraseQuery:
q := *query.(*PhraseQuery)
children, err := expandSlice(q.termQueries)
if err != nil {
return nil, err
}
q.termQueries = children
return &q, nil
default: default:
return query, nil return query, nil
} }

@ -39,16 +39,20 @@ func (q *QueryStringQuery) SetBoost(b float64) {
q.BoostVal = &boost q.BoostVal = &boost
} }
func (q *QueryStringQuery) Boost() float64{ func (q *QueryStringQuery) Boost() float64 {
return q.BoostVal.Value() return q.BoostVal.Value()
} }
func (q *QueryStringQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { func (q *QueryStringQuery) Parse() (Query, error) {
return parseQuerySyntax(q.Query)
}
func (q *QueryStringQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
newQuery, err := parseQuerySyntax(q.Query) newQuery, err := parseQuerySyntax(q.Query)
if err != nil { if err != nil {
return nil, err return nil, err
} }
return newQuery.Searcher(i, m, explain) return newQuery.Searcher(i, m, options)
} }
func (q *QueryStringQuery) Validate() error { func (q *QueryStringQuery) Validate() error {

@ -27,6 +27,7 @@ tEQUAL tTILDE
%type <s> tSTRING %type <s> tSTRING
%type <s> tPHRASE %type <s> tPHRASE
%type <s> tNUMBER %type <s> tNUMBER
%type <s> posOrNegNumber
%type <s> tTILDE %type <s> tTILDE
%type <s> tBOOST %type <s> tBOOST
%type <q> searchBase %type <q> searchBase
@ -127,7 +128,15 @@ tSTRING tCOLON tSTRING tTILDE {
tNUMBER { tNUMBER {
str := $1 str := $1
logDebugGrammar("STRING - %s", str) logDebugGrammar("STRING - %s", str)
q := NewMatchQuery(str) q1 := NewMatchQuery(str)
val, err := strconv.ParseFloat($1, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
inclusive := true
q2 := NewNumericRangeInclusiveQuery(&val, &val, &inclusive, &inclusive)
q := NewDisjunctionQuery([]Query{q1,q2})
q.queryStringMode = true
$$ = q $$ = q
} }
| |
@ -154,12 +163,21 @@ tSTRING tCOLON tSTRING {
$$ = q $$ = q
} }
| |
tSTRING tCOLON tNUMBER { tSTRING tCOLON posOrNegNumber {
field := $1 field := $1
str := $3 str := $3
logDebugGrammar("FIELD - %s STRING - %s", field, str) logDebugGrammar("FIELD - %s STRING - %s", field, str)
q := NewMatchQuery(str) q1 := NewMatchQuery(str)
q.SetField(field) q1.SetField(field)
val, err := strconv.ParseFloat($3, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
inclusive := true
q2 := NewNumericRangeInclusiveQuery(&val, &val, &inclusive, &inclusive)
q2.SetField(field)
q := NewDisjunctionQuery([]Query{q1,q2})
q.queryStringMode = true
$$ = q $$ = q
} }
| |
@ -172,9 +190,12 @@ tSTRING tCOLON tPHRASE {
$$ = q $$ = q
} }
| |
tSTRING tCOLON tGREATER tNUMBER { tSTRING tCOLON tGREATER posOrNegNumber {
field := $1 field := $1
min, _ := strconv.ParseFloat($4, 64) min, err := strconv.ParseFloat($4, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
minInclusive := false minInclusive := false
logDebugGrammar("FIELD - GREATER THAN %f", min) logDebugGrammar("FIELD - GREATER THAN %f", min)
q := NewNumericRangeInclusiveQuery(&min, nil, &minInclusive, nil) q := NewNumericRangeInclusiveQuery(&min, nil, &minInclusive, nil)
@ -182,9 +203,12 @@ tSTRING tCOLON tGREATER tNUMBER {
$$ = q $$ = q
} }
| |
tSTRING tCOLON tGREATER tEQUAL tNUMBER { tSTRING tCOLON tGREATER tEQUAL posOrNegNumber {
field := $1 field := $1
min, _ := strconv.ParseFloat($5, 64) min, err := strconv.ParseFloat($5, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
minInclusive := true minInclusive := true
logDebugGrammar("FIELD - GREATER THAN OR EQUAL %f", min) logDebugGrammar("FIELD - GREATER THAN OR EQUAL %f", min)
q := NewNumericRangeInclusiveQuery(&min, nil, &minInclusive, nil) q := NewNumericRangeInclusiveQuery(&min, nil, &minInclusive, nil)
@ -192,9 +216,12 @@ tSTRING tCOLON tGREATER tEQUAL tNUMBER {
$$ = q $$ = q
} }
| |
tSTRING tCOLON tLESS tNUMBER { tSTRING tCOLON tLESS posOrNegNumber {
field := $1 field := $1
max, _ := strconv.ParseFloat($4, 64) max, err := strconv.ParseFloat($4, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
maxInclusive := false maxInclusive := false
logDebugGrammar("FIELD - LESS THAN %f", max) logDebugGrammar("FIELD - LESS THAN %f", max)
q := NewNumericRangeInclusiveQuery(nil, &max, nil, &maxInclusive) q := NewNumericRangeInclusiveQuery(nil, &max, nil, &maxInclusive)
@ -202,9 +229,12 @@ tSTRING tCOLON tLESS tNUMBER {
$$ = q $$ = q
} }
| |
tSTRING tCOLON tLESS tEQUAL tNUMBER { tSTRING tCOLON tLESS tEQUAL posOrNegNumber {
field := $1 field := $1
max, _ := strconv.ParseFloat($5, 64) max, err := strconv.ParseFloat($5, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
maxInclusive := true maxInclusive := true
logDebugGrammar("FIELD - LESS THAN OR EQUAL %f", max) logDebugGrammar("FIELD - LESS THAN OR EQUAL %f", max)
q := NewNumericRangeInclusiveQuery(nil, &max, nil, &maxInclusive) q := NewNumericRangeInclusiveQuery(nil, &max, nil, &maxInclusive)
@ -287,3 +317,12 @@ tBOOST {
} }
logDebugGrammar("BOOST %f", boost) logDebugGrammar("BOOST %f", boost)
}; };
posOrNegNumber:
tNUMBER {
$$ = $1
}
|
tMINUS tNUMBER {
$$ = "-" + $2
};

@ -70,57 +70,58 @@ var yyExca = [...]int{
-2, 5, -2, 5,
} }
const yyNprod = 26 const yyNprod = 28
const yyPrivate = 57344 const yyPrivate = 57344
var yyTokenNames []string var yyTokenNames []string
var yyStates []string var yyStates []string
const yyLast = 31 const yyLast = 42
var yyAct = [...]int{ var yyAct = [...]int{
16, 18, 21, 13, 27, 24, 17, 19, 20, 25, 17, 16, 18, 23, 22, 30, 3, 21, 19, 20,
22, 15, 26, 23, 9, 11, 31, 14, 29, 3, 29, 26, 22, 22, 1, 21, 21, 15, 28, 25,
10, 30, 2, 28, 5, 6, 7, 1, 4, 12, 24, 27, 34, 14, 22, 13, 31, 21, 32, 33,
8, 22, 9, 11, 21, 5, 6, 2, 10, 4, 12,
7, 8,
} }
var yyPact = [...]int{ var yyPact = [...]int{
18, -1000, -1000, 18, 10, -1000, -1000, -1000, -6, 3, 28, -1000, -1000, 28, 27, -1000, -1000, -1000, 16, 9,
-1000, -1000, -1000, -1000, -1000, -4, -12, -1000, -1000, 0, -1000, -1000, -1000, -1000, -1000, -3, -11, -1000, -1000, 6,
-1, -1000, -1000, 13, -1000, -1000, 11, -1000, -1000, -1000, 5, -1000, -5, -1000, -1000, 23, -1000, -1000, 17, -1000,
-1000, -1000, -1000, -1000, -1000, -1000, -1000,
} }
var yyPgo = [...]int{ var yyPgo = [...]int{
0, 30, 29, 28, 27, 22, 19, 0, 0, 41, 39, 38, 14, 36, 6,
} }
var yyR1 = [...]int{ var yyR1 = [...]int{
0, 4, 5, 5, 6, 3, 3, 3, 1, 1, 0, 5, 6, 6, 7, 4, 4, 4, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 1, 1,
} }
var yyR2 = [...]int{ var yyR2 = [...]int{
0, 1, 2, 1, 3, 0, 1, 1, 1, 2, 0, 1, 2, 1, 3, 0, 1, 1, 1, 2,
4, 1, 1, 3, 3, 3, 4, 5, 4, 5, 4, 1, 1, 3, 3, 3, 4, 5, 4, 5,
4, 5, 4, 5, 0, 1, 4, 5, 4, 5, 0, 1, 1, 2,
} }
var yyChk = [...]int{ var yyChk = [...]int{
-1000, -4, -5, -6, -3, 6, 7, -5, -1, 4, -1000, -5, -6, -7, -4, 6, 7, -6, -2, 4,
10, 5, -2, 9, 14, 8, 4, 10, 5, 11, 10, 5, -3, 9, 14, 8, 4, -1, 5, 11,
12, 14, 10, 13, 5, 10, 13, 5, 10, 5, 12, 10, 7, 14, -1, 13, 5, -1, 13, 5,
10, 5, 10, -1, 5, -1, 5,
} }
var yyDef = [...]int{ var yyDef = [...]int{
5, -2, 1, -2, 0, 6, 7, 2, 24, 8, 5, -2, 1, -2, 0, 6, 7, 2, 24, 8,
11, 12, 4, 25, 9, 0, 13, 14, 15, 0, 11, 12, 4, 25, 9, 0, 13, 14, 15, 0,
0, 10, 16, 0, 20, 18, 0, 22, 17, 21, 0, 26, 0, 10, 16, 0, 20, 18, 0, 22,
19, 23, 27, 17, 21, 19, 23,
} }
var yyTok1 = [...]int{ var yyTok1 = [...]int{
@ -474,25 +475,25 @@ yydefault:
case 1: case 1:
yyDollar = yyS[yypt-1 : yypt+1] yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:39 //line query_string.y:40
{ {
logDebugGrammar("INPUT") logDebugGrammar("INPUT")
} }
case 2: case 2:
yyDollar = yyS[yypt-2 : yypt+1] yyDollar = yyS[yypt-2 : yypt+1]
//line query_string.y:44 //line query_string.y:45
{ {
logDebugGrammar("SEARCH PARTS") logDebugGrammar("SEARCH PARTS")
} }
case 3: case 3:
yyDollar = yyS[yypt-1 : yypt+1] yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:48 //line query_string.y:49
{ {
logDebugGrammar("SEARCH PART") logDebugGrammar("SEARCH PART")
} }
case 4: case 4:
yyDollar = yyS[yypt-3 : yypt+1] yyDollar = yyS[yypt-3 : yypt+1]
//line query_string.y:53 //line query_string.y:54
{ {
query := yyDollar[2].q query := yyDollar[2].q
if yyDollar[3].pf != nil { if yyDollar[3].pf != nil {
@ -511,27 +512,27 @@ yydefault:
} }
case 5: case 5:
yyDollar = yyS[yypt-0 : yypt+1] yyDollar = yyS[yypt-0 : yypt+1]
//line query_string.y:72 //line query_string.y:73
{ {
yyVAL.n = queryShould yyVAL.n = queryShould
} }
case 6: case 6:
yyDollar = yyS[yypt-1 : yypt+1] yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:76 //line query_string.y:77
{ {
logDebugGrammar("PLUS") logDebugGrammar("PLUS")
yyVAL.n = queryMust yyVAL.n = queryMust
} }
case 7: case 7:
yyDollar = yyS[yypt-1 : yypt+1] yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:81 //line query_string.y:82
{ {
logDebugGrammar("MINUS") logDebugGrammar("MINUS")
yyVAL.n = queryMustNot yyVAL.n = queryMustNot
} }
case 8: case 8:
yyDollar = yyS[yypt-1 : yypt+1] yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:87 //line query_string.y:88
{ {
str := yyDollar[1].s str := yyDollar[1].s
logDebugGrammar("STRING - %s", str) logDebugGrammar("STRING - %s", str)
@ -547,7 +548,7 @@ yydefault:
} }
case 9: case 9:
yyDollar = yyS[yypt-2 : yypt+1] yyDollar = yyS[yypt-2 : yypt+1]
//line query_string.y:101 //line query_string.y:102
{ {
str := yyDollar[1].s str := yyDollar[1].s
fuzziness, err := strconv.ParseFloat(yyDollar[2].s, 64) fuzziness, err := strconv.ParseFloat(yyDollar[2].s, 64)
@ -561,7 +562,7 @@ yydefault:
} }
case 10: case 10:
yyDollar = yyS[yypt-4 : yypt+1] yyDollar = yyS[yypt-4 : yypt+1]
//line query_string.y:113 //line query_string.y:114
{ {
field := yyDollar[1].s field := yyDollar[1].s
str := yyDollar[3].s str := yyDollar[3].s
@ -577,16 +578,24 @@ yydefault:
} }
case 11: case 11:
yyDollar = yyS[yypt-1 : yypt+1] yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:127 //line query_string.y:128
{ {
str := yyDollar[1].s str := yyDollar[1].s
logDebugGrammar("STRING - %s", str) logDebugGrammar("STRING - %s", str)
q := NewMatchQuery(str) q1 := NewMatchQuery(str)
val, err := strconv.ParseFloat(yyDollar[1].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
inclusive := true
q2 := NewNumericRangeInclusiveQuery(&val, &val, &inclusive, &inclusive)
q := NewDisjunctionQuery([]Query{q1, q2})
q.queryStringMode = true
yyVAL.q = q yyVAL.q = q
} }
case 12: case 12:
yyDollar = yyS[yypt-1 : yypt+1] yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:134 //line query_string.y:143
{ {
phrase := yyDollar[1].s phrase := yyDollar[1].s
logDebugGrammar("PHRASE - %s", phrase) logDebugGrammar("PHRASE - %s", phrase)
@ -595,7 +604,7 @@ yydefault:
} }
case 13: case 13:
yyDollar = yyS[yypt-3 : yypt+1] yyDollar = yyS[yypt-3 : yypt+1]
//line query_string.y:141 //line query_string.y:150
{ {
field := yyDollar[1].s field := yyDollar[1].s
str := yyDollar[3].s str := yyDollar[3].s
@ -613,18 +622,27 @@ yydefault:
} }
case 14: case 14:
yyDollar = yyS[yypt-3 : yypt+1] yyDollar = yyS[yypt-3 : yypt+1]
//line query_string.y:157 //line query_string.y:166
{ {
field := yyDollar[1].s field := yyDollar[1].s
str := yyDollar[3].s str := yyDollar[3].s
logDebugGrammar("FIELD - %s STRING - %s", field, str) logDebugGrammar("FIELD - %s STRING - %s", field, str)
q := NewMatchQuery(str) q1 := NewMatchQuery(str)
q.SetField(field) q1.SetField(field)
val, err := strconv.ParseFloat(yyDollar[3].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
inclusive := true
q2 := NewNumericRangeInclusiveQuery(&val, &val, &inclusive, &inclusive)
q2.SetField(field)
q := NewDisjunctionQuery([]Query{q1, q2})
q.queryStringMode = true
yyVAL.q = q yyVAL.q = q
} }
case 15: case 15:
yyDollar = yyS[yypt-3 : yypt+1] yyDollar = yyS[yypt-3 : yypt+1]
//line query_string.y:166 //line query_string.y:184
{ {
field := yyDollar[1].s field := yyDollar[1].s
phrase := yyDollar[3].s phrase := yyDollar[3].s
@ -635,10 +653,13 @@ yydefault:
} }
case 16: case 16:
yyDollar = yyS[yypt-4 : yypt+1] yyDollar = yyS[yypt-4 : yypt+1]
//line query_string.y:175 //line query_string.y:193
{ {
field := yyDollar[1].s field := yyDollar[1].s
min, _ := strconv.ParseFloat(yyDollar[4].s, 64) min, err := strconv.ParseFloat(yyDollar[4].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
minInclusive := false minInclusive := false
logDebugGrammar("FIELD - GREATER THAN %f", min) logDebugGrammar("FIELD - GREATER THAN %f", min)
q := NewNumericRangeInclusiveQuery(&min, nil, &minInclusive, nil) q := NewNumericRangeInclusiveQuery(&min, nil, &minInclusive, nil)
@ -647,10 +668,13 @@ yydefault:
} }
case 17: case 17:
yyDollar = yyS[yypt-5 : yypt+1] yyDollar = yyS[yypt-5 : yypt+1]
//line query_string.y:185 //line query_string.y:206
{ {
field := yyDollar[1].s field := yyDollar[1].s
min, _ := strconv.ParseFloat(yyDollar[5].s, 64) min, err := strconv.ParseFloat(yyDollar[5].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
minInclusive := true minInclusive := true
logDebugGrammar("FIELD - GREATER THAN OR EQUAL %f", min) logDebugGrammar("FIELD - GREATER THAN OR EQUAL %f", min)
q := NewNumericRangeInclusiveQuery(&min, nil, &minInclusive, nil) q := NewNumericRangeInclusiveQuery(&min, nil, &minInclusive, nil)
@ -659,10 +683,13 @@ yydefault:
} }
case 18: case 18:
yyDollar = yyS[yypt-4 : yypt+1] yyDollar = yyS[yypt-4 : yypt+1]
//line query_string.y:195 //line query_string.y:219
{ {
field := yyDollar[1].s field := yyDollar[1].s
max, _ := strconv.ParseFloat(yyDollar[4].s, 64) max, err := strconv.ParseFloat(yyDollar[4].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
maxInclusive := false maxInclusive := false
logDebugGrammar("FIELD - LESS THAN %f", max) logDebugGrammar("FIELD - LESS THAN %f", max)
q := NewNumericRangeInclusiveQuery(nil, &max, nil, &maxInclusive) q := NewNumericRangeInclusiveQuery(nil, &max, nil, &maxInclusive)
@ -671,10 +698,13 @@ yydefault:
} }
case 19: case 19:
yyDollar = yyS[yypt-5 : yypt+1] yyDollar = yyS[yypt-5 : yypt+1]
//line query_string.y:205 //line query_string.y:232
{ {
field := yyDollar[1].s field := yyDollar[1].s
max, _ := strconv.ParseFloat(yyDollar[5].s, 64) max, err := strconv.ParseFloat(yyDollar[5].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
maxInclusive := true maxInclusive := true
logDebugGrammar("FIELD - LESS THAN OR EQUAL %f", max) logDebugGrammar("FIELD - LESS THAN OR EQUAL %f", max)
q := NewNumericRangeInclusiveQuery(nil, &max, nil, &maxInclusive) q := NewNumericRangeInclusiveQuery(nil, &max, nil, &maxInclusive)
@ -683,7 +713,7 @@ yydefault:
} }
case 20: case 20:
yyDollar = yyS[yypt-4 : yypt+1] yyDollar = yyS[yypt-4 : yypt+1]
//line query_string.y:215 //line query_string.y:245
{ {
field := yyDollar[1].s field := yyDollar[1].s
minInclusive := false minInclusive := false
@ -700,7 +730,7 @@ yydefault:
} }
case 21: case 21:
yyDollar = yyS[yypt-5 : yypt+1] yyDollar = yyS[yypt-5 : yypt+1]
//line query_string.y:230 //line query_string.y:260
{ {
field := yyDollar[1].s field := yyDollar[1].s
minInclusive := true minInclusive := true
@ -717,7 +747,7 @@ yydefault:
} }
case 22: case 22:
yyDollar = yyS[yypt-4 : yypt+1] yyDollar = yyS[yypt-4 : yypt+1]
//line query_string.y:245 //line query_string.y:275
{ {
field := yyDollar[1].s field := yyDollar[1].s
maxInclusive := false maxInclusive := false
@ -734,7 +764,7 @@ yydefault:
} }
case 23: case 23:
yyDollar = yyS[yypt-5 : yypt+1] yyDollar = yyS[yypt-5 : yypt+1]
//line query_string.y:260 //line query_string.y:290
{ {
field := yyDollar[1].s field := yyDollar[1].s
maxInclusive := true maxInclusive := true
@ -751,13 +781,13 @@ yydefault:
} }
case 24: case 24:
yyDollar = yyS[yypt-0 : yypt+1] yyDollar = yyS[yypt-0 : yypt+1]
//line query_string.y:276 //line query_string.y:306
{ {
yyVAL.pf = nil yyVAL.pf = nil
} }
case 25: case 25:
yyDollar = yyS[yypt-1 : yypt+1] yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:280 //line query_string.y:310
{ {
yyVAL.pf = nil yyVAL.pf = nil
boost, err := strconv.ParseFloat(yyDollar[1].s, 64) boost, err := strconv.ParseFloat(yyDollar[1].s, 64)
@ -768,6 +798,18 @@ yydefault:
} }
logDebugGrammar("BOOST %f", boost) logDebugGrammar("BOOST %f", boost)
} }
case 26:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:322
{
yyVAL.s = yyDollar[1].s
}
case 27:
yyDollar = yyS[yypt-2 : yypt+1]
//line query_string.y:326
{
yyVAL.s = "-" + yyDollar[2].s
}
} }
goto yystack /* stack new state and value */ goto yystack /* stack new state and value */
} }

@ -12,7 +12,10 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
//go:generate go tool yacc -o query_string.y.go query_string.y // as of Go 1.8 this requires the goyacc external tool
// available from golang.org/x/tools/cmd/goyacc
//go:generate goyacc -o query_string.y.go query_string.y
//go:generate sed -i.tmp -e 1d query_string.y.go //go:generate sed -i.tmp -e 1d query_string.y.go
//go:generate rm query_string.y.go.tmp //go:generate rm query_string.y.go.tmp
@ -31,6 +34,9 @@ var debugParser bool
var debugLexer bool var debugLexer bool
func parseQuerySyntax(query string) (rq Query, err error) { func parseQuerySyntax(query string) (rq Query, err error) {
if query == "" {
return NewMatchNoneQuery(), nil
}
lex := newLexerWrapper(newQueryStringLex(strings.NewReader(query))) lex := newLexerWrapper(newQueryStringLex(strings.NewReader(query)))
doParse(lex) doParse(lex)
@ -66,7 +72,7 @@ type lexerWrapper struct {
func newLexerWrapper(lex yyLexer) *lexerWrapper { func newLexerWrapper(lex yyLexer) *lexerWrapper {
return &lexerWrapper{ return &lexerWrapper{
lex: lex, lex: lex,
query: NewBooleanQuery(nil, nil, nil), query: NewBooleanQueryForQueryString(nil, nil, nil),
} }
} }

@ -33,7 +33,9 @@ type RegexpQuery struct {
// NewRegexpQuery creates a new Query which finds // NewRegexpQuery creates a new Query which finds
// documents containing terms that match the // documents containing terms that match the
// specified regular expression. // specified regular expression. The regexp pattern
// SHOULD NOT include ^ or $ modifiers, the search
// will only match entire terms even without them.
func NewRegexpQuery(regexp string) *RegexpQuery { func NewRegexpQuery(regexp string) *RegexpQuery {
return &RegexpQuery{ return &RegexpQuery{
Regexp: regexp, Regexp: regexp,
@ -45,7 +47,7 @@ func (q *RegexpQuery) SetBoost(b float64) {
q.BoostVal = &boost q.BoostVal = &boost
} }
func (q *RegexpQuery) Boost() float64{ func (q *RegexpQuery) Boost() float64 {
return q.BoostVal.Value() return q.BoostVal.Value()
} }
@ -53,11 +55,11 @@ func (q *RegexpQuery) SetField(f string) {
q.FieldVal = f q.FieldVal = f
} }
func (q *RegexpQuery) Field() string{ func (q *RegexpQuery) Field() string {
return q.FieldVal return q.FieldVal
} }
func (q *RegexpQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { func (q *RegexpQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal field := q.FieldVal
if q.FieldVal == "" { if q.FieldVal == "" {
field = m.DefaultSearchField() field = m.DefaultSearchField()
@ -67,7 +69,7 @@ func (q *RegexpQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, expl
return nil, err return nil, err
} }
return searcher.NewRegexpSearcher(i, q.compiled, field, q.BoostVal.Value(), explain) return searcher.NewRegexpSearcher(i, q.compiled, field, q.BoostVal.Value(), options)
} }
func (q *RegexpQuery) Validate() error { func (q *RegexpQuery) Validate() error {
@ -76,14 +78,14 @@ func (q *RegexpQuery) Validate() error {
func (q *RegexpQuery) compile() error { func (q *RegexpQuery) compile() error {
if q.compiled == nil { if q.compiled == nil {
// require that pattern be anchored to start and end of term // require that pattern NOT be anchored to start and end of term
actualRegexp := q.Regexp actualRegexp := q.Regexp
if !strings.HasPrefix(actualRegexp, "^") { if strings.HasPrefix(actualRegexp, "^") {
actualRegexp = "^" + actualRegexp actualRegexp = actualRegexp[1:] // remove leading ^
}
if !strings.HasSuffix(actualRegexp, "$") {
actualRegexp = actualRegexp + "$"
} }
// do not attempt to remove trailing $, it's presence is not
// known to interfere with LiteralPrefix() the way ^ does
// and removing $ introduces possible ambiguities with escaped \$, \\$, etc
var err error var err error
q.compiled, err = regexp.Compile(actualRegexp) q.compiled, err = regexp.Compile(actualRegexp)
if err != nil { if err != nil {

@ -40,7 +40,7 @@ func (q *TermQuery) SetBoost(b float64) {
q.BoostVal = &boost q.BoostVal = &boost
} }
func (q *TermQuery) Boost() float64{ func (q *TermQuery) Boost() float64 {
return q.BoostVal.Value() return q.BoostVal.Value()
} }
@ -48,14 +48,14 @@ func (q *TermQuery) SetField(f string) {
q.FieldVal = f q.FieldVal = f
} }
func (q *TermQuery) Field() string{ func (q *TermQuery) Field() string {
return q.FieldVal return q.FieldVal
} }
func (q *TermQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { func (q *TermQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal field := q.FieldVal
if q.FieldVal == "" { if q.FieldVal == "" {
field = m.DefaultSearchField() field = m.DefaultSearchField()
} }
return searcher.NewTermSearcher(i, q.Term, field, q.BoostVal.Value(), explain) return searcher.NewTermSearcher(i, q.Term, field, q.BoostVal.Value(), options)
} }

@ -0,0 +1,95 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"fmt"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searcher"
)
type TermRangeQuery struct {
Min string `json:"min,omitempty"`
Max string `json:"max,omitempty"`
InclusiveMin *bool `json:"inclusive_min,omitempty"`
InclusiveMax *bool `json:"inclusive_max,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewTermRangeQuery creates a new Query for ranges
// of text term values.
// Either, but not both endpoints can be nil.
// The minimum value is inclusive.
// The maximum value is exclusive.
func NewTermRangeQuery(min, max string) *TermRangeQuery {
return NewTermRangeInclusiveQuery(min, max, nil, nil)
}
// NewTermRangeInclusiveQuery creates a new Query for ranges
// of numeric values.
// Either, but not both endpoints can be nil.
// Control endpoint inclusion with inclusiveMin, inclusiveMax.
func NewTermRangeInclusiveQuery(min, max string, minInclusive, maxInclusive *bool) *TermRangeQuery {
return &TermRangeQuery{
Min: min,
Max: max,
InclusiveMin: minInclusive,
InclusiveMax: maxInclusive,
}
}
func (q *TermRangeQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *TermRangeQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *TermRangeQuery) SetField(f string) {
q.FieldVal = f
}
func (q *TermRangeQuery) Field() string {
return q.FieldVal
}
func (q *TermRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
var minTerm []byte
if q.Min != "" {
minTerm = []byte(q.Min)
}
var maxTerm []byte
if q.Max != "" {
maxTerm = []byte(q.Max)
}
return searcher.NewTermRangeSearcher(i, minTerm, maxTerm, q.InclusiveMin, q.InclusiveMax, field, q.BoostVal.Value(), options)
}
func (q *TermRangeQuery) Validate() error {
if q.Min == "" && q.Min == q.Max {
return fmt.Errorf("term range query must specify min or max")
}
return nil
}

@ -66,7 +66,7 @@ func (q *WildcardQuery) SetBoost(b float64) {
q.BoostVal = &boost q.BoostVal = &boost
} }
func (q *WildcardQuery) Boost() float64{ func (q *WildcardQuery) Boost() float64 {
return q.BoostVal.Value() return q.BoostVal.Value()
} }
@ -74,11 +74,11 @@ func (q *WildcardQuery) SetField(f string) {
q.FieldVal = f q.FieldVal = f
} }
func (q *WildcardQuery) Field() string{ func (q *WildcardQuery) Field() string {
return q.FieldVal return q.FieldVal
} }
func (q *WildcardQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, explain bool) (search.Searcher, error) { func (q *WildcardQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal field := q.FieldVal
if q.FieldVal == "" { if q.FieldVal == "" {
field = m.DefaultSearchField() field = m.DefaultSearchField()
@ -91,7 +91,7 @@ func (q *WildcardQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, ex
} }
} }
return searcher.NewRegexpSearcher(i, q.compiled, field, q.BoostVal.Value(), explain) return searcher.NewRegexpSearcher(i, q.compiled, field, q.BoostVal.Value(), options)
} }
func (q *WildcardQuery) Validate() error { func (q *WildcardQuery) Validate() error {
@ -101,6 +101,6 @@ func (q *WildcardQuery) Validate() error {
} }
func (q *WildcardQuery) convertToRegexp() (*regexp.Regexp, error) { func (q *WildcardQuery) convertToRegexp() (*regexp.Regexp, error) {
regexpString := "^" + wildcardRegexpReplacer.Replace(q.Wildcard) + "$" regexpString := wildcardRegexpReplacer.Replace(q.Wildcard)
return regexp.Compile(regexpString) return regexp.Compile(regexpString)
} }

@ -19,26 +19,26 @@ import (
) )
type ConjunctionQueryScorer struct { type ConjunctionQueryScorer struct {
explain bool options search.SearcherOptions
} }
func NewConjunctionQueryScorer(explain bool) *ConjunctionQueryScorer { func NewConjunctionQueryScorer(options search.SearcherOptions) *ConjunctionQueryScorer {
return &ConjunctionQueryScorer{ return &ConjunctionQueryScorer{
explain: explain, options: options,
} }
} }
func (s *ConjunctionQueryScorer) Score(ctx *search.SearchContext, constituents []*search.DocumentMatch) *search.DocumentMatch { func (s *ConjunctionQueryScorer) Score(ctx *search.SearchContext, constituents []*search.DocumentMatch) *search.DocumentMatch {
var sum float64 var sum float64
var childrenExplanations []*search.Explanation var childrenExplanations []*search.Explanation
if s.explain { if s.options.Explain {
childrenExplanations = make([]*search.Explanation, len(constituents)) childrenExplanations = make([]*search.Explanation, len(constituents))
} }
locations := []search.FieldTermLocationMap{} locations := []search.FieldTermLocationMap{}
for i, docMatch := range constituents { for i, docMatch := range constituents {
sum += docMatch.Score sum += docMatch.Score
if s.explain { if s.options.Explain {
childrenExplanations[i] = docMatch.Expl childrenExplanations[i] = docMatch.Expl
} }
if docMatch.Locations != nil { if docMatch.Locations != nil {
@ -47,7 +47,7 @@ func (s *ConjunctionQueryScorer) Score(ctx *search.SearchContext, constituents [
} }
newScore := sum newScore := sum
var newExpl *search.Explanation var newExpl *search.Explanation
if s.explain { if s.options.Explain {
newExpl = &search.Explanation{Value: sum, Message: "sum of:", Children: childrenExplanations} newExpl = &search.Explanation{Value: sum, Message: "sum of:", Children: childrenExplanations}
} }

@ -24,15 +24,15 @@ import (
type ConstantScorer struct { type ConstantScorer struct {
constant float64 constant float64
boost float64 boost float64
explain bool options search.SearcherOptions
queryNorm float64 queryNorm float64
queryWeight float64 queryWeight float64
queryWeightExplanation *search.Explanation queryWeightExplanation *search.Explanation
} }
func NewConstantScorer(constant float64, boost float64, explain bool) *ConstantScorer { func NewConstantScorer(constant float64, boost float64, options search.SearcherOptions) *ConstantScorer {
rv := ConstantScorer{ rv := ConstantScorer{
explain: explain, options: options,
queryWeight: 1.0, queryWeight: 1.0,
constant: constant, constant: constant,
boost: boost, boost: boost,
@ -52,7 +52,7 @@ func (s *ConstantScorer) SetQueryNorm(qnorm float64) {
// update the query weight // update the query weight
s.queryWeight = s.boost * s.queryNorm s.queryWeight = s.boost * s.queryNorm
if s.explain { if s.options.Explain {
childrenExplanations := make([]*search.Explanation, 2) childrenExplanations := make([]*search.Explanation, 2)
childrenExplanations[0] = &search.Explanation{ childrenExplanations[0] = &search.Explanation{
Value: s.boost, Value: s.boost,
@ -75,7 +75,7 @@ func (s *ConstantScorer) Score(ctx *search.SearchContext, id index.IndexInternal
score := s.constant score := s.constant
if s.explain { if s.options.Explain {
scoreExplanation = &search.Explanation{ scoreExplanation = &search.Explanation{
Value: score, Value: score,
Message: fmt.Sprintf("ConstantScore()"), Message: fmt.Sprintf("ConstantScore()"),
@ -85,7 +85,7 @@ func (s *ConstantScorer) Score(ctx *search.SearchContext, id index.IndexInternal
// if the query weight isn't 1, multiply // if the query weight isn't 1, multiply
if s.queryWeight != 1.0 { if s.queryWeight != 1.0 {
score = score * s.queryWeight score = score * s.queryWeight
if s.explain { if s.options.Explain {
childExplanations := make([]*search.Explanation, 2) childExplanations := make([]*search.Explanation, 2)
childExplanations[0] = s.queryWeightExplanation childExplanations[0] = s.queryWeightExplanation
childExplanations[1] = scoreExplanation childExplanations[1] = scoreExplanation
@ -100,7 +100,7 @@ func (s *ConstantScorer) Score(ctx *search.SearchContext, id index.IndexInternal
rv := ctx.DocumentMatchPool.Get() rv := ctx.DocumentMatchPool.Get()
rv.IndexInternalID = id rv.IndexInternalID = id
rv.Score = score rv.Score = score
if s.explain { if s.options.Explain {
rv.Expl = scoreExplanation rv.Expl = scoreExplanation
} }

@ -21,26 +21,26 @@ import (
) )
type DisjunctionQueryScorer struct { type DisjunctionQueryScorer struct {
explain bool options search.SearcherOptions
} }
func NewDisjunctionQueryScorer(explain bool) *DisjunctionQueryScorer { func NewDisjunctionQueryScorer(options search.SearcherOptions) *DisjunctionQueryScorer {
return &DisjunctionQueryScorer{ return &DisjunctionQueryScorer{
explain: explain, options: options,
} }
} }
func (s *DisjunctionQueryScorer) Score(ctx *search.SearchContext, constituents []*search.DocumentMatch, countMatch, countTotal int) *search.DocumentMatch { func (s *DisjunctionQueryScorer) Score(ctx *search.SearchContext, constituents []*search.DocumentMatch, countMatch, countTotal int) *search.DocumentMatch {
var sum float64 var sum float64
var childrenExplanations []*search.Explanation var childrenExplanations []*search.Explanation
if s.explain { if s.options.Explain {
childrenExplanations = make([]*search.Explanation, len(constituents)) childrenExplanations = make([]*search.Explanation, len(constituents))
} }
var locations []search.FieldTermLocationMap var locations []search.FieldTermLocationMap
for i, docMatch := range constituents { for i, docMatch := range constituents {
sum += docMatch.Score sum += docMatch.Score
if s.explain { if s.options.Explain {
childrenExplanations[i] = docMatch.Expl childrenExplanations[i] = docMatch.Expl
} }
if docMatch.Locations != nil { if docMatch.Locations != nil {
@ -49,14 +49,14 @@ func (s *DisjunctionQueryScorer) Score(ctx *search.SearchContext, constituents [
} }
var rawExpl *search.Explanation var rawExpl *search.Explanation
if s.explain { if s.options.Explain {
rawExpl = &search.Explanation{Value: sum, Message: "sum of:", Children: childrenExplanations} rawExpl = &search.Explanation{Value: sum, Message: "sum of:", Children: childrenExplanations}
} }
coord := float64(countMatch) / float64(countTotal) coord := float64(countMatch) / float64(countTotal)
newScore := sum * coord newScore := sum * coord
var newExpl *search.Explanation var newExpl *search.Explanation
if s.explain { if s.options.Explain {
ce := make([]*search.Explanation, 2) ce := make([]*search.Explanation, 2)
ce[0] = rawExpl ce[0] = rawExpl
ce[1] = &search.Explanation{Value: coord, Message: fmt.Sprintf("coord(%d/%d)", countMatch, countTotal)} ce[1] = &search.Explanation{Value: coord, Message: fmt.Sprintf("coord(%d/%d)", countMatch, countTotal)}

@ -23,20 +23,20 @@ import (
) )
type TermQueryScorer struct { type TermQueryScorer struct {
queryTerm string queryTerm []byte
queryField string queryField string
queryBoost float64 queryBoost float64
docTerm uint64 docTerm uint64
docTotal uint64 docTotal uint64
idf float64 idf float64
explain bool options search.SearcherOptions
idfExplanation *search.Explanation idfExplanation *search.Explanation
queryNorm float64 queryNorm float64
queryWeight float64 queryWeight float64
queryWeightExplanation *search.Explanation queryWeightExplanation *search.Explanation
} }
func NewTermQueryScorer(queryTerm string, queryField string, queryBoost float64, docTotal, docTerm uint64, explain bool) *TermQueryScorer { func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64, docTotal, docTerm uint64, options search.SearcherOptions) *TermQueryScorer {
rv := TermQueryScorer{ rv := TermQueryScorer{
queryTerm: queryTerm, queryTerm: queryTerm,
queryField: queryField, queryField: queryField,
@ -44,11 +44,11 @@ func NewTermQueryScorer(queryTerm string, queryField string, queryBoost float64,
docTerm: docTerm, docTerm: docTerm,
docTotal: docTotal, docTotal: docTotal,
idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)), idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)),
explain: explain, options: options,
queryWeight: 1.0, queryWeight: 1.0,
} }
if explain { if options.Explain {
rv.idfExplanation = &search.Explanation{ rv.idfExplanation = &search.Explanation{
Value: rv.idf, Value: rv.idf,
Message: fmt.Sprintf("idf(docFreq=%d, maxDocs=%d)", docTerm, docTotal), Message: fmt.Sprintf("idf(docFreq=%d, maxDocs=%d)", docTerm, docTotal),
@ -69,7 +69,7 @@ func (s *TermQueryScorer) SetQueryNorm(qnorm float64) {
// update the query weight // update the query weight
s.queryWeight = s.queryBoost * s.idf * s.queryNorm s.queryWeight = s.queryBoost * s.idf * s.queryNorm
if s.explain { if s.options.Explain {
childrenExplanations := make([]*search.Explanation, 3) childrenExplanations := make([]*search.Explanation, 3)
childrenExplanations[0] = &search.Explanation{ childrenExplanations[0] = &search.Explanation{
Value: s.queryBoost, Value: s.queryBoost,
@ -100,7 +100,7 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term
} }
score := tf * termMatch.Norm * s.idf score := tf * termMatch.Norm * s.idf
if s.explain { if s.options.Explain {
childrenExplanations := make([]*search.Explanation, 3) childrenExplanations := make([]*search.Explanation, 3)
childrenExplanations[0] = &search.Explanation{ childrenExplanations[0] = &search.Explanation{
Value: tf, Value: tf,
@ -121,7 +121,7 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term
// if the query weight isn't 1, multiply // if the query weight isn't 1, multiply
if s.queryWeight != 1.0 { if s.queryWeight != 1.0 {
score = score * s.queryWeight score = score * s.queryWeight
if s.explain { if s.options.Explain {
childExplanations := make([]*search.Explanation, 2) childExplanations := make([]*search.Explanation, 2)
childExplanations[0] = s.queryWeightExplanation childExplanations[0] = s.queryWeightExplanation
childExplanations[1] = scoreExplanation childExplanations[1] = scoreExplanation
@ -136,44 +136,46 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term
rv := ctx.DocumentMatchPool.Get() rv := ctx.DocumentMatchPool.Get()
rv.IndexInternalID = append(rv.IndexInternalID, termMatch.ID...) rv.IndexInternalID = append(rv.IndexInternalID, termMatch.ID...)
rv.Score = score rv.Score = score
if s.explain { if s.options.Explain {
rv.Expl = scoreExplanation rv.Expl = scoreExplanation
} }
if termMatch.Vectors != nil && len(termMatch.Vectors) > 0 { if termMatch.Vectors != nil && len(termMatch.Vectors) > 0 {
locs := make([]search.Location, len(termMatch.Vectors))
locsUsed := 0
totalPositions := 0
for _, v := range termMatch.Vectors {
totalPositions += len(v.ArrayPositions)
}
positions := make(search.ArrayPositions, totalPositions)
positionsUsed := 0
rv.Locations = make(search.FieldTermLocationMap) rv.Locations = make(search.FieldTermLocationMap)
for _, v := range termMatch.Vectors { for _, v := range termMatch.Vectors {
tlm := rv.Locations[v.Field] tlm := rv.Locations[v.Field]
if tlm == nil { if tlm == nil {
tlm = make(search.TermLocationMap) tlm = make(search.TermLocationMap)
rv.Locations[v.Field] = tlm
} }
loc := search.Location{ loc := &locs[locsUsed]
Pos: float64(v.Pos), locsUsed++
Start: float64(v.Start),
End: float64(v.End), loc.Pos = v.Pos
} loc.Start = v.Start
loc.End = v.End
if len(v.ArrayPositions) > 0 { if len(v.ArrayPositions) > 0 {
loc.ArrayPositions = make([]float64, len(v.ArrayPositions)) loc.ArrayPositions = positions[positionsUsed : positionsUsed+len(v.ArrayPositions)]
for i, ap := range v.ArrayPositions { for i, ap := range v.ArrayPositions {
loc.ArrayPositions[i] = float64(ap) loc.ArrayPositions[i] = ap
} }
positionsUsed += len(v.ArrayPositions)
} }
locations := tlm[s.queryTerm] tlm[string(s.queryTerm)] = append(tlm[string(s.queryTerm)], loc)
if locations == nil {
locations = make(search.Locations, 1)
locations[0] = &loc
} else {
locations = append(locations, &loc)
}
tlm[s.queryTerm] = locations
rv.Locations[v.Field] = tlm
} }
} }
return rv return rv

@ -21,27 +21,32 @@ import (
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
) )
type Location struct { type ArrayPositions []uint64
Pos float64 `json:"pos"`
Start float64 `json:"start"`
End float64 `json:"end"`
ArrayPositions []float64 `json:"array_positions"`
}
// SameArrayElement returns true if two locations are point to func (ap ArrayPositions) Equals(other ArrayPositions) bool {
// the same array element if len(ap) != len(other) {
func (l *Location) SameArrayElement(other *Location) bool {
if len(l.ArrayPositions) != len(other.ArrayPositions) {
return false return false
} }
for i, elem := range l.ArrayPositions { for i := range ap {
if other.ArrayPositions[i] != elem { if ap[i] != other[i] {
return false return false
} }
} }
return true return true
} }
type Location struct {
// Pos is the position of the term within the field, starting at 1
Pos uint64 `json:"pos"`
// Start and End are the byte offsets of the term in the field
Start uint64 `json:"start"`
End uint64 `json:"end"`
// ArrayPositions contains the positions of the term within any elements.
ArrayPositions ArrayPositions `json:"array_positions"`
}
type Locations []*Location type Locations []*Location
type TermLocationMap map[string]Locations type TermLocationMap map[string]Locations
@ -69,10 +74,6 @@ type DocumentMatch struct {
// fields as float64s and date fields as time.RFC3339 formatted strings. // fields as float64s and date fields as time.RFC3339 formatted strings.
Fields map[string]interface{} `json:"fields,omitempty"` Fields map[string]interface{} `json:"fields,omitempty"`
// as we learn field terms, we can cache important ones for later use
// for example, sorting and building facets need these values
CachedFieldTerms index.FieldTerms `json:"-"`
// if we load the document for this hit, remember it so we dont load again // if we load the document for this hit, remember it so we dont load again
Document *document.Document `json:"-"` Document *document.Document `json:"-"`
@ -138,6 +139,11 @@ type Searcher interface {
DocumentMatchPoolSize() int DocumentMatchPoolSize() int
} }
type SearcherOptions struct {
Explain bool
IncludeTermVectors bool
}
// SearchContext represents the context around a single search // SearchContext represents the context around a single search
type SearchContext struct { type SearchContext struct {
DocumentMatchPool *DocumentMatchPool DocumentMatchPool *DocumentMatchPool

@ -38,14 +38,14 @@ type BooleanSearcher struct {
initialized bool initialized bool
} }
func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searcher, shouldSearcher search.Searcher, mustNotSearcher search.Searcher, explain bool) (*BooleanSearcher, error) { func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searcher, shouldSearcher search.Searcher, mustNotSearcher search.Searcher, options search.SearcherOptions) (*BooleanSearcher, error) {
// build our searcher // build our searcher
rv := BooleanSearcher{ rv := BooleanSearcher{
indexReader: indexReader, indexReader: indexReader,
mustSearcher: mustSearcher, mustSearcher: mustSearcher,
shouldSearcher: shouldSearcher, shouldSearcher: shouldSearcher,
mustNotSearcher: mustNotSearcher, mustNotSearcher: mustNotSearcher,
scorer: scorer.NewConjunctionQueryScorer(explain), scorer: scorer.NewConjunctionQueryScorer(options),
matches: make([]*search.DocumentMatch, 2), matches: make([]*search.DocumentMatch, 2),
} }
rv.computeQueryNorm() rv.computeQueryNorm()

@ -31,10 +31,10 @@ type ConjunctionSearcher struct {
maxIDIdx int maxIDIdx int
scorer *scorer.ConjunctionQueryScorer scorer *scorer.ConjunctionQueryScorer
initialized bool initialized bool
explain bool options search.SearcherOptions
} }
func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.Searcher, explain bool) (*ConjunctionSearcher, error) { func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.Searcher, options search.SearcherOptions) (*ConjunctionSearcher, error) {
// build the downstream searchers // build the downstream searchers
searchers := make(OrderedSearcherList, len(qsearchers)) searchers := make(OrderedSearcherList, len(qsearchers))
for i, searcher := range qsearchers { for i, searcher := range qsearchers {
@ -45,10 +45,10 @@ func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.S
// build our searcher // build our searcher
rv := ConjunctionSearcher{ rv := ConjunctionSearcher{
indexReader: indexReader, indexReader: indexReader,
explain: explain, options: options,
searchers: searchers, searchers: searchers,
currs: make([]*search.DocumentMatch, len(searchers)), currs: make([]*search.DocumentMatch, len(searchers)),
scorer: scorer.NewConjunctionQueryScorer(explain), scorer: scorer.NewConjunctionQueryScorer(options),
} }
rv.computeQueryNorm() rv.computeQueryNorm()
return &rv, nil return &rv, nil

@ -50,11 +50,22 @@ func tooManyClauses(count int) bool {
} }
func tooManyClausesErr() error { func tooManyClausesErr() error {
return fmt.Errorf("TooManyClauses[maxClauseCount is set to %d]", DisjunctionMaxClauseCount) return fmt.Errorf("TooManyClauses[maxClauseCount is set to %d]",
DisjunctionMaxClauseCount)
} }
func NewDisjunctionSearcher(indexReader index.IndexReader, qsearchers []search.Searcher, min float64, explain bool) (*DisjunctionSearcher, error) { func NewDisjunctionSearcher(indexReader index.IndexReader,
if tooManyClauses(len(qsearchers)) { qsearchers []search.Searcher, min float64, options search.SearcherOptions) (
*DisjunctionSearcher, error) {
return newDisjunctionSearcher(indexReader, qsearchers, min, options,
true)
}
func newDisjunctionSearcher(indexReader index.IndexReader,
qsearchers []search.Searcher, min float64, options search.SearcherOptions,
limit bool) (
*DisjunctionSearcher, error) {
if limit && tooManyClauses(len(qsearchers)) {
return nil, tooManyClausesErr() return nil, tooManyClausesErr()
} }
// build the downstream searchers // build the downstream searchers
@ -70,7 +81,7 @@ func NewDisjunctionSearcher(indexReader index.IndexReader, qsearchers []search.S
searchers: searchers, searchers: searchers,
numSearchers: len(searchers), numSearchers: len(searchers),
currs: make([]*search.DocumentMatch, len(searchers)), currs: make([]*search.DocumentMatch, len(searchers)),
scorer: scorer.NewDisjunctionQueryScorer(explain), scorer: scorer.NewDisjunctionQueryScorer(options),
min: int(min), min: int(min),
matching: make([]*search.DocumentMatch, len(searchers)), matching: make([]*search.DocumentMatch, len(searchers)),
matchingIdxs: make([]int, len(searchers)), matchingIdxs: make([]int, len(searchers)),
@ -161,7 +172,8 @@ func (s *DisjunctionSearcher) SetQueryNorm(qnorm float64) {
} }
} }
func (s *DisjunctionSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) { func (s *DisjunctionSearcher) Next(ctx *search.SearchContext) (
*search.DocumentMatch, error) {
if !s.initialized { if !s.initialized {
err := s.initSearchers(ctx) err := s.initSearchers(ctx)
if err != nil { if err != nil {
@ -199,7 +211,8 @@ func (s *DisjunctionSearcher) Next(ctx *search.SearchContext) (*search.DocumentM
return rv, nil return rv, nil
} }
func (s *DisjunctionSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) { func (s *DisjunctionSearcher) Advance(ctx *search.SearchContext,
ID index.IndexInternalID) (*search.DocumentMatch, error) {
if !s.initialized { if !s.initialized {
err := s.initSearchers(ctx) err := s.initSearchers(ctx)
if err != nil { if err != nil {

@ -28,13 +28,13 @@ type DocIDSearcher struct {
} }
func NewDocIDSearcher(indexReader index.IndexReader, ids []string, boost float64, func NewDocIDSearcher(indexReader index.IndexReader, ids []string, boost float64,
explain bool) (searcher *DocIDSearcher, err error) { options search.SearcherOptions) (searcher *DocIDSearcher, err error) {
reader, err := indexReader.DocIDReaderOnly(ids) reader, err := indexReader.DocIDReaderOnly(ids)
if err != nil { if err != nil {
return nil, err return nil, err
} }
scorer := scorer.NewConstantScorer(1.0, boost, explain) scorer := scorer.NewConstantScorer(1.0, boost, options)
return &DocIDSearcher{ return &DocIDSearcher{
scorer: scorer, scorer: scorer,
reader: reader, reader: reader,

@ -0,0 +1,88 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
// FilterFunc defines a function which can filter documents
// returning true means keep the document
// returning false means do not keep the document
type FilterFunc func(d *search.DocumentMatch) bool
// FilteringSearcher wraps any other searcher, but checks any Next/Advance
// call against the supplied FilterFunc
type FilteringSearcher struct {
child search.Searcher
accept FilterFunc
}
func NewFilteringSearcher(s search.Searcher, filter FilterFunc) *FilteringSearcher {
return &FilteringSearcher{
child: s,
accept: filter,
}
}
func (f *FilteringSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
next, err := f.child.Next(ctx)
for next != nil && err == nil {
if f.accept(next) {
return next, nil
}
next, err = f.child.Next(ctx)
}
return nil, err
}
func (f *FilteringSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
adv, err := f.child.Advance(ctx, ID)
if err != nil {
return nil, err
}
if adv == nil {
return nil, nil
}
if f.accept(adv) {
return adv, nil
}
return f.Next(ctx)
}
func (f *FilteringSearcher) Close() error {
return f.child.Close()
}
func (f *FilteringSearcher) Weight() float64 {
return f.child.Weight()
}
func (f *FilteringSearcher) SetQueryNorm(n float64) {
f.child.SetQueryNorm(n)
}
func (f *FilteringSearcher) Count() uint64 {
return f.child.Count()
}
func (f *FilteringSearcher) Min() int {
return f.child.Min()
}
func (f *FilteringSearcher) DocumentMatchPoolSize() int {
return f.child.DocumentMatchPoolSize()
}

@ -19,17 +19,9 @@ import (
"github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search"
) )
type FuzzySearcher struct { func NewFuzzySearcher(indexReader index.IndexReader, term string,
indexReader index.IndexReader prefix, fuzziness int, field string, boost float64,
term string options search.SearcherOptions) (search.Searcher, error) {
prefix int
fuzziness int
field string
explain bool
searcher *DisjunctionSearcher
}
func NewFuzzySearcher(indexReader index.IndexReader, term string, prefix, fuzziness int, field string, boost float64, explain bool) (*FuzzySearcher, error) {
// Note: we don't byte slice the term for a prefix because of runes. // Note: we don't byte slice the term for a prefix because of runes.
prefixTerm := "" prefixTerm := ""
for i, r := range term { for i, r := range term {
@ -40,46 +32,18 @@ func NewFuzzySearcher(indexReader index.IndexReader, term string, prefix, fuzzin
} }
} }
candidateTerms, err := findFuzzyCandidateTerms(indexReader, term, fuzziness, field, prefixTerm) candidateTerms, err := findFuzzyCandidateTerms(indexReader, term, fuzziness,
if err != nil { field, prefixTerm)
return nil, err
}
// enumerate all the terms in the range
qsearchers := make([]search.Searcher, 0, len(candidateTerms))
qsearchersClose := func() {
for _, searcher := range qsearchers {
_ = searcher.Close()
}
}
for _, cterm := range candidateTerms {
qsearcher, err := NewTermSearcher(indexReader, cterm, field, boost, explain)
if err != nil {
qsearchersClose()
return nil, err
}
qsearchers = append(qsearchers, qsearcher)
}
// build disjunction searcher of these ranges
searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain)
if err != nil { if err != nil {
qsearchersClose()
return nil, err return nil, err
} }
return &FuzzySearcher{ return NewMultiTermSearcher(indexReader, candidateTerms, field,
indexReader: indexReader, boost, options, true)
term: term,
prefix: prefix,
fuzziness: fuzziness,
field: field,
explain: explain,
searcher: searcher,
}, nil
} }
func findFuzzyCandidateTerms(indexReader index.IndexReader, term string, fuzziness int, field, prefixTerm string) (rv []string, err error) { func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
fuzziness int, field, prefixTerm string) (rv []string, err error) {
rv = make([]string, 0) rv = make([]string, 0)
var fieldDict index.FieldDict var fieldDict index.FieldDict
if len(prefixTerm) > 0 { if len(prefixTerm) > 0 {
@ -108,36 +72,3 @@ func findFuzzyCandidateTerms(indexReader index.IndexReader, term string, fuzzine
return rv, err return rv, err
} }
func (s *FuzzySearcher) Count() uint64 {
return s.searcher.Count()
}
func (s *FuzzySearcher) Weight() float64 {
return s.searcher.Weight()
}
func (s *FuzzySearcher) SetQueryNorm(qnorm float64) {
s.searcher.SetQueryNorm(qnorm)
}
func (s *FuzzySearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
return s.searcher.Next(ctx)
}
func (s *FuzzySearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
return s.searcher.Advance(ctx, ID)
}
func (s *FuzzySearcher) Close() error {
return s.searcher.Close()
}
func (s *FuzzySearcher) Min() int {
return 0
}
func (s *FuzzySearcher) DocumentMatchPoolSize() int {
return s.searcher.DocumentMatchPoolSize()
}

@ -0,0 +1,173 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/geo"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/search"
)
func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
maxLon, maxLat float64, field string, boost float64,
options search.SearcherOptions, checkBoundaries bool) (
search.Searcher, error) {
// track list of opened searchers, for cleanup on early exit
var openedSearchers []search.Searcher
cleanupOpenedSearchers := func() {
for _, s := range openedSearchers {
_ = s.Close()
}
}
// do math to produce list of terms needed for this search
onBoundaryTerms, notOnBoundaryTerms := ComputeGeoRange(0, (geo.GeoBits<<1)-1,
minLon, minLat, maxLon, maxLat, checkBoundaries)
var onBoundarySearcher search.Searcher
if len(onBoundaryTerms) > 0 {
rawOnBoundarySearcher, err := NewMultiTermSearcherBytes(indexReader,
onBoundaryTerms, field, boost, options, false)
if err != nil {
return nil, err
}
// add filter to check points near the boundary
onBoundarySearcher = NewFilteringSearcher(rawOnBoundarySearcher,
buildRectFilter(indexReader, field, minLon, minLat, maxLon, maxLat))
openedSearchers = append(openedSearchers, onBoundarySearcher)
}
var notOnBoundarySearcher search.Searcher
if len(notOnBoundaryTerms) > 0 {
var err error
notOnBoundarySearcher, err = NewMultiTermSearcherBytes(indexReader,
notOnBoundaryTerms, field, boost, options, false)
if err != nil {
cleanupOpenedSearchers()
return nil, err
}
openedSearchers = append(openedSearchers, notOnBoundarySearcher)
}
if onBoundarySearcher != nil && notOnBoundarySearcher != nil {
rv, err := NewDisjunctionSearcher(indexReader,
[]search.Searcher{
onBoundarySearcher,
notOnBoundarySearcher,
},
0, options)
if err != nil {
cleanupOpenedSearchers()
return nil, err
}
return rv, nil
} else if onBoundarySearcher != nil {
return onBoundarySearcher, nil
} else if notOnBoundarySearcher != nil {
return notOnBoundarySearcher, nil
}
return NewMatchNoneSearcher(indexReader)
}
var geoMaxShift = document.GeoPrecisionStep * 4
var geoDetailLevel = ((geo.GeoBits << 1) - geoMaxShift) / 2
func ComputeGeoRange(term uint64, shift uint,
sminLon, sminLat, smaxLon, smaxLat float64,
checkBoundaries bool) (
onBoundary [][]byte, notOnBoundary [][]byte) {
split := term | uint64(0x1)<<shift
var upperMax uint64
if shift < 63 {
upperMax = term | ((uint64(1) << (shift + 1)) - 1)
} else {
upperMax = 0xffffffffffffffff
}
lowerMax := split - 1
onBoundary, notOnBoundary = relateAndRecurse(term, lowerMax, shift,
sminLon, sminLat, smaxLon, smaxLat, checkBoundaries)
plusOnBoundary, plusNotOnBoundary := relateAndRecurse(split, upperMax, shift,
sminLon, sminLat, smaxLon, smaxLat, checkBoundaries)
onBoundary = append(onBoundary, plusOnBoundary...)
notOnBoundary = append(notOnBoundary, plusNotOnBoundary...)
return
}
func relateAndRecurse(start, end uint64, res uint,
sminLon, sminLat, smaxLon, smaxLat float64,
checkBoundaries bool) (
onBoundary [][]byte, notOnBoundary [][]byte) {
minLon := geo.MortonUnhashLon(start)
minLat := geo.MortonUnhashLat(start)
maxLon := geo.MortonUnhashLon(end)
maxLat := geo.MortonUnhashLat(end)
level := ((geo.GeoBits << 1) - res) >> 1
within := res%document.GeoPrecisionStep == 0 &&
geo.RectWithin(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat)
if within || (level == geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat)) {
if !within && checkBoundaries {
return [][]byte{
numeric.MustNewPrefixCodedInt64(int64(start), res),
}, nil
}
return nil,
[][]byte{
numeric.MustNewPrefixCodedInt64(int64(start), res),
}
} else if level < geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat) {
return ComputeGeoRange(start, res-1, sminLon, sminLat, smaxLon, smaxLat,
checkBoundaries)
}
return nil, nil
}
func buildRectFilter(indexReader index.IndexReader, field string,
minLon, minLat, maxLon, maxLat float64) FilterFunc {
return func(d *search.DocumentMatch) bool {
var lon, lat float64
var found bool
err := indexReader.DocumentVisitFieldTerms(d.IndexInternalID,
[]string{field}, func(field string, term []byte) {
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
var i64 int64
i64, err = prefixCoded.Int64()
if err == nil {
lon = geo.MortonUnhashLon(uint64(i64))
lat = geo.MortonUnhashLat(uint64(i64))
found = true
}
}
})
if err == nil && found {
return geo.BoundingBoxContains(lon, lat,
minLon, minLat, maxLon, maxLat)
}
return false
}
}

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save