Search bar for issues/pulls (#530)
parent
8bc431952f
commit
833f8b94c2
@ -0,0 +1,183 @@ |
|||||||
|
// Copyright 2017 The Gitea Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package models |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"os" |
||||||
|
"strconv" |
||||||
|
"strings" |
||||||
|
|
||||||
|
"code.gitea.io/gitea/modules/log" |
||||||
|
"code.gitea.io/gitea/modules/setting" |
||||||
|
"code.gitea.io/gitea/modules/util" |
||||||
|
"github.com/blevesearch/bleve" |
||||||
|
"github.com/blevesearch/bleve/analysis/analyzer/simple" |
||||||
|
"github.com/blevesearch/bleve/search/query" |
||||||
|
) |
||||||
|
|
||||||
|
// issueIndexerUpdateQueue queue of issues that need to be updated in the issues
|
||||||
|
// indexer
|
||||||
|
var issueIndexerUpdateQueue chan *Issue |
||||||
|
|
||||||
|
// issueIndexer (thread-safe) index for searching issues
|
||||||
|
var issueIndexer bleve.Index |
||||||
|
|
||||||
|
// issueIndexerData data stored in the issue indexer
|
||||||
|
type issueIndexerData struct { |
||||||
|
ID int64 |
||||||
|
RepoID int64 |
||||||
|
|
||||||
|
Title string |
||||||
|
Content string |
||||||
|
} |
||||||
|
|
||||||
|
// numericQuery an numeric-equality query for the given value and field
|
||||||
|
func numericQuery(value int64, field string) *query.NumericRangeQuery { |
||||||
|
f := float64(value) |
||||||
|
tru := true |
||||||
|
q := bleve.NewNumericRangeInclusiveQuery(&f, &f, &tru, &tru) |
||||||
|
q.SetField(field) |
||||||
|
return q |
||||||
|
} |
||||||
|
|
||||||
|
// SearchIssuesByKeyword searches for issues by given conditions.
|
||||||
|
// Returns the matching issue IDs
|
||||||
|
func SearchIssuesByKeyword(repoID int64, keyword string) ([]int64, error) { |
||||||
|
fields := strings.Fields(strings.ToLower(keyword)) |
||||||
|
indexerQuery := bleve.NewConjunctionQuery( |
||||||
|
numericQuery(repoID, "RepoID"), |
||||||
|
bleve.NewDisjunctionQuery( |
||||||
|
bleve.NewPhraseQuery(fields, "Title"), |
||||||
|
bleve.NewPhraseQuery(fields, "Content"), |
||||||
|
)) |
||||||
|
search := bleve.NewSearchRequestOptions(indexerQuery, 2147483647, 0, false) |
||||||
|
search.Fields = []string{"ID"} |
||||||
|
|
||||||
|
result, err := issueIndexer.Search(search) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
issueIDs := make([]int64, len(result.Hits)) |
||||||
|
for i, hit := range result.Hits { |
||||||
|
issueIDs[i] = int64(hit.Fields["ID"].(float64)) |
||||||
|
} |
||||||
|
return issueIDs, nil |
||||||
|
} |
||||||
|
|
||||||
|
// InitIssueIndexer initialize issue indexer
|
||||||
|
func InitIssueIndexer() { |
||||||
|
_, err := os.Stat(setting.Indexer.IssuePath) |
||||||
|
if err != nil { |
||||||
|
if os.IsNotExist(err) { |
||||||
|
if err = createIssueIndexer(); err != nil { |
||||||
|
log.Fatal(4, "CreateIssuesIndexer: %v", err) |
||||||
|
} |
||||||
|
if err = populateIssueIndexer(); err != nil { |
||||||
|
log.Fatal(4, "PopulateIssuesIndex: %v", err) |
||||||
|
} |
||||||
|
} else { |
||||||
|
log.Fatal(4, "InitIssuesIndexer: %v", err) |
||||||
|
} |
||||||
|
} else { |
||||||
|
issueIndexer, err = bleve.Open(setting.Indexer.IssuePath) |
||||||
|
if err != nil { |
||||||
|
log.Fatal(4, "InitIssuesIndexer, open index: %v", err) |
||||||
|
} |
||||||
|
} |
||||||
|
issueIndexerUpdateQueue = make(chan *Issue, setting.Indexer.UpdateQueueLength) |
||||||
|
go processIssueIndexerUpdateQueue() |
||||||
|
// TODO close issueIndexer when Gitea closes
|
||||||
|
} |
||||||
|
|
||||||
|
// createIssueIndexer create an issue indexer if one does not already exist
|
||||||
|
func createIssueIndexer() error { |
||||||
|
mapping := bleve.NewIndexMapping() |
||||||
|
docMapping := bleve.NewDocumentMapping() |
||||||
|
|
||||||
|
docMapping.AddFieldMappingsAt("ID", bleve.NewNumericFieldMapping()) |
||||||
|
docMapping.AddFieldMappingsAt("RepoID", bleve.NewNumericFieldMapping()) |
||||||
|
|
||||||
|
textFieldMapping := bleve.NewTextFieldMapping() |
||||||
|
textFieldMapping.Analyzer = simple.Name |
||||||
|
docMapping.AddFieldMappingsAt("Title", textFieldMapping) |
||||||
|
docMapping.AddFieldMappingsAt("Content", textFieldMapping) |
||||||
|
|
||||||
|
mapping.AddDocumentMapping("issues", docMapping) |
||||||
|
|
||||||
|
var err error |
||||||
|
issueIndexer, err = bleve.New(setting.Indexer.IssuePath, mapping) |
||||||
|
return err |
||||||
|
} |
||||||
|
|
||||||
|
// populateIssueIndexer populate the issue indexer with issue data
|
||||||
|
func populateIssueIndexer() error { |
||||||
|
for page := 1; ; page++ { |
||||||
|
repos, err := Repositories(&SearchRepoOptions{ |
||||||
|
Page: page, |
||||||
|
PageSize: 10, |
||||||
|
}) |
||||||
|
if err != nil { |
||||||
|
return fmt.Errorf("Repositories: %v", err) |
||||||
|
} |
||||||
|
if len(repos) == 0 { |
||||||
|
return nil |
||||||
|
} |
||||||
|
batch := issueIndexer.NewBatch() |
||||||
|
for _, repo := range repos { |
||||||
|
issues, err := Issues(&IssuesOptions{ |
||||||
|
RepoID: repo.ID, |
||||||
|
IsClosed: util.OptionalBoolNone, |
||||||
|
IsPull: util.OptionalBoolNone, |
||||||
|
Page: -1, // do not page
|
||||||
|
}) |
||||||
|
if err != nil { |
||||||
|
return fmt.Errorf("Issues: %v", err) |
||||||
|
} |
||||||
|
for _, issue := range issues { |
||||||
|
err = batch.Index(issue.indexUID(), issue.issueData()) |
||||||
|
if err != nil { |
||||||
|
return fmt.Errorf("batch.Index: %v", err) |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
if err = issueIndexer.Batch(batch); err != nil { |
||||||
|
return fmt.Errorf("index.Batch: %v", err) |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func processIssueIndexerUpdateQueue() { |
||||||
|
for { |
||||||
|
select { |
||||||
|
case issue := <-issueIndexerUpdateQueue: |
||||||
|
if err := issueIndexer.Index(issue.indexUID(), issue.issueData()); err != nil { |
||||||
|
log.Error(4, "issuesIndexer.Index: %v", err) |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// indexUID a unique identifier for an issue used in full-text indices
|
||||||
|
func (issue *Issue) indexUID() string { |
||||||
|
return strconv.FormatInt(issue.ID, 36) |
||||||
|
} |
||||||
|
|
||||||
|
func (issue *Issue) issueData() *issueIndexerData { |
||||||
|
return &issueIndexerData{ |
||||||
|
ID: issue.ID, |
||||||
|
RepoID: issue.RepoID, |
||||||
|
Title: issue.Title, |
||||||
|
Content: issue.Content, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// UpdateIssueIndexer add/update an issue to the issue indexer
|
||||||
|
func UpdateIssueIndexer(issue *Issue) { |
||||||
|
go func() { |
||||||
|
issueIndexerUpdateQueue <- issue |
||||||
|
}() |
||||||
|
} |
@ -0,0 +1,14 @@ |
|||||||
|
// Copyright 2016 The Gitea Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package indexer |
||||||
|
|
||||||
|
import ( |
||||||
|
"code.gitea.io/gitea/models" |
||||||
|
) |
||||||
|
|
||||||
|
// NewContext start indexer service
|
||||||
|
func NewContext() { |
||||||
|
models.InitIssueIndexer() |
||||||
|
} |
@ -0,0 +1,25 @@ |
|||||||
|
// Copyright 2017 The Gitea Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package util |
||||||
|
|
||||||
|
// OptionalBool a boolean that can be "null"
|
||||||
|
type OptionalBool byte |
||||||
|
|
||||||
|
const ( |
||||||
|
// OptionalBoolNone a "null" boolean value
|
||||||
|
OptionalBoolNone = iota |
||||||
|
// OptionalBoolTrue a "true" boolean value
|
||||||
|
OptionalBoolTrue |
||||||
|
// OptionalBoolFalse a "false" boolean value
|
||||||
|
OptionalBoolFalse |
||||||
|
) |
||||||
|
|
||||||
|
// OptionalBoolOf get the corresponding OptionalBool of a bool
|
||||||
|
func OptionalBoolOf(b bool) OptionalBool { |
||||||
|
if b { |
||||||
|
return OptionalBoolTrue |
||||||
|
} |
||||||
|
return OptionalBoolFalse |
||||||
|
} |
@ -1,4 +1,4 @@ |
|||||||
<div class="ui compact small menu"> |
<div class="ui compact left small menu"> |
||||||
<a class="{{if .PageIsLabels}}active{{end}} item" href="{{.RepoLink}}/labels">{{.i18n.Tr "repo.labels"}}</a> |
<a class="{{if .PageIsLabels}}active{{end}} item" href="{{.RepoLink}}/labels">{{.i18n.Tr "repo.labels"}}</a> |
||||||
<a class="{{if .PageIsMilestones}}active{{end}} item" href="{{.RepoLink}}/milestones">{{.i18n.Tr "repo.milestones"}}</a> |
<a class="{{if .PageIsMilestones}}active{{end}} item" href="{{.RepoLink}}/milestones">{{.i18n.Tr "repo.milestones"}}</a> |
||||||
</div> |
</div> |
||||||
|
@ -0,0 +1,13 @@ |
|||||||
|
<form class="ui form"> |
||||||
|
<div class="ui fluid action input"> |
||||||
|
<input type="hidden" name="type" value="{{$.ViewType}}"/> |
||||||
|
<input type="hidden" name="state" value="{{$.State}}"/> |
||||||
|
<input type="hidden" name="labels" value="{{.SelectLabels}}"/> |
||||||
|
<input type="hidden" name="milestone" value="{{$.MilestoneID}}"/> |
||||||
|
<input type="hidden" name="assignee" value="{{$.AssigneeID}}"/> |
||||||
|
<div class="ui search action input"> |
||||||
|
<input name="q" value="{{.Keyword}}" placeholder="{{.i18n.Tr "explore.search"}}..." autofocus> |
||||||
|
</div> |
||||||
|
<button class="ui blue button" type="submit">{{.i18n.Tr "explore.search"}}</button> |
||||||
|
</div> |
||||||
|
</form> |
@ -0,0 +1,16 @@ |
|||||||
|
# Contributing to Bleve |
||||||
|
|
||||||
|
We look forward to your contributions, but ask that you first review these guidelines. |
||||||
|
|
||||||
|
### Sign the CLA |
||||||
|
|
||||||
|
As Bleve is a Couchbase project we require contributors accept the [Couchbase Contributor License Agreement](http://review.couchbase.org/static/individual_agreement.html). To sign this agreement log into the Couchbase [code review tool](http://review.couchbase.org/). The Bleve project does not use this code review tool but it is still used to track acceptance of the contributor license agreements. |
||||||
|
|
||||||
|
### Submitting a Pull Request |
||||||
|
|
||||||
|
All types of contributions are welcome, but please keep the following in mind: |
||||||
|
|
||||||
|
- If you're planning a large change, you should really discuss it in a github issue or on the google group first. This helps avoid duplicate effort and spending time on something that may not be merged. |
||||||
|
- Existing tests should continue to pass, new tests for the contribution are nice to have. |
||||||
|
- All code should have gone through `go fmt` |
||||||
|
- All code should pass `go vet` |
@ -0,0 +1,202 @@ |
|||||||
|
|
||||||
|
Apache License |
||||||
|
Version 2.0, January 2004 |
||||||
|
http://www.apache.org/licenses/ |
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION |
||||||
|
|
||||||
|
1. Definitions. |
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction, |
||||||
|
and distribution as defined by Sections 1 through 9 of this document. |
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by |
||||||
|
the copyright owner that is granting the License. |
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all |
||||||
|
other entities that control, are controlled by, or are under common |
||||||
|
control with that entity. For the purposes of this definition, |
||||||
|
"control" means (i) the power, direct or indirect, to cause the |
||||||
|
direction or management of such entity, whether by contract or |
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the |
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity. |
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity |
||||||
|
exercising permissions granted by this License. |
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications, |
||||||
|
including but not limited to software source code, documentation |
||||||
|
source, and configuration files. |
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical |
||||||
|
transformation or translation of a Source form, including but |
||||||
|
not limited to compiled object code, generated documentation, |
||||||
|
and conversions to other media types. |
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or |
||||||
|
Object form, made available under the License, as indicated by a |
||||||
|
copyright notice that is included in or attached to the work |
||||||
|
(an example is provided in the Appendix below). |
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object |
||||||
|
form, that is based on (or derived from) the Work and for which the |
||||||
|
editorial revisions, annotations, elaborations, or other modifications |
||||||
|
represent, as a whole, an original work of authorship. For the purposes |
||||||
|
of this License, Derivative Works shall not include works that remain |
||||||
|
separable from, or merely link (or bind by name) to the interfaces of, |
||||||
|
the Work and Derivative Works thereof. |
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including |
||||||
|
the original version of the Work and any modifications or additions |
||||||
|
to that Work or Derivative Works thereof, that is intentionally |
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner |
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of |
||||||
|
the copyright owner. For the purposes of this definition, "submitted" |
||||||
|
means any form of electronic, verbal, or written communication sent |
||||||
|
to the Licensor or its representatives, including but not limited to |
||||||
|
communication on electronic mailing lists, source code control systems, |
||||||
|
and issue tracking systems that are managed by, or on behalf of, the |
||||||
|
Licensor for the purpose of discussing and improving the Work, but |
||||||
|
excluding communication that is conspicuously marked or otherwise |
||||||
|
designated in writing by the copyright owner as "Not a Contribution." |
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity |
||||||
|
on behalf of whom a Contribution has been received by Licensor and |
||||||
|
subsequently incorporated within the Work. |
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of |
||||||
|
this License, each Contributor hereby grants to You a perpetual, |
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable |
||||||
|
copyright license to reproduce, prepare Derivative Works of, |
||||||
|
publicly display, publicly perform, sublicense, and distribute the |
||||||
|
Work and such Derivative Works in Source or Object form. |
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of |
||||||
|
this License, each Contributor hereby grants to You a perpetual, |
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable |
||||||
|
(except as stated in this section) patent license to make, have made, |
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work, |
||||||
|
where such license applies only to those patent claims licensable |
||||||
|
by such Contributor that are necessarily infringed by their |
||||||
|
Contribution(s) alone or by combination of their Contribution(s) |
||||||
|
with the Work to which such Contribution(s) was submitted. If You |
||||||
|
institute patent litigation against any entity (including a |
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work |
||||||
|
or a Contribution incorporated within the Work constitutes direct |
||||||
|
or contributory patent infringement, then any patent licenses |
||||||
|
granted to You under this License for that Work shall terminate |
||||||
|
as of the date such litigation is filed. |
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the |
||||||
|
Work or Derivative Works thereof in any medium, with or without |
||||||
|
modifications, and in Source or Object form, provided that You |
||||||
|
meet the following conditions: |
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or |
||||||
|
Derivative Works a copy of this License; and |
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices |
||||||
|
stating that You changed the files; and |
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works |
||||||
|
that You distribute, all copyright, patent, trademark, and |
||||||
|
attribution notices from the Source form of the Work, |
||||||
|
excluding those notices that do not pertain to any part of |
||||||
|
the Derivative Works; and |
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its |
||||||
|
distribution, then any Derivative Works that You distribute must |
||||||
|
include a readable copy of the attribution notices contained |
||||||
|
within such NOTICE file, excluding those notices that do not |
||||||
|
pertain to any part of the Derivative Works, in at least one |
||||||
|
of the following places: within a NOTICE text file distributed |
||||||
|
as part of the Derivative Works; within the Source form or |
||||||
|
documentation, if provided along with the Derivative Works; or, |
||||||
|
within a display generated by the Derivative Works, if and |
||||||
|
wherever such third-party notices normally appear. The contents |
||||||
|
of the NOTICE file are for informational purposes only and |
||||||
|
do not modify the License. You may add Your own attribution |
||||||
|
notices within Derivative Works that You distribute, alongside |
||||||
|
or as an addendum to the NOTICE text from the Work, provided |
||||||
|
that such additional attribution notices cannot be construed |
||||||
|
as modifying the License. |
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and |
||||||
|
may provide additional or different license terms and conditions |
||||||
|
for use, reproduction, or distribution of Your modifications, or |
||||||
|
for any such Derivative Works as a whole, provided Your use, |
||||||
|
reproduction, and distribution of the Work otherwise complies with |
||||||
|
the conditions stated in this License. |
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise, |
||||||
|
any Contribution intentionally submitted for inclusion in the Work |
||||||
|
by You to the Licensor shall be under the terms and conditions of |
||||||
|
this License, without any additional terms or conditions. |
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify |
||||||
|
the terms of any separate license agreement you may have executed |
||||||
|
with Licensor regarding such Contributions. |
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade |
||||||
|
names, trademarks, service marks, or product names of the Licensor, |
||||||
|
except as required for reasonable and customary use in describing the |
||||||
|
origin of the Work and reproducing the content of the NOTICE file. |
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or |
||||||
|
agreed to in writing, Licensor provides the Work (and each |
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS, |
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or |
||||||
|
implied, including, without limitation, any warranties or conditions |
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A |
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the |
||||||
|
appropriateness of using or redistributing the Work and assume any |
||||||
|
risks associated with Your exercise of permissions under this License. |
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory, |
||||||
|
whether in tort (including negligence), contract, or otherwise, |
||||||
|
unless required by applicable law (such as deliberate and grossly |
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be |
||||||
|
liable to You for damages, including any direct, indirect, special, |
||||||
|
incidental, or consequential damages of any character arising as a |
||||||
|
result of this License or out of the use or inability to use the |
||||||
|
Work (including but not limited to damages for loss of goodwill, |
||||||
|
work stoppage, computer failure or malfunction, or any and all |
||||||
|
other commercial damages or losses), even if such Contributor |
||||||
|
has been advised of the possibility of such damages. |
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing |
||||||
|
the Work or Derivative Works thereof, You may choose to offer, |
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity, |
||||||
|
or other liability obligations and/or rights consistent with this |
||||||
|
License. However, in accepting such obligations, You may act only |
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf |
||||||
|
of any other Contributor, and only if You agree to indemnify, |
||||||
|
defend, and hold each Contributor harmless for any liability |
||||||
|
incurred by, or claims asserted against, such Contributor by reason |
||||||
|
of your accepting any such warranty or additional liability. |
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS |
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work. |
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following |
||||||
|
boilerplate notice, with the fields enclosed by brackets "[]" |
||||||
|
replaced with your own identifying information. (Don't include |
||||||
|
the brackets!) The text should be enclosed in the appropriate |
||||||
|
comment syntax for the file format. We also recommend that a |
||||||
|
file or class name and description of purpose be included on the |
||||||
|
same "printed page" as the copyright notice for easier |
||||||
|
identification within third-party archives. |
||||||
|
|
||||||
|
Copyright [yyyy] [name of copyright owner] |
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
you may not use this file except in compliance with the License. |
||||||
|
You may obtain a copy of the License at |
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software |
||||||
|
distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
See the License for the specific language governing permissions and |
||||||
|
limitations under the License. |
@ -0,0 +1,62 @@ |
|||||||
|
# ![bleve](docs/bleve.png) bleve |
||||||
|
|
||||||
|
[![Build Status](https://travis-ci.org/blevesearch/bleve.svg?branch=master)](https://travis-ci.org/blevesearch/bleve) [![Coverage Status](https://coveralls.io/repos/blevesearch/bleve/badge.png?branch=master)](https://coveralls.io/r/blevesearch/bleve?branch=master) [![GoDoc](https://godoc.org/github.com/blevesearch/bleve?status.svg)](https://godoc.org/github.com/blevesearch/bleve) |
||||||
|
[![Join the chat at https://gitter.im/blevesearch/bleve](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/blevesearch/bleve?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) |
||||||
|
[![codebeat](https://codebeat.co/badges/38a7cbc9-9cf5-41c0-a315-0746178230f4)](https://codebeat.co/projects/github-com-blevesearch-bleve) |
||||||
|
[![Go Report Card](https://goreportcard.com/badge/blevesearch/bleve)](https://goreportcard.com/report/blevesearch/bleve) |
||||||
|
|
||||||
|
modern text indexing in go - [blevesearch.com](http://www.blevesearch.com/) |
||||||
|
|
||||||
|
Try out bleve live by [searching the bleve website](http://www.blevesearch.com/search/?q=bleve). |
||||||
|
|
||||||
|
## Features |
||||||
|
|
||||||
|
* Index any go data structure (including JSON) |
||||||
|
* Intelligent defaults backed up by powerful configuration |
||||||
|
* Supported field types: |
||||||
|
* Text, Numeric, Date |
||||||
|
* Supported query types: |
||||||
|
* Term, Phrase, Match, Match Phrase, Prefix |
||||||
|
* Conjunction, Disjunction, Boolean |
||||||
|
* Numeric Range, Date Range |
||||||
|
* Simple query [syntax](http://www.blevesearch.com/docs/Query-String-Query/) for human entry |
||||||
|
* tf-idf Scoring |
||||||
|
* Search result match highlighting |
||||||
|
* Supports Aggregating Facets: |
||||||
|
* Terms Facet |
||||||
|
* Numeric Range Facet |
||||||
|
* Date Range Facet |
||||||
|
|
||||||
|
## Discussion |
||||||
|
|
||||||
|
Discuss usage and development of bleve in the [google group](https://groups.google.com/forum/#!forum/bleve). |
||||||
|
|
||||||
|
## Indexing |
||||||
|
|
||||||
|
message := struct{ |
||||||
|
Id string |
||||||
|
From string |
||||||
|
Body string |
||||||
|
}{ |
||||||
|
Id: "example", |
||||||
|
From: "marty.schoch@gmail.com", |
||||||
|
Body: "bleve indexing is easy", |
||||||
|
} |
||||||
|
|
||||||
|
mapping := bleve.NewIndexMapping() |
||||||
|
index, err := bleve.New("example.bleve", mapping) |
||||||
|
if err != nil { |
||||||
|
panic(err) |
||||||
|
} |
||||||
|
index.Index(message.Id, message) |
||||||
|
|
||||||
|
## Querying |
||||||
|
|
||||||
|
index, _ := bleve.Open("example.bleve") |
||||||
|
query := bleve.NewQueryStringQuery("bleve") |
||||||
|
searchRequest := bleve.NewSearchRequest(query) |
||||||
|
searchResult, _ := index.Search(searchRequest) |
||||||
|
|
||||||
|
## License |
||||||
|
|
||||||
|
Apache License Version 2.0 |
@ -0,0 +1,46 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package simple |
||||||
|
|
||||||
|
import ( |
||||||
|
"github.com/blevesearch/bleve/analysis" |
||||||
|
"github.com/blevesearch/bleve/analysis/token/lowercase" |
||||||
|
"github.com/blevesearch/bleve/analysis/tokenizer/letter" |
||||||
|
"github.com/blevesearch/bleve/registry" |
||||||
|
) |
||||||
|
|
||||||
|
const Name = "simple" |
||||||
|
|
||||||
|
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) { |
||||||
|
tokenizer, err := cache.TokenizerNamed(letter.Name) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
rv := analysis.Analyzer{ |
||||||
|
Tokenizer: tokenizer, |
||||||
|
TokenFilters: []analysis.TokenFilter{ |
||||||
|
toLowerFilter, |
||||||
|
}, |
||||||
|
} |
||||||
|
return &rv, nil |
||||||
|
} |
||||||
|
|
||||||
|
func init() { |
||||||
|
registry.RegisterAnalyzer(Name, AnalyzerConstructor) |
||||||
|
} |
@ -0,0 +1,52 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package standard |
||||||
|
|
||||||
|
import ( |
||||||
|
"github.com/blevesearch/bleve/analysis" |
||||||
|
"github.com/blevesearch/bleve/analysis/lang/en" |
||||||
|
"github.com/blevesearch/bleve/analysis/token/lowercase" |
||||||
|
"github.com/blevesearch/bleve/analysis/tokenizer/unicode" |
||||||
|
"github.com/blevesearch/bleve/registry" |
||||||
|
) |
||||||
|
|
||||||
|
const Name = "standard" |
||||||
|
|
||||||
|
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) { |
||||||
|
tokenizer, err := cache.TokenizerNamed(unicode.Name) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
stopEnFilter, err := cache.TokenFilterNamed(en.StopName) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
rv := analysis.Analyzer{ |
||||||
|
Tokenizer: tokenizer, |
||||||
|
TokenFilters: []analysis.TokenFilter{ |
||||||
|
toLowerFilter, |
||||||
|
stopEnFilter, |
||||||
|
}, |
||||||
|
} |
||||||
|
return &rv, nil |
||||||
|
} |
||||||
|
|
||||||
|
func init() { |
||||||
|
registry.RegisterAnalyzer(Name, AnalyzerConstructor) |
||||||
|
} |
@ -0,0 +1,64 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package flexible |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"time" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/analysis" |
||||||
|
"github.com/blevesearch/bleve/registry" |
||||||
|
) |
||||||
|
|
||||||
|
const Name = "flexiblego" |
||||||
|
|
||||||
|
type DateTimeParser struct { |
||||||
|
layouts []string |
||||||
|
} |
||||||
|
|
||||||
|
func New(layouts []string) *DateTimeParser { |
||||||
|
return &DateTimeParser{ |
||||||
|
layouts: layouts, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (p *DateTimeParser) ParseDateTime(input string) (time.Time, error) { |
||||||
|
for _, layout := range p.layouts { |
||||||
|
rv, err := time.Parse(layout, input) |
||||||
|
if err == nil { |
||||||
|
return rv, nil |
||||||
|
} |
||||||
|
} |
||||||
|
return time.Time{}, analysis.ErrInvalidDateTime |
||||||
|
} |
||||||
|
|
||||||
|
func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) { |
||||||
|
layouts, ok := config["layouts"].([]interface{}) |
||||||
|
if !ok { |
||||||
|
return nil, fmt.Errorf("must specify layouts") |
||||||
|
} |
||||||
|
var layoutStrs []string |
||||||
|
for _, layout := range layouts { |
||||||
|
layoutStr, ok := layout.(string) |
||||||
|
if ok { |
||||||
|
layoutStrs = append(layoutStrs, layoutStr) |
||||||
|
} |
||||||
|
} |
||||||
|
return New(layoutStrs), nil |
||||||
|
} |
||||||
|
|
||||||
|
func init() { |
||||||
|
registry.RegisterDateTimeParser(Name, DateTimeParserConstructor) |
||||||
|
} |
@ -0,0 +1,45 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package optional |
||||||
|
|
||||||
|
import ( |
||||||
|
"time" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/analysis" |
||||||
|
"github.com/blevesearch/bleve/analysis/datetime/flexible" |
||||||
|
"github.com/blevesearch/bleve/registry" |
||||||
|
) |
||||||
|
|
||||||
|
const Name = "dateTimeOptional" |
||||||
|
|
||||||
|
const rfc3339NoTimezone = "2006-01-02T15:04:05" |
||||||
|
const rfc3339NoTimezoneNoT = "2006-01-02 15:04:05" |
||||||
|
const rfc3339NoTime = "2006-01-02" |
||||||
|
|
||||||
|
var layouts = []string{ |
||||||
|
time.RFC3339Nano, |
||||||
|
time.RFC3339, |
||||||
|
rfc3339NoTimezone, |
||||||
|
rfc3339NoTimezoneNoT, |
||||||
|
rfc3339NoTime, |
||||||
|
} |
||||||
|
|
||||||
|
func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) { |
||||||
|
return flexible.New(layouts), nil |
||||||
|
} |
||||||
|
|
||||||
|
func init() { |
||||||
|
registry.RegisterDateTimeParser(Name, DateTimeParserConstructor) |
||||||
|
} |
@ -0,0 +1,111 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package analysis |
||||||
|
|
||||||
|
// TokenLocation represents one occurrence of a term at a particular location in
|
||||||
|
// a field. Start, End and Position have the same meaning as in analysis.Token.
|
||||||
|
// Field and ArrayPositions identify the field value in the source document.
|
||||||
|
// See document.Field for details.
|
||||||
|
type TokenLocation struct { |
||||||
|
Field string |
||||||
|
ArrayPositions []uint64 |
||||||
|
Start int |
||||||
|
End int |
||||||
|
Position int |
||||||
|
} |
||||||
|
|
||||||
|
// TokenFreq represents all the occurrences of a term in all fields of a
|
||||||
|
// document.
|
||||||
|
type TokenFreq struct { |
||||||
|
Term []byte |
||||||
|
Locations []*TokenLocation |
||||||
|
frequency int |
||||||
|
} |
||||||
|
|
||||||
|
func (tf *TokenFreq) Frequency() int { |
||||||
|
return tf.frequency |
||||||
|
} |
||||||
|
|
||||||
|
// TokenFrequencies maps document terms to their combined frequencies from all
|
||||||
|
// fields.
|
||||||
|
type TokenFrequencies map[string]*TokenFreq |
||||||
|
|
||||||
|
func (tfs TokenFrequencies) MergeAll(remoteField string, other TokenFrequencies) { |
||||||
|
// walk the new token frequencies
|
||||||
|
for tfk, tf := range other { |
||||||
|
// set the remoteField value in incoming token freqs
|
||||||
|
for _, l := range tf.Locations { |
||||||
|
l.Field = remoteField |
||||||
|
} |
||||||
|
existingTf, exists := tfs[tfk] |
||||||
|
if exists { |
||||||
|
existingTf.Locations = append(existingTf.Locations, tf.Locations...) |
||||||
|
existingTf.frequency = existingTf.frequency + tf.frequency |
||||||
|
} else { |
||||||
|
tfs[tfk] = &TokenFreq{ |
||||||
|
Term: tf.Term, |
||||||
|
frequency: tf.frequency, |
||||||
|
Locations: make([]*TokenLocation, len(tf.Locations)), |
||||||
|
} |
||||||
|
copy(tfs[tfk].Locations, tf.Locations) |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func TokenFrequency(tokens TokenStream, arrayPositions []uint64, includeTermVectors bool) TokenFrequencies { |
||||||
|
rv := make(map[string]*TokenFreq, len(tokens)) |
||||||
|
|
||||||
|
if includeTermVectors { |
||||||
|
tls := make([]TokenLocation, len(tokens)) |
||||||
|
tlNext := 0 |
||||||
|
|
||||||
|
for _, token := range tokens { |
||||||
|
tls[tlNext] = TokenLocation{ |
||||||
|
ArrayPositions: arrayPositions, |
||||||
|
Start: token.Start, |
||||||
|
End: token.End, |
||||||
|
Position: token.Position, |
||||||
|
} |
||||||
|
|
||||||
|
curr, ok := rv[string(token.Term)] |
||||||
|
if ok { |
||||||
|
curr.Locations = append(curr.Locations, &tls[tlNext]) |
||||||
|
curr.frequency++ |
||||||
|
} else { |
||||||
|
rv[string(token.Term)] = &TokenFreq{ |
||||||
|
Term: token.Term, |
||||||
|
Locations: []*TokenLocation{&tls[tlNext]}, |
||||||
|
frequency: 1, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
tlNext++ |
||||||
|
} |
||||||
|
} else { |
||||||
|
for _, token := range tokens { |
||||||
|
curr, exists := rv[string(token.Term)] |
||||||
|
if exists { |
||||||
|
curr.frequency++ |
||||||
|
} else { |
||||||
|
rv[string(token.Term)] = &TokenFreq{ |
||||||
|
Term: token.Term, |
||||||
|
frequency: 1, |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return rv |
||||||
|
} |
@ -0,0 +1,70 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
// Package en implements an analyzer with reasonable defaults for processing
|
||||||
|
// English text.
|
||||||
|
//
|
||||||
|
// It strips possessive suffixes ('s), transforms tokens to lower case,
|
||||||
|
// removes stopwords from a built-in list, and applies porter stemming.
|
||||||
|
//
|
||||||
|
// The built-in stopwords list is defined in EnglishStopWords.
|
||||||
|
package en |
||||||
|
|
||||||
|
import ( |
||||||
|
"github.com/blevesearch/bleve/analysis" |
||||||
|
"github.com/blevesearch/bleve/registry" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/analysis/token/lowercase" |
||||||
|
"github.com/blevesearch/bleve/analysis/token/porter" |
||||||
|
"github.com/blevesearch/bleve/analysis/tokenizer/unicode" |
||||||
|
) |
||||||
|
|
||||||
|
const AnalyzerName = "en" |
||||||
|
|
||||||
|
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) { |
||||||
|
tokenizer, err := cache.TokenizerNamed(unicode.Name) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
possEnFilter, err := cache.TokenFilterNamed(PossessiveName) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
stopEnFilter, err := cache.TokenFilterNamed(StopName) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
stemmerEnFilter, err := cache.TokenFilterNamed(porter.Name) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
rv := analysis.Analyzer{ |
||||||
|
Tokenizer: tokenizer, |
||||||
|
TokenFilters: []analysis.TokenFilter{ |
||||||
|
possEnFilter, |
||||||
|
toLowerFilter, |
||||||
|
stopEnFilter, |
||||||
|
stemmerEnFilter, |
||||||
|
}, |
||||||
|
} |
||||||
|
return &rv, nil |
||||||
|
} |
||||||
|
|
||||||
|
func init() { |
||||||
|
registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor) |
||||||
|
} |
@ -0,0 +1,67 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package en |
||||||
|
|
||||||
|
import ( |
||||||
|
"unicode/utf8" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/analysis" |
||||||
|
"github.com/blevesearch/bleve/registry" |
||||||
|
) |
||||||
|
|
||||||
|
// PossessiveName is the name PossessiveFilter is registered as
|
||||||
|
// in the bleve registry.
|
||||||
|
const PossessiveName = "possessive_en" |
||||||
|
|
||||||
|
const rightSingleQuotationMark = '’' |
||||||
|
const apostrophe = '\'' |
||||||
|
const fullWidthApostrophe = ''' |
||||||
|
|
||||||
|
const apostropheChars = rightSingleQuotationMark + apostrophe + fullWidthApostrophe |
||||||
|
|
||||||
|
// PossessiveFilter implements a TokenFilter which
|
||||||
|
// strips the English possessive suffix ('s) from tokens.
|
||||||
|
// It handle a variety of apostrophe types, is case-insensitive
|
||||||
|
// and doesn't distinguish between possessive and contraction.
|
||||||
|
// (ie "She's So Rad" becomes "She So Rad")
|
||||||
|
type PossessiveFilter struct { |
||||||
|
} |
||||||
|
|
||||||
|
func NewPossessiveFilter() *PossessiveFilter { |
||||||
|
return &PossessiveFilter{} |
||||||
|
} |
||||||
|
|
||||||
|
func (s *PossessiveFilter) Filter(input analysis.TokenStream) analysis.TokenStream { |
||||||
|
for _, token := range input { |
||||||
|
lastRune, lastRuneSize := utf8.DecodeLastRune(token.Term) |
||||||
|
if lastRune == 's' || lastRune == 'S' { |
||||||
|
nextLastRune, nextLastRuneSize := utf8.DecodeLastRune(token.Term[:len(token.Term)-lastRuneSize]) |
||||||
|
if nextLastRune == rightSingleQuotationMark || |
||||||
|
nextLastRune == apostrophe || |
||||||
|
nextLastRune == fullWidthApostrophe { |
||||||
|
token.Term = token.Term[:len(token.Term)-lastRuneSize-nextLastRuneSize] |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
return input |
||||||
|
} |
||||||
|
|
||||||
|
func PossessiveFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { |
||||||
|
return NewPossessiveFilter(), nil |
||||||
|
} |
||||||
|
|
||||||
|
func init() { |
||||||
|
registry.RegisterTokenFilter(PossessiveName, PossessiveFilterConstructor) |
||||||
|
} |
@ -0,0 +1,33 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package en |
||||||
|
|
||||||
|
import ( |
||||||
|
"github.com/blevesearch/bleve/analysis" |
||||||
|
"github.com/blevesearch/bleve/analysis/token/stop" |
||||||
|
"github.com/blevesearch/bleve/registry" |
||||||
|
) |
||||||
|
|
||||||
|
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { |
||||||
|
tokenMap, err := cache.TokenMapNamed(StopName) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
return stop.NewStopTokensFilter(tokenMap), nil |
||||||
|
} |
||||||
|
|
||||||
|
func init() { |
||||||
|
registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) |
||||||
|
} |
@ -0,0 +1,344 @@ |
|||||||
|
package en |
||||||
|
|
||||||
|
import ( |
||||||
|
"github.com/blevesearch/bleve/analysis" |
||||||
|
"github.com/blevesearch/bleve/registry" |
||||||
|
) |
||||||
|
|
||||||
|
const StopName = "stop_en" |
||||||
|
|
||||||
|
// EnglishStopWords is the built-in list of stopwords used by the "stop_en" TokenFilter.
|
||||||
|
//
|
||||||
|
// this content was obtained from:
|
||||||
|
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
|
||||||
|
// ` was changed to ' to allow for literal string
|
||||||
|
var EnglishStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/english/stop.txt |
||||||
|
| This file is distributed under the BSD License. |
||||||
|
| See http://snowball.tartarus.org/license.php
|
||||||
|
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||||
|
| - Encoding was converted to UTF-8. |
||||||
|
| - This notice was added. |
||||||
|
| |
||||||
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball" |
||||||
|
|
||||||
|
| An English stop word list. Comments begin with vertical bar. Each stop |
||||||
|
| word is at the start of a line. |
||||||
|
|
||||||
|
| Many of the forms below are quite rare (e.g. "yourselves") but included for |
||||||
|
| completeness. |
||||||
|
|
||||||
|
| PRONOUNS FORMS |
||||||
|
| 1st person sing |
||||||
|
|
||||||
|
i | subject, always in upper case of course |
||||||
|
|
||||||
|
me | object |
||||||
|
my | possessive adjective |
||||||
|
| the possessive pronoun 'mine' is best suppressed, because of the |
||||||
|
| sense of coal-mine etc. |
||||||
|
myself | reflexive |
||||||
|
| 1st person plural |
||||||
|
we | subject |
||||||
|
|
||||||
|
| us | object |
||||||
|
| care is required here because US = United States. It is usually |
||||||
|
| safe to remove it if it is in lower case. |
||||||
|
our | possessive adjective |
||||||
|
ours | possessive pronoun |
||||||
|
ourselves | reflexive |
||||||
|
| second person (archaic 'thou' forms not included) |
||||||
|
you | subject and object |
||||||
|
your | possessive adjective |
||||||
|
yours | possessive pronoun |
||||||
|
yourself | reflexive (singular) |
||||||
|
yourselves | reflexive (plural) |
||||||
|
| third person singular |
||||||
|
he | subject |
||||||
|
him | object |
||||||
|
his | possessive adjective and pronoun |
||||||
|
himself | reflexive |
||||||
|
|
||||||
|
she | subject |
||||||
|
her | object and possessive adjective |
||||||
|
hers | possessive pronoun |
||||||
|
herself | reflexive |
||||||
|
|
||||||
|
it | subject and object |
||||||
|
its | possessive adjective |
||||||
|
itself | reflexive |
||||||
|
| third person plural |
||||||
|
they | subject |
||||||
|
them | object |
||||||
|
their | possessive adjective |
||||||
|
theirs | possessive pronoun |
||||||
|
themselves | reflexive |
||||||
|
| other forms (demonstratives, interrogatives) |
||||||
|
what |
||||||
|
which |
||||||
|
who |
||||||
|
whom |
||||||
|
this |
||||||
|
that |
||||||
|
these |
||||||
|
those |
||||||
|
|
||||||
|
| VERB FORMS (using F.R. Palmer's nomenclature) |
||||||
|
| BE |
||||||
|
am | 1st person, present |
||||||
|
is | -s form (3rd person, present) |
||||||
|
are | present |
||||||
|
was | 1st person, past |
||||||
|
were | past |
||||||
|
be | infinitive |
||||||
|
been | past participle |
||||||
|
being | -ing form |
||||||
|
| HAVE |
||||||
|
have | simple |
||||||
|
has | -s form |
||||||
|
had | past |
||||||
|
having | -ing form |
||||||
|
| DO |
||||||
|
do | simple |
||||||
|
does | -s form |
||||||
|
did | past |
||||||
|
doing | -ing form |
||||||
|
|
||||||
|
| The forms below are, I believe, best omitted, because of the significant |
||||||
|
| homonym forms: |
||||||
|
|
||||||
|
| He made a WILL |
||||||
|
| old tin CAN |
||||||
|
| merry month of MAY |
||||||
|
| a smell of MUST |
||||||
|
| fight the good fight with all thy MIGHT |
||||||
|
|
||||||
|
| would, could, should, ought might however be included |
||||||
|
|
||||||
|
| | AUXILIARIES |
||||||
|
| | WILL |
||||||
|
|will |
||||||
|
|
||||||
|
would |
||||||
|
|
||||||
|
| | SHALL |
||||||
|
|shall |
||||||
|
|
||||||
|
should |
||||||
|
|
||||||
|
| | CAN |
||||||
|
|can |
||||||
|
|
||||||
|
could |
||||||
|
|
||||||
|
| | MAY |
||||||
|
|may |
||||||
|
|might |
||||||
|
| | MUST |
||||||
|
|must |
||||||
|
| | OUGHT |
||||||
|
|
||||||
|
ought |
||||||
|
|
||||||
|
| COMPOUND FORMS, increasingly encountered nowadays in 'formal' writing |
||||||
|
| pronoun + verb |
||||||
|
|
||||||
|
i'm |
||||||
|
you're |
||||||
|
he's |
||||||
|
she's |
||||||
|
it's |
||||||
|
we're |
||||||
|
they're |
||||||
|
i've |
||||||
|
you've |
||||||
|
we've |
||||||
|
they've |
||||||
|
i'd |
||||||
|
you'd |
||||||
|
he'd |
||||||
|
she'd |
||||||
|
we'd |
||||||
|
they'd |
||||||
|
i'll |
||||||
|
you'll |
||||||
|
he'll |
||||||
|
she'll |
||||||
|
we'll |
||||||
|
they'll |
||||||
|
|
||||||
|
| verb + negation |
||||||
|
|
||||||
|
isn't |
||||||
|
aren't |
||||||
|
wasn't |
||||||
|
weren't |
||||||
|
hasn't |
||||||
|
haven't |
||||||
|
hadn't |
||||||
|
doesn't |
||||||
|
don't |
||||||
|
didn't |
||||||
|
|
||||||
|
| auxiliary + negation |
||||||
|
|
||||||
|
won't |
||||||
|
wouldn't |
||||||
|
shan't |
||||||
|
shouldn't |
||||||
|
can't |
||||||
|
cannot |
||||||
|
couldn't |
||||||
|
mustn't |
||||||
|
|
||||||
|
| miscellaneous forms |
||||||
|
|
||||||
|
let's |
||||||
|
that's |
||||||
|
who's |
||||||
|
what's |
||||||
|
here's |
||||||
|
there's |
||||||
|
when's |
||||||
|
where's |
||||||
|
why's |
||||||
|
how's |
||||||
|
|
||||||
|
| rarer forms |
||||||
|
|
||||||
|
| daren't needn't |
||||||
|
|
||||||
|
| doubtful forms |
||||||
|
|
||||||
|
| oughtn't mightn't |
||||||
|
|
||||||
|
| ARTICLES |
||||||
|
a |
||||||
|
an |
||||||
|
the |
||||||
|
|
||||||
|
| THE REST (Overlap among prepositions, conjunctions, adverbs etc is so |
||||||
|
| high, that classification is pointless.) |
||||||
|
and |
||||||
|
but |
||||||
|
if |
||||||
|
or |
||||||
|
because |
||||||
|
as |
||||||
|
until |
||||||
|
while |
||||||
|
|
||||||
|
of |
||||||
|
at |
||||||
|
by |
||||||
|
for |
||||||
|
with |
||||||
|
about |
||||||
|
against |
||||||
|
between |
||||||
|
into |
||||||
|
through |
||||||
|
during |
||||||
|
before |
||||||
|
after |
||||||
|
above |
||||||
|
below |
||||||
|
to |
||||||
|
from |
||||||
|
up |
||||||
|
down |
||||||
|
in |
||||||
|
out |
||||||
|
on |
||||||
|
off |
||||||
|
over |
||||||
|
under |
||||||
|
|
||||||
|
again |
||||||
|
further |
||||||
|
then |
||||||
|
once |
||||||
|
|
||||||
|
here |
||||||
|
there |
||||||
|
when |
||||||
|
where |
||||||
|
why |
||||||
|
how |
||||||
|
|
||||||
|
all |
||||||
|
any |
||||||
|
both |
||||||
|
each |
||||||
|
few |
||||||
|
more |
||||||
|
most |
||||||
|
other |
||||||
|
some |
||||||
|
such |
||||||
|
|
||||||
|
no |
||||||
|
nor |
||||||
|
not |
||||||
|
only |
||||||
|
own |
||||||
|
same |
||||||
|
so |
||||||
|
than |
||||||
|
too |
||||||
|
very |
||||||
|
|
||||||
|
| Just for the record, the following words are among the commonest in English |
||||||
|
|
||||||
|
| one |
||||||
|
| every |
||||||
|
| least |
||||||
|
| less |
||||||
|
| many |
||||||
|
| now |
||||||
|
| ever |
||||||
|
| never |
||||||
|
| say |
||||||
|
| says |
||||||
|
| said |
||||||
|
| also |
||||||
|
| get |
||||||
|
| go |
||||||
|
| goes |
||||||
|
| just |
||||||
|
| made |
||||||
|
| make |
||||||
|
| put |
||||||
|
| see |
||||||
|
| seen |
||||||
|
| whether |
||||||
|
| like |
||||||
|
| well |
||||||
|
| back |
||||||
|
| even |
||||||
|
| still |
||||||
|
| way |
||||||
|
| take |
||||||
|
| since |
||||||
|
| another |
||||||
|
| however |
||||||
|
| two |
||||||
|
| three |
||||||
|
| four |
||||||
|
| five |
||||||
|
| first |
||||||
|
| second |
||||||
|
| new |
||||||
|
| old |
||||||
|
| high |
||||||
|
| long |
||||||
|
`) |
||||||
|
|
||||||
|
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) { |
||||||
|
rv := analysis.NewTokenMap() |
||||||
|
err := rv.LoadBytes(EnglishStopWords) |
||||||
|
return rv, err |
||||||
|
} |
||||||
|
|
||||||
|
func init() { |
||||||
|
registry.RegisterTokenMap(StopName, TokenMapConstructor) |
||||||
|
} |
@ -0,0 +1,7 @@ |
|||||||
|
# full line comment |
||||||
|
marty |
||||||
|
steve # trailing comment |
||||||
|
| different format of comment |
||||||
|
dustin |
||||||
|
siri | different style trailing comment |
||||||
|
multiple words with different whitespace |
@ -0,0 +1,105 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
// Package lowercase implements a TokenFilter which converts
|
||||||
|
// tokens to lower case according to unicode rules.
|
||||||
|
package lowercase |
||||||
|
|
||||||
|
import ( |
||||||
|
"bytes" |
||||||
|
"unicode" |
||||||
|
"unicode/utf8" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/analysis" |
||||||
|
"github.com/blevesearch/bleve/registry" |
||||||
|
) |
||||||
|
|
||||||
|
// Name is the name used to register LowerCaseFilter in the bleve registry
|
||||||
|
const Name = "to_lower" |
||||||
|
|
||||||
|
type LowerCaseFilter struct { |
||||||
|
} |
||||||
|
|
||||||
|
func NewLowerCaseFilter() *LowerCaseFilter { |
||||||
|
return &LowerCaseFilter{} |
||||||
|
} |
||||||
|
|
||||||
|
func (f *LowerCaseFilter) Filter(input analysis.TokenStream) analysis.TokenStream { |
||||||
|
for _, token := range input { |
||||||
|
token.Term = toLowerDeferredCopy(token.Term) |
||||||
|
} |
||||||
|
return input |
||||||
|
} |
||||||
|
|
||||||
|
func LowerCaseFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { |
||||||
|
return NewLowerCaseFilter(), nil |
||||||
|
} |
||||||
|
|
||||||
|
func init() { |
||||||
|
registry.RegisterTokenFilter(Name, LowerCaseFilterConstructor) |
||||||
|
} |
||||||
|
|
||||||
|
// toLowerDeferredCopy will function exactly like
|
||||||
|
// bytes.ToLower() only it will reuse (overwrite)
|
||||||
|
// the original byte array when possible
|
||||||
|
// NOTE: because its possible that the lower-case
|
||||||
|
// form of a rune has a different utf-8 encoded
|
||||||
|
// length, in these cases a new byte array is allocated
|
||||||
|
func toLowerDeferredCopy(s []byte) []byte { |
||||||
|
j := 0 |
||||||
|
for i := 0; i < len(s); { |
||||||
|
wid := 1 |
||||||
|
r := rune(s[i]) |
||||||
|
if r >= utf8.RuneSelf { |
||||||
|
r, wid = utf8.DecodeRune(s[i:]) |
||||||
|
} |
||||||
|
|
||||||
|
l := unicode.ToLower(r) |
||||||
|
|
||||||
|
// If the rune is already lowercased, just move to the
|
||||||
|
// next rune.
|
||||||
|
if l == r { |
||||||
|
i += wid |
||||||
|
j += wid |
||||||
|
continue |
||||||
|
} |
||||||
|
|
||||||
|
// Handles the Unicode edge-case where the last
|
||||||
|
// rune in a word on the greek Σ needs to be converted
|
||||||
|
// differently.
|
||||||
|
if l == 'σ' && i+2 == len(s) { |
||||||
|
l = 'ς' |
||||||
|
} |
||||||
|
|
||||||
|
lwid := utf8.RuneLen(l) |
||||||
|
if lwid > wid { |
||||||
|
// utf-8 encoded replacement is wider
|
||||||
|
// for now, punt and defer
|
||||||
|
// to bytes.ToLower() for the remainder
|
||||||
|
// only known to happen with chars
|
||||||
|
// Rune Ⱥ(570) width 2 - Lower ⱥ(11365) width 3
|
||||||
|
// Rune Ⱦ(574) width 2 - Lower ⱦ(11366) width 3
|
||||||
|
rest := bytes.ToLower(s[i:]) |
||||||
|
rv := make([]byte, j+len(rest)) |
||||||
|
copy(rv[:j], s[:j]) |
||||||
|
copy(rv[j:], rest) |
||||||
|
return rv |
||||||
|
} else { |
||||||
|
utf8.EncodeRune(s[j:], l) |
||||||
|
} |
||||||
|
i += wid |
||||||
|
j += lwid |
||||||
|
} |
||||||
|
return s[:j] |
||||||
|
} |
@ -0,0 +1,53 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package porter |
||||||
|
|
||||||
|
import ( |
||||||
|
"bytes" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/analysis" |
||||||
|
"github.com/blevesearch/bleve/registry" |
||||||
|
|
||||||
|
"github.com/blevesearch/go-porterstemmer" |
||||||
|
) |
||||||
|
|
||||||
|
const Name = "stemmer_porter" |
||||||
|
|
||||||
|
type PorterStemmer struct { |
||||||
|
} |
||||||
|
|
||||||
|
func NewPorterStemmer() *PorterStemmer { |
||||||
|
return &PorterStemmer{} |
||||||
|
} |
||||||
|
|
||||||
|
func (s *PorterStemmer) Filter(input analysis.TokenStream) analysis.TokenStream { |
||||||
|
for _, token := range input { |
||||||
|
// if it is not a protected keyword, stem it
|
||||||
|
if !token.KeyWord { |
||||||
|
termRunes := bytes.Runes(token.Term) |
||||||
|
stemmedRunes := porterstemmer.StemWithoutLowerCasing(termRunes) |
||||||
|
token.Term = analysis.BuildTermFromRunes(stemmedRunes) |
||||||
|
} |
||||||
|
} |
||||||
|
return input |
||||||
|
} |
||||||
|
|
||||||
|
func PorterStemmerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { |
||||||
|
return NewPorterStemmer(), nil |
||||||
|
} |
||||||
|
|
||||||
|
func init() { |
||||||
|
registry.RegisterTokenFilter(Name, PorterStemmerConstructor) |
||||||
|
} |
@ -0,0 +1,70 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
// Package stop implements a TokenFilter removing tokens found in
|
||||||
|
// a TokenMap.
|
||||||
|
//
|
||||||
|
// It constructor takes the following arguments:
|
||||||
|
//
|
||||||
|
// "stop_token_map" (string): the name of the token map identifying tokens to
|
||||||
|
// remove.
|
||||||
|
package stop |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/analysis" |
||||||
|
"github.com/blevesearch/bleve/registry" |
||||||
|
) |
||||||
|
|
||||||
|
const Name = "stop_tokens" |
||||||
|
|
||||||
|
type StopTokensFilter struct { |
||||||
|
stopTokens analysis.TokenMap |
||||||
|
} |
||||||
|
|
||||||
|
func NewStopTokensFilter(stopTokens analysis.TokenMap) *StopTokensFilter { |
||||||
|
return &StopTokensFilter{ |
||||||
|
stopTokens: stopTokens, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (f *StopTokensFilter) Filter(input analysis.TokenStream) analysis.TokenStream { |
||||||
|
j := 0 |
||||||
|
for _, token := range input { |
||||||
|
_, isStopToken := f.stopTokens[string(token.Term)] |
||||||
|
if !isStopToken { |
||||||
|
input[j] = token |
||||||
|
j++ |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return input[:j] |
||||||
|
} |
||||||
|
|
||||||
|
func StopTokensFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { |
||||||
|
stopTokenMapName, ok := config["stop_token_map"].(string) |
||||||
|
if !ok { |
||||||
|
return nil, fmt.Errorf("must specify stop_token_map") |
||||||
|
} |
||||||
|
stopTokenMap, err := cache.TokenMapNamed(stopTokenMapName) |
||||||
|
if err != nil { |
||||||
|
return nil, fmt.Errorf("error building stop words filter: %v", err) |
||||||
|
} |
||||||
|
return NewStopTokensFilter(stopTokenMap), nil |
||||||
|
} |
||||||
|
|
||||||
|
func init() { |
||||||
|
registry.RegisterTokenFilter(Name, StopTokensFilterConstructor) |
||||||
|
} |
@ -0,0 +1,76 @@ |
|||||||
|
// Copyright (c) 2016 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package character |
||||||
|
|
||||||
|
import ( |
||||||
|
"unicode/utf8" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/analysis" |
||||||
|
) |
||||||
|
|
||||||
|
type IsTokenRune func(r rune) bool |
||||||
|
|
||||||
|
type CharacterTokenizer struct { |
||||||
|
isTokenRun IsTokenRune |
||||||
|
} |
||||||
|
|
||||||
|
func NewCharacterTokenizer(f IsTokenRune) *CharacterTokenizer { |
||||||
|
return &CharacterTokenizer{ |
||||||
|
isTokenRun: f, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (c *CharacterTokenizer) Tokenize(input []byte) analysis.TokenStream { |
||||||
|
|
||||||
|
rv := make(analysis.TokenStream, 0, 1024) |
||||||
|
|
||||||
|
offset := 0 |
||||||
|
start := 0 |
||||||
|
end := 0 |
||||||
|
count := 0 |
||||||
|
for currRune, size := utf8.DecodeRune(input[offset:]); currRune != utf8.RuneError; currRune, size = utf8.DecodeRune(input[offset:]) { |
||||||
|
isToken := c.isTokenRun(currRune) |
||||||
|
if isToken { |
||||||
|
end = offset + size |
||||||
|
} else { |
||||||
|
if end-start > 0 { |
||||||
|
// build token
|
||||||
|
rv = append(rv, &analysis.Token{ |
||||||
|
Term: input[start:end], |
||||||
|
Start: start, |
||||||
|
End: end, |
||||||
|
Position: count + 1, |
||||||
|
Type: analysis.AlphaNumeric, |
||||||
|
}) |
||||||
|
count++ |
||||||
|
} |
||||||
|
start = offset + size |
||||||
|
end = start |
||||||
|
} |
||||||
|
offset += size |
||||||
|
} |
||||||
|
// if we ended in the middle of a token, finish it
|
||||||
|
if end-start > 0 { |
||||||
|
// build token
|
||||||
|
rv = append(rv, &analysis.Token{ |
||||||
|
Term: input[start:end], |
||||||
|
Start: start, |
||||||
|
End: end, |
||||||
|
Position: count + 1, |
||||||
|
Type: analysis.AlphaNumeric, |
||||||
|
}) |
||||||
|
} |
||||||
|
return rv |
||||||
|
} |
@ -0,0 +1,33 @@ |
|||||||
|
// Copyright (c) 2016 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package letter |
||||||
|
|
||||||
|
import ( |
||||||
|
"unicode" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/analysis" |
||||||
|
"github.com/blevesearch/bleve/analysis/tokenizer/character" |
||||||
|
"github.com/blevesearch/bleve/registry" |
||||||
|
) |
||||||
|
|
||||||
|
const Name = "letter" |
||||||
|
|
||||||
|
func TokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) { |
||||||
|
return character.NewCharacterTokenizer(unicode.IsLetter), nil |
||||||
|
} |
||||||
|
|
||||||
|
func init() { |
||||||
|
registry.RegisterTokenizer(Name, TokenizerConstructor) |
||||||
|
} |
@ -0,0 +1,131 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package unicode |
||||||
|
|
||||||
|
import ( |
||||||
|
"github.com/blevesearch/segment" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/analysis" |
||||||
|
"github.com/blevesearch/bleve/registry" |
||||||
|
) |
||||||
|
|
||||||
|
const Name = "unicode" |
||||||
|
|
||||||
|
type UnicodeTokenizer struct { |
||||||
|
} |
||||||
|
|
||||||
|
func NewUnicodeTokenizer() *UnicodeTokenizer { |
||||||
|
return &UnicodeTokenizer{} |
||||||
|
} |
||||||
|
|
||||||
|
func (rt *UnicodeTokenizer) Tokenize(input []byte) analysis.TokenStream { |
||||||
|
rvx := make([]analysis.TokenStream, 0, 10) // When rv gets full, append to rvx.
|
||||||
|
rv := make(analysis.TokenStream, 0, 1) |
||||||
|
|
||||||
|
ta := []analysis.Token(nil) |
||||||
|
taNext := 0 |
||||||
|
|
||||||
|
segmenter := segment.NewWordSegmenterDirect(input) |
||||||
|
start := 0 |
||||||
|
pos := 1 |
||||||
|
|
||||||
|
guessRemaining := func(end int) int { |
||||||
|
avgSegmentLen := end / (len(rv) + 1) |
||||||
|
if avgSegmentLen < 1 { |
||||||
|
avgSegmentLen = 1 |
||||||
|
} |
||||||
|
|
||||||
|
remainingLen := len(input) - end |
||||||
|
|
||||||
|
return remainingLen / avgSegmentLen |
||||||
|
} |
||||||
|
|
||||||
|
for segmenter.Segment() { |
||||||
|
segmentBytes := segmenter.Bytes() |
||||||
|
end := start + len(segmentBytes) |
||||||
|
if segmenter.Type() != segment.None { |
||||||
|
if taNext >= len(ta) { |
||||||
|
remainingSegments := guessRemaining(end) |
||||||
|
if remainingSegments > 1000 { |
||||||
|
remainingSegments = 1000 |
||||||
|
} |
||||||
|
if remainingSegments < 1 { |
||||||
|
remainingSegments = 1 |
||||||
|
} |
||||||
|
|
||||||
|
ta = make([]analysis.Token, remainingSegments) |
||||||
|
taNext = 0 |
||||||
|
} |
||||||
|
|
||||||
|
token := &ta[taNext] |
||||||
|
taNext++ |
||||||
|
|
||||||
|
token.Term = segmentBytes |
||||||
|
token.Start = start |
||||||
|
token.End = end |
||||||
|
token.Position = pos |
||||||
|
token.Type = convertType(segmenter.Type()) |
||||||
|
|
||||||
|
if len(rv) >= cap(rv) { // When rv is full, save it into rvx.
|
||||||
|
rvx = append(rvx, rv) |
||||||
|
|
||||||
|
rvCap := cap(rv) * 2 |
||||||
|
if rvCap > 256 { |
||||||
|
rvCap = 256 |
||||||
|
} |
||||||
|
|
||||||
|
rv = make(analysis.TokenStream, 0, rvCap) // Next rv cap is bigger.
|
||||||
|
} |
||||||
|
|
||||||
|
rv = append(rv, token) |
||||||
|
pos++ |
||||||
|
} |
||||||
|
start = end |
||||||
|
} |
||||||
|
|
||||||
|
if len(rvx) > 0 { |
||||||
|
n := len(rv) |
||||||
|
for _, r := range rvx { |
||||||
|
n += len(r) |
||||||
|
} |
||||||
|
rall := make(analysis.TokenStream, 0, n) |
||||||
|
for _, r := range rvx { |
||||||
|
rall = append(rall, r...) |
||||||
|
} |
||||||
|
return append(rall, rv...) |
||||||
|
} |
||||||
|
|
||||||
|
return rv |
||||||
|
} |
||||||
|
|
||||||
|
func UnicodeTokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) { |
||||||
|
return NewUnicodeTokenizer(), nil |
||||||
|
} |
||||||
|
|
||||||
|
func init() { |
||||||
|
registry.RegisterTokenizer(Name, UnicodeTokenizerConstructor) |
||||||
|
} |
||||||
|
|
||||||
|
func convertType(segmentWordType int) analysis.TokenType { |
||||||
|
switch segmentWordType { |
||||||
|
case segment.Ideo: |
||||||
|
return analysis.Ideographic |
||||||
|
case segment.Kana: |
||||||
|
return analysis.Ideographic |
||||||
|
case segment.Number: |
||||||
|
return analysis.Numeric |
||||||
|
} |
||||||
|
return analysis.AlphaNumeric |
||||||
|
} |
@ -0,0 +1,76 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package analysis |
||||||
|
|
||||||
|
import ( |
||||||
|
"bufio" |
||||||
|
"bytes" |
||||||
|
"io" |
||||||
|
"io/ioutil" |
||||||
|
"strings" |
||||||
|
) |
||||||
|
|
||||||
|
type TokenMap map[string]bool |
||||||
|
|
||||||
|
func NewTokenMap() TokenMap { |
||||||
|
return make(TokenMap, 0) |
||||||
|
} |
||||||
|
|
||||||
|
// LoadFile reads in a list of tokens from a text file,
|
||||||
|
// one per line.
|
||||||
|
// Comments are supported using `#` or `|`
|
||||||
|
func (t TokenMap) LoadFile(filename string) error { |
||||||
|
data, err := ioutil.ReadFile(filename) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
return t.LoadBytes(data) |
||||||
|
} |
||||||
|
|
||||||
|
// LoadBytes reads in a list of tokens from memory,
|
||||||
|
// one per line.
|
||||||
|
// Comments are supported using `#` or `|`
|
||||||
|
func (t TokenMap) LoadBytes(data []byte) error { |
||||||
|
bytesReader := bytes.NewReader(data) |
||||||
|
bufioReader := bufio.NewReader(bytesReader) |
||||||
|
line, err := bufioReader.ReadString('\n') |
||||||
|
for err == nil { |
||||||
|
t.LoadLine(line) |
||||||
|
line, err = bufioReader.ReadString('\n') |
||||||
|
} |
||||||
|
// if the err was EOF we still need to process the last value
|
||||||
|
if err == io.EOF { |
||||||
|
t.LoadLine(line) |
||||||
|
return nil |
||||||
|
} |
||||||
|
return err |
||||||
|
} |
||||||
|
|
||||||
|
func (t TokenMap) LoadLine(line string) { |
||||||
|
// find the start of a comment, if any
|
||||||
|
startComment := strings.IndexAny(line, "#|") |
||||||
|
if startComment >= 0 { |
||||||
|
line = line[:startComment] |
||||||
|
} |
||||||
|
|
||||||
|
tokens := strings.Fields(line) |
||||||
|
for _, token := range tokens { |
||||||
|
t.AddToken(token) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (t TokenMap) AddToken(token string) { |
||||||
|
t[token] = true |
||||||
|
} |
@ -0,0 +1,103 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package analysis |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"time" |
||||||
|
) |
||||||
|
|
||||||
|
type CharFilter interface { |
||||||
|
Filter([]byte) []byte |
||||||
|
} |
||||||
|
|
||||||
|
type TokenType int |
||||||
|
|
||||||
|
const ( |
||||||
|
AlphaNumeric TokenType = iota |
||||||
|
Ideographic |
||||||
|
Numeric |
||||||
|
DateTime |
||||||
|
Shingle |
||||||
|
Single |
||||||
|
Double |
||||||
|
Boolean |
||||||
|
) |
||||||
|
|
||||||
|
// Token represents one occurrence of a term at a particular location in a
|
||||||
|
// field.
|
||||||
|
type Token struct { |
||||||
|
// Start specifies the byte offset of the beginning of the term in the
|
||||||
|
// field.
|
||||||
|
Start int `json:"start"` |
||||||
|
|
||||||
|
// End specifies the byte offset of the end of the term in the field.
|
||||||
|
End int `json:"end"` |
||||||
|
Term []byte `json:"term"` |
||||||
|
|
||||||
|
// Position specifies the 1-based index of the token in the sequence of
|
||||||
|
// occurrences of its term in the field.
|
||||||
|
Position int `json:"position"` |
||||||
|
Type TokenType `json:"type"` |
||||||
|
KeyWord bool `json:"keyword"` |
||||||
|
} |
||||||
|
|
||||||
|
func (t *Token) String() string { |
||||||
|
return fmt.Sprintf("Start: %d End: %d Position: %d Token: %s Type: %d", t.Start, t.End, t.Position, string(t.Term), t.Type) |
||||||
|
} |
||||||
|
|
||||||
|
type TokenStream []*Token |
||||||
|
|
||||||
|
// A Tokenizer splits an input string into tokens, the usual behaviour being to
|
||||||
|
// map words to tokens.
|
||||||
|
type Tokenizer interface { |
||||||
|
Tokenize([]byte) TokenStream |
||||||
|
} |
||||||
|
|
||||||
|
// A TokenFilter adds, transforms or removes tokens from a token stream.
|
||||||
|
type TokenFilter interface { |
||||||
|
Filter(TokenStream) TokenStream |
||||||
|
} |
||||||
|
|
||||||
|
type Analyzer struct { |
||||||
|
CharFilters []CharFilter |
||||||
|
Tokenizer Tokenizer |
||||||
|
TokenFilters []TokenFilter |
||||||
|
} |
||||||
|
|
||||||
|
func (a *Analyzer) Analyze(input []byte) TokenStream { |
||||||
|
if a.CharFilters != nil { |
||||||
|
for _, cf := range a.CharFilters { |
||||||
|
input = cf.Filter(input) |
||||||
|
} |
||||||
|
} |
||||||
|
tokens := a.Tokenizer.Tokenize(input) |
||||||
|
if a.TokenFilters != nil { |
||||||
|
for _, tf := range a.TokenFilters { |
||||||
|
tokens = tf.Filter(tokens) |
||||||
|
} |
||||||
|
} |
||||||
|
return tokens |
||||||
|
} |
||||||
|
|
||||||
|
var ErrInvalidDateTime = fmt.Errorf("unable to parse datetime with any of the layouts") |
||||||
|
|
||||||
|
type DateTimeParser interface { |
||||||
|
ParseDateTime(string) (time.Time, error) |
||||||
|
} |
||||||
|
|
||||||
|
type ByteArrayConverter interface { |
||||||
|
Convert([]byte) (interface{}, error) |
||||||
|
} |
@ -0,0 +1,92 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package analysis |
||||||
|
|
||||||
|
import ( |
||||||
|
"bytes" |
||||||
|
"unicode/utf8" |
||||||
|
) |
||||||
|
|
||||||
|
func DeleteRune(in []rune, pos int) []rune { |
||||||
|
if pos >= len(in) { |
||||||
|
return in |
||||||
|
} |
||||||
|
copy(in[pos:], in[pos+1:]) |
||||||
|
return in[:len(in)-1] |
||||||
|
} |
||||||
|
|
||||||
|
func InsertRune(in []rune, pos int, r rune) []rune { |
||||||
|
// create a new slice 1 rune larger
|
||||||
|
rv := make([]rune, len(in)+1) |
||||||
|
// copy the characters before the insert pos
|
||||||
|
copy(rv[0:pos], in[0:pos]) |
||||||
|
// set the inserted rune
|
||||||
|
rv[pos] = r |
||||||
|
// copy the characters after the insert pos
|
||||||
|
copy(rv[pos+1:], in[pos:]) |
||||||
|
return rv |
||||||
|
} |
||||||
|
|
||||||
|
// BuildTermFromRunesOptimistic will build a term from the provided runes
|
||||||
|
// AND optimistically attempt to encode into the provided buffer
|
||||||
|
// if at any point it appears the buffer is too small, a new buffer is
|
||||||
|
// allocated and that is used instead
|
||||||
|
// this should be used in cases where frequently the new term is the same
|
||||||
|
// length or shorter than the original term (in number of bytes)
|
||||||
|
func BuildTermFromRunesOptimistic(buf []byte, runes []rune) []byte { |
||||||
|
rv := buf |
||||||
|
used := 0 |
||||||
|
for _, r := range runes { |
||||||
|
nextLen := utf8.RuneLen(r) |
||||||
|
if used+nextLen > len(rv) { |
||||||
|
// alloc new buf
|
||||||
|
buf = make([]byte, len(runes)*utf8.UTFMax) |
||||||
|
// copy work we've already done
|
||||||
|
copy(buf, rv[:used]) |
||||||
|
rv = buf |
||||||
|
} |
||||||
|
written := utf8.EncodeRune(rv[used:], r) |
||||||
|
used += written |
||||||
|
} |
||||||
|
return rv[:used] |
||||||
|
} |
||||||
|
|
||||||
|
func BuildTermFromRunes(runes []rune) []byte { |
||||||
|
return BuildTermFromRunesOptimistic(make([]byte, len(runes)*utf8.UTFMax), runes) |
||||||
|
} |
||||||
|
|
||||||
|
func TruncateRunes(input []byte, num int) []byte { |
||||||
|
runes := bytes.Runes(input) |
||||||
|
runes = runes[:len(runes)-num] |
||||||
|
out := BuildTermFromRunes(runes) |
||||||
|
return out |
||||||
|
} |
||||||
|
|
||||||
|
func RunesEndsWith(input []rune, suffix string) bool { |
||||||
|
inputLen := len(input) |
||||||
|
suffixRunes := []rune(suffix) |
||||||
|
suffixLen := len(suffixRunes) |
||||||
|
if suffixLen > inputLen { |
||||||
|
return false |
||||||
|
} |
||||||
|
|
||||||
|
for i := suffixLen - 1; i >= 0; i-- { |
||||||
|
if input[inputLen-(suffixLen-i)] != suffixRunes[i] { |
||||||
|
return false |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return true |
||||||
|
} |
@ -0,0 +1,88 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package bleve |
||||||
|
|
||||||
|
import ( |
||||||
|
"expvar" |
||||||
|
"io/ioutil" |
||||||
|
"log" |
||||||
|
"time" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/index" |
||||||
|
"github.com/blevesearch/bleve/index/store/gtreap" |
||||||
|
"github.com/blevesearch/bleve/index/upsidedown" |
||||||
|
"github.com/blevesearch/bleve/registry" |
||||||
|
"github.com/blevesearch/bleve/search/highlight/highlighter/html" |
||||||
|
) |
||||||
|
|
||||||
|
var bleveExpVar = expvar.NewMap("bleve") |
||||||
|
|
||||||
|
type configuration struct { |
||||||
|
Cache *registry.Cache |
||||||
|
DefaultHighlighter string |
||||||
|
DefaultKVStore string |
||||||
|
DefaultMemKVStore string |
||||||
|
DefaultIndexType string |
||||||
|
SlowSearchLogThreshold time.Duration |
||||||
|
analysisQueue *index.AnalysisQueue |
||||||
|
} |
||||||
|
|
||||||
|
func (c *configuration) SetAnalysisQueueSize(n int) { |
||||||
|
c.analysisQueue = index.NewAnalysisQueue(n) |
||||||
|
} |
||||||
|
|
||||||
|
func newConfiguration() *configuration { |
||||||
|
return &configuration{ |
||||||
|
Cache: registry.NewCache(), |
||||||
|
analysisQueue: index.NewAnalysisQueue(4), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Config contains library level configuration
|
||||||
|
var Config *configuration |
||||||
|
|
||||||
|
func init() { |
||||||
|
bootStart := time.Now() |
||||||
|
|
||||||
|
// build the default configuration
|
||||||
|
Config = newConfiguration() |
||||||
|
|
||||||
|
// set the default highlighter
|
||||||
|
Config.DefaultHighlighter = html.Name |
||||||
|
|
||||||
|
// default kv store
|
||||||
|
Config.DefaultKVStore = "" |
||||||
|
|
||||||
|
// default mem only kv store
|
||||||
|
Config.DefaultMemKVStore = gtreap.Name |
||||||
|
|
||||||
|
// default index
|
||||||
|
Config.DefaultIndexType = upsidedown.Name |
||||||
|
|
||||||
|
bootDuration := time.Since(bootStart) |
||||||
|
bleveExpVar.Add("bootDuration", int64(bootDuration)) |
||||||
|
indexStats = NewIndexStats() |
||||||
|
bleveExpVar.Set("indexes", indexStats) |
||||||
|
|
||||||
|
initDisk() |
||||||
|
} |
||||||
|
|
||||||
|
var logger = log.New(ioutil.Discard, "bleve", log.LstdFlags) |
||||||
|
|
||||||
|
// SetLog sets the logger used for logging
|
||||||
|
// by default log messages are sent to ioutil.Discard
|
||||||
|
func SetLog(l *log.Logger) { |
||||||
|
logger = l |
||||||
|
} |
@ -0,0 +1,23 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
// +build appengine appenginevm
|
||||||
|
|
||||||
|
package bleve |
||||||
|
|
||||||
|
// in the appengine environment we cannot support disk based indexes
|
||||||
|
// so we do no extra configuration in this method
|
||||||
|
func initDisk() { |
||||||
|
|
||||||
|
} |
@ -0,0 +1,25 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
// +build !appengine,!appenginevm
|
||||||
|
|
||||||
|
package bleve |
||||||
|
|
||||||
|
import "github.com/blevesearch/bleve/index/store/boltdb" |
||||||
|
|
||||||
|
// in normal environments we configure boltdb as the default storage
|
||||||
|
func initDisk() { |
||||||
|
// default kv store
|
||||||
|
Config.DefaultKVStore = boltdb.Name |
||||||
|
} |
@ -0,0 +1,38 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
/* |
||||||
|
Package bleve is a library for indexing and searching text. |
||||||
|
|
||||||
|
Example Opening New Index, Indexing Data |
||||||
|
|
||||||
|
message := struct{ |
||||||
|
Id: "example" |
||||||
|
From: "marty.schoch@gmail.com", |
||||||
|
Body: "bleve indexing is easy", |
||||||
|
} |
||||||
|
|
||||||
|
mapping := bleve.NewIndexMapping() |
||||||
|
index, _ := bleve.New("example.bleve", mapping) |
||||||
|
index.Index(message.Id, message) |
||||||
|
|
||||||
|
Example Opening Existing Index, Searching Data |
||||||
|
|
||||||
|
index, _ := bleve.Open("example.bleve") |
||||||
|
query := bleve.NewQueryStringQuery("bleve") |
||||||
|
searchRequest := bleve.NewSearchRequest(query) |
||||||
|
searchResult, _ := index.Search(searchRequest) |
||||||
|
|
||||||
|
*/ |
||||||
|
package bleve |
@ -0,0 +1,75 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package document |
||||||
|
|
||||||
|
import "fmt" |
||||||
|
|
||||||
|
type Document struct { |
||||||
|
ID string `json:"id"` |
||||||
|
Fields []Field `json:"fields"` |
||||||
|
CompositeFields []*CompositeField |
||||||
|
Number uint64 `json:"-"` |
||||||
|
} |
||||||
|
|
||||||
|
func NewDocument(id string) *Document { |
||||||
|
return &Document{ |
||||||
|
ID: id, |
||||||
|
Fields: make([]Field, 0), |
||||||
|
CompositeFields: make([]*CompositeField, 0), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (d *Document) AddField(f Field) *Document { |
||||||
|
switch f := f.(type) { |
||||||
|
case *CompositeField: |
||||||
|
d.CompositeFields = append(d.CompositeFields, f) |
||||||
|
default: |
||||||
|
d.Fields = append(d.Fields, f) |
||||||
|
} |
||||||
|
return d |
||||||
|
} |
||||||
|
|
||||||
|
func (d *Document) GoString() string { |
||||||
|
fields := "" |
||||||
|
for i, field := range d.Fields { |
||||||
|
if i != 0 { |
||||||
|
fields += ", " |
||||||
|
} |
||||||
|
fields += fmt.Sprintf("%#v", field) |
||||||
|
} |
||||||
|
compositeFields := "" |
||||||
|
for i, field := range d.CompositeFields { |
||||||
|
if i != 0 { |
||||||
|
compositeFields += ", " |
||||||
|
} |
||||||
|
compositeFields += fmt.Sprintf("%#v", field) |
||||||
|
} |
||||||
|
return fmt.Sprintf("&document.Document{ID:%s, Fields: %s, CompositeFields: %s}", d.ID, fields, compositeFields) |
||||||
|
} |
||||||
|
|
||||||
|
func (d *Document) NumPlainTextBytes() uint64 { |
||||||
|
rv := uint64(0) |
||||||
|
for _, field := range d.Fields { |
||||||
|
rv += field.NumPlainTextBytes() |
||||||
|
} |
||||||
|
for _, compositeField := range d.CompositeFields { |
||||||
|
for _, field := range d.Fields { |
||||||
|
if compositeField.includesField(field.Name()) { |
||||||
|
rv += field.NumPlainTextBytes() |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
return rv |
||||||
|
} |
@ -0,0 +1,39 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package document |
||||||
|
|
||||||
|
import ( |
||||||
|
"github.com/blevesearch/bleve/analysis" |
||||||
|
) |
||||||
|
|
||||||
|
type Field interface { |
||||||
|
// Name returns the path of the field from the root DocumentMapping.
|
||||||
|
// A root field path is "field", a subdocument field is "parent.field".
|
||||||
|
Name() string |
||||||
|
// ArrayPositions returns the intermediate document and field indices
|
||||||
|
// required to resolve the field value in the document. For example, if the
|
||||||
|
// field path is "doc1.doc2.field" where doc1 and doc2 are slices or
|
||||||
|
// arrays, ArrayPositions returns 2 indices used to resolve "doc2" value in
|
||||||
|
// "doc1", then "field" in "doc2".
|
||||||
|
ArrayPositions() []uint64 |
||||||
|
Options() IndexingOptions |
||||||
|
Analyze() (int, analysis.TokenFrequencies) |
||||||
|
Value() []byte |
||||||
|
|
||||||
|
// NumPlainTextBytes should return the number of plain text bytes
|
||||||
|
// that this field represents - this is a common metric for tracking
|
||||||
|
// the rate of indexing
|
||||||
|
NumPlainTextBytes() uint64 |
||||||
|
} |
@ -0,0 +1,107 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package document |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/analysis" |
||||||
|
) |
||||||
|
|
||||||
|
const DefaultBooleanIndexingOptions = StoreField | IndexField |
||||||
|
|
||||||
|
type BooleanField struct { |
||||||
|
name string |
||||||
|
arrayPositions []uint64 |
||||||
|
options IndexingOptions |
||||||
|
value []byte |
||||||
|
numPlainTextBytes uint64 |
||||||
|
} |
||||||
|
|
||||||
|
func (b *BooleanField) Name() string { |
||||||
|
return b.name |
||||||
|
} |
||||||
|
|
||||||
|
func (b *BooleanField) ArrayPositions() []uint64 { |
||||||
|
return b.arrayPositions |
||||||
|
} |
||||||
|
|
||||||
|
func (b *BooleanField) Options() IndexingOptions { |
||||||
|
return b.options |
||||||
|
} |
||||||
|
|
||||||
|
func (b *BooleanField) Analyze() (int, analysis.TokenFrequencies) { |
||||||
|
tokens := make(analysis.TokenStream, 0) |
||||||
|
tokens = append(tokens, &analysis.Token{ |
||||||
|
Start: 0, |
||||||
|
End: len(b.value), |
||||||
|
Term: b.value, |
||||||
|
Position: 1, |
||||||
|
Type: analysis.Boolean, |
||||||
|
}) |
||||||
|
|
||||||
|
fieldLength := len(tokens) |
||||||
|
tokenFreqs := analysis.TokenFrequency(tokens, b.arrayPositions, b.options.IncludeTermVectors()) |
||||||
|
return fieldLength, tokenFreqs |
||||||
|
} |
||||||
|
|
||||||
|
func (b *BooleanField) Value() []byte { |
||||||
|
return b.value |
||||||
|
} |
||||||
|
|
||||||
|
func (b *BooleanField) Boolean() (bool, error) { |
||||||
|
if len(b.value) == 1 { |
||||||
|
return b.value[0] == 'T', nil |
||||||
|
} |
||||||
|
return false, fmt.Errorf("boolean field has %d bytes", len(b.value)) |
||||||
|
} |
||||||
|
|
||||||
|
func (b *BooleanField) GoString() string { |
||||||
|
return fmt.Sprintf("&document.BooleanField{Name:%s, Options: %s, Value: %s}", b.name, b.options, b.value) |
||||||
|
} |
||||||
|
|
||||||
|
func (b *BooleanField) NumPlainTextBytes() uint64 { |
||||||
|
return b.numPlainTextBytes |
||||||
|
} |
||||||
|
|
||||||
|
func NewBooleanFieldFromBytes(name string, arrayPositions []uint64, value []byte) *BooleanField { |
||||||
|
return &BooleanField{ |
||||||
|
name: name, |
||||||
|
arrayPositions: arrayPositions, |
||||||
|
value: value, |
||||||
|
options: DefaultNumericIndexingOptions, |
||||||
|
numPlainTextBytes: uint64(len(value)), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func NewBooleanField(name string, arrayPositions []uint64, b bool) *BooleanField { |
||||||
|
return NewBooleanFieldWithIndexingOptions(name, arrayPositions, b, DefaultNumericIndexingOptions) |
||||||
|
} |
||||||
|
|
||||||
|
func NewBooleanFieldWithIndexingOptions(name string, arrayPositions []uint64, b bool, options IndexingOptions) *BooleanField { |
||||||
|
numPlainTextBytes := 5 |
||||||
|
v := []byte("F") |
||||||
|
if b { |
||||||
|
numPlainTextBytes = 4 |
||||||
|
v = []byte("T") |
||||||
|
} |
||||||
|
return &BooleanField{ |
||||||
|
name: name, |
||||||
|
arrayPositions: arrayPositions, |
||||||
|
value: v, |
||||||
|
options: options, |
||||||
|
numPlainTextBytes: uint64(numPlainTextBytes), |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,99 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package document |
||||||
|
|
||||||
|
import ( |
||||||
|
"github.com/blevesearch/bleve/analysis" |
||||||
|
) |
||||||
|
|
||||||
|
const DefaultCompositeIndexingOptions = IndexField |
||||||
|
|
||||||
|
type CompositeField struct { |
||||||
|
name string |
||||||
|
includedFields map[string]bool |
||||||
|
excludedFields map[string]bool |
||||||
|
defaultInclude bool |
||||||
|
options IndexingOptions |
||||||
|
totalLength int |
||||||
|
compositeFrequencies analysis.TokenFrequencies |
||||||
|
} |
||||||
|
|
||||||
|
func NewCompositeField(name string, defaultInclude bool, include []string, exclude []string) *CompositeField { |
||||||
|
return NewCompositeFieldWithIndexingOptions(name, defaultInclude, include, exclude, DefaultCompositeIndexingOptions) |
||||||
|
} |
||||||
|
|
||||||
|
func NewCompositeFieldWithIndexingOptions(name string, defaultInclude bool, include []string, exclude []string, options IndexingOptions) *CompositeField { |
||||||
|
rv := &CompositeField{ |
||||||
|
name: name, |
||||||
|
options: options, |
||||||
|
defaultInclude: defaultInclude, |
||||||
|
includedFields: make(map[string]bool, len(include)), |
||||||
|
excludedFields: make(map[string]bool, len(exclude)), |
||||||
|
compositeFrequencies: make(analysis.TokenFrequencies), |
||||||
|
} |
||||||
|
|
||||||
|
for _, i := range include { |
||||||
|
rv.includedFields[i] = true |
||||||
|
} |
||||||
|
for _, e := range exclude { |
||||||
|
rv.excludedFields[e] = true |
||||||
|
} |
||||||
|
|
||||||
|
return rv |
||||||
|
} |
||||||
|
|
||||||
|
func (c *CompositeField) Name() string { |
||||||
|
return c.name |
||||||
|
} |
||||||
|
|
||||||
|
func (c *CompositeField) ArrayPositions() []uint64 { |
||||||
|
return []uint64{} |
||||||
|
} |
||||||
|
|
||||||
|
func (c *CompositeField) Options() IndexingOptions { |
||||||
|
return c.options |
||||||
|
} |
||||||
|
|
||||||
|
func (c *CompositeField) Analyze() (int, analysis.TokenFrequencies) { |
||||||
|
return c.totalLength, c.compositeFrequencies |
||||||
|
} |
||||||
|
|
||||||
|
func (c *CompositeField) Value() []byte { |
||||||
|
return []byte{} |
||||||
|
} |
||||||
|
|
||||||
|
func (c *CompositeField) NumPlainTextBytes() uint64 { |
||||||
|
return 0 |
||||||
|
} |
||||||
|
|
||||||
|
func (c *CompositeField) includesField(field string) bool { |
||||||
|
shouldInclude := c.defaultInclude |
||||||
|
_, fieldShouldBeIncluded := c.includedFields[field] |
||||||
|
if fieldShouldBeIncluded { |
||||||
|
shouldInclude = true |
||||||
|
} |
||||||
|
_, fieldShouldBeExcluded := c.excludedFields[field] |
||||||
|
if fieldShouldBeExcluded { |
||||||
|
shouldInclude = false |
||||||
|
} |
||||||
|
return shouldInclude |
||||||
|
} |
||||||
|
|
||||||
|
func (c *CompositeField) Compose(field string, length int, freq analysis.TokenFrequencies) { |
||||||
|
if c.includesField(field) { |
||||||
|
c.totalLength += length |
||||||
|
c.compositeFrequencies.MergeAll(field, freq) |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,144 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package document |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"math" |
||||||
|
"time" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/analysis" |
||||||
|
"github.com/blevesearch/bleve/numeric" |
||||||
|
) |
||||||
|
|
||||||
|
const DefaultDateTimeIndexingOptions = StoreField | IndexField |
||||||
|
const DefaultDateTimePrecisionStep uint = 4 |
||||||
|
|
||||||
|
var MinTimeRepresentable = time.Unix(0, math.MinInt64) |
||||||
|
var MaxTimeRepresentable = time.Unix(0, math.MaxInt64) |
||||||
|
|
||||||
|
type DateTimeField struct { |
||||||
|
name string |
||||||
|
arrayPositions []uint64 |
||||||
|
options IndexingOptions |
||||||
|
value numeric.PrefixCoded |
||||||
|
numPlainTextBytes uint64 |
||||||
|
} |
||||||
|
|
||||||
|
func (n *DateTimeField) Name() string { |
||||||
|
return n.name |
||||||
|
} |
||||||
|
|
||||||
|
func (n *DateTimeField) ArrayPositions() []uint64 { |
||||||
|
return n.arrayPositions |
||||||
|
} |
||||||
|
|
||||||
|
func (n *DateTimeField) Options() IndexingOptions { |
||||||
|
return n.options |
||||||
|
} |
||||||
|
|
||||||
|
func (n *DateTimeField) Analyze() (int, analysis.TokenFrequencies) { |
||||||
|
tokens := make(analysis.TokenStream, 0) |
||||||
|
tokens = append(tokens, &analysis.Token{ |
||||||
|
Start: 0, |
||||||
|
End: len(n.value), |
||||||
|
Term: n.value, |
||||||
|
Position: 1, |
||||||
|
Type: analysis.DateTime, |
||||||
|
}) |
||||||
|
|
||||||
|
original, err := n.value.Int64() |
||||||
|
if err == nil { |
||||||
|
|
||||||
|
shift := DefaultDateTimePrecisionStep |
||||||
|
for shift < 64 { |
||||||
|
shiftEncoded, err := numeric.NewPrefixCodedInt64(original, shift) |
||||||
|
if err != nil { |
||||||
|
break |
||||||
|
} |
||||||
|
token := analysis.Token{ |
||||||
|
Start: 0, |
||||||
|
End: len(shiftEncoded), |
||||||
|
Term: shiftEncoded, |
||||||
|
Position: 1, |
||||||
|
Type: analysis.DateTime, |
||||||
|
} |
||||||
|
tokens = append(tokens, &token) |
||||||
|
shift += DefaultDateTimePrecisionStep |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
fieldLength := len(tokens) |
||||||
|
tokenFreqs := analysis.TokenFrequency(tokens, n.arrayPositions, n.options.IncludeTermVectors()) |
||||||
|
return fieldLength, tokenFreqs |
||||||
|
} |
||||||
|
|
||||||
|
func (n *DateTimeField) Value() []byte { |
||||||
|
return n.value |
||||||
|
} |
||||||
|
|
||||||
|
func (n *DateTimeField) DateTime() (time.Time, error) { |
||||||
|
i64, err := n.value.Int64() |
||||||
|
if err != nil { |
||||||
|
return time.Time{}, err |
||||||
|
} |
||||||
|
return time.Unix(0, i64).UTC(), nil |
||||||
|
} |
||||||
|
|
||||||
|
func (n *DateTimeField) GoString() string { |
||||||
|
return fmt.Sprintf("&document.DateField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value) |
||||||
|
} |
||||||
|
|
||||||
|
func (n *DateTimeField) NumPlainTextBytes() uint64 { |
||||||
|
return n.numPlainTextBytes |
||||||
|
} |
||||||
|
|
||||||
|
func NewDateTimeFieldFromBytes(name string, arrayPositions []uint64, value []byte) *DateTimeField { |
||||||
|
return &DateTimeField{ |
||||||
|
name: name, |
||||||
|
arrayPositions: arrayPositions, |
||||||
|
value: value, |
||||||
|
options: DefaultDateTimeIndexingOptions, |
||||||
|
numPlainTextBytes: uint64(len(value)), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func NewDateTimeField(name string, arrayPositions []uint64, dt time.Time) (*DateTimeField, error) { |
||||||
|
return NewDateTimeFieldWithIndexingOptions(name, arrayPositions, dt, DefaultDateTimeIndexingOptions) |
||||||
|
} |
||||||
|
|
||||||
|
func NewDateTimeFieldWithIndexingOptions(name string, arrayPositions []uint64, dt time.Time, options IndexingOptions) (*DateTimeField, error) { |
||||||
|
if canRepresent(dt) { |
||||||
|
dtInt64 := dt.UnixNano() |
||||||
|
prefixCoded := numeric.MustNewPrefixCodedInt64(dtInt64, 0) |
||||||
|
return &DateTimeField{ |
||||||
|
name: name, |
||||||
|
arrayPositions: arrayPositions, |
||||||
|
value: prefixCoded, |
||||||
|
options: options, |
||||||
|
// not correct, just a place holder until we revisit how fields are
|
||||||
|
// represented and can fix this better
|
||||||
|
numPlainTextBytes: uint64(8), |
||||||
|
}, nil |
||||||
|
} |
||||||
|
return nil, fmt.Errorf("cannot represent %s in this type", dt) |
||||||
|
} |
||||||
|
|
||||||
|
func canRepresent(dt time.Time) bool { |
||||||
|
if dt.Before(MinTimeRepresentable) || dt.After(MaxTimeRepresentable) { |
||||||
|
return false |
||||||
|
} |
||||||
|
return true |
||||||
|
} |
@ -0,0 +1,130 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package document |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/analysis" |
||||||
|
"github.com/blevesearch/bleve/numeric" |
||||||
|
) |
||||||
|
|
||||||
|
const DefaultNumericIndexingOptions = StoreField | IndexField |
||||||
|
|
||||||
|
const DefaultPrecisionStep uint = 4 |
||||||
|
|
||||||
|
type NumericField struct { |
||||||
|
name string |
||||||
|
arrayPositions []uint64 |
||||||
|
options IndexingOptions |
||||||
|
value numeric.PrefixCoded |
||||||
|
numPlainTextBytes uint64 |
||||||
|
} |
||||||
|
|
||||||
|
func (n *NumericField) Name() string { |
||||||
|
return n.name |
||||||
|
} |
||||||
|
|
||||||
|
func (n *NumericField) ArrayPositions() []uint64 { |
||||||
|
return n.arrayPositions |
||||||
|
} |
||||||
|
|
||||||
|
func (n *NumericField) Options() IndexingOptions { |
||||||
|
return n.options |
||||||
|
} |
||||||
|
|
||||||
|
func (n *NumericField) Analyze() (int, analysis.TokenFrequencies) { |
||||||
|
tokens := make(analysis.TokenStream, 0) |
||||||
|
tokens = append(tokens, &analysis.Token{ |
||||||
|
Start: 0, |
||||||
|
End: len(n.value), |
||||||
|
Term: n.value, |
||||||
|
Position: 1, |
||||||
|
Type: analysis.Numeric, |
||||||
|
}) |
||||||
|
|
||||||
|
original, err := n.value.Int64() |
||||||
|
if err == nil { |
||||||
|
|
||||||
|
shift := DefaultPrecisionStep |
||||||
|
for shift < 64 { |
||||||
|
shiftEncoded, err := numeric.NewPrefixCodedInt64(original, shift) |
||||||
|
if err != nil { |
||||||
|
break |
||||||
|
} |
||||||
|
token := analysis.Token{ |
||||||
|
Start: 0, |
||||||
|
End: len(shiftEncoded), |
||||||
|
Term: shiftEncoded, |
||||||
|
Position: 1, |
||||||
|
Type: analysis.Numeric, |
||||||
|
} |
||||||
|
tokens = append(tokens, &token) |
||||||
|
shift += DefaultPrecisionStep |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
fieldLength := len(tokens) |
||||||
|
tokenFreqs := analysis.TokenFrequency(tokens, n.arrayPositions, n.options.IncludeTermVectors()) |
||||||
|
return fieldLength, tokenFreqs |
||||||
|
} |
||||||
|
|
||||||
|
func (n *NumericField) Value() []byte { |
||||||
|
return n.value |
||||||
|
} |
||||||
|
|
||||||
|
func (n *NumericField) Number() (float64, error) { |
||||||
|
i64, err := n.value.Int64() |
||||||
|
if err != nil { |
||||||
|
return 0.0, err |
||||||
|
} |
||||||
|
return numeric.Int64ToFloat64(i64), nil |
||||||
|
} |
||||||
|
|
||||||
|
func (n *NumericField) GoString() string { |
||||||
|
return fmt.Sprintf("&document.NumericField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value) |
||||||
|
} |
||||||
|
|
||||||
|
func (n *NumericField) NumPlainTextBytes() uint64 { |
||||||
|
return n.numPlainTextBytes |
||||||
|
} |
||||||
|
|
||||||
|
func NewNumericFieldFromBytes(name string, arrayPositions []uint64, value []byte) *NumericField { |
||||||
|
return &NumericField{ |
||||||
|
name: name, |
||||||
|
arrayPositions: arrayPositions, |
||||||
|
value: value, |
||||||
|
options: DefaultNumericIndexingOptions, |
||||||
|
numPlainTextBytes: uint64(len(value)), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func NewNumericField(name string, arrayPositions []uint64, number float64) *NumericField { |
||||||
|
return NewNumericFieldWithIndexingOptions(name, arrayPositions, number, DefaultNumericIndexingOptions) |
||||||
|
} |
||||||
|
|
||||||
|
func NewNumericFieldWithIndexingOptions(name string, arrayPositions []uint64, number float64, options IndexingOptions) *NumericField { |
||||||
|
numberInt64 := numeric.Float64ToInt64(number) |
||||||
|
prefixCoded := numeric.MustNewPrefixCodedInt64(numberInt64, 0) |
||||||
|
return &NumericField{ |
||||||
|
name: name, |
||||||
|
arrayPositions: arrayPositions, |
||||||
|
value: prefixCoded, |
||||||
|
options: options, |
||||||
|
// not correct, just a place holder until we revisit how fields are
|
||||||
|
// represented and can fix this better
|
||||||
|
numPlainTextBytes: uint64(8), |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,119 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package document |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/analysis" |
||||||
|
) |
||||||
|
|
||||||
|
const DefaultTextIndexingOptions = IndexField |
||||||
|
|
||||||
|
type TextField struct { |
||||||
|
name string |
||||||
|
arrayPositions []uint64 |
||||||
|
options IndexingOptions |
||||||
|
analyzer *analysis.Analyzer |
||||||
|
value []byte |
||||||
|
numPlainTextBytes uint64 |
||||||
|
} |
||||||
|
|
||||||
|
func (t *TextField) Name() string { |
||||||
|
return t.name |
||||||
|
} |
||||||
|
|
||||||
|
func (t *TextField) ArrayPositions() []uint64 { |
||||||
|
return t.arrayPositions |
||||||
|
} |
||||||
|
|
||||||
|
func (t *TextField) Options() IndexingOptions { |
||||||
|
return t.options |
||||||
|
} |
||||||
|
|
||||||
|
func (t *TextField) Analyze() (int, analysis.TokenFrequencies) { |
||||||
|
var tokens analysis.TokenStream |
||||||
|
if t.analyzer != nil { |
||||||
|
bytesToAnalyze := t.Value() |
||||||
|
if t.options.IsStored() { |
||||||
|
// need to copy
|
||||||
|
bytesCopied := make([]byte, len(bytesToAnalyze)) |
||||||
|
copy(bytesCopied, bytesToAnalyze) |
||||||
|
bytesToAnalyze = bytesCopied |
||||||
|
} |
||||||
|
tokens = t.analyzer.Analyze(bytesToAnalyze) |
||||||
|
} else { |
||||||
|
tokens = analysis.TokenStream{ |
||||||
|
&analysis.Token{ |
||||||
|
Start: 0, |
||||||
|
End: len(t.value), |
||||||
|
Term: t.value, |
||||||
|
Position: 1, |
||||||
|
Type: analysis.AlphaNumeric, |
||||||
|
}, |
||||||
|
} |
||||||
|
} |
||||||
|
fieldLength := len(tokens) // number of tokens in this doc field
|
||||||
|
tokenFreqs := analysis.TokenFrequency(tokens, t.arrayPositions, t.options.IncludeTermVectors()) |
||||||
|
return fieldLength, tokenFreqs |
||||||
|
} |
||||||
|
|
||||||
|
func (t *TextField) Value() []byte { |
||||||
|
return t.value |
||||||
|
} |
||||||
|
|
||||||
|
func (t *TextField) GoString() string { |
||||||
|
return fmt.Sprintf("&document.TextField{Name:%s, Options: %s, Analyzer: %v, Value: %s, ArrayPositions: %v}", t.name, t.options, t.analyzer, t.value, t.arrayPositions) |
||||||
|
} |
||||||
|
|
||||||
|
func (t *TextField) NumPlainTextBytes() uint64 { |
||||||
|
return t.numPlainTextBytes |
||||||
|
} |
||||||
|
|
||||||
|
func NewTextField(name string, arrayPositions []uint64, value []byte) *TextField { |
||||||
|
return NewTextFieldWithIndexingOptions(name, arrayPositions, value, DefaultTextIndexingOptions) |
||||||
|
} |
||||||
|
|
||||||
|
func NewTextFieldWithIndexingOptions(name string, arrayPositions []uint64, value []byte, options IndexingOptions) *TextField { |
||||||
|
return &TextField{ |
||||||
|
name: name, |
||||||
|
arrayPositions: arrayPositions, |
||||||
|
options: options, |
||||||
|
value: value, |
||||||
|
numPlainTextBytes: uint64(len(value)), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func NewTextFieldWithAnalyzer(name string, arrayPositions []uint64, value []byte, analyzer *analysis.Analyzer) *TextField { |
||||||
|
return &TextField{ |
||||||
|
name: name, |
||||||
|
arrayPositions: arrayPositions, |
||||||
|
options: DefaultTextIndexingOptions, |
||||||
|
analyzer: analyzer, |
||||||
|
value: value, |
||||||
|
numPlainTextBytes: uint64(len(value)), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func NewTextFieldCustom(name string, arrayPositions []uint64, value []byte, options IndexingOptions, analyzer *analysis.Analyzer) *TextField { |
||||||
|
return &TextField{ |
||||||
|
name: name, |
||||||
|
arrayPositions: arrayPositions, |
||||||
|
options: options, |
||||||
|
analyzer: analyzer, |
||||||
|
value: value, |
||||||
|
numPlainTextBytes: uint64(len(value)), |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,55 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package document |
||||||
|
|
||||||
|
type IndexingOptions int |
||||||
|
|
||||||
|
const ( |
||||||
|
IndexField IndexingOptions = 1 << iota |
||||||
|
StoreField |
||||||
|
IncludeTermVectors |
||||||
|
) |
||||||
|
|
||||||
|
func (o IndexingOptions) IsIndexed() bool { |
||||||
|
return o&IndexField != 0 |
||||||
|
} |
||||||
|
|
||||||
|
func (o IndexingOptions) IsStored() bool { |
||||||
|
return o&StoreField != 0 |
||||||
|
} |
||||||
|
|
||||||
|
func (o IndexingOptions) IncludeTermVectors() bool { |
||||||
|
return o&IncludeTermVectors != 0 |
||||||
|
} |
||||||
|
|
||||||
|
func (o IndexingOptions) String() string { |
||||||
|
rv := "" |
||||||
|
if o.IsIndexed() { |
||||||
|
rv += "INDEXED" |
||||||
|
} |
||||||
|
if o.IsStored() { |
||||||
|
if rv != "" { |
||||||
|
rv += ", " |
||||||
|
} |
||||||
|
rv += "STORE" |
||||||
|
} |
||||||
|
if o.IncludeTermVectors() { |
||||||
|
if rv != "" { |
||||||
|
rv += ", " |
||||||
|
} |
||||||
|
rv += "TV" |
||||||
|
} |
||||||
|
return rv |
||||||
|
} |
@ -0,0 +1,52 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package bleve |
||||||
|
|
||||||
|
// Constant Error values which can be compared to determine the type of error
|
||||||
|
const ( |
||||||
|
ErrorIndexPathExists Error = iota |
||||||
|
ErrorIndexPathDoesNotExist |
||||||
|
ErrorIndexMetaMissing |
||||||
|
ErrorIndexMetaCorrupt |
||||||
|
ErrorUnknownStorageType |
||||||
|
ErrorIndexClosed |
||||||
|
ErrorAliasMulti |
||||||
|
ErrorAliasEmpty |
||||||
|
ErrorUnknownIndexType |
||||||
|
ErrorEmptyID |
||||||
|
ErrorIndexReadInconsistency |
||||||
|
) |
||||||
|
|
||||||
|
// Error represents a more strongly typed bleve error for detecting
|
||||||
|
// and handling specific types of errors.
|
||||||
|
type Error int |
||||||
|
|
||||||
|
func (e Error) Error() string { |
||||||
|
return errorMessages[e] |
||||||
|
} |
||||||
|
|
||||||
|
var errorMessages = map[Error]string{ |
||||||
|
ErrorIndexPathExists: "cannot create new index, path already exists", |
||||||
|
ErrorIndexPathDoesNotExist: "cannot open index, path does not exist", |
||||||
|
ErrorIndexMetaMissing: "cannot open index, metadata missing", |
||||||
|
ErrorIndexMetaCorrupt: "cannot open index, metadata corrupt", |
||||||
|
ErrorUnknownStorageType: "unknown storage type", |
||||||
|
ErrorIndexClosed: "index is closed", |
||||||
|
ErrorAliasMulti: "cannot perform single index operation on multiple index alias", |
||||||
|
ErrorAliasEmpty: "cannot perform operation on empty alias", |
||||||
|
ErrorUnknownIndexType: "unknown index type", |
||||||
|
ErrorEmptyID: "document ID cannot be empty", |
||||||
|
ErrorIndexReadInconsistency: "index read inconsistency detected", |
||||||
|
} |
@ -0,0 +1,243 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package bleve |
||||||
|
|
||||||
|
import ( |
||||||
|
"github.com/blevesearch/bleve/document" |
||||||
|
"github.com/blevesearch/bleve/index" |
||||||
|
"github.com/blevesearch/bleve/index/store" |
||||||
|
"github.com/blevesearch/bleve/mapping" |
||||||
|
"golang.org/x/net/context" |
||||||
|
) |
||||||
|
|
||||||
|
// A Batch groups together multiple Index and Delete
|
||||||
|
// operations you would like performed at the same
|
||||||
|
// time. The Batch structure is NOT thread-safe.
|
||||||
|
// You should only perform operations on a batch
|
||||||
|
// from a single thread at a time. Once batch
|
||||||
|
// execution has started, you may not modify it.
|
||||||
|
type Batch struct { |
||||||
|
index Index |
||||||
|
internal *index.Batch |
||||||
|
} |
||||||
|
|
||||||
|
// Index adds the specified index operation to the
|
||||||
|
// batch. NOTE: the bleve Index is not updated
|
||||||
|
// until the batch is executed.
|
||||||
|
func (b *Batch) Index(id string, data interface{}) error { |
||||||
|
if id == "" { |
||||||
|
return ErrorEmptyID |
||||||
|
} |
||||||
|
doc := document.NewDocument(id) |
||||||
|
err := b.index.Mapping().MapDocument(doc, data) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
b.internal.Update(doc) |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
// Delete adds the specified delete operation to the
|
||||||
|
// batch. NOTE: the bleve Index is not updated until
|
||||||
|
// the batch is executed.
|
||||||
|
func (b *Batch) Delete(id string) { |
||||||
|
if id != "" { |
||||||
|
b.internal.Delete(id) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// SetInternal adds the specified set internal
|
||||||
|
// operation to the batch. NOTE: the bleve Index is
|
||||||
|
// not updated until the batch is executed.
|
||||||
|
func (b *Batch) SetInternal(key, val []byte) { |
||||||
|
b.internal.SetInternal(key, val) |
||||||
|
} |
||||||
|
|
||||||
|
// SetInternal adds the specified delete internal
|
||||||
|
// operation to the batch. NOTE: the bleve Index is
|
||||||
|
// not updated until the batch is executed.
|
||||||
|
func (b *Batch) DeleteInternal(key []byte) { |
||||||
|
b.internal.DeleteInternal(key) |
||||||
|
} |
||||||
|
|
||||||
|
// Size returns the total number of operations inside the batch
|
||||||
|
// including normal index operations and internal operations.
|
||||||
|
func (b *Batch) Size() int { |
||||||
|
return len(b.internal.IndexOps) + len(b.internal.InternalOps) |
||||||
|
} |
||||||
|
|
||||||
|
// String prints a user friendly string representation of what
|
||||||
|
// is inside this batch.
|
||||||
|
func (b *Batch) String() string { |
||||||
|
return b.internal.String() |
||||||
|
} |
||||||
|
|
||||||
|
// Reset returns a Batch to the empty state so that it can
|
||||||
|
// be re-used in the future.
|
||||||
|
func (b *Batch) Reset() { |
||||||
|
b.internal.Reset() |
||||||
|
} |
||||||
|
|
||||||
|
// An Index implements all the indexing and searching
|
||||||
|
// capabilities of bleve. An Index can be created
|
||||||
|
// using the New() and Open() methods.
|
||||||
|
//
|
||||||
|
// Index() takes an input value, deduces a DocumentMapping for its type,
|
||||||
|
// assigns string paths to its fields or values then applies field mappings on
|
||||||
|
// them.
|
||||||
|
//
|
||||||
|
// The DocumentMapping used to index a value is deduced by the following rules:
|
||||||
|
// 1) If value implements Classifier interface, resolve the mapping from Type().
|
||||||
|
// 2) If value has a string field or value at IndexMapping.TypeField.
|
||||||
|
// (defaulting to "_type"), use it to resolve the mapping. Fields addressing
|
||||||
|
// is described below.
|
||||||
|
// 3) If IndexMapping.DefaultType is registered, return it.
|
||||||
|
// 4) Return IndexMapping.DefaultMapping.
|
||||||
|
//
|
||||||
|
// Each field or nested field of the value is identified by a string path, then
|
||||||
|
// mapped to one or several FieldMappings which extract the result for analysis.
|
||||||
|
//
|
||||||
|
// Struct values fields are identified by their "json:" tag, or by their name.
|
||||||
|
// Nested fields are identified by prefixing with their parent identifier,
|
||||||
|
// separated by a dot.
|
||||||
|
//
|
||||||
|
// Map values entries are identified by their string key. Entries not indexed
|
||||||
|
// by strings are ignored. Entry values are identified recursively like struct
|
||||||
|
// fields.
|
||||||
|
//
|
||||||
|
// Slice and array values are identified by their field name. Their elements
|
||||||
|
// are processed sequentially with the same FieldMapping.
|
||||||
|
//
|
||||||
|
// String, float64 and time.Time values are identified by their field name.
|
||||||
|
// Other types are ignored.
|
||||||
|
//
|
||||||
|
// Each value identifier is decomposed in its parts and recursively address
|
||||||
|
// SubDocumentMappings in the tree starting at the root DocumentMapping. If a
|
||||||
|
// mapping is found, all its FieldMappings are applied to the value. If no
|
||||||
|
// mapping is found and the root DocumentMapping is dynamic, default mappings
|
||||||
|
// are used based on value type and IndexMapping default configurations.
|
||||||
|
//
|
||||||
|
// Finally, mapped values are analyzed, indexed or stored. See
|
||||||
|
// FieldMapping.Analyzer to know how an analyzer is resolved for a given field.
|
||||||
|
//
|
||||||
|
// Examples:
|
||||||
|
//
|
||||||
|
// type Date struct {
|
||||||
|
// Day string `json:"day"`
|
||||||
|
// Month string
|
||||||
|
// Year string
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// type Person struct {
|
||||||
|
// FirstName string `json:"first_name"`
|
||||||
|
// LastName string
|
||||||
|
// BirthDate Date `json:"birth_date"`
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// A Person value FirstName is mapped by the SubDocumentMapping at
|
||||||
|
// "first_name". Its LastName is mapped by the one at "LastName". The day of
|
||||||
|
// BirthDate is mapped to the SubDocumentMapping "day" of the root
|
||||||
|
// SubDocumentMapping "birth_date". It will appear as the "birth_date.day"
|
||||||
|
// field in the index. The month is mapped to "birth_date.Month".
|
||||||
|
type Index interface { |
||||||
|
// Index analyzes, indexes or stores mapped data fields. Supplied
|
||||||
|
// identifier is bound to analyzed data and will be retrieved by search
|
||||||
|
// requests. See Index interface documentation for details about mapping
|
||||||
|
// rules.
|
||||||
|
Index(id string, data interface{}) error |
||||||
|
Delete(id string) error |
||||||
|
|
||||||
|
NewBatch() *Batch |
||||||
|
Batch(b *Batch) error |
||||||
|
|
||||||
|
// Document returns specified document or nil if the document is not
|
||||||
|
// indexed or stored.
|
||||||
|
Document(id string) (*document.Document, error) |
||||||
|
// DocCount returns the number of documents in the index.
|
||||||
|
DocCount() (uint64, error) |
||||||
|
|
||||||
|
Search(req *SearchRequest) (*SearchResult, error) |
||||||
|
SearchInContext(ctx context.Context, req *SearchRequest) (*SearchResult, error) |
||||||
|
|
||||||
|
Fields() ([]string, error) |
||||||
|
|
||||||
|
FieldDict(field string) (index.FieldDict, error) |
||||||
|
FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) |
||||||
|
FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) |
||||||
|
|
||||||
|
Close() error |
||||||
|
|
||||||
|
Mapping() mapping.IndexMapping |
||||||
|
|
||||||
|
Stats() *IndexStat |
||||||
|
StatsMap() map[string]interface{} |
||||||
|
|
||||||
|
GetInternal(key []byte) ([]byte, error) |
||||||
|
SetInternal(key, val []byte) error |
||||||
|
DeleteInternal(key []byte) error |
||||||
|
|
||||||
|
// Name returns the name of the index (by default this is the path)
|
||||||
|
Name() string |
||||||
|
// SetName lets you assign your own logical name to this index
|
||||||
|
SetName(string) |
||||||
|
|
||||||
|
// Advanced returns the indexer and data store, exposing lower level
|
||||||
|
// methods to enumerate records and access data.
|
||||||
|
Advanced() (index.Index, store.KVStore, error) |
||||||
|
} |
||||||
|
|
||||||
|
// New index at the specified path, must not exist.
|
||||||
|
// The provided mapping will be used for all
|
||||||
|
// Index/Search operations.
|
||||||
|
func New(path string, mapping mapping.IndexMapping) (Index, error) { |
||||||
|
return newIndexUsing(path, mapping, Config.DefaultIndexType, Config.DefaultKVStore, nil) |
||||||
|
} |
||||||
|
|
||||||
|
// NewMemOnly creates a memory-only index.
|
||||||
|
// The contents of the index is NOT persisted,
|
||||||
|
// and will be lost once closed.
|
||||||
|
// The provided mapping will be used for all
|
||||||
|
// Index/Search operations.
|
||||||
|
func NewMemOnly(mapping mapping.IndexMapping) (Index, error) { |
||||||
|
return newIndexUsing("", mapping, Config.DefaultIndexType, Config.DefaultMemKVStore, nil) |
||||||
|
} |
||||||
|
|
||||||
|
// NewUsing creates index at the specified path,
|
||||||
|
// which must not already exist.
|
||||||
|
// The provided mapping will be used for all
|
||||||
|
// Index/Search operations.
|
||||||
|
// The specified index type will be used.
|
||||||
|
// The specified kvstore implementation will be used
|
||||||
|
// and the provided kvconfig will be passed to its
|
||||||
|
// constructor. Note that currently the values of kvconfig must
|
||||||
|
// be able to be marshaled and unmarshaled using the encoding/json library (used
|
||||||
|
// when reading/writing the index metadata file).
|
||||||
|
func NewUsing(path string, mapping mapping.IndexMapping, indexType string, kvstore string, kvconfig map[string]interface{}) (Index, error) { |
||||||
|
return newIndexUsing(path, mapping, indexType, kvstore, kvconfig) |
||||||
|
} |
||||||
|
|
||||||
|
// Open index at the specified path, must exist.
|
||||||
|
// The mapping used when it was created will be used for all Index/Search operations.
|
||||||
|
func Open(path string) (Index, error) { |
||||||
|
return openIndexUsing(path, nil) |
||||||
|
} |
||||||
|
|
||||||
|
// OpenUsing opens index at the specified path, must exist.
|
||||||
|
// The mapping used when it was created will be used for all Index/Search operations.
|
||||||
|
// The provided runtimeConfig can override settings
|
||||||
|
// persisted when the kvstore was created.
|
||||||
|
func OpenUsing(path string, runtimeConfig map[string]interface{}) (Index, error) { |
||||||
|
return openIndexUsing(path, runtimeConfig) |
||||||
|
} |
@ -0,0 +1,83 @@ |
|||||||
|
// Copyright (c) 2015 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package index |
||||||
|
|
||||||
|
import "github.com/blevesearch/bleve/document" |
||||||
|
|
||||||
|
type IndexRow interface { |
||||||
|
KeySize() int |
||||||
|
KeyTo([]byte) (int, error) |
||||||
|
Key() []byte |
||||||
|
|
||||||
|
ValueSize() int |
||||||
|
ValueTo([]byte) (int, error) |
||||||
|
Value() []byte |
||||||
|
} |
||||||
|
|
||||||
|
type AnalysisResult struct { |
||||||
|
DocID string |
||||||
|
Rows []IndexRow |
||||||
|
} |
||||||
|
|
||||||
|
type AnalysisWork struct { |
||||||
|
i Index |
||||||
|
d *document.Document |
||||||
|
rc chan *AnalysisResult |
||||||
|
} |
||||||
|
|
||||||
|
func NewAnalysisWork(i Index, d *document.Document, rc chan *AnalysisResult) *AnalysisWork { |
||||||
|
return &AnalysisWork{ |
||||||
|
i: i, |
||||||
|
d: d, |
||||||
|
rc: rc, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
type AnalysisQueue struct { |
||||||
|
queue chan *AnalysisWork |
||||||
|
done chan struct{} |
||||||
|
} |
||||||
|
|
||||||
|
func (q *AnalysisQueue) Queue(work *AnalysisWork) { |
||||||
|
q.queue <- work |
||||||
|
} |
||||||
|
|
||||||
|
func (q *AnalysisQueue) Close() { |
||||||
|
close(q.done) |
||||||
|
} |
||||||
|
|
||||||
|
func NewAnalysisQueue(numWorkers int) *AnalysisQueue { |
||||||
|
rv := AnalysisQueue{ |
||||||
|
queue: make(chan *AnalysisWork), |
||||||
|
done: make(chan struct{}), |
||||||
|
} |
||||||
|
for i := 0; i < numWorkers; i++ { |
||||||
|
go AnalysisWorker(rv) |
||||||
|
} |
||||||
|
return &rv |
||||||
|
} |
||||||
|
|
||||||
|
func AnalysisWorker(q AnalysisQueue) { |
||||||
|
// read work off the queue
|
||||||
|
for { |
||||||
|
select { |
||||||
|
case <-q.done: |
||||||
|
return |
||||||
|
case w := <-q.queue: |
||||||
|
r := w.i.Analyze(w.d) |
||||||
|
w.rc <- r |
||||||
|
} |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,88 @@ |
|||||||
|
// Copyright (c) 2015 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package index |
||||||
|
|
||||||
|
import ( |
||||||
|
"sync" |
||||||
|
) |
||||||
|
|
||||||
|
type FieldCache struct { |
||||||
|
fieldIndexes map[string]uint16 |
||||||
|
indexFields []string |
||||||
|
lastFieldIndex int |
||||||
|
mutex sync.RWMutex |
||||||
|
} |
||||||
|
|
||||||
|
func NewFieldCache() *FieldCache { |
||||||
|
return &FieldCache{ |
||||||
|
fieldIndexes: make(map[string]uint16), |
||||||
|
lastFieldIndex: -1, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (f *FieldCache) AddExisting(field string, index uint16) { |
||||||
|
f.mutex.Lock() |
||||||
|
f.addLOCKED(field, index) |
||||||
|
f.mutex.Unlock() |
||||||
|
} |
||||||
|
|
||||||
|
func (f *FieldCache) addLOCKED(field string, index uint16) uint16 { |
||||||
|
f.fieldIndexes[field] = index |
||||||
|
if len(f.indexFields) < int(index)+1 { |
||||||
|
prevIndexFields := f.indexFields |
||||||
|
f.indexFields = make([]string, int(index)+16) |
||||||
|
copy(f.indexFields, prevIndexFields) |
||||||
|
} |
||||||
|
f.indexFields[int(index)] = field |
||||||
|
if int(index) > f.lastFieldIndex { |
||||||
|
f.lastFieldIndex = int(index) |
||||||
|
} |
||||||
|
return index |
||||||
|
} |
||||||
|
|
||||||
|
// FieldNamed returns the index of the field, and whether or not it existed
|
||||||
|
// before this call. if createIfMissing is true, and new field index is assigned
|
||||||
|
// but the second return value will still be false
|
||||||
|
func (f *FieldCache) FieldNamed(field string, createIfMissing bool) (uint16, bool) { |
||||||
|
f.mutex.RLock() |
||||||
|
if index, ok := f.fieldIndexes[field]; ok { |
||||||
|
f.mutex.RUnlock() |
||||||
|
return index, true |
||||||
|
} else if !createIfMissing { |
||||||
|
f.mutex.RUnlock() |
||||||
|
return 0, false |
||||||
|
} |
||||||
|
// trade read lock for write lock
|
||||||
|
f.mutex.RUnlock() |
||||||
|
f.mutex.Lock() |
||||||
|
// need to check again with write lock
|
||||||
|
if index, ok := f.fieldIndexes[field]; ok { |
||||||
|
f.mutex.Unlock() |
||||||
|
return index, true |
||||||
|
} |
||||||
|
// assign next field id
|
||||||
|
index := f.addLOCKED(field, uint16(f.lastFieldIndex+1)) |
||||||
|
f.mutex.Unlock() |
||||||
|
return index, false |
||||||
|
} |
||||||
|
|
||||||
|
func (f *FieldCache) FieldIndexed(index uint16) (field string) { |
||||||
|
f.mutex.RLock() |
||||||
|
if int(index) < len(f.indexFields) { |
||||||
|
field = f.indexFields[int(index)] |
||||||
|
} |
||||||
|
f.mutex.RUnlock() |
||||||
|
return field |
||||||
|
} |
@ -0,0 +1,239 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package index |
||||||
|
|
||||||
|
import ( |
||||||
|
"bytes" |
||||||
|
"encoding/json" |
||||||
|
"fmt" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/document" |
||||||
|
"github.com/blevesearch/bleve/index/store" |
||||||
|
) |
||||||
|
|
||||||
|
var ErrorUnknownStorageType = fmt.Errorf("unknown storage type") |
||||||
|
|
||||||
|
type Index interface { |
||||||
|
Open() error |
||||||
|
Close() error |
||||||
|
|
||||||
|
Update(doc *document.Document) error |
||||||
|
Delete(id string) error |
||||||
|
Batch(batch *Batch) error |
||||||
|
|
||||||
|
SetInternal(key, val []byte) error |
||||||
|
DeleteInternal(key []byte) error |
||||||
|
|
||||||
|
// Reader returns a low-level accessor on the index data. Close it to
|
||||||
|
// release associated resources.
|
||||||
|
Reader() (IndexReader, error) |
||||||
|
|
||||||
|
Stats() json.Marshaler |
||||||
|
StatsMap() map[string]interface{} |
||||||
|
|
||||||
|
Analyze(d *document.Document) *AnalysisResult |
||||||
|
|
||||||
|
Advanced() (store.KVStore, error) |
||||||
|
} |
||||||
|
|
||||||
|
type IndexReader interface { |
||||||
|
TermFieldReader(term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (TermFieldReader, error) |
||||||
|
|
||||||
|
// DocIDReader returns an iterator over all doc ids
|
||||||
|
// The caller must close returned instance to release associated resources.
|
||||||
|
DocIDReaderAll() (DocIDReader, error) |
||||||
|
|
||||||
|
DocIDReaderOnly(ids []string) (DocIDReader, error) |
||||||
|
|
||||||
|
FieldDict(field string) (FieldDict, error) |
||||||
|
|
||||||
|
// FieldDictRange is currently defined to include the start and end terms
|
||||||
|
FieldDictRange(field string, startTerm []byte, endTerm []byte) (FieldDict, error) |
||||||
|
FieldDictPrefix(field string, termPrefix []byte) (FieldDict, error) |
||||||
|
|
||||||
|
Document(id string) (*document.Document, error) |
||||||
|
DocumentFieldTerms(id IndexInternalID, fields []string) (FieldTerms, error) |
||||||
|
|
||||||
|
Fields() ([]string, error) |
||||||
|
|
||||||
|
GetInternal(key []byte) ([]byte, error) |
||||||
|
|
||||||
|
DocCount() (uint64, error) |
||||||
|
|
||||||
|
ExternalID(id IndexInternalID) (string, error) |
||||||
|
InternalID(id string) (IndexInternalID, error) |
||||||
|
|
||||||
|
DumpAll() chan interface{} |
||||||
|
DumpDoc(id string) chan interface{} |
||||||
|
DumpFields() chan interface{} |
||||||
|
|
||||||
|
Close() error |
||||||
|
} |
||||||
|
|
||||||
|
// FieldTerms contains the terms used by a document, keyed by field
|
||||||
|
type FieldTerms map[string][]string |
||||||
|
|
||||||
|
// FieldsNotYetCached returns a list of fields not yet cached out of a larger list of fields
|
||||||
|
func (f FieldTerms) FieldsNotYetCached(fields []string) []string { |
||||||
|
rv := make([]string, 0, len(fields)) |
||||||
|
for _, field := range fields { |
||||||
|
if _, ok := f[field]; !ok { |
||||||
|
rv = append(rv, field) |
||||||
|
} |
||||||
|
} |
||||||
|
return rv |
||||||
|
} |
||||||
|
|
||||||
|
// Merge will combine two FieldTerms
|
||||||
|
// it assumes that the terms lists are complete (thus do not need to be merged)
|
||||||
|
// field terms from the other list always replace the ones in the receiver
|
||||||
|
func (f FieldTerms) Merge(other FieldTerms) { |
||||||
|
for field, terms := range other { |
||||||
|
f[field] = terms |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
type TermFieldVector struct { |
||||||
|
Field string |
||||||
|
ArrayPositions []uint64 |
||||||
|
Pos uint64 |
||||||
|
Start uint64 |
||||||
|
End uint64 |
||||||
|
} |
||||||
|
|
||||||
|
// IndexInternalID is an opaque document identifier interal to the index impl
|
||||||
|
type IndexInternalID []byte |
||||||
|
|
||||||
|
func (id IndexInternalID) Equals(other IndexInternalID) bool { |
||||||
|
return id.Compare(other) == 0 |
||||||
|
} |
||||||
|
|
||||||
|
func (id IndexInternalID) Compare(other IndexInternalID) int { |
||||||
|
return bytes.Compare(id, other) |
||||||
|
} |
||||||
|
|
||||||
|
type TermFieldDoc struct { |
||||||
|
Term string |
||||||
|
ID IndexInternalID |
||||||
|
Freq uint64 |
||||||
|
Norm float64 |
||||||
|
Vectors []*TermFieldVector |
||||||
|
} |
||||||
|
|
||||||
|
// Reset allows an already allocated TermFieldDoc to be reused
|
||||||
|
func (tfd *TermFieldDoc) Reset() *TermFieldDoc { |
||||||
|
// remember the []byte used for the ID
|
||||||
|
id := tfd.ID |
||||||
|
// idiom to copy over from empty TermFieldDoc (0 allocations)
|
||||||
|
*tfd = TermFieldDoc{} |
||||||
|
// reuse the []byte already allocated (and reset len to 0)
|
||||||
|
tfd.ID = id[:0] |
||||||
|
return tfd |
||||||
|
} |
||||||
|
|
||||||
|
// TermFieldReader is the interface exposing the enumeration of documents
|
||||||
|
// containing a given term in a given field. Documents are returned in byte
|
||||||
|
// lexicographic order over their identifiers.
|
||||||
|
type TermFieldReader interface { |
||||||
|
// Next returns the next document containing the term in this field, or nil
|
||||||
|
// when it reaches the end of the enumeration. The preAlloced TermFieldDoc
|
||||||
|
// is optional, and when non-nil, will be used instead of allocating memory.
|
||||||
|
Next(preAlloced *TermFieldDoc) (*TermFieldDoc, error) |
||||||
|
|
||||||
|
// Advance resets the enumeration at specified document or its immediate
|
||||||
|
// follower.
|
||||||
|
Advance(ID IndexInternalID, preAlloced *TermFieldDoc) (*TermFieldDoc, error) |
||||||
|
|
||||||
|
// Count returns the number of documents contains the term in this field.
|
||||||
|
Count() uint64 |
||||||
|
Close() error |
||||||
|
} |
||||||
|
|
||||||
|
type DictEntry struct { |
||||||
|
Term string |
||||||
|
Count uint64 |
||||||
|
} |
||||||
|
|
||||||
|
type FieldDict interface { |
||||||
|
Next() (*DictEntry, error) |
||||||
|
Close() error |
||||||
|
} |
||||||
|
|
||||||
|
// DocIDReader is the interface exposing enumeration of documents identifiers.
|
||||||
|
// Close the reader to release associated resources.
|
||||||
|
type DocIDReader interface { |
||||||
|
// Next returns the next document internal identifier in the natural
|
||||||
|
// index order, nil when the end of the sequence is reached.
|
||||||
|
Next() (IndexInternalID, error) |
||||||
|
|
||||||
|
// Advance resets the iteration to the first internal identifier greater than
|
||||||
|
// or equal to ID. If ID is smaller than the start of the range, the iteration
|
||||||
|
// will start there instead. If ID is greater than or equal to the end of
|
||||||
|
// the range, Next() call will return io.EOF.
|
||||||
|
Advance(ID IndexInternalID) (IndexInternalID, error) |
||||||
|
Close() error |
||||||
|
} |
||||||
|
|
||||||
|
type Batch struct { |
||||||
|
IndexOps map[string]*document.Document |
||||||
|
InternalOps map[string][]byte |
||||||
|
} |
||||||
|
|
||||||
|
func NewBatch() *Batch { |
||||||
|
return &Batch{ |
||||||
|
IndexOps: make(map[string]*document.Document), |
||||||
|
InternalOps: make(map[string][]byte), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (b *Batch) Update(doc *document.Document) { |
||||||
|
b.IndexOps[doc.ID] = doc |
||||||
|
} |
||||||
|
|
||||||
|
func (b *Batch) Delete(id string) { |
||||||
|
b.IndexOps[id] = nil |
||||||
|
} |
||||||
|
|
||||||
|
func (b *Batch) SetInternal(key, val []byte) { |
||||||
|
b.InternalOps[string(key)] = val |
||||||
|
} |
||||||
|
|
||||||
|
func (b *Batch) DeleteInternal(key []byte) { |
||||||
|
b.InternalOps[string(key)] = nil |
||||||
|
} |
||||||
|
|
||||||
|
func (b *Batch) String() string { |
||||||
|
rv := fmt.Sprintf("Batch (%d ops, %d internal ops)\n", len(b.IndexOps), len(b.InternalOps)) |
||||||
|
for k, v := range b.IndexOps { |
||||||
|
if v != nil { |
||||||
|
rv += fmt.Sprintf("\tINDEX - '%s'\n", k) |
||||||
|
} else { |
||||||
|
rv += fmt.Sprintf("\tDELETE - '%s'\n", k) |
||||||
|
} |
||||||
|
} |
||||||
|
for k, v := range b.InternalOps { |
||||||
|
if v != nil { |
||||||
|
rv += fmt.Sprintf("\tSET INTERNAL - '%s'\n", k) |
||||||
|
} else { |
||||||
|
rv += fmt.Sprintf("\tDELETE INTERNAL - '%s'\n", k) |
||||||
|
} |
||||||
|
} |
||||||
|
return rv |
||||||
|
} |
||||||
|
|
||||||
|
func (b *Batch) Reset() { |
||||||
|
b.IndexOps = make(map[string]*document.Document) |
||||||
|
b.InternalOps = make(map[string][]byte) |
||||||
|
} |
@ -0,0 +1,62 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package store |
||||||
|
|
||||||
|
type op struct { |
||||||
|
K []byte |
||||||
|
V []byte |
||||||
|
} |
||||||
|
|
||||||
|
type EmulatedBatch struct { |
||||||
|
Ops []*op |
||||||
|
Merger *EmulatedMerge |
||||||
|
} |
||||||
|
|
||||||
|
func NewEmulatedBatch(mo MergeOperator) *EmulatedBatch { |
||||||
|
return &EmulatedBatch{ |
||||||
|
Ops: make([]*op, 0, 1000), |
||||||
|
Merger: NewEmulatedMerge(mo), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (b *EmulatedBatch) Set(key, val []byte) { |
||||||
|
ck := make([]byte, len(key)) |
||||||
|
copy(ck, key) |
||||||
|
cv := make([]byte, len(val)) |
||||||
|
copy(cv, val) |
||||||
|
b.Ops = append(b.Ops, &op{ck, cv}) |
||||||
|
} |
||||||
|
|
||||||
|
func (b *EmulatedBatch) Delete(key []byte) { |
||||||
|
ck := make([]byte, len(key)) |
||||||
|
copy(ck, key) |
||||||
|
b.Ops = append(b.Ops, &op{ck, nil}) |
||||||
|
} |
||||||
|
|
||||||
|
func (b *EmulatedBatch) Merge(key, val []byte) { |
||||||
|
ck := make([]byte, len(key)) |
||||||
|
copy(ck, key) |
||||||
|
cv := make([]byte, len(val)) |
||||||
|
copy(cv, val) |
||||||
|
b.Merger.Merge(key, val) |
||||||
|
} |
||||||
|
|
||||||
|
func (b *EmulatedBatch) Reset() { |
||||||
|
b.Ops = b.Ops[:0] |
||||||
|
} |
||||||
|
|
||||||
|
func (b *EmulatedBatch) Close() error { |
||||||
|
return nil |
||||||
|
} |
@ -0,0 +1,85 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package boltdb |
||||||
|
|
||||||
|
import ( |
||||||
|
"bytes" |
||||||
|
|
||||||
|
"github.com/boltdb/bolt" |
||||||
|
) |
||||||
|
|
||||||
|
type Iterator struct { |
||||||
|
store *Store |
||||||
|
tx *bolt.Tx |
||||||
|
cursor *bolt.Cursor |
||||||
|
prefix []byte |
||||||
|
start []byte |
||||||
|
end []byte |
||||||
|
valid bool |
||||||
|
key []byte |
||||||
|
val []byte |
||||||
|
} |
||||||
|
|
||||||
|
func (i *Iterator) updateValid() { |
||||||
|
i.valid = (i.key != nil) |
||||||
|
if i.valid { |
||||||
|
if i.prefix != nil { |
||||||
|
i.valid = bytes.HasPrefix(i.key, i.prefix) |
||||||
|
} else if i.end != nil { |
||||||
|
i.valid = bytes.Compare(i.key, i.end) < 0 |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (i *Iterator) Seek(k []byte) { |
||||||
|
if i.start != nil && bytes.Compare(k, i.start) < 0 { |
||||||
|
k = i.start |
||||||
|
} |
||||||
|
if i.prefix != nil && !bytes.HasPrefix(k, i.prefix) { |
||||||
|
if bytes.Compare(k, i.prefix) < 0 { |
||||||
|
k = i.prefix |
||||||
|
} else { |
||||||
|
i.valid = false |
||||||
|
return |
||||||
|
} |
||||||
|
} |
||||||
|
i.key, i.val = i.cursor.Seek(k) |
||||||
|
i.updateValid() |
||||||
|
} |
||||||
|
|
||||||
|
func (i *Iterator) Next() { |
||||||
|
i.key, i.val = i.cursor.Next() |
||||||
|
i.updateValid() |
||||||
|
} |
||||||
|
|
||||||
|
func (i *Iterator) Current() ([]byte, []byte, bool) { |
||||||
|
return i.key, i.val, i.valid |
||||||
|
} |
||||||
|
|
||||||
|
func (i *Iterator) Key() []byte { |
||||||
|
return i.key |
||||||
|
} |
||||||
|
|
||||||
|
func (i *Iterator) Value() []byte { |
||||||
|
return i.val |
||||||
|
} |
||||||
|
|
||||||
|
func (i *Iterator) Valid() bool { |
||||||
|
return i.valid |
||||||
|
} |
||||||
|
|
||||||
|
func (i *Iterator) Close() error { |
||||||
|
return nil |
||||||
|
} |
@ -0,0 +1,73 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package boltdb |
||||||
|
|
||||||
|
import ( |
||||||
|
"github.com/blevesearch/bleve/index/store" |
||||||
|
"github.com/boltdb/bolt" |
||||||
|
) |
||||||
|
|
||||||
|
type Reader struct { |
||||||
|
store *Store |
||||||
|
tx *bolt.Tx |
||||||
|
bucket *bolt.Bucket |
||||||
|
} |
||||||
|
|
||||||
|
func (r *Reader) Get(key []byte) ([]byte, error) { |
||||||
|
var rv []byte |
||||||
|
v := r.bucket.Get(key) |
||||||
|
if v != nil { |
||||||
|
rv = make([]byte, len(v)) |
||||||
|
copy(rv, v) |
||||||
|
} |
||||||
|
return rv, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (r *Reader) MultiGet(keys [][]byte) ([][]byte, error) { |
||||||
|
return store.MultiGet(r, keys) |
||||||
|
} |
||||||
|
|
||||||
|
func (r *Reader) PrefixIterator(prefix []byte) store.KVIterator { |
||||||
|
cursor := r.bucket.Cursor() |
||||||
|
|
||||||
|
rv := &Iterator{ |
||||||
|
store: r.store, |
||||||
|
tx: r.tx, |
||||||
|
cursor: cursor, |
||||||
|
prefix: prefix, |
||||||
|
} |
||||||
|
|
||||||
|
rv.Seek(prefix) |
||||||
|
return rv |
||||||
|
} |
||||||
|
|
||||||
|
func (r *Reader) RangeIterator(start, end []byte) store.KVIterator { |
||||||
|
cursor := r.bucket.Cursor() |
||||||
|
|
||||||
|
rv := &Iterator{ |
||||||
|
store: r.store, |
||||||
|
tx: r.tx, |
||||||
|
cursor: cursor, |
||||||
|
start: start, |
||||||
|
end: end, |
||||||
|
} |
||||||
|
|
||||||
|
rv.Seek(start) |
||||||
|
return rv |
||||||
|
} |
||||||
|
|
||||||
|
func (r *Reader) Close() error { |
||||||
|
return r.tx.Rollback() |
||||||
|
} |
@ -0,0 +1,26 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package boltdb |
||||||
|
|
||||||
|
import "encoding/json" |
||||||
|
|
||||||
|
type stats struct { |
||||||
|
s *Store |
||||||
|
} |
||||||
|
|
||||||
|
func (s *stats) MarshalJSON() ([]byte, error) { |
||||||
|
bs := s.s.db.Stats() |
||||||
|
return json.Marshal(bs) |
||||||
|
} |
@ -0,0 +1,175 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
// Package boltdb implements a store.KVStore on top of BoltDB. It supports the
|
||||||
|
// following options:
|
||||||
|
//
|
||||||
|
// "bucket" (string): the name of BoltDB bucket to use, defaults to "bleve".
|
||||||
|
//
|
||||||
|
// "nosync" (bool): if true, set boltdb.DB.NoSync to true. It speeds up index
|
||||||
|
// operations in exchange of losing integrity guarantees if indexation aborts
|
||||||
|
// without closing the index. Use it when rebuilding indexes from zero.
|
||||||
|
package boltdb |
||||||
|
|
||||||
|
import ( |
||||||
|
"bytes" |
||||||
|
"encoding/json" |
||||||
|
"fmt" |
||||||
|
"os" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/index/store" |
||||||
|
"github.com/blevesearch/bleve/registry" |
||||||
|
"github.com/boltdb/bolt" |
||||||
|
) |
||||||
|
|
||||||
|
const ( |
||||||
|
Name = "boltdb" |
||||||
|
defaultCompactBatchSize = 100 |
||||||
|
) |
||||||
|
|
||||||
|
type Store struct { |
||||||
|
path string |
||||||
|
bucket string |
||||||
|
db *bolt.DB |
||||||
|
noSync bool |
||||||
|
fillPercent float64 |
||||||
|
mo store.MergeOperator |
||||||
|
} |
||||||
|
|
||||||
|
func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) { |
||||||
|
path, ok := config["path"].(string) |
||||||
|
if !ok { |
||||||
|
return nil, fmt.Errorf("must specify path") |
||||||
|
} |
||||||
|
if path == "" { |
||||||
|
return nil, os.ErrInvalid |
||||||
|
} |
||||||
|
|
||||||
|
bucket, ok := config["bucket"].(string) |
||||||
|
if !ok { |
||||||
|
bucket = "bleve" |
||||||
|
} |
||||||
|
|
||||||
|
noSync, _ := config["nosync"].(bool) |
||||||
|
|
||||||
|
fillPercent, ok := config["fillPercent"].(float64) |
||||||
|
if !ok { |
||||||
|
fillPercent = bolt.DefaultFillPercent |
||||||
|
} |
||||||
|
|
||||||
|
bo := &bolt.Options{} |
||||||
|
ro, ok := config["read_only"].(bool) |
||||||
|
if ok { |
||||||
|
bo.ReadOnly = ro |
||||||
|
} |
||||||
|
|
||||||
|
db, err := bolt.Open(path, 0600, bo) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
db.NoSync = noSync |
||||||
|
|
||||||
|
if !bo.ReadOnly { |
||||||
|
err = db.Update(func(tx *bolt.Tx) error { |
||||||
|
_, err := tx.CreateBucketIfNotExists([]byte(bucket)) |
||||||
|
|
||||||
|
return err |
||||||
|
}) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
rv := Store{ |
||||||
|
path: path, |
||||||
|
bucket: bucket, |
||||||
|
db: db, |
||||||
|
mo: mo, |
||||||
|
noSync: noSync, |
||||||
|
fillPercent: fillPercent, |
||||||
|
} |
||||||
|
return &rv, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (bs *Store) Close() error { |
||||||
|
return bs.db.Close() |
||||||
|
} |
||||||
|
|
||||||
|
func (bs *Store) Reader() (store.KVReader, error) { |
||||||
|
tx, err := bs.db.Begin(false) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
return &Reader{ |
||||||
|
store: bs, |
||||||
|
tx: tx, |
||||||
|
bucket: tx.Bucket([]byte(bs.bucket)), |
||||||
|
}, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (bs *Store) Writer() (store.KVWriter, error) { |
||||||
|
return &Writer{ |
||||||
|
store: bs, |
||||||
|
}, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (bs *Store) Stats() json.Marshaler { |
||||||
|
return &stats{ |
||||||
|
s: bs, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// CompactWithBatchSize removes DictionaryTerm entries with a count of zero (in batchSize batches)
|
||||||
|
// Removing entries is a workaround for github issue #374.
|
||||||
|
func (bs *Store) CompactWithBatchSize(batchSize int) error { |
||||||
|
for { |
||||||
|
cnt := 0 |
||||||
|
err := bs.db.Batch(func(tx *bolt.Tx) error { |
||||||
|
c := tx.Bucket([]byte(bs.bucket)).Cursor() |
||||||
|
prefix := []byte("d") |
||||||
|
|
||||||
|
for k, v := c.Seek(prefix); bytes.HasPrefix(k, prefix); k, v = c.Next() { |
||||||
|
if bytes.Equal(v, []byte{0}) { |
||||||
|
cnt++ |
||||||
|
if err := c.Delete(); err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
if cnt == batchSize { |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
} |
||||||
|
return nil |
||||||
|
}) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
|
||||||
|
if cnt == 0 { |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
// Compact calls CompactWithBatchSize with a default batch size of 100. This is a workaround
|
||||||
|
// for github issue #374.
|
||||||
|
func (bs *Store) Compact() error { |
||||||
|
return bs.CompactWithBatchSize(defaultCompactBatchSize) |
||||||
|
} |
||||||
|
|
||||||
|
func init() { |
||||||
|
registry.RegisterKVStore(Name, New) |
||||||
|
} |
@ -0,0 +1,95 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package boltdb |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/index/store" |
||||||
|
) |
||||||
|
|
||||||
|
type Writer struct { |
||||||
|
store *Store |
||||||
|
} |
||||||
|
|
||||||
|
func (w *Writer) NewBatch() store.KVBatch { |
||||||
|
return store.NewEmulatedBatch(w.store.mo) |
||||||
|
} |
||||||
|
|
||||||
|
func (w *Writer) NewBatchEx(options store.KVBatchOptions) ([]byte, store.KVBatch, error) { |
||||||
|
return make([]byte, options.TotalBytes), w.NewBatch(), nil |
||||||
|
} |
||||||
|
|
||||||
|
func (w *Writer) ExecuteBatch(batch store.KVBatch) (err error) { |
||||||
|
|
||||||
|
emulatedBatch, ok := batch.(*store.EmulatedBatch) |
||||||
|
if !ok { |
||||||
|
return fmt.Errorf("wrong type of batch") |
||||||
|
} |
||||||
|
|
||||||
|
tx, err := w.store.db.Begin(true) |
||||||
|
if err != nil { |
||||||
|
return |
||||||
|
} |
||||||
|
// defer function to ensure that once started,
|
||||||
|
// we either Commit tx or Rollback
|
||||||
|
defer func() { |
||||||
|
// if nothing went wrong, commit
|
||||||
|
if err == nil { |
||||||
|
// careful to catch error here too
|
||||||
|
err = tx.Commit() |
||||||
|
} else { |
||||||
|
// caller should see error that caused abort,
|
||||||
|
// not success or failure of Rollback itself
|
||||||
|
_ = tx.Rollback() |
||||||
|
} |
||||||
|
}() |
||||||
|
|
||||||
|
bucket := tx.Bucket([]byte(w.store.bucket)) |
||||||
|
bucket.FillPercent = w.store.fillPercent |
||||||
|
|
||||||
|
for k, mergeOps := range emulatedBatch.Merger.Merges { |
||||||
|
kb := []byte(k) |
||||||
|
existingVal := bucket.Get(kb) |
||||||
|
mergedVal, fullMergeOk := w.store.mo.FullMerge(kb, existingVal, mergeOps) |
||||||
|
if !fullMergeOk { |
||||||
|
err = fmt.Errorf("merge operator returned failure") |
||||||
|
return |
||||||
|
} |
||||||
|
err = bucket.Put(kb, mergedVal) |
||||||
|
if err != nil { |
||||||
|
return |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
for _, op := range emulatedBatch.Ops { |
||||||
|
if op.V != nil { |
||||||
|
err = bucket.Put(op.K, op.V) |
||||||
|
if err != nil { |
||||||
|
return |
||||||
|
} |
||||||
|
} else { |
||||||
|
err = bucket.Delete(op.K) |
||||||
|
if err != nil { |
||||||
|
return |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
return |
||||||
|
} |
||||||
|
|
||||||
|
func (w *Writer) Close() error { |
||||||
|
return nil |
||||||
|
} |
@ -0,0 +1,152 @@ |
|||||||
|
// Copyright (c) 2015 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
// Package gtreap provides an in-memory implementation of the
|
||||||
|
// KVStore interfaces using the gtreap balanced-binary treap,
|
||||||
|
// copy-on-write data structure.
|
||||||
|
package gtreap |
||||||
|
|
||||||
|
import ( |
||||||
|
"bytes" |
||||||
|
"sync" |
||||||
|
|
||||||
|
"github.com/steveyen/gtreap" |
||||||
|
) |
||||||
|
|
||||||
|
type Iterator struct { |
||||||
|
t *gtreap.Treap |
||||||
|
|
||||||
|
m sync.Mutex |
||||||
|
cancelCh chan struct{} |
||||||
|
nextCh chan *Item |
||||||
|
curr *Item |
||||||
|
currOk bool |
||||||
|
|
||||||
|
prefix []byte |
||||||
|
start []byte |
||||||
|
end []byte |
||||||
|
} |
||||||
|
|
||||||
|
func (w *Iterator) Seek(k []byte) { |
||||||
|
if w.start != nil && bytes.Compare(k, w.start) < 0 { |
||||||
|
k = w.start |
||||||
|
} |
||||||
|
if w.prefix != nil && !bytes.HasPrefix(k, w.prefix) { |
||||||
|
if bytes.Compare(k, w.prefix) < 0 { |
||||||
|
k = w.prefix |
||||||
|
} else { |
||||||
|
var end []byte |
||||||
|
for i := len(w.prefix) - 1; i >= 0; i-- { |
||||||
|
c := w.prefix[i] |
||||||
|
if c < 0xff { |
||||||
|
end = make([]byte, i+1) |
||||||
|
copy(end, w.prefix) |
||||||
|
end[i] = c + 1 |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
k = end |
||||||
|
} |
||||||
|
} |
||||||
|
w.restart(&Item{k: k}) |
||||||
|
} |
||||||
|
|
||||||
|
func (w *Iterator) restart(start *Item) *Iterator { |
||||||
|
cancelCh := make(chan struct{}) |
||||||
|
nextCh := make(chan *Item, 1) |
||||||
|
|
||||||
|
w.m.Lock() |
||||||
|
if w.cancelCh != nil { |
||||||
|
close(w.cancelCh) |
||||||
|
} |
||||||
|
w.cancelCh = cancelCh |
||||||
|
w.nextCh = nextCh |
||||||
|
w.curr = nil |
||||||
|
w.currOk = false |
||||||
|
w.m.Unlock() |
||||||
|
|
||||||
|
go func() { |
||||||
|
if start != nil { |
||||||
|
w.t.VisitAscend(start, func(itm gtreap.Item) bool { |
||||||
|
select { |
||||||
|
case <-cancelCh: |
||||||
|
return false |
||||||
|
case nextCh <- itm.(*Item): |
||||||
|
return true |
||||||
|
} |
||||||
|
}) |
||||||
|
} |
||||||
|
close(nextCh) |
||||||
|
}() |
||||||
|
|
||||||
|
w.Next() |
||||||
|
|
||||||
|
return w |
||||||
|
} |
||||||
|
|
||||||
|
func (w *Iterator) Next() { |
||||||
|
w.m.Lock() |
||||||
|
nextCh := w.nextCh |
||||||
|
w.m.Unlock() |
||||||
|
w.curr, w.currOk = <-nextCh |
||||||
|
} |
||||||
|
|
||||||
|
func (w *Iterator) Current() ([]byte, []byte, bool) { |
||||||
|
w.m.Lock() |
||||||
|
defer w.m.Unlock() |
||||||
|
if !w.currOk || w.curr == nil { |
||||||
|
return nil, nil, false |
||||||
|
} |
||||||
|
if w.prefix != nil && !bytes.HasPrefix(w.curr.k, w.prefix) { |
||||||
|
return nil, nil, false |
||||||
|
} else if w.end != nil && bytes.Compare(w.curr.k, w.end) >= 0 { |
||||||
|
return nil, nil, false |
||||||
|
} |
||||||
|
return w.curr.k, w.curr.v, w.currOk |
||||||
|
} |
||||||
|
|
||||||
|
func (w *Iterator) Key() []byte { |
||||||
|
k, _, ok := w.Current() |
||||||
|
if !ok { |
||||||
|
return nil |
||||||
|
} |
||||||
|
return k |
||||||
|
} |
||||||
|
|
||||||
|
func (w *Iterator) Value() []byte { |
||||||
|
_, v, ok := w.Current() |
||||||
|
if !ok { |
||||||
|
return nil |
||||||
|
} |
||||||
|
return v |
||||||
|
} |
||||||
|
|
||||||
|
func (w *Iterator) Valid() bool { |
||||||
|
_, _, ok := w.Current() |
||||||
|
return ok |
||||||
|
} |
||||||
|
|
||||||
|
func (w *Iterator) Close() error { |
||||||
|
w.m.Lock() |
||||||
|
if w.cancelCh != nil { |
||||||
|
close(w.cancelCh) |
||||||
|
} |
||||||
|
w.cancelCh = nil |
||||||
|
w.nextCh = nil |
||||||
|
w.curr = nil |
||||||
|
w.currOk = false |
||||||
|
w.m.Unlock() |
||||||
|
|
||||||
|
return nil |
||||||
|
} |
@ -0,0 +1,66 @@ |
|||||||
|
// Copyright (c) 2015 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
// Package gtreap provides an in-memory implementation of the
|
||||||
|
// KVStore interfaces using the gtreap balanced-binary treap,
|
||||||
|
// copy-on-write data structure.
|
||||||
|
package gtreap |
||||||
|
|
||||||
|
import ( |
||||||
|
"github.com/blevesearch/bleve/index/store" |
||||||
|
|
||||||
|
"github.com/steveyen/gtreap" |
||||||
|
) |
||||||
|
|
||||||
|
type Reader struct { |
||||||
|
t *gtreap.Treap |
||||||
|
} |
||||||
|
|
||||||
|
func (w *Reader) Get(k []byte) (v []byte, err error) { |
||||||
|
var rv []byte |
||||||
|
itm := w.t.Get(&Item{k: k}) |
||||||
|
if itm != nil { |
||||||
|
rv = make([]byte, len(itm.(*Item).v)) |
||||||
|
copy(rv, itm.(*Item).v) |
||||||
|
return rv, nil |
||||||
|
} |
||||||
|
return nil, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (r *Reader) MultiGet(keys [][]byte) ([][]byte, error) { |
||||||
|
return store.MultiGet(r, keys) |
||||||
|
} |
||||||
|
|
||||||
|
func (w *Reader) PrefixIterator(k []byte) store.KVIterator { |
||||||
|
rv := Iterator{ |
||||||
|
t: w.t, |
||||||
|
prefix: k, |
||||||
|
} |
||||||
|
rv.restart(&Item{k: k}) |
||||||
|
return &rv |
||||||
|
} |
||||||
|
|
||||||
|
func (w *Reader) RangeIterator(start, end []byte) store.KVIterator { |
||||||
|
rv := Iterator{ |
||||||
|
t: w.t, |
||||||
|
start: start, |
||||||
|
end: end, |
||||||
|
} |
||||||
|
rv.restart(&Item{k: start}) |
||||||
|
return &rv |
||||||
|
} |
||||||
|
|
||||||
|
func (w *Reader) Close() error { |
||||||
|
return nil |
||||||
|
} |
@ -0,0 +1,82 @@ |
|||||||
|
// Copyright (c) 2015 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
// Package gtreap provides an in-memory implementation of the
|
||||||
|
// KVStore interfaces using the gtreap balanced-binary treap,
|
||||||
|
// copy-on-write data structure.
|
||||||
|
|
||||||
|
package gtreap |
||||||
|
|
||||||
|
import ( |
||||||
|
"bytes" |
||||||
|
"fmt" |
||||||
|
"os" |
||||||
|
"sync" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/index/store" |
||||||
|
"github.com/blevesearch/bleve/registry" |
||||||
|
"github.com/steveyen/gtreap" |
||||||
|
) |
||||||
|
|
||||||
|
const Name = "gtreap" |
||||||
|
|
||||||
|
type Store struct { |
||||||
|
m sync.Mutex |
||||||
|
t *gtreap.Treap |
||||||
|
mo store.MergeOperator |
||||||
|
} |
||||||
|
|
||||||
|
type Item struct { |
||||||
|
k []byte |
||||||
|
v []byte |
||||||
|
} |
||||||
|
|
||||||
|
func itemCompare(a, b interface{}) int { |
||||||
|
return bytes.Compare(a.(*Item).k, b.(*Item).k) |
||||||
|
} |
||||||
|
|
||||||
|
func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) { |
||||||
|
path, ok := config["path"].(string) |
||||||
|
if !ok { |
||||||
|
return nil, fmt.Errorf("must specify path") |
||||||
|
} |
||||||
|
if path != "" { |
||||||
|
return nil, os.ErrInvalid |
||||||
|
} |
||||||
|
|
||||||
|
rv := Store{ |
||||||
|
t: gtreap.NewTreap(itemCompare), |
||||||
|
mo: mo, |
||||||
|
} |
||||||
|
return &rv, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (s *Store) Close() error { |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
func (s *Store) Reader() (store.KVReader, error) { |
||||||
|
s.m.Lock() |
||||||
|
t := s.t |
||||||
|
s.m.Unlock() |
||||||
|
return &Reader{t: t}, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (s *Store) Writer() (store.KVWriter, error) { |
||||||
|
return &Writer{s: s}, nil |
||||||
|
} |
||||||
|
|
||||||
|
func init() { |
||||||
|
registry.RegisterKVStore(Name, New) |
||||||
|
} |
@ -0,0 +1,76 @@ |
|||||||
|
// Copyright (c) 2015 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
// Package gtreap provides an in-memory implementation of the
|
||||||
|
// KVStore interfaces using the gtreap balanced-binary treap,
|
||||||
|
// copy-on-write data structure.
|
||||||
|
package gtreap |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"math/rand" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/index/store" |
||||||
|
) |
||||||
|
|
||||||
|
type Writer struct { |
||||||
|
s *Store |
||||||
|
} |
||||||
|
|
||||||
|
func (w *Writer) NewBatch() store.KVBatch { |
||||||
|
return store.NewEmulatedBatch(w.s.mo) |
||||||
|
} |
||||||
|
|
||||||
|
func (w *Writer) NewBatchEx(options store.KVBatchOptions) ([]byte, store.KVBatch, error) { |
||||||
|
return make([]byte, options.TotalBytes), w.NewBatch(), nil |
||||||
|
} |
||||||
|
|
||||||
|
func (w *Writer) ExecuteBatch(batch store.KVBatch) error { |
||||||
|
|
||||||
|
emulatedBatch, ok := batch.(*store.EmulatedBatch) |
||||||
|
if !ok { |
||||||
|
return fmt.Errorf("wrong type of batch") |
||||||
|
} |
||||||
|
|
||||||
|
w.s.m.Lock() |
||||||
|
for k, mergeOps := range emulatedBatch.Merger.Merges { |
||||||
|
kb := []byte(k) |
||||||
|
var existingVal []byte |
||||||
|
existingItem := w.s.t.Get(&Item{k: kb}) |
||||||
|
if existingItem != nil { |
||||||
|
existingVal = w.s.t.Get(&Item{k: kb}).(*Item).v |
||||||
|
} |
||||||
|
mergedVal, fullMergeOk := w.s.mo.FullMerge(kb, existingVal, mergeOps) |
||||||
|
if !fullMergeOk { |
||||||
|
return fmt.Errorf("merge operator returned failure") |
||||||
|
} |
||||||
|
w.s.t = w.s.t.Upsert(&Item{k: kb, v: mergedVal}, rand.Int()) |
||||||
|
} |
||||||
|
|
||||||
|
for _, op := range emulatedBatch.Ops { |
||||||
|
if op.V != nil { |
||||||
|
w.s.t = w.s.t.Upsert(&Item{k: op.K, v: op.V}, rand.Int()) |
||||||
|
} else { |
||||||
|
w.s.t = w.s.t.Delete(&Item{k: op.K}) |
||||||
|
} |
||||||
|
} |
||||||
|
w.s.m.Unlock() |
||||||
|
|
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
func (w *Writer) Close() error { |
||||||
|
w.s = nil |
||||||
|
return nil |
||||||
|
} |
@ -0,0 +1,174 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package store |
||||||
|
|
||||||
|
import "encoding/json" |
||||||
|
|
||||||
|
// KVStore is an abstraction for working with KV stores. Note that
|
||||||
|
// in order to be used with the bleve.registry, it must also implement
|
||||||
|
// a constructor function of the registry.KVStoreConstructor type.
|
||||||
|
type KVStore interface { |
||||||
|
|
||||||
|
// Writer returns a KVWriter which can be used to
|
||||||
|
// make changes to the KVStore. If a writer cannot
|
||||||
|
// be obtained a non-nil error is returned.
|
||||||
|
Writer() (KVWriter, error) |
||||||
|
|
||||||
|
// Reader returns a KVReader which can be used to
|
||||||
|
// read data from the KVStore. If a reader cannot
|
||||||
|
// be obtained a non-nil error is returned.
|
||||||
|
Reader() (KVReader, error) |
||||||
|
|
||||||
|
// Close closes the KVStore
|
||||||
|
Close() error |
||||||
|
} |
||||||
|
|
||||||
|
// KVReader is an abstraction of an **ISOLATED** reader
|
||||||
|
// In this context isolated is defined to mean that
|
||||||
|
// writes/deletes made after the KVReader is opened
|
||||||
|
// are not observed.
|
||||||
|
// Because there is usually a cost associated with
|
||||||
|
// keeping isolated readers active, users should
|
||||||
|
// close them as soon as they are no longer needed.
|
||||||
|
type KVReader interface { |
||||||
|
|
||||||
|
// Get returns the value associated with the key
|
||||||
|
// If the key does not exist, nil is returned.
|
||||||
|
// The caller owns the bytes returned.
|
||||||
|
Get(key []byte) ([]byte, error) |
||||||
|
|
||||||
|
// MultiGet retrieves multiple values in one call.
|
||||||
|
MultiGet(keys [][]byte) ([][]byte, error) |
||||||
|
|
||||||
|
// PrefixIterator returns a KVIterator that will
|
||||||
|
// visit all K/V pairs with the provided prefix
|
||||||
|
PrefixIterator(prefix []byte) KVIterator |
||||||
|
|
||||||
|
// RangeIterator returns a KVIterator that will
|
||||||
|
// visit all K/V pairs >= start AND < end
|
||||||
|
RangeIterator(start, end []byte) KVIterator |
||||||
|
|
||||||
|
// Close closes the iterator
|
||||||
|
Close() error |
||||||
|
} |
||||||
|
|
||||||
|
// KVIterator is an abstraction around key iteration
|
||||||
|
type KVIterator interface { |
||||||
|
|
||||||
|
// Seek will advance the iterator to the specified key
|
||||||
|
Seek(key []byte) |
||||||
|
|
||||||
|
// Next will advance the iterator to the next key
|
||||||
|
Next() |
||||||
|
|
||||||
|
// Key returns the key pointed to by the iterator
|
||||||
|
// The bytes returned are **ONLY** valid until the next call to Seek/Next/Close
|
||||||
|
// Continued use after that requires that they be copied.
|
||||||
|
Key() []byte |
||||||
|
|
||||||
|
// Value returns the value pointed to by the iterator
|
||||||
|
// The bytes returned are **ONLY** valid until the next call to Seek/Next/Close
|
||||||
|
// Continued use after that requires that they be copied.
|
||||||
|
Value() []byte |
||||||
|
|
||||||
|
// Valid returns whether or not the iterator is in a valid state
|
||||||
|
Valid() bool |
||||||
|
|
||||||
|
// Current returns Key(),Value(),Valid() in a single operation
|
||||||
|
Current() ([]byte, []byte, bool) |
||||||
|
|
||||||
|
// Close closes the iterator
|
||||||
|
Close() error |
||||||
|
} |
||||||
|
|
||||||
|
// KVWriter is an abstraction for mutating the KVStore
|
||||||
|
// KVWriter does **NOT** enforce restrictions of a single writer
|
||||||
|
// if the underlying KVStore allows concurrent writes, the
|
||||||
|
// KVWriter interface should also do so, it is up to the caller
|
||||||
|
// to do this in a way that is safe and makes sense
|
||||||
|
type KVWriter interface { |
||||||
|
|
||||||
|
// NewBatch returns a KVBatch for performing batch operations on this kvstore
|
||||||
|
NewBatch() KVBatch |
||||||
|
|
||||||
|
// NewBatchEx returns a KVBatch and an associated byte array
|
||||||
|
// that's pre-sized based on the KVBatchOptions. The caller can
|
||||||
|
// use the returned byte array for keys and values associated with
|
||||||
|
// the batch. Once the batch is either executed or closed, the
|
||||||
|
// associated byte array should no longer be accessed by the
|
||||||
|
// caller.
|
||||||
|
NewBatchEx(KVBatchOptions) ([]byte, KVBatch, error) |
||||||
|
|
||||||
|
// ExecuteBatch will execute the KVBatch, the provided KVBatch **MUST** have
|
||||||
|
// been created by the same KVStore (though not necessarily the same KVWriter)
|
||||||
|
// Batch execution is atomic, either all the operations or none will be performed
|
||||||
|
ExecuteBatch(batch KVBatch) error |
||||||
|
|
||||||
|
// Close closes the writer
|
||||||
|
Close() error |
||||||
|
} |
||||||
|
|
||||||
|
// KVBatchOptions provides the KVWriter.NewBatchEx() method with batch
|
||||||
|
// preparation and preallocation information.
|
||||||
|
type KVBatchOptions struct { |
||||||
|
// TotalBytes is the sum of key and value bytes needed by the
|
||||||
|
// caller for the entire batch. It affects the size of the
|
||||||
|
// returned byte array of KVWrite.NewBatchEx().
|
||||||
|
TotalBytes int |
||||||
|
|
||||||
|
// NumSets is the number of Set() calls the caller will invoke on
|
||||||
|
// the KVBatch.
|
||||||
|
NumSets int |
||||||
|
|
||||||
|
// NumDeletes is the number of Delete() calls the caller will invoke
|
||||||
|
// on the KVBatch.
|
||||||
|
NumDeletes int |
||||||
|
|
||||||
|
// NumMerges is the number of Merge() calls the caller will invoke
|
||||||
|
// on the KVBatch.
|
||||||
|
NumMerges int |
||||||
|
} |
||||||
|
|
||||||
|
// KVBatch is an abstraction for making multiple KV mutations at once
|
||||||
|
type KVBatch interface { |
||||||
|
|
||||||
|
// Set updates the key with the specified value
|
||||||
|
// both key and value []byte may be reused as soon as this call returns
|
||||||
|
Set(key, val []byte) |
||||||
|
|
||||||
|
// Delete removes the specified key
|
||||||
|
// the key []byte may be reused as soon as this call returns
|
||||||
|
Delete(key []byte) |
||||||
|
|
||||||
|
// Merge merges old value with the new value at the specified key
|
||||||
|
// as prescribed by the KVStores merge operator
|
||||||
|
// both key and value []byte may be reused as soon as this call returns
|
||||||
|
Merge(key, val []byte) |
||||||
|
|
||||||
|
// Reset frees resources for this batch and allows reuse
|
||||||
|
Reset() |
||||||
|
|
||||||
|
// Close frees resources
|
||||||
|
Close() error |
||||||
|
} |
||||||
|
|
||||||
|
// KVStoreStats is an optional interface that KVStores can implement
|
||||||
|
// if they're able to report any useful stats
|
||||||
|
type KVStoreStats interface { |
||||||
|
// Stats returns a JSON serializable object representing stats for this KVStore
|
||||||
|
Stats() json.Marshaler |
||||||
|
|
||||||
|
StatsMap() map[string]interface{} |
||||||
|
} |
@ -0,0 +1,64 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package store |
||||||
|
|
||||||
|
// At the moment this happens to be the same interface as described by
|
||||||
|
// RocksDB, but this may not always be the case.
|
||||||
|
|
||||||
|
type MergeOperator interface { |
||||||
|
|
||||||
|
// FullMerge the full sequence of operands on top of the existingValue
|
||||||
|
// if no value currently exists, existingValue is nil
|
||||||
|
// return the merged value, and success/failure
|
||||||
|
FullMerge(key, existingValue []byte, operands [][]byte) ([]byte, bool) |
||||||
|
|
||||||
|
// Partially merge these two operands.
|
||||||
|
// If partial merge cannot be done, return nil,false, which will defer
|
||||||
|
// all processing until the FullMerge is done.
|
||||||
|
PartialMerge(key, leftOperand, rightOperand []byte) ([]byte, bool) |
||||||
|
|
||||||
|
// Name returns an identifier for the operator
|
||||||
|
Name() string |
||||||
|
} |
||||||
|
|
||||||
|
type EmulatedMerge struct { |
||||||
|
Merges map[string][][]byte |
||||||
|
mo MergeOperator |
||||||
|
} |
||||||
|
|
||||||
|
func NewEmulatedMerge(mo MergeOperator) *EmulatedMerge { |
||||||
|
return &EmulatedMerge{ |
||||||
|
Merges: make(map[string][][]byte), |
||||||
|
mo: mo, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (m *EmulatedMerge) Merge(key, val []byte) { |
||||||
|
ops, ok := m.Merges[string(key)] |
||||||
|
if ok && len(ops) > 0 { |
||||||
|
last := ops[len(ops)-1] |
||||||
|
mergedVal, partialMergeOk := m.mo.PartialMerge(key, last, val) |
||||||
|
if partialMergeOk { |
||||||
|
// replace last entry with the result of the merge
|
||||||
|
ops[len(ops)-1] = mergedVal |
||||||
|
} else { |
||||||
|
// could not partial merge, append this to the end
|
||||||
|
ops = append(ops, val) |
||||||
|
} |
||||||
|
} else { |
||||||
|
ops = [][]byte{val} |
||||||
|
} |
||||||
|
m.Merges[string(key)] = ops |
||||||
|
} |
@ -0,0 +1,33 @@ |
|||||||
|
// Copyright (c) 2016 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package store |
||||||
|
|
||||||
|
// MultiGet is a helper function to retrieve mutiple keys from a
|
||||||
|
// KVReader, and might be used by KVStore implementations that don't
|
||||||
|
// have a native multi-get facility.
|
||||||
|
func MultiGet(kvreader KVReader, keys [][]byte) ([][]byte, error) { |
||||||
|
vals := make([][]byte, 0, len(keys)) |
||||||
|
|
||||||
|
for i, key := range keys { |
||||||
|
val, err := kvreader.Get(key) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
vals[i] = val |
||||||
|
} |
||||||
|
|
||||||
|
return vals, nil |
||||||
|
} |
@ -0,0 +1,110 @@ |
|||||||
|
// Copyright (c) 2015 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package upsidedown |
||||||
|
|
||||||
|
import ( |
||||||
|
"github.com/blevesearch/bleve/analysis" |
||||||
|
"github.com/blevesearch/bleve/document" |
||||||
|
"github.com/blevesearch/bleve/index" |
||||||
|
) |
||||||
|
|
||||||
|
func (udc *UpsideDownCouch) Analyze(d *document.Document) *index.AnalysisResult { |
||||||
|
rv := &index.AnalysisResult{ |
||||||
|
DocID: d.ID, |
||||||
|
Rows: make([]index.IndexRow, 0, 100), |
||||||
|
} |
||||||
|
|
||||||
|
docIDBytes := []byte(d.ID) |
||||||
|
|
||||||
|
// track our back index entries
|
||||||
|
backIndexStoredEntries := make([]*BackIndexStoreEntry, 0) |
||||||
|
|
||||||
|
// information we collate as we merge fields with same name
|
||||||
|
fieldTermFreqs := make(map[uint16]analysis.TokenFrequencies) |
||||||
|
fieldLengths := make(map[uint16]int) |
||||||
|
fieldIncludeTermVectors := make(map[uint16]bool) |
||||||
|
fieldNames := make(map[uint16]string) |
||||||
|
|
||||||
|
analyzeField := func(field document.Field, storable bool) { |
||||||
|
fieldIndex, newFieldRow := udc.fieldIndexOrNewRow(field.Name()) |
||||||
|
if newFieldRow != nil { |
||||||
|
rv.Rows = append(rv.Rows, newFieldRow) |
||||||
|
} |
||||||
|
fieldNames[fieldIndex] = field.Name() |
||||||
|
|
||||||
|
if field.Options().IsIndexed() { |
||||||
|
fieldLength, tokenFreqs := field.Analyze() |
||||||
|
existingFreqs := fieldTermFreqs[fieldIndex] |
||||||
|
if existingFreqs == nil { |
||||||
|
fieldTermFreqs[fieldIndex] = tokenFreqs |
||||||
|
} else { |
||||||
|
existingFreqs.MergeAll(field.Name(), tokenFreqs) |
||||||
|
fieldTermFreqs[fieldIndex] = existingFreqs |
||||||
|
} |
||||||
|
fieldLengths[fieldIndex] += fieldLength |
||||||
|
fieldIncludeTermVectors[fieldIndex] = field.Options().IncludeTermVectors() |
||||||
|
} |
||||||
|
|
||||||
|
if storable && field.Options().IsStored() { |
||||||
|
rv.Rows, backIndexStoredEntries = udc.storeField(docIDBytes, field, fieldIndex, rv.Rows, backIndexStoredEntries) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// walk all the fields, record stored fields now
|
||||||
|
// place information about indexed fields into map
|
||||||
|
// this collates information across fields with
|
||||||
|
// same names (arrays)
|
||||||
|
for _, field := range d.Fields { |
||||||
|
analyzeField(field, true) |
||||||
|
} |
||||||
|
|
||||||
|
if len(d.CompositeFields) > 0 { |
||||||
|
for fieldIndex, tokenFreqs := range fieldTermFreqs { |
||||||
|
// see if any of the composite fields need this
|
||||||
|
for _, compositeField := range d.CompositeFields { |
||||||
|
compositeField.Compose(fieldNames[fieldIndex], fieldLengths[fieldIndex], tokenFreqs) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
for _, compositeField := range d.CompositeFields { |
||||||
|
analyzeField(compositeField, false) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
rowsCapNeeded := len(rv.Rows) + 1 |
||||||
|
for _, tokenFreqs := range fieldTermFreqs { |
||||||
|
rowsCapNeeded += len(tokenFreqs) |
||||||
|
} |
||||||
|
|
||||||
|
rv.Rows = append(make([]index.IndexRow, 0, rowsCapNeeded), rv.Rows...) |
||||||
|
|
||||||
|
backIndexTermEntries := make([]*BackIndexTermEntry, 0, rowsCapNeeded) |
||||||
|
|
||||||
|
// walk through the collated information and process
|
||||||
|
// once for each indexed field (unique name)
|
||||||
|
for fieldIndex, tokenFreqs := range fieldTermFreqs { |
||||||
|
fieldLength := fieldLengths[fieldIndex] |
||||||
|
includeTermVectors := fieldIncludeTermVectors[fieldIndex] |
||||||
|
|
||||||
|
// encode this field
|
||||||
|
rv.Rows, backIndexTermEntries = udc.indexField(docIDBytes, includeTermVectors, fieldIndex, fieldLength, tokenFreqs, rv.Rows, backIndexTermEntries) |
||||||
|
} |
||||||
|
|
||||||
|
// build the back index row
|
||||||
|
backIndexRow := NewBackIndexRow(docIDBytes, backIndexTermEntries, backIndexStoredEntries) |
||||||
|
rv.Rows = append(rv.Rows, backIndexRow) |
||||||
|
|
||||||
|
return rv |
||||||
|
} |
@ -0,0 +1,8 @@ |
|||||||
|
#!/bin/sh |
||||||
|
|
||||||
|
BENCHMARKS=`grep "func Benchmark" *_test.go | sed 's/.*func //' | sed s/\(.*{//` |
||||||
|
|
||||||
|
for BENCHMARK in $BENCHMARKS |
||||||
|
do |
||||||
|
go test -v -run=xxx -bench=^$BENCHMARK$ -benchtime=10s -tags 'forestdb leveldb' | grep -v ok | grep -v PASS |
||||||
|
done |
@ -0,0 +1,172 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package upsidedown |
||||||
|
|
||||||
|
import ( |
||||||
|
"bytes" |
||||||
|
"sort" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/index/store" |
||||||
|
) |
||||||
|
|
||||||
|
// the functions in this file are only intended to be used by
|
||||||
|
// the bleve_dump utility and the debug http handlers
|
||||||
|
// if your application relies on them, you're doing something wrong
|
||||||
|
// they may change or be removed at any time
|
||||||
|
|
||||||
|
func dumpPrefix(kvreader store.KVReader, rv chan interface{}, prefix []byte) { |
||||||
|
start := prefix |
||||||
|
if start == nil { |
||||||
|
start = []byte{0} |
||||||
|
} |
||||||
|
it := kvreader.PrefixIterator(start) |
||||||
|
defer func() { |
||||||
|
cerr := it.Close() |
||||||
|
if cerr != nil { |
||||||
|
rv <- cerr |
||||||
|
} |
||||||
|
}() |
||||||
|
key, val, valid := it.Current() |
||||||
|
for valid { |
||||||
|
ck := make([]byte, len(key)) |
||||||
|
copy(ck, key) |
||||||
|
cv := make([]byte, len(val)) |
||||||
|
copy(cv, val) |
||||||
|
row, err := ParseFromKeyValue(ck, cv) |
||||||
|
if err != nil { |
||||||
|
rv <- err |
||||||
|
return |
||||||
|
} |
||||||
|
rv <- row |
||||||
|
|
||||||
|
it.Next() |
||||||
|
key, val, valid = it.Current() |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func dumpRange(kvreader store.KVReader, rv chan interface{}, start, end []byte) { |
||||||
|
it := kvreader.RangeIterator(start, end) |
||||||
|
defer func() { |
||||||
|
cerr := it.Close() |
||||||
|
if cerr != nil { |
||||||
|
rv <- cerr |
||||||
|
} |
||||||
|
}() |
||||||
|
key, val, valid := it.Current() |
||||||
|
for valid { |
||||||
|
ck := make([]byte, len(key)) |
||||||
|
copy(ck, key) |
||||||
|
cv := make([]byte, len(val)) |
||||||
|
copy(cv, val) |
||||||
|
row, err := ParseFromKeyValue(ck, cv) |
||||||
|
if err != nil { |
||||||
|
rv <- err |
||||||
|
return |
||||||
|
} |
||||||
|
rv <- row |
||||||
|
|
||||||
|
it.Next() |
||||||
|
key, val, valid = it.Current() |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (i *IndexReader) DumpAll() chan interface{} { |
||||||
|
rv := make(chan interface{}) |
||||||
|
go func() { |
||||||
|
defer close(rv) |
||||||
|
dumpRange(i.kvreader, rv, nil, nil) |
||||||
|
}() |
||||||
|
return rv |
||||||
|
} |
||||||
|
|
||||||
|
func (i *IndexReader) DumpFields() chan interface{} { |
||||||
|
rv := make(chan interface{}) |
||||||
|
go func() { |
||||||
|
defer close(rv) |
||||||
|
dumpPrefix(i.kvreader, rv, []byte{'f'}) |
||||||
|
}() |
||||||
|
return rv |
||||||
|
} |
||||||
|
|
||||||
|
type keyset [][]byte |
||||||
|
|
||||||
|
func (k keyset) Len() int { return len(k) } |
||||||
|
func (k keyset) Swap(i, j int) { k[i], k[j] = k[j], k[i] } |
||||||
|
func (k keyset) Less(i, j int) bool { return bytes.Compare(k[i], k[j]) < 0 } |
||||||
|
|
||||||
|
// DumpDoc returns all rows in the index related to this doc id
|
||||||
|
func (i *IndexReader) DumpDoc(id string) chan interface{} { |
||||||
|
idBytes := []byte(id) |
||||||
|
|
||||||
|
rv := make(chan interface{}) |
||||||
|
|
||||||
|
go func() { |
||||||
|
defer close(rv) |
||||||
|
|
||||||
|
back, err := backIndexRowForDoc(i.kvreader, []byte(id)) |
||||||
|
if err != nil { |
||||||
|
rv <- err |
||||||
|
return |
||||||
|
} |
||||||
|
|
||||||
|
// no such doc
|
||||||
|
if back == nil { |
||||||
|
return |
||||||
|
} |
||||||
|
// build sorted list of term keys
|
||||||
|
keys := make(keyset, 0) |
||||||
|
for _, entry := range back.termEntries { |
||||||
|
tfr := NewTermFrequencyRow([]byte(*entry.Term), uint16(*entry.Field), idBytes, 0, 0) |
||||||
|
key := tfr.Key() |
||||||
|
keys = append(keys, key) |
||||||
|
} |
||||||
|
sort.Sort(keys) |
||||||
|
|
||||||
|
// first add all the stored rows
|
||||||
|
storedRowPrefix := NewStoredRow(idBytes, 0, []uint64{}, 'x', []byte{}).ScanPrefixForDoc() |
||||||
|
dumpPrefix(i.kvreader, rv, storedRowPrefix) |
||||||
|
|
||||||
|
// now walk term keys in order and add them as well
|
||||||
|
if len(keys) > 0 { |
||||||
|
it := i.kvreader.RangeIterator(keys[0], nil) |
||||||
|
defer func() { |
||||||
|
cerr := it.Close() |
||||||
|
if cerr != nil { |
||||||
|
rv <- cerr |
||||||
|
} |
||||||
|
}() |
||||||
|
|
||||||
|
for _, key := range keys { |
||||||
|
it.Seek(key) |
||||||
|
rkey, rval, valid := it.Current() |
||||||
|
if !valid { |
||||||
|
break |
||||||
|
} |
||||||
|
rck := make([]byte, len(rkey)) |
||||||
|
copy(rck, key) |
||||||
|
rcv := make([]byte, len(rval)) |
||||||
|
copy(rcv, rval) |
||||||
|
row, err := ParseFromKeyValue(rck, rcv) |
||||||
|
if err != nil { |
||||||
|
rv <- err |
||||||
|
return |
||||||
|
} |
||||||
|
rv <- row |
||||||
|
} |
||||||
|
} |
||||||
|
}() |
||||||
|
|
||||||
|
return rv |
||||||
|
} |
@ -0,0 +1,78 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package upsidedown |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/index" |
||||||
|
"github.com/blevesearch/bleve/index/store" |
||||||
|
) |
||||||
|
|
||||||
|
type UpsideDownCouchFieldDict struct { |
||||||
|
indexReader *IndexReader |
||||||
|
iterator store.KVIterator |
||||||
|
dictRow *DictionaryRow |
||||||
|
dictEntry *index.DictEntry |
||||||
|
field uint16 |
||||||
|
} |
||||||
|
|
||||||
|
func newUpsideDownCouchFieldDict(indexReader *IndexReader, field uint16, startTerm, endTerm []byte) (*UpsideDownCouchFieldDict, error) { |
||||||
|
|
||||||
|
startKey := NewDictionaryRow(startTerm, field, 0).Key() |
||||||
|
if endTerm == nil { |
||||||
|
endTerm = []byte{ByteSeparator} |
||||||
|
} else { |
||||||
|
endTerm = incrementBytes(endTerm) |
||||||
|
} |
||||||
|
endKey := NewDictionaryRow(endTerm, field, 0).Key() |
||||||
|
|
||||||
|
it := indexReader.kvreader.RangeIterator(startKey, endKey) |
||||||
|
|
||||||
|
return &UpsideDownCouchFieldDict{ |
||||||
|
indexReader: indexReader, |
||||||
|
iterator: it, |
||||||
|
dictRow: &DictionaryRow{}, // Pre-alloced, reused row.
|
||||||
|
dictEntry: &index.DictEntry{}, // Pre-alloced, reused entry.
|
||||||
|
field: field, |
||||||
|
}, nil |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
func (r *UpsideDownCouchFieldDict) Next() (*index.DictEntry, error) { |
||||||
|
key, val, valid := r.iterator.Current() |
||||||
|
if !valid { |
||||||
|
return nil, nil |
||||||
|
} |
||||||
|
|
||||||
|
err := r.dictRow.parseDictionaryK(key) |
||||||
|
if err != nil { |
||||||
|
return nil, fmt.Errorf("unexpected error parsing dictionary row key: %v", err) |
||||||
|
} |
||||||
|
err = r.dictRow.parseDictionaryV(val) |
||||||
|
if err != nil { |
||||||
|
return nil, fmt.Errorf("unexpected error parsing dictionary row val: %v", err) |
||||||
|
} |
||||||
|
r.dictEntry.Term = string(r.dictRow.term) |
||||||
|
r.dictEntry.Count = r.dictRow.count |
||||||
|
// advance the iterator to the next term
|
||||||
|
r.iterator.Next() |
||||||
|
return r.dictEntry, nil |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
func (r *UpsideDownCouchFieldDict) Close() error { |
||||||
|
return r.iterator.Close() |
||||||
|
} |
@ -0,0 +1,189 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package upsidedown |
||||||
|
|
||||||
|
import ( |
||||||
|
"github.com/blevesearch/bleve/document" |
||||||
|
"github.com/blevesearch/bleve/index" |
||||||
|
"github.com/blevesearch/bleve/index/store" |
||||||
|
) |
||||||
|
|
||||||
|
type IndexReader struct { |
||||||
|
index *UpsideDownCouch |
||||||
|
kvreader store.KVReader |
||||||
|
docCount uint64 |
||||||
|
} |
||||||
|
|
||||||
|
func (i *IndexReader) TermFieldReader(term []byte, fieldName string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) { |
||||||
|
fieldIndex, fieldExists := i.index.fieldCache.FieldNamed(fieldName, false) |
||||||
|
if fieldExists { |
||||||
|
return newUpsideDownCouchTermFieldReader(i, term, uint16(fieldIndex), includeFreq, includeNorm, includeTermVectors) |
||||||
|
} |
||||||
|
return newUpsideDownCouchTermFieldReader(i, []byte{ByteSeparator}, ^uint16(0), includeFreq, includeNorm, includeTermVectors) |
||||||
|
} |
||||||
|
|
||||||
|
func (i *IndexReader) FieldDict(fieldName string) (index.FieldDict, error) { |
||||||
|
return i.FieldDictRange(fieldName, nil, nil) |
||||||
|
} |
||||||
|
|
||||||
|
func (i *IndexReader) FieldDictRange(fieldName string, startTerm []byte, endTerm []byte) (index.FieldDict, error) { |
||||||
|
fieldIndex, fieldExists := i.index.fieldCache.FieldNamed(fieldName, false) |
||||||
|
if fieldExists { |
||||||
|
return newUpsideDownCouchFieldDict(i, uint16(fieldIndex), startTerm, endTerm) |
||||||
|
} |
||||||
|
return newUpsideDownCouchFieldDict(i, ^uint16(0), []byte{ByteSeparator}, []byte{}) |
||||||
|
} |
||||||
|
|
||||||
|
func (i *IndexReader) FieldDictPrefix(fieldName string, termPrefix []byte) (index.FieldDict, error) { |
||||||
|
return i.FieldDictRange(fieldName, termPrefix, termPrefix) |
||||||
|
} |
||||||
|
|
||||||
|
func (i *IndexReader) DocIDReaderAll() (index.DocIDReader, error) { |
||||||
|
return newUpsideDownCouchDocIDReader(i) |
||||||
|
} |
||||||
|
|
||||||
|
func (i *IndexReader) DocIDReaderOnly(ids []string) (index.DocIDReader, error) { |
||||||
|
return newUpsideDownCouchDocIDReaderOnly(i, ids) |
||||||
|
} |
||||||
|
|
||||||
|
func (i *IndexReader) Document(id string) (doc *document.Document, err error) { |
||||||
|
// first hit the back index to confirm doc exists
|
||||||
|
var backIndexRow *BackIndexRow |
||||||
|
backIndexRow, err = backIndexRowForDoc(i.kvreader, []byte(id)) |
||||||
|
if err != nil { |
||||||
|
return |
||||||
|
} |
||||||
|
if backIndexRow == nil { |
||||||
|
return |
||||||
|
} |
||||||
|
doc = document.NewDocument(id) |
||||||
|
storedRow := NewStoredRow([]byte(id), 0, []uint64{}, 'x', nil) |
||||||
|
storedRowScanPrefix := storedRow.ScanPrefixForDoc() |
||||||
|
it := i.kvreader.PrefixIterator(storedRowScanPrefix) |
||||||
|
defer func() { |
||||||
|
if cerr := it.Close(); err == nil && cerr != nil { |
||||||
|
err = cerr |
||||||
|
} |
||||||
|
}() |
||||||
|
key, val, valid := it.Current() |
||||||
|
for valid { |
||||||
|
safeVal := make([]byte, len(val)) |
||||||
|
copy(safeVal, val) |
||||||
|
var row *StoredRow |
||||||
|
row, err = NewStoredRowKV(key, safeVal) |
||||||
|
if err != nil { |
||||||
|
doc = nil |
||||||
|
return |
||||||
|
} |
||||||
|
if row != nil { |
||||||
|
fieldName := i.index.fieldCache.FieldIndexed(row.field) |
||||||
|
field := decodeFieldType(row.typ, fieldName, row.arrayPositions, row.value) |
||||||
|
if field != nil { |
||||||
|
doc.AddField(field) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
it.Next() |
||||||
|
key, val, valid = it.Current() |
||||||
|
} |
||||||
|
return |
||||||
|
} |
||||||
|
|
||||||
|
func (i *IndexReader) DocumentFieldTerms(id index.IndexInternalID, fields []string) (index.FieldTerms, error) { |
||||||
|
back, err := backIndexRowForDoc(i.kvreader, id) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
if back == nil { |
||||||
|
return nil, nil |
||||||
|
} |
||||||
|
rv := make(index.FieldTerms, len(fields)) |
||||||
|
fieldsMap := make(map[uint16]string, len(fields)) |
||||||
|
for _, f := range fields { |
||||||
|
id, ok := i.index.fieldCache.FieldNamed(f, false) |
||||||
|
if ok { |
||||||
|
fieldsMap[id] = f |
||||||
|
} |
||||||
|
} |
||||||
|
for _, entry := range back.termEntries { |
||||||
|
if field, ok := fieldsMap[uint16(*entry.Field)]; ok { |
||||||
|
rv[field] = append(rv[field], *entry.Term) |
||||||
|
} |
||||||
|
} |
||||||
|
return rv, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (i *IndexReader) Fields() (fields []string, err error) { |
||||||
|
fields = make([]string, 0) |
||||||
|
it := i.kvreader.PrefixIterator([]byte{'f'}) |
||||||
|
defer func() { |
||||||
|
if cerr := it.Close(); err == nil && cerr != nil { |
||||||
|
err = cerr |
||||||
|
} |
||||||
|
}() |
||||||
|
key, val, valid := it.Current() |
||||||
|
for valid { |
||||||
|
var row UpsideDownCouchRow |
||||||
|
row, err = ParseFromKeyValue(key, val) |
||||||
|
if err != nil { |
||||||
|
fields = nil |
||||||
|
return |
||||||
|
} |
||||||
|
if row != nil { |
||||||
|
fieldRow, ok := row.(*FieldRow) |
||||||
|
if ok { |
||||||
|
fields = append(fields, fieldRow.name) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
it.Next() |
||||||
|
key, val, valid = it.Current() |
||||||
|
} |
||||||
|
return |
||||||
|
} |
||||||
|
|
||||||
|
func (i *IndexReader) GetInternal(key []byte) ([]byte, error) { |
||||||
|
internalRow := NewInternalRow(key, nil) |
||||||
|
return i.kvreader.Get(internalRow.Key()) |
||||||
|
} |
||||||
|
|
||||||
|
func (i *IndexReader) DocCount() (uint64, error) { |
||||||
|
return i.docCount, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (i *IndexReader) Close() error { |
||||||
|
return i.kvreader.Close() |
||||||
|
} |
||||||
|
|
||||||
|
func (i *IndexReader) ExternalID(id index.IndexInternalID) (string, error) { |
||||||
|
return string(id), nil |
||||||
|
} |
||||||
|
|
||||||
|
func (i *IndexReader) InternalID(id string) (index.IndexInternalID, error) { |
||||||
|
return index.IndexInternalID(id), nil |
||||||
|
} |
||||||
|
|
||||||
|
func incrementBytes(in []byte) []byte { |
||||||
|
rv := make([]byte, len(in)) |
||||||
|
copy(rv, in) |
||||||
|
for i := len(rv) - 1; i >= 0; i-- { |
||||||
|
rv[i] = rv[i] + 1 |
||||||
|
if rv[i] != 0 { |
||||||
|
// didn't overflow, so stop
|
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
return rv |
||||||
|
} |
@ -0,0 +1,325 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package upsidedown |
||||||
|
|
||||||
|
import ( |
||||||
|
"bytes" |
||||||
|
"sort" |
||||||
|
"sync/atomic" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/index" |
||||||
|
"github.com/blevesearch/bleve/index/store" |
||||||
|
) |
||||||
|
|
||||||
|
type UpsideDownCouchTermFieldReader struct { |
||||||
|
count uint64 |
||||||
|
indexReader *IndexReader |
||||||
|
iterator store.KVIterator |
||||||
|
term []byte |
||||||
|
tfrNext *TermFrequencyRow |
||||||
|
keyBuf []byte |
||||||
|
field uint16 |
||||||
|
} |
||||||
|
|
||||||
|
func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16, includeFreq, includeNorm, includeTermVectors bool) (*UpsideDownCouchTermFieldReader, error) { |
||||||
|
dictionaryRow := NewDictionaryRow(term, field, 0) |
||||||
|
val, err := indexReader.kvreader.Get(dictionaryRow.Key()) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
if val == nil { |
||||||
|
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1)) |
||||||
|
return &UpsideDownCouchTermFieldReader{ |
||||||
|
count: 0, |
||||||
|
term: term, |
||||||
|
tfrNext: &TermFrequencyRow{}, |
||||||
|
field: field, |
||||||
|
}, nil |
||||||
|
} |
||||||
|
|
||||||
|
err = dictionaryRow.parseDictionaryV(val) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
tfr := NewTermFrequencyRow(term, field, []byte{}, 0, 0) |
||||||
|
it := indexReader.kvreader.PrefixIterator(tfr.Key()) |
||||||
|
|
||||||
|
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1)) |
||||||
|
return &UpsideDownCouchTermFieldReader{ |
||||||
|
indexReader: indexReader, |
||||||
|
iterator: it, |
||||||
|
count: dictionaryRow.count, |
||||||
|
term: term, |
||||||
|
field: field, |
||||||
|
}, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (r *UpsideDownCouchTermFieldReader) Count() uint64 { |
||||||
|
return r.count |
||||||
|
} |
||||||
|
|
||||||
|
func (r *UpsideDownCouchTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) { |
||||||
|
if r.iterator != nil { |
||||||
|
// We treat tfrNext also like an initialization flag, which
|
||||||
|
// tells us whether we need to invoke the underlying
|
||||||
|
// iterator.Next(). The first time, don't call iterator.Next().
|
||||||
|
if r.tfrNext != nil { |
||||||
|
r.iterator.Next() |
||||||
|
} else { |
||||||
|
r.tfrNext = &TermFrequencyRow{} |
||||||
|
} |
||||||
|
key, val, valid := r.iterator.Current() |
||||||
|
if valid { |
||||||
|
tfr := r.tfrNext |
||||||
|
err := tfr.parseKDoc(key, r.term) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
err = tfr.parseV(val) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
rv := preAlloced |
||||||
|
if rv == nil { |
||||||
|
rv = &index.TermFieldDoc{} |
||||||
|
} |
||||||
|
rv.ID = append(rv.ID, tfr.doc...) |
||||||
|
rv.Freq = tfr.freq |
||||||
|
rv.Norm = float64(tfr.norm) |
||||||
|
if tfr.vectors != nil { |
||||||
|
rv.Vectors = r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors) |
||||||
|
} |
||||||
|
return rv, nil |
||||||
|
} |
||||||
|
} |
||||||
|
return nil, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (r *UpsideDownCouchTermFieldReader) Advance(docID index.IndexInternalID, preAlloced *index.TermFieldDoc) (rv *index.TermFieldDoc, err error) { |
||||||
|
if r.iterator != nil { |
||||||
|
if r.tfrNext == nil { |
||||||
|
r.tfrNext = &TermFrequencyRow{} |
||||||
|
} |
||||||
|
tfr := InitTermFrequencyRow(r.tfrNext, r.term, r.field, docID, 0, 0) |
||||||
|
r.keyBuf, err = tfr.KeyAppendTo(r.keyBuf[:0]) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
r.iterator.Seek(r.keyBuf) |
||||||
|
key, val, valid := r.iterator.Current() |
||||||
|
if valid { |
||||||
|
err := tfr.parseKDoc(key, r.term) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
err = tfr.parseV(val) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
rv = preAlloced |
||||||
|
if rv == nil { |
||||||
|
rv = &index.TermFieldDoc{} |
||||||
|
} |
||||||
|
rv.ID = append(rv.ID, tfr.doc...) |
||||||
|
rv.Freq = tfr.freq |
||||||
|
rv.Norm = float64(tfr.norm) |
||||||
|
if tfr.vectors != nil { |
||||||
|
rv.Vectors = r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors) |
||||||
|
} |
||||||
|
return rv, nil |
||||||
|
} |
||||||
|
} |
||||||
|
return nil, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (r *UpsideDownCouchTermFieldReader) Close() error { |
||||||
|
if r.indexReader != nil { |
||||||
|
atomic.AddUint64(&r.indexReader.index.stats.termSearchersFinished, uint64(1)) |
||||||
|
} |
||||||
|
if r.iterator != nil { |
||||||
|
return r.iterator.Close() |
||||||
|
} |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
type UpsideDownCouchDocIDReader struct { |
||||||
|
indexReader *IndexReader |
||||||
|
iterator store.KVIterator |
||||||
|
only []string |
||||||
|
onlyPos int |
||||||
|
onlyMode bool |
||||||
|
} |
||||||
|
|
||||||
|
func newUpsideDownCouchDocIDReader(indexReader *IndexReader) (*UpsideDownCouchDocIDReader, error) { |
||||||
|
|
||||||
|
startBytes := []byte{0x0} |
||||||
|
endBytes := []byte{0xff} |
||||||
|
|
||||||
|
bisr := NewBackIndexRow(startBytes, nil, nil) |
||||||
|
bier := NewBackIndexRow(endBytes, nil, nil) |
||||||
|
it := indexReader.kvreader.RangeIterator(bisr.Key(), bier.Key()) |
||||||
|
|
||||||
|
return &UpsideDownCouchDocIDReader{ |
||||||
|
indexReader: indexReader, |
||||||
|
iterator: it, |
||||||
|
}, nil |
||||||
|
} |
||||||
|
|
||||||
|
func newUpsideDownCouchDocIDReaderOnly(indexReader *IndexReader, ids []string) (*UpsideDownCouchDocIDReader, error) { |
||||||
|
// ensure ids are sorted
|
||||||
|
sort.Strings(ids) |
||||||
|
startBytes := []byte{0x0} |
||||||
|
if len(ids) > 0 { |
||||||
|
startBytes = []byte(ids[0]) |
||||||
|
} |
||||||
|
endBytes := []byte{0xff} |
||||||
|
if len(ids) > 0 { |
||||||
|
endBytes = incrementBytes([]byte(ids[len(ids)-1])) |
||||||
|
} |
||||||
|
bisr := NewBackIndexRow(startBytes, nil, nil) |
||||||
|
bier := NewBackIndexRow(endBytes, nil, nil) |
||||||
|
it := indexReader.kvreader.RangeIterator(bisr.Key(), bier.Key()) |
||||||
|
|
||||||
|
return &UpsideDownCouchDocIDReader{ |
||||||
|
indexReader: indexReader, |
||||||
|
iterator: it, |
||||||
|
only: ids, |
||||||
|
onlyMode: true, |
||||||
|
}, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (r *UpsideDownCouchDocIDReader) Next() (index.IndexInternalID, error) { |
||||||
|
key, val, valid := r.iterator.Current() |
||||||
|
|
||||||
|
if r.onlyMode { |
||||||
|
var rv index.IndexInternalID |
||||||
|
for valid && r.onlyPos < len(r.only) { |
||||||
|
br, err := NewBackIndexRowKV(key, val) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
if !bytes.Equal(br.doc, []byte(r.only[r.onlyPos])) { |
||||||
|
ok := r.nextOnly() |
||||||
|
if !ok { |
||||||
|
return nil, nil |
||||||
|
} |
||||||
|
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key()) |
||||||
|
key, val, valid = r.iterator.Current() |
||||||
|
continue |
||||||
|
} else { |
||||||
|
rv = append([]byte(nil), br.doc...) |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
if valid && r.onlyPos < len(r.only) { |
||||||
|
ok := r.nextOnly() |
||||||
|
if ok { |
||||||
|
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key()) |
||||||
|
} |
||||||
|
return rv, nil |
||||||
|
} |
||||||
|
|
||||||
|
} else { |
||||||
|
if valid { |
||||||
|
br, err := NewBackIndexRowKV(key, val) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
rv := append([]byte(nil), br.doc...) |
||||||
|
r.iterator.Next() |
||||||
|
return rv, nil |
||||||
|
} |
||||||
|
} |
||||||
|
return nil, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (r *UpsideDownCouchDocIDReader) Advance(docID index.IndexInternalID) (index.IndexInternalID, error) { |
||||||
|
|
||||||
|
if r.onlyMode { |
||||||
|
r.onlyPos = sort.SearchStrings(r.only, string(docID)) |
||||||
|
if r.onlyPos >= len(r.only) { |
||||||
|
// advanced to key after our last only key
|
||||||
|
return nil, nil |
||||||
|
} |
||||||
|
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key()) |
||||||
|
key, val, valid := r.iterator.Current() |
||||||
|
|
||||||
|
var rv index.IndexInternalID |
||||||
|
for valid && r.onlyPos < len(r.only) { |
||||||
|
br, err := NewBackIndexRowKV(key, val) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
if !bytes.Equal(br.doc, []byte(r.only[r.onlyPos])) { |
||||||
|
// the only key we seek'd to didn't exist
|
||||||
|
// now look for the closest key that did exist in only
|
||||||
|
r.onlyPos = sort.SearchStrings(r.only, string(br.doc)) |
||||||
|
if r.onlyPos >= len(r.only) { |
||||||
|
// advanced to key after our last only key
|
||||||
|
return nil, nil |
||||||
|
} |
||||||
|
// now seek to this new only key
|
||||||
|
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key()) |
||||||
|
key, val, valid = r.iterator.Current() |
||||||
|
continue |
||||||
|
} else { |
||||||
|
rv = append([]byte(nil), br.doc...) |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
if valid && r.onlyPos < len(r.only) { |
||||||
|
ok := r.nextOnly() |
||||||
|
if ok { |
||||||
|
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key()) |
||||||
|
} |
||||||
|
return rv, nil |
||||||
|
} |
||||||
|
} else { |
||||||
|
bir := NewBackIndexRow(docID, nil, nil) |
||||||
|
r.iterator.Seek(bir.Key()) |
||||||
|
key, val, valid := r.iterator.Current() |
||||||
|
if valid { |
||||||
|
br, err := NewBackIndexRowKV(key, val) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
rv := append([]byte(nil), br.doc...) |
||||||
|
r.iterator.Next() |
||||||
|
return rv, nil |
||||||
|
} |
||||||
|
} |
||||||
|
return nil, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (r *UpsideDownCouchDocIDReader) Close() error { |
||||||
|
return r.iterator.Close() |
||||||
|
} |
||||||
|
|
||||||
|
// move the r.only pos forward one, skipping duplicates
|
||||||
|
// return true if there is more data, or false if we got to the end of the list
|
||||||
|
func (r *UpsideDownCouchDocIDReader) nextOnly() bool { |
||||||
|
|
||||||
|
// advance 1 position, until we see a different key
|
||||||
|
// it's already sorted, so this skips duplicates
|
||||||
|
start := r.onlyPos |
||||||
|
r.onlyPos++ |
||||||
|
for r.onlyPos < len(r.only) && r.only[r.onlyPos] == r.only[start] { |
||||||
|
start = r.onlyPos |
||||||
|
r.onlyPos++ |
||||||
|
} |
||||||
|
// inidicate if we got to the end of the list
|
||||||
|
return r.onlyPos < len(r.only) |
||||||
|
} |
@ -0,0 +1,853 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package upsidedown |
||||||
|
|
||||||
|
import ( |
||||||
|
"bytes" |
||||||
|
"encoding/binary" |
||||||
|
"fmt" |
||||||
|
"io" |
||||||
|
"math" |
||||||
|
|
||||||
|
"github.com/golang/protobuf/proto" |
||||||
|
) |
||||||
|
|
||||||
|
const ByteSeparator byte = 0xff |
||||||
|
|
||||||
|
type UpsideDownCouchRowStream chan UpsideDownCouchRow |
||||||
|
|
||||||
|
type UpsideDownCouchRow interface { |
||||||
|
KeySize() int |
||||||
|
KeyTo([]byte) (int, error) |
||||||
|
Key() []byte |
||||||
|
Value() []byte |
||||||
|
ValueSize() int |
||||||
|
ValueTo([]byte) (int, error) |
||||||
|
} |
||||||
|
|
||||||
|
func ParseFromKeyValue(key, value []byte) (UpsideDownCouchRow, error) { |
||||||
|
if len(key) > 0 { |
||||||
|
switch key[0] { |
||||||
|
case 'v': |
||||||
|
return NewVersionRowKV(key, value) |
||||||
|
case 'f': |
||||||
|
return NewFieldRowKV(key, value) |
||||||
|
case 'd': |
||||||
|
return NewDictionaryRowKV(key, value) |
||||||
|
case 't': |
||||||
|
return NewTermFrequencyRowKV(key, value) |
||||||
|
case 'b': |
||||||
|
return NewBackIndexRowKV(key, value) |
||||||
|
case 's': |
||||||
|
return NewStoredRowKV(key, value) |
||||||
|
case 'i': |
||||||
|
return NewInternalRowKV(key, value) |
||||||
|
} |
||||||
|
return nil, fmt.Errorf("Unknown field type '%s'", string(key[0])) |
||||||
|
} |
||||||
|
return nil, fmt.Errorf("Invalid empty key") |
||||||
|
} |
||||||
|
|
||||||
|
// VERSION
|
||||||
|
|
||||||
|
type VersionRow struct { |
||||||
|
version uint8 |
||||||
|
} |
||||||
|
|
||||||
|
func (v *VersionRow) Key() []byte { |
||||||
|
return []byte{'v'} |
||||||
|
} |
||||||
|
|
||||||
|
func (v *VersionRow) KeySize() int { |
||||||
|
return 1 |
||||||
|
} |
||||||
|
|
||||||
|
func (v *VersionRow) KeyTo(buf []byte) (int, error) { |
||||||
|
buf[0] = 'v' |
||||||
|
return 1, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (v *VersionRow) Value() []byte { |
||||||
|
return []byte{byte(v.version)} |
||||||
|
} |
||||||
|
|
||||||
|
func (v *VersionRow) ValueSize() int { |
||||||
|
return 1 |
||||||
|
} |
||||||
|
|
||||||
|
func (v *VersionRow) ValueTo(buf []byte) (int, error) { |
||||||
|
buf[0] = v.version |
||||||
|
return 1, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (v *VersionRow) String() string { |
||||||
|
return fmt.Sprintf("Version: %d", v.version) |
||||||
|
} |
||||||
|
|
||||||
|
func NewVersionRow(version uint8) *VersionRow { |
||||||
|
return &VersionRow{ |
||||||
|
version: version, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func NewVersionRowKV(key, value []byte) (*VersionRow, error) { |
||||||
|
rv := VersionRow{} |
||||||
|
buf := bytes.NewBuffer(value) |
||||||
|
err := binary.Read(buf, binary.LittleEndian, &rv.version) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
return &rv, nil |
||||||
|
} |
||||||
|
|
||||||
|
// INTERNAL STORAGE
|
||||||
|
|
||||||
|
type InternalRow struct { |
||||||
|
key []byte |
||||||
|
val []byte |
||||||
|
} |
||||||
|
|
||||||
|
func (i *InternalRow) Key() []byte { |
||||||
|
buf := make([]byte, i.KeySize()) |
||||||
|
size, _ := i.KeyTo(buf) |
||||||
|
return buf[:size] |
||||||
|
} |
||||||
|
|
||||||
|
func (i *InternalRow) KeySize() int { |
||||||
|
return len(i.key) + 1 |
||||||
|
} |
||||||
|
|
||||||
|
func (i *InternalRow) KeyTo(buf []byte) (int, error) { |
||||||
|
buf[0] = 'i' |
||||||
|
actual := copy(buf[1:], i.key) |
||||||
|
return 1 + actual, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (i *InternalRow) Value() []byte { |
||||||
|
return i.val |
||||||
|
} |
||||||
|
|
||||||
|
func (i *InternalRow) ValueSize() int { |
||||||
|
return len(i.val) |
||||||
|
} |
||||||
|
|
||||||
|
func (i *InternalRow) ValueTo(buf []byte) (int, error) { |
||||||
|
actual := copy(buf, i.val) |
||||||
|
return actual, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (i *InternalRow) String() string { |
||||||
|
return fmt.Sprintf("InternalStore - Key: %s (% x) Val: %s (% x)", i.key, i.key, i.val, i.val) |
||||||
|
} |
||||||
|
|
||||||
|
func NewInternalRow(key, val []byte) *InternalRow { |
||||||
|
return &InternalRow{ |
||||||
|
key: key, |
||||||
|
val: val, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func NewInternalRowKV(key, value []byte) (*InternalRow, error) { |
||||||
|
rv := InternalRow{} |
||||||
|
rv.key = key[1:] |
||||||
|
rv.val = value |
||||||
|
return &rv, nil |
||||||
|
} |
||||||
|
|
||||||
|
// FIELD definition
|
||||||
|
|
||||||
|
type FieldRow struct { |
||||||
|
index uint16 |
||||||
|
name string |
||||||
|
} |
||||||
|
|
||||||
|
func (f *FieldRow) Key() []byte { |
||||||
|
buf := make([]byte, f.KeySize()) |
||||||
|
size, _ := f.KeyTo(buf) |
||||||
|
return buf[:size] |
||||||
|
} |
||||||
|
|
||||||
|
func (f *FieldRow) KeySize() int { |
||||||
|
return 3 |
||||||
|
} |
||||||
|
|
||||||
|
func (f *FieldRow) KeyTo(buf []byte) (int, error) { |
||||||
|
buf[0] = 'f' |
||||||
|
binary.LittleEndian.PutUint16(buf[1:3], f.index) |
||||||
|
return 3, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (f *FieldRow) Value() []byte { |
||||||
|
return append([]byte(f.name), ByteSeparator) |
||||||
|
} |
||||||
|
|
||||||
|
func (f *FieldRow) ValueSize() int { |
||||||
|
return len(f.name) + 1 |
||||||
|
} |
||||||
|
|
||||||
|
func (f *FieldRow) ValueTo(buf []byte) (int, error) { |
||||||
|
size := copy(buf, f.name) |
||||||
|
buf[size] = ByteSeparator |
||||||
|
return size + 1, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (f *FieldRow) String() string { |
||||||
|
return fmt.Sprintf("Field: %d Name: %s", f.index, f.name) |
||||||
|
} |
||||||
|
|
||||||
|
func NewFieldRow(index uint16, name string) *FieldRow { |
||||||
|
return &FieldRow{ |
||||||
|
index: index, |
||||||
|
name: name, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func NewFieldRowKV(key, value []byte) (*FieldRow, error) { |
||||||
|
rv := FieldRow{} |
||||||
|
|
||||||
|
buf := bytes.NewBuffer(key) |
||||||
|
_, err := buf.ReadByte() // type
|
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
err = binary.Read(buf, binary.LittleEndian, &rv.index) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
buf = bytes.NewBuffer(value) |
||||||
|
rv.name, err = buf.ReadString(ByteSeparator) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
rv.name = rv.name[:len(rv.name)-1] // trim off separator byte
|
||||||
|
|
||||||
|
return &rv, nil |
||||||
|
} |
||||||
|
|
||||||
|
// DICTIONARY
|
||||||
|
|
||||||
|
const DictionaryRowMaxValueSize = binary.MaxVarintLen64 |
||||||
|
|
||||||
|
type DictionaryRow struct { |
||||||
|
term []byte |
||||||
|
count uint64 |
||||||
|
field uint16 |
||||||
|
} |
||||||
|
|
||||||
|
func (dr *DictionaryRow) Key() []byte { |
||||||
|
buf := make([]byte, dr.KeySize()) |
||||||
|
size, _ := dr.KeyTo(buf) |
||||||
|
return buf[:size] |
||||||
|
} |
||||||
|
|
||||||
|
func (dr *DictionaryRow) KeySize() int { |
||||||
|
return len(dr.term) + 3 |
||||||
|
} |
||||||
|
|
||||||
|
func (dr *DictionaryRow) KeyTo(buf []byte) (int, error) { |
||||||
|
buf[0] = 'd' |
||||||
|
binary.LittleEndian.PutUint16(buf[1:3], dr.field) |
||||||
|
size := copy(buf[3:], dr.term) |
||||||
|
return size + 3, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (dr *DictionaryRow) Value() []byte { |
||||||
|
buf := make([]byte, dr.ValueSize()) |
||||||
|
size, _ := dr.ValueTo(buf) |
||||||
|
return buf[:size] |
||||||
|
} |
||||||
|
|
||||||
|
func (dr *DictionaryRow) ValueSize() int { |
||||||
|
return DictionaryRowMaxValueSize |
||||||
|
} |
||||||
|
|
||||||
|
func (dr *DictionaryRow) ValueTo(buf []byte) (int, error) { |
||||||
|
used := binary.PutUvarint(buf, dr.count) |
||||||
|
return used, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (dr *DictionaryRow) String() string { |
||||||
|
return fmt.Sprintf("Dictionary Term: `%s` Field: %d Count: %d ", string(dr.term), dr.field, dr.count) |
||||||
|
} |
||||||
|
|
||||||
|
func NewDictionaryRow(term []byte, field uint16, count uint64) *DictionaryRow { |
||||||
|
return &DictionaryRow{ |
||||||
|
term: term, |
||||||
|
field: field, |
||||||
|
count: count, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func NewDictionaryRowKV(key, value []byte) (*DictionaryRow, error) { |
||||||
|
rv, err := NewDictionaryRowK(key) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
err = rv.parseDictionaryV(value) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
return rv, nil |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
func NewDictionaryRowK(key []byte) (*DictionaryRow, error) { |
||||||
|
rv := &DictionaryRow{} |
||||||
|
err := rv.parseDictionaryK(key) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
return rv, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (dr *DictionaryRow) parseDictionaryK(key []byte) error { |
||||||
|
dr.field = binary.LittleEndian.Uint16(key[1:3]) |
||||||
|
if dr.term != nil { |
||||||
|
dr.term = dr.term[:0] |
||||||
|
} |
||||||
|
dr.term = append(dr.term, key[3:]...) |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
func (dr *DictionaryRow) parseDictionaryV(value []byte) error { |
||||||
|
count, nread := binary.Uvarint(value) |
||||||
|
if nread <= 0 { |
||||||
|
return fmt.Errorf("DictionaryRow parse Uvarint error, nread: %d", nread) |
||||||
|
} |
||||||
|
dr.count = count |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
// TERM FIELD FREQUENCY
|
||||||
|
|
||||||
|
type TermVector struct { |
||||||
|
field uint16 |
||||||
|
arrayPositions []uint64 |
||||||
|
pos uint64 |
||||||
|
start uint64 |
||||||
|
end uint64 |
||||||
|
} |
||||||
|
|
||||||
|
func (tv *TermVector) String() string { |
||||||
|
return fmt.Sprintf("Field: %d Pos: %d Start: %d End %d ArrayPositions: %#v", tv.field, tv.pos, tv.start, tv.end, tv.arrayPositions) |
||||||
|
} |
||||||
|
|
||||||
|
type TermFrequencyRow struct { |
||||||
|
term []byte |
||||||
|
doc []byte |
||||||
|
freq uint64 |
||||||
|
vectors []*TermVector |
||||||
|
norm float32 |
||||||
|
field uint16 |
||||||
|
} |
||||||
|
|
||||||
|
func (tfr *TermFrequencyRow) Term() []byte { |
||||||
|
return tfr.term |
||||||
|
} |
||||||
|
|
||||||
|
func (tfr *TermFrequencyRow) Freq() uint64 { |
||||||
|
return tfr.freq |
||||||
|
} |
||||||
|
|
||||||
|
func (tfr *TermFrequencyRow) ScanPrefixForField() []byte { |
||||||
|
buf := make([]byte, 3) |
||||||
|
buf[0] = 't' |
||||||
|
binary.LittleEndian.PutUint16(buf[1:3], tfr.field) |
||||||
|
return buf |
||||||
|
} |
||||||
|
|
||||||
|
func (tfr *TermFrequencyRow) ScanPrefixForFieldTermPrefix() []byte { |
||||||
|
buf := make([]byte, 3+len(tfr.term)) |
||||||
|
buf[0] = 't' |
||||||
|
binary.LittleEndian.PutUint16(buf[1:3], tfr.field) |
||||||
|
copy(buf[3:], tfr.term) |
||||||
|
return buf |
||||||
|
} |
||||||
|
|
||||||
|
func (tfr *TermFrequencyRow) ScanPrefixForFieldTerm() []byte { |
||||||
|
buf := make([]byte, 3+len(tfr.term)+1) |
||||||
|
buf[0] = 't' |
||||||
|
binary.LittleEndian.PutUint16(buf[1:3], tfr.field) |
||||||
|
termLen := copy(buf[3:], tfr.term) |
||||||
|
buf[3+termLen] = ByteSeparator |
||||||
|
return buf |
||||||
|
} |
||||||
|
|
||||||
|
func (tfr *TermFrequencyRow) Key() []byte { |
||||||
|
buf := make([]byte, tfr.KeySize()) |
||||||
|
size, _ := tfr.KeyTo(buf) |
||||||
|
return buf[:size] |
||||||
|
} |
||||||
|
|
||||||
|
func (tfr *TermFrequencyRow) KeySize() int { |
||||||
|
return 3 + len(tfr.term) + 1 + len(tfr.doc) |
||||||
|
} |
||||||
|
|
||||||
|
func (tfr *TermFrequencyRow) KeyTo(buf []byte) (int, error) { |
||||||
|
buf[0] = 't' |
||||||
|
binary.LittleEndian.PutUint16(buf[1:3], tfr.field) |
||||||
|
termLen := copy(buf[3:], tfr.term) |
||||||
|
buf[3+termLen] = ByteSeparator |
||||||
|
docLen := copy(buf[3+termLen+1:], tfr.doc) |
||||||
|
return 3 + termLen + 1 + docLen, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (tfr *TermFrequencyRow) KeyAppendTo(buf []byte) ([]byte, error) { |
||||||
|
keySize := tfr.KeySize() |
||||||
|
if cap(buf) < keySize { |
||||||
|
buf = make([]byte, keySize) |
||||||
|
} |
||||||
|
actualSize, err := tfr.KeyTo(buf[0:keySize]) |
||||||
|
return buf[0:actualSize], err |
||||||
|
} |
||||||
|
|
||||||
|
func (tfr *TermFrequencyRow) DictionaryRowKey() []byte { |
||||||
|
dr := NewDictionaryRow(tfr.term, tfr.field, 0) |
||||||
|
return dr.Key() |
||||||
|
} |
||||||
|
|
||||||
|
func (tfr *TermFrequencyRow) DictionaryRowKeySize() int { |
||||||
|
dr := NewDictionaryRow(tfr.term, tfr.field, 0) |
||||||
|
return dr.KeySize() |
||||||
|
} |
||||||
|
|
||||||
|
func (tfr *TermFrequencyRow) DictionaryRowKeyTo(buf []byte) (int, error) { |
||||||
|
dr := NewDictionaryRow(tfr.term, tfr.field, 0) |
||||||
|
return dr.KeyTo(buf) |
||||||
|
} |
||||||
|
|
||||||
|
func (tfr *TermFrequencyRow) Value() []byte { |
||||||
|
buf := make([]byte, tfr.ValueSize()) |
||||||
|
size, _ := tfr.ValueTo(buf) |
||||||
|
return buf[:size] |
||||||
|
} |
||||||
|
|
||||||
|
func (tfr *TermFrequencyRow) ValueSize() int { |
||||||
|
bufLen := binary.MaxVarintLen64 + binary.MaxVarintLen64 |
||||||
|
for _, vector := range tfr.vectors { |
||||||
|
bufLen += (binary.MaxVarintLen64 * 4) + (1+len(vector.arrayPositions))*binary.MaxVarintLen64 |
||||||
|
} |
||||||
|
return bufLen |
||||||
|
} |
||||||
|
|
||||||
|
func (tfr *TermFrequencyRow) ValueTo(buf []byte) (int, error) { |
||||||
|
used := binary.PutUvarint(buf[:binary.MaxVarintLen64], tfr.freq) |
||||||
|
|
||||||
|
normuint32 := math.Float32bits(tfr.norm) |
||||||
|
newbuf := buf[used : used+binary.MaxVarintLen64] |
||||||
|
used += binary.PutUvarint(newbuf, uint64(normuint32)) |
||||||
|
|
||||||
|
for _, vector := range tfr.vectors { |
||||||
|
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], uint64(vector.field)) |
||||||
|
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], vector.pos) |
||||||
|
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], vector.start) |
||||||
|
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], vector.end) |
||||||
|
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], uint64(len(vector.arrayPositions))) |
||||||
|
for _, arrayPosition := range vector.arrayPositions { |
||||||
|
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], arrayPosition) |
||||||
|
} |
||||||
|
} |
||||||
|
return used, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (tfr *TermFrequencyRow) String() string { |
||||||
|
return fmt.Sprintf("Term: `%s` Field: %d DocId: `%s` Frequency: %d Norm: %f Vectors: %v", string(tfr.term), tfr.field, string(tfr.doc), tfr.freq, tfr.norm, tfr.vectors) |
||||||
|
} |
||||||
|
|
||||||
|
func InitTermFrequencyRow(tfr *TermFrequencyRow, term []byte, field uint16, docID []byte, freq uint64, norm float32) *TermFrequencyRow { |
||||||
|
tfr.term = term |
||||||
|
tfr.field = field |
||||||
|
tfr.doc = docID |
||||||
|
tfr.freq = freq |
||||||
|
tfr.norm = norm |
||||||
|
return tfr |
||||||
|
} |
||||||
|
|
||||||
|
func NewTermFrequencyRow(term []byte, field uint16, docID []byte, freq uint64, norm float32) *TermFrequencyRow { |
||||||
|
return &TermFrequencyRow{ |
||||||
|
term: term, |
||||||
|
field: field, |
||||||
|
doc: docID, |
||||||
|
freq: freq, |
||||||
|
norm: norm, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func NewTermFrequencyRowWithTermVectors(term []byte, field uint16, docID []byte, freq uint64, norm float32, vectors []*TermVector) *TermFrequencyRow { |
||||||
|
return &TermFrequencyRow{ |
||||||
|
term: term, |
||||||
|
field: field, |
||||||
|
doc: docID, |
||||||
|
freq: freq, |
||||||
|
norm: norm, |
||||||
|
vectors: vectors, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func NewTermFrequencyRowK(key []byte) (*TermFrequencyRow, error) { |
||||||
|
rv := &TermFrequencyRow{} |
||||||
|
err := rv.parseK(key) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
return rv, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (tfr *TermFrequencyRow) parseK(key []byte) error { |
||||||
|
keyLen := len(key) |
||||||
|
if keyLen < 3 { |
||||||
|
return fmt.Errorf("invalid term frequency key, no valid field") |
||||||
|
} |
||||||
|
tfr.field = binary.LittleEndian.Uint16(key[1:3]) |
||||||
|
|
||||||
|
termEndPos := bytes.IndexByte(key[3:], ByteSeparator) |
||||||
|
if termEndPos < 0 { |
||||||
|
return fmt.Errorf("invalid term frequency key, no byte separator terminating term") |
||||||
|
} |
||||||
|
tfr.term = key[3 : 3+termEndPos] |
||||||
|
|
||||||
|
docLen := keyLen - (3 + termEndPos + 1) |
||||||
|
if docLen < 1 { |
||||||
|
return fmt.Errorf("invalid term frequency key, empty docid") |
||||||
|
} |
||||||
|
tfr.doc = key[3+termEndPos+1:] |
||||||
|
|
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
func (tfr *TermFrequencyRow) parseKDoc(key []byte, term []byte) error { |
||||||
|
tfr.doc = key[3+len(term)+1:] |
||||||
|
if len(tfr.doc) <= 0 { |
||||||
|
return fmt.Errorf("invalid term frequency key, empty docid") |
||||||
|
} |
||||||
|
|
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
func (tfr *TermFrequencyRow) parseV(value []byte) error { |
||||||
|
var bytesRead int |
||||||
|
tfr.freq, bytesRead = binary.Uvarint(value) |
||||||
|
if bytesRead <= 0 { |
||||||
|
return fmt.Errorf("invalid term frequency value, invalid frequency") |
||||||
|
} |
||||||
|
currOffset := bytesRead |
||||||
|
|
||||||
|
var norm uint64 |
||||||
|
norm, bytesRead = binary.Uvarint(value[currOffset:]) |
||||||
|
if bytesRead <= 0 { |
||||||
|
return fmt.Errorf("invalid term frequency value, no norm") |
||||||
|
} |
||||||
|
currOffset += bytesRead |
||||||
|
|
||||||
|
tfr.norm = math.Float32frombits(uint32(norm)) |
||||||
|
|
||||||
|
tfr.vectors = nil |
||||||
|
var field uint64 |
||||||
|
field, bytesRead = binary.Uvarint(value[currOffset:]) |
||||||
|
for bytesRead > 0 { |
||||||
|
currOffset += bytesRead |
||||||
|
tv := TermVector{} |
||||||
|
tv.field = uint16(field) |
||||||
|
// at this point we expect at least one term vector
|
||||||
|
if tfr.vectors == nil { |
||||||
|
tfr.vectors = make([]*TermVector, 0) |
||||||
|
} |
||||||
|
|
||||||
|
tv.pos, bytesRead = binary.Uvarint(value[currOffset:]) |
||||||
|
if bytesRead <= 0 { |
||||||
|
return fmt.Errorf("invalid term frequency value, vector contains no position") |
||||||
|
} |
||||||
|
currOffset += bytesRead |
||||||
|
|
||||||
|
tv.start, bytesRead = binary.Uvarint(value[currOffset:]) |
||||||
|
if bytesRead <= 0 { |
||||||
|
return fmt.Errorf("invalid term frequency value, vector contains no start") |
||||||
|
} |
||||||
|
currOffset += bytesRead |
||||||
|
|
||||||
|
tv.end, bytesRead = binary.Uvarint(value[currOffset:]) |
||||||
|
if bytesRead <= 0 { |
||||||
|
return fmt.Errorf("invalid term frequency value, vector contains no end") |
||||||
|
} |
||||||
|
currOffset += bytesRead |
||||||
|
|
||||||
|
var arrayPositionsLen uint64 = 0 |
||||||
|
arrayPositionsLen, bytesRead = binary.Uvarint(value[currOffset:]) |
||||||
|
if bytesRead <= 0 { |
||||||
|
return fmt.Errorf("invalid term frequency value, vector contains no arrayPositionLen") |
||||||
|
} |
||||||
|
currOffset += bytesRead |
||||||
|
|
||||||
|
if arrayPositionsLen > 0 { |
||||||
|
tv.arrayPositions = make([]uint64, arrayPositionsLen) |
||||||
|
for i := 0; uint64(i) < arrayPositionsLen; i++ { |
||||||
|
tv.arrayPositions[i], bytesRead = binary.Uvarint(value[currOffset:]) |
||||||
|
if bytesRead <= 0 { |
||||||
|
return fmt.Errorf("invalid term frequency value, vector contains no arrayPosition of index %d", i) |
||||||
|
} |
||||||
|
currOffset += bytesRead |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
tfr.vectors = append(tfr.vectors, &tv) |
||||||
|
// try to read next record (may not exist)
|
||||||
|
field, bytesRead = binary.Uvarint(value[currOffset:]) |
||||||
|
} |
||||||
|
if len(value[currOffset:]) > 0 && bytesRead <= 0 { |
||||||
|
return fmt.Errorf("invalid term frequency value, vector field invalid") |
||||||
|
} |
||||||
|
|
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
func NewTermFrequencyRowKV(key, value []byte) (*TermFrequencyRow, error) { |
||||||
|
rv, err := NewTermFrequencyRowK(key) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
err = rv.parseV(value) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
return rv, nil |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
type BackIndexRow struct { |
||||||
|
doc []byte |
||||||
|
termEntries []*BackIndexTermEntry |
||||||
|
storedEntries []*BackIndexStoreEntry |
||||||
|
} |
||||||
|
|
||||||
|
func (br *BackIndexRow) AllTermKeys() [][]byte { |
||||||
|
if br == nil { |
||||||
|
return nil |
||||||
|
} |
||||||
|
rv := make([][]byte, len(br.termEntries)) |
||||||
|
for i, termEntry := range br.termEntries { |
||||||
|
termRow := NewTermFrequencyRow([]byte(termEntry.GetTerm()), uint16(termEntry.GetField()), br.doc, 0, 0) |
||||||
|
rv[i] = termRow.Key() |
||||||
|
} |
||||||
|
return rv |
||||||
|
} |
||||||
|
|
||||||
|
func (br *BackIndexRow) AllStoredKeys() [][]byte { |
||||||
|
if br == nil { |
||||||
|
return nil |
||||||
|
} |
||||||
|
rv := make([][]byte, len(br.storedEntries)) |
||||||
|
for i, storedEntry := range br.storedEntries { |
||||||
|
storedRow := NewStoredRow(br.doc, uint16(storedEntry.GetField()), storedEntry.GetArrayPositions(), 'x', []byte{}) |
||||||
|
rv[i] = storedRow.Key() |
||||||
|
} |
||||||
|
return rv |
||||||
|
} |
||||||
|
|
||||||
|
func (br *BackIndexRow) Key() []byte { |
||||||
|
buf := make([]byte, br.KeySize()) |
||||||
|
size, _ := br.KeyTo(buf) |
||||||
|
return buf[:size] |
||||||
|
} |
||||||
|
|
||||||
|
func (br *BackIndexRow) KeySize() int { |
||||||
|
return len(br.doc) + 1 |
||||||
|
} |
||||||
|
|
||||||
|
func (br *BackIndexRow) KeyTo(buf []byte) (int, error) { |
||||||
|
buf[0] = 'b' |
||||||
|
used := copy(buf[1:], br.doc) |
||||||
|
return used + 1, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (br *BackIndexRow) Value() []byte { |
||||||
|
buf := make([]byte, br.ValueSize()) |
||||||
|
size, _ := br.ValueTo(buf) |
||||||
|
return buf[:size] |
||||||
|
} |
||||||
|
|
||||||
|
func (br *BackIndexRow) ValueSize() int { |
||||||
|
birv := &BackIndexRowValue{ |
||||||
|
TermEntries: br.termEntries, |
||||||
|
StoredEntries: br.storedEntries, |
||||||
|
} |
||||||
|
return birv.Size() |
||||||
|
} |
||||||
|
|
||||||
|
func (br *BackIndexRow) ValueTo(buf []byte) (int, error) { |
||||||
|
birv := &BackIndexRowValue{ |
||||||
|
TermEntries: br.termEntries, |
||||||
|
StoredEntries: br.storedEntries, |
||||||
|
} |
||||||
|
return birv.MarshalTo(buf) |
||||||
|
} |
||||||
|
|
||||||
|
func (br *BackIndexRow) String() string { |
||||||
|
return fmt.Sprintf("Backindex DocId: `%s` Term Entries: %v, Stored Entries: %v", string(br.doc), br.termEntries, br.storedEntries) |
||||||
|
} |
||||||
|
|
||||||
|
func NewBackIndexRow(docID []byte, entries []*BackIndexTermEntry, storedFields []*BackIndexStoreEntry) *BackIndexRow { |
||||||
|
return &BackIndexRow{ |
||||||
|
doc: docID, |
||||||
|
termEntries: entries, |
||||||
|
storedEntries: storedFields, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func NewBackIndexRowKV(key, value []byte) (*BackIndexRow, error) { |
||||||
|
rv := BackIndexRow{} |
||||||
|
|
||||||
|
buf := bytes.NewBuffer(key) |
||||||
|
_, err := buf.ReadByte() // type
|
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
rv.doc, err = buf.ReadBytes(ByteSeparator) |
||||||
|
if err == io.EOF && len(rv.doc) < 1 { |
||||||
|
err = fmt.Errorf("invalid doc length 0 - % x", key) |
||||||
|
} |
||||||
|
if err != nil && err != io.EOF { |
||||||
|
return nil, err |
||||||
|
} else if err == nil { |
||||||
|
rv.doc = rv.doc[:len(rv.doc)-1] // trim off separator byte
|
||||||
|
} |
||||||
|
|
||||||
|
var birv BackIndexRowValue |
||||||
|
err = proto.Unmarshal(value, &birv) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
rv.termEntries = birv.TermEntries |
||||||
|
rv.storedEntries = birv.StoredEntries |
||||||
|
|
||||||
|
return &rv, nil |
||||||
|
} |
||||||
|
|
||||||
|
// STORED
|
||||||
|
|
||||||
|
type StoredRow struct { |
||||||
|
doc []byte |
||||||
|
field uint16 |
||||||
|
arrayPositions []uint64 |
||||||
|
typ byte |
||||||
|
value []byte |
||||||
|
} |
||||||
|
|
||||||
|
func (s *StoredRow) Key() []byte { |
||||||
|
buf := make([]byte, s.KeySize()) |
||||||
|
size, _ := s.KeyTo(buf) |
||||||
|
return buf[0:size] |
||||||
|
} |
||||||
|
|
||||||
|
func (s *StoredRow) KeySize() int { |
||||||
|
return 1 + len(s.doc) + 1 + 2 + (binary.MaxVarintLen64 * len(s.arrayPositions)) |
||||||
|
} |
||||||
|
|
||||||
|
func (s *StoredRow) KeyTo(buf []byte) (int, error) { |
||||||
|
docLen := len(s.doc) |
||||||
|
buf[0] = 's' |
||||||
|
copy(buf[1:], s.doc) |
||||||
|
buf[1+docLen] = ByteSeparator |
||||||
|
binary.LittleEndian.PutUint16(buf[1+docLen+1:], s.field) |
||||||
|
bytesUsed := 1 + docLen + 1 + 2 |
||||||
|
for _, arrayPosition := range s.arrayPositions { |
||||||
|
varbytes := binary.PutUvarint(buf[bytesUsed:], arrayPosition) |
||||||
|
bytesUsed += varbytes |
||||||
|
} |
||||||
|
return bytesUsed, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (s *StoredRow) Value() []byte { |
||||||
|
buf := make([]byte, s.ValueSize()) |
||||||
|
size, _ := s.ValueTo(buf) |
||||||
|
return buf[:size] |
||||||
|
} |
||||||
|
|
||||||
|
func (s *StoredRow) ValueSize() int { |
||||||
|
return len(s.value) + 1 |
||||||
|
} |
||||||
|
|
||||||
|
func (s *StoredRow) ValueTo(buf []byte) (int, error) { |
||||||
|
buf[0] = s.typ |
||||||
|
used := copy(buf[1:], s.value) |
||||||
|
return used + 1, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (s *StoredRow) String() string { |
||||||
|
return fmt.Sprintf("Document: %s Field %d, Array Positions: %v, Type: %s Value: %s", s.doc, s.field, s.arrayPositions, string(s.typ), s.value) |
||||||
|
} |
||||||
|
|
||||||
|
func (s *StoredRow) ScanPrefixForDoc() []byte { |
||||||
|
docLen := len(s.doc) |
||||||
|
buf := make([]byte, 1+docLen+1) |
||||||
|
buf[0] = 's' |
||||||
|
copy(buf[1:], s.doc) |
||||||
|
buf[1+docLen] = ByteSeparator |
||||||
|
return buf |
||||||
|
} |
||||||
|
|
||||||
|
func NewStoredRow(docID []byte, field uint16, arrayPositions []uint64, typ byte, value []byte) *StoredRow { |
||||||
|
return &StoredRow{ |
||||||
|
doc: docID, |
||||||
|
field: field, |
||||||
|
arrayPositions: arrayPositions, |
||||||
|
typ: typ, |
||||||
|
value: value, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func NewStoredRowK(key []byte) (*StoredRow, error) { |
||||||
|
rv := StoredRow{} |
||||||
|
|
||||||
|
buf := bytes.NewBuffer(key) |
||||||
|
_, err := buf.ReadByte() // type
|
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
rv.doc, err = buf.ReadBytes(ByteSeparator) |
||||||
|
if len(rv.doc) < 2 { // 1 for min doc id length, 1 for separator
|
||||||
|
err = fmt.Errorf("invalid doc length 0") |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
rv.doc = rv.doc[:len(rv.doc)-1] // trim off separator byte
|
||||||
|
|
||||||
|
err = binary.Read(buf, binary.LittleEndian, &rv.field) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
rv.arrayPositions = make([]uint64, 0) |
||||||
|
nextArrayPos, err := binary.ReadUvarint(buf) |
||||||
|
for err == nil { |
||||||
|
rv.arrayPositions = append(rv.arrayPositions, nextArrayPos) |
||||||
|
nextArrayPos, err = binary.ReadUvarint(buf) |
||||||
|
} |
||||||
|
return &rv, nil |
||||||
|
} |
||||||
|
|
||||||
|
func NewStoredRowKV(key, value []byte) (*StoredRow, error) { |
||||||
|
rv, err := NewStoredRowK(key) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
rv.typ = value[0] |
||||||
|
rv.value = value[1:] |
||||||
|
return rv, nil |
||||||
|
} |
@ -0,0 +1,76 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package upsidedown |
||||||
|
|
||||||
|
import ( |
||||||
|
"encoding/binary" |
||||||
|
) |
||||||
|
|
||||||
|
var mergeOperator upsideDownMerge |
||||||
|
|
||||||
|
var dictionaryTermIncr []byte |
||||||
|
var dictionaryTermDecr []byte |
||||||
|
|
||||||
|
func init() { |
||||||
|
dictionaryTermIncr = make([]byte, 8) |
||||||
|
binary.LittleEndian.PutUint64(dictionaryTermIncr, uint64(1)) |
||||||
|
dictionaryTermDecr = make([]byte, 8) |
||||||
|
var negOne = int64(-1) |
||||||
|
binary.LittleEndian.PutUint64(dictionaryTermDecr, uint64(negOne)) |
||||||
|
} |
||||||
|
|
||||||
|
type upsideDownMerge struct{} |
||||||
|
|
||||||
|
func (m *upsideDownMerge) FullMerge(key, existingValue []byte, operands [][]byte) ([]byte, bool) { |
||||||
|
// set up record based on key
|
||||||
|
dr, err := NewDictionaryRowK(key) |
||||||
|
if err != nil { |
||||||
|
return nil, false |
||||||
|
} |
||||||
|
if len(existingValue) > 0 { |
||||||
|
// if existing value, parse it
|
||||||
|
err = dr.parseDictionaryV(existingValue) |
||||||
|
if err != nil { |
||||||
|
return nil, false |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// now process operands
|
||||||
|
for _, operand := range operands { |
||||||
|
next := int64(binary.LittleEndian.Uint64(operand)) |
||||||
|
if next < 0 && uint64(-next) > dr.count { |
||||||
|
// subtracting next from existing would overflow
|
||||||
|
dr.count = 0 |
||||||
|
} else if next < 0 { |
||||||
|
dr.count -= uint64(-next) |
||||||
|
} else { |
||||||
|
dr.count += uint64(next) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return dr.Value(), true |
||||||
|
} |
||||||
|
|
||||||
|
func (m *upsideDownMerge) PartialMerge(key, leftOperand, rightOperand []byte) ([]byte, bool) { |
||||||
|
left := int64(binary.LittleEndian.Uint64(leftOperand)) |
||||||
|
right := int64(binary.LittleEndian.Uint64(rightOperand)) |
||||||
|
rv := make([]byte, 8) |
||||||
|
binary.LittleEndian.PutUint64(rv, uint64(left+right)) |
||||||
|
return rv, true |
||||||
|
} |
||||||
|
|
||||||
|
func (m *upsideDownMerge) Name() string { |
||||||
|
return "upsideDownMerge" |
||||||
|
} |
@ -0,0 +1,55 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package upsidedown |
||||||
|
|
||||||
|
import ( |
||||||
|
"encoding/json" |
||||||
|
"sync/atomic" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/index/store" |
||||||
|
) |
||||||
|
|
||||||
|
type indexStat struct { |
||||||
|
updates, deletes, batches, errors uint64 |
||||||
|
analysisTime, indexTime uint64 |
||||||
|
termSearchersStarted uint64 |
||||||
|
termSearchersFinished uint64 |
||||||
|
numPlainTextBytesIndexed uint64 |
||||||
|
i *UpsideDownCouch |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexStat) statsMap() map[string]interface{} { |
||||||
|
m := map[string]interface{}{} |
||||||
|
m["updates"] = atomic.LoadUint64(&i.updates) |
||||||
|
m["deletes"] = atomic.LoadUint64(&i.deletes) |
||||||
|
m["batches"] = atomic.LoadUint64(&i.batches) |
||||||
|
m["errors"] = atomic.LoadUint64(&i.errors) |
||||||
|
m["analysis_time"] = atomic.LoadUint64(&i.analysisTime) |
||||||
|
m["index_time"] = atomic.LoadUint64(&i.indexTime) |
||||||
|
m["term_searchers_started"] = atomic.LoadUint64(&i.termSearchersStarted) |
||||||
|
m["term_searchers_finished"] = atomic.LoadUint64(&i.termSearchersFinished) |
||||||
|
m["num_plain_text_bytes_indexed"] = atomic.LoadUint64(&i.numPlainTextBytesIndexed) |
||||||
|
|
||||||
|
if o, ok := i.i.store.(store.KVStoreStats); ok { |
||||||
|
m["kv"] = o.StatsMap() |
||||||
|
} |
||||||
|
|
||||||
|
return m |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexStat) MarshalJSON() ([]byte, error) { |
||||||
|
m := i.statsMap() |
||||||
|
return json.Marshal(m) |
||||||
|
} |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,684 @@ |
|||||||
|
// Code generated by protoc-gen-gogo.
|
||||||
|
// source: upsidedown.proto
|
||||||
|
// DO NOT EDIT!
|
||||||
|
|
||||||
|
/* |
||||||
|
Package upsidedown is a generated protocol buffer package. |
||||||
|
|
||||||
|
It is generated from these files: |
||||||
|
upsidedown.proto |
||||||
|
|
||||||
|
It has these top-level messages: |
||||||
|
BackIndexTermEntry |
||||||
|
BackIndexStoreEntry |
||||||
|
BackIndexRowValue |
||||||
|
*/ |
||||||
|
package upsidedown |
||||||
|
|
||||||
|
import proto "github.com/golang/protobuf/proto" |
||||||
|
import math "math" |
||||||
|
|
||||||
|
import io "io" |
||||||
|
import fmt "fmt" |
||||||
|
import github_com_golang_protobuf_proto "github.com/golang/protobuf/proto" |
||||||
|
|
||||||
|
// Reference imports to suppress errors if they are not otherwise used.
|
||||||
|
var _ = proto.Marshal |
||||||
|
var _ = math.Inf |
||||||
|
|
||||||
|
type BackIndexTermEntry struct { |
||||||
|
Term *string `protobuf:"bytes,1,req,name=term" json:"term,omitempty"` |
||||||
|
Field *uint32 `protobuf:"varint,2,req,name=field" json:"field,omitempty"` |
||||||
|
XXX_unrecognized []byte `json:"-"` |
||||||
|
} |
||||||
|
|
||||||
|
func (m *BackIndexTermEntry) Reset() { *m = BackIndexTermEntry{} } |
||||||
|
func (m *BackIndexTermEntry) String() string { return proto.CompactTextString(m) } |
||||||
|
func (*BackIndexTermEntry) ProtoMessage() {} |
||||||
|
|
||||||
|
func (m *BackIndexTermEntry) GetTerm() string { |
||||||
|
if m != nil && m.Term != nil { |
||||||
|
return *m.Term |
||||||
|
} |
||||||
|
return "" |
||||||
|
} |
||||||
|
|
||||||
|
func (m *BackIndexTermEntry) GetField() uint32 { |
||||||
|
if m != nil && m.Field != nil { |
||||||
|
return *m.Field |
||||||
|
} |
||||||
|
return 0 |
||||||
|
} |
||||||
|
|
||||||
|
type BackIndexStoreEntry struct { |
||||||
|
Field *uint32 `protobuf:"varint,1,req,name=field" json:"field,omitempty"` |
||||||
|
ArrayPositions []uint64 `protobuf:"varint,2,rep,name=arrayPositions" json:"arrayPositions,omitempty"` |
||||||
|
XXX_unrecognized []byte `json:"-"` |
||||||
|
} |
||||||
|
|
||||||
|
func (m *BackIndexStoreEntry) Reset() { *m = BackIndexStoreEntry{} } |
||||||
|
func (m *BackIndexStoreEntry) String() string { return proto.CompactTextString(m) } |
||||||
|
func (*BackIndexStoreEntry) ProtoMessage() {} |
||||||
|
|
||||||
|
func (m *BackIndexStoreEntry) GetField() uint32 { |
||||||
|
if m != nil && m.Field != nil { |
||||||
|
return *m.Field |
||||||
|
} |
||||||
|
return 0 |
||||||
|
} |
||||||
|
|
||||||
|
func (m *BackIndexStoreEntry) GetArrayPositions() []uint64 { |
||||||
|
if m != nil { |
||||||
|
return m.ArrayPositions |
||||||
|
} |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
type BackIndexRowValue struct { |
||||||
|
TermEntries []*BackIndexTermEntry `protobuf:"bytes,1,rep,name=termEntries" json:"termEntries,omitempty"` |
||||||
|
StoredEntries []*BackIndexStoreEntry `protobuf:"bytes,2,rep,name=storedEntries" json:"storedEntries,omitempty"` |
||||||
|
XXX_unrecognized []byte `json:"-"` |
||||||
|
} |
||||||
|
|
||||||
|
func (m *BackIndexRowValue) Reset() { *m = BackIndexRowValue{} } |
||||||
|
func (m *BackIndexRowValue) String() string { return proto.CompactTextString(m) } |
||||||
|
func (*BackIndexRowValue) ProtoMessage() {} |
||||||
|
|
||||||
|
func (m *BackIndexRowValue) GetTermEntries() []*BackIndexTermEntry { |
||||||
|
if m != nil { |
||||||
|
return m.TermEntries |
||||||
|
} |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
func (m *BackIndexRowValue) GetStoredEntries() []*BackIndexStoreEntry { |
||||||
|
if m != nil { |
||||||
|
return m.StoredEntries |
||||||
|
} |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
func (m *BackIndexTermEntry) Unmarshal(data []byte) error { |
||||||
|
var hasFields [1]uint64 |
||||||
|
l := len(data) |
||||||
|
iNdEx := 0 |
||||||
|
for iNdEx < l { |
||||||
|
var wire uint64 |
||||||
|
for shift := uint(0); ; shift += 7 { |
||||||
|
if iNdEx >= l { |
||||||
|
return io.ErrUnexpectedEOF |
||||||
|
} |
||||||
|
b := data[iNdEx] |
||||||
|
iNdEx++ |
||||||
|
wire |= (uint64(b) & 0x7F) << shift |
||||||
|
if b < 0x80 { |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
fieldNum := int32(wire >> 3) |
||||||
|
wireType := int(wire & 0x7) |
||||||
|
switch fieldNum { |
||||||
|
case 1: |
||||||
|
if wireType != 2 { |
||||||
|
return fmt.Errorf("proto: wrong wireType = %d for field Term", wireType) |
||||||
|
} |
||||||
|
var stringLen uint64 |
||||||
|
for shift := uint(0); ; shift += 7 { |
||||||
|
if iNdEx >= l { |
||||||
|
return io.ErrUnexpectedEOF |
||||||
|
} |
||||||
|
b := data[iNdEx] |
||||||
|
iNdEx++ |
||||||
|
stringLen |= (uint64(b) & 0x7F) << shift |
||||||
|
if b < 0x80 { |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
postIndex := iNdEx + int(stringLen) |
||||||
|
if postIndex > l { |
||||||
|
return io.ErrUnexpectedEOF |
||||||
|
} |
||||||
|
s := string(data[iNdEx:postIndex]) |
||||||
|
m.Term = &s |
||||||
|
iNdEx = postIndex |
||||||
|
hasFields[0] |= uint64(0x00000001) |
||||||
|
case 2: |
||||||
|
if wireType != 0 { |
||||||
|
return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType) |
||||||
|
} |
||||||
|
var v uint32 |
||||||
|
for shift := uint(0); ; shift += 7 { |
||||||
|
if iNdEx >= l { |
||||||
|
return io.ErrUnexpectedEOF |
||||||
|
} |
||||||
|
b := data[iNdEx] |
||||||
|
iNdEx++ |
||||||
|
v |= (uint32(b) & 0x7F) << shift |
||||||
|
if b < 0x80 { |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
m.Field = &v |
||||||
|
hasFields[0] |= uint64(0x00000002) |
||||||
|
default: |
||||||
|
var sizeOfWire int |
||||||
|
for { |
||||||
|
sizeOfWire++ |
||||||
|
wire >>= 7 |
||||||
|
if wire == 0 { |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
iNdEx -= sizeOfWire |
||||||
|
skippy, err := skipUpsidedown(data[iNdEx:]) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
if skippy < 0 { |
||||||
|
return ErrInvalidLengthUpsidedown |
||||||
|
} |
||||||
|
if (iNdEx + skippy) > l { |
||||||
|
return io.ErrUnexpectedEOF |
||||||
|
} |
||||||
|
m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...) |
||||||
|
iNdEx += skippy |
||||||
|
} |
||||||
|
} |
||||||
|
if hasFields[0]&uint64(0x00000001) == 0 { |
||||||
|
return new(github_com_golang_protobuf_proto.RequiredNotSetError) |
||||||
|
} |
||||||
|
if hasFields[0]&uint64(0x00000002) == 0 { |
||||||
|
return new(github_com_golang_protobuf_proto.RequiredNotSetError) |
||||||
|
} |
||||||
|
|
||||||
|
return nil |
||||||
|
} |
||||||
|
func (m *BackIndexStoreEntry) Unmarshal(data []byte) error { |
||||||
|
var hasFields [1]uint64 |
||||||
|
l := len(data) |
||||||
|
iNdEx := 0 |
||||||
|
for iNdEx < l { |
||||||
|
var wire uint64 |
||||||
|
for shift := uint(0); ; shift += 7 { |
||||||
|
if iNdEx >= l { |
||||||
|
return io.ErrUnexpectedEOF |
||||||
|
} |
||||||
|
b := data[iNdEx] |
||||||
|
iNdEx++ |
||||||
|
wire |= (uint64(b) & 0x7F) << shift |
||||||
|
if b < 0x80 { |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
fieldNum := int32(wire >> 3) |
||||||
|
wireType := int(wire & 0x7) |
||||||
|
switch fieldNum { |
||||||
|
case 1: |
||||||
|
if wireType != 0 { |
||||||
|
return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType) |
||||||
|
} |
||||||
|
var v uint32 |
||||||
|
for shift := uint(0); ; shift += 7 { |
||||||
|
if iNdEx >= l { |
||||||
|
return io.ErrUnexpectedEOF |
||||||
|
} |
||||||
|
b := data[iNdEx] |
||||||
|
iNdEx++ |
||||||
|
v |= (uint32(b) & 0x7F) << shift |
||||||
|
if b < 0x80 { |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
m.Field = &v |
||||||
|
hasFields[0] |= uint64(0x00000001) |
||||||
|
case 2: |
||||||
|
if wireType != 0 { |
||||||
|
return fmt.Errorf("proto: wrong wireType = %d for field ArrayPositions", wireType) |
||||||
|
} |
||||||
|
var v uint64 |
||||||
|
for shift := uint(0); ; shift += 7 { |
||||||
|
if iNdEx >= l { |
||||||
|
return io.ErrUnexpectedEOF |
||||||
|
} |
||||||
|
b := data[iNdEx] |
||||||
|
iNdEx++ |
||||||
|
v |= (uint64(b) & 0x7F) << shift |
||||||
|
if b < 0x80 { |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
m.ArrayPositions = append(m.ArrayPositions, v) |
||||||
|
default: |
||||||
|
var sizeOfWire int |
||||||
|
for { |
||||||
|
sizeOfWire++ |
||||||
|
wire >>= 7 |
||||||
|
if wire == 0 { |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
iNdEx -= sizeOfWire |
||||||
|
skippy, err := skipUpsidedown(data[iNdEx:]) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
if skippy < 0 { |
||||||
|
return ErrInvalidLengthUpsidedown |
||||||
|
} |
||||||
|
if (iNdEx + skippy) > l { |
||||||
|
return io.ErrUnexpectedEOF |
||||||
|
} |
||||||
|
m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...) |
||||||
|
iNdEx += skippy |
||||||
|
} |
||||||
|
} |
||||||
|
if hasFields[0]&uint64(0x00000001) == 0 { |
||||||
|
return new(github_com_golang_protobuf_proto.RequiredNotSetError) |
||||||
|
} |
||||||
|
|
||||||
|
return nil |
||||||
|
} |
||||||
|
func (m *BackIndexRowValue) Unmarshal(data []byte) error { |
||||||
|
l := len(data) |
||||||
|
iNdEx := 0 |
||||||
|
for iNdEx < l { |
||||||
|
var wire uint64 |
||||||
|
for shift := uint(0); ; shift += 7 { |
||||||
|
if iNdEx >= l { |
||||||
|
return io.ErrUnexpectedEOF |
||||||
|
} |
||||||
|
b := data[iNdEx] |
||||||
|
iNdEx++ |
||||||
|
wire |= (uint64(b) & 0x7F) << shift |
||||||
|
if b < 0x80 { |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
fieldNum := int32(wire >> 3) |
||||||
|
wireType := int(wire & 0x7) |
||||||
|
switch fieldNum { |
||||||
|
case 1: |
||||||
|
if wireType != 2 { |
||||||
|
return fmt.Errorf("proto: wrong wireType = %d for field TermEntries", wireType) |
||||||
|
} |
||||||
|
var msglen int |
||||||
|
for shift := uint(0); ; shift += 7 { |
||||||
|
if iNdEx >= l { |
||||||
|
return io.ErrUnexpectedEOF |
||||||
|
} |
||||||
|
b := data[iNdEx] |
||||||
|
iNdEx++ |
||||||
|
msglen |= (int(b) & 0x7F) << shift |
||||||
|
if b < 0x80 { |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
postIndex := iNdEx + msglen |
||||||
|
if msglen < 0 { |
||||||
|
return ErrInvalidLengthUpsidedown |
||||||
|
} |
||||||
|
if postIndex > l { |
||||||
|
return io.ErrUnexpectedEOF |
||||||
|
} |
||||||
|
m.TermEntries = append(m.TermEntries, &BackIndexTermEntry{}) |
||||||
|
if err := m.TermEntries[len(m.TermEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
iNdEx = postIndex |
||||||
|
case 2: |
||||||
|
if wireType != 2 { |
||||||
|
return fmt.Errorf("proto: wrong wireType = %d for field StoredEntries", wireType) |
||||||
|
} |
||||||
|
var msglen int |
||||||
|
for shift := uint(0); ; shift += 7 { |
||||||
|
if iNdEx >= l { |
||||||
|
return io.ErrUnexpectedEOF |
||||||
|
} |
||||||
|
b := data[iNdEx] |
||||||
|
iNdEx++ |
||||||
|
msglen |= (int(b) & 0x7F) << shift |
||||||
|
if b < 0x80 { |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
postIndex := iNdEx + msglen |
||||||
|
if msglen < 0 { |
||||||
|
return ErrInvalidLengthUpsidedown |
||||||
|
} |
||||||
|
if postIndex > l { |
||||||
|
return io.ErrUnexpectedEOF |
||||||
|
} |
||||||
|
m.StoredEntries = append(m.StoredEntries, &BackIndexStoreEntry{}) |
||||||
|
if err := m.StoredEntries[len(m.StoredEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
iNdEx = postIndex |
||||||
|
default: |
||||||
|
var sizeOfWire int |
||||||
|
for { |
||||||
|
sizeOfWire++ |
||||||
|
wire >>= 7 |
||||||
|
if wire == 0 { |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
iNdEx -= sizeOfWire |
||||||
|
skippy, err := skipUpsidedown(data[iNdEx:]) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
if skippy < 0 { |
||||||
|
return ErrInvalidLengthUpsidedown |
||||||
|
} |
||||||
|
if (iNdEx + skippy) > l { |
||||||
|
return io.ErrUnexpectedEOF |
||||||
|
} |
||||||
|
m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...) |
||||||
|
iNdEx += skippy |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return nil |
||||||
|
} |
||||||
|
func skipUpsidedown(data []byte) (n int, err error) { |
||||||
|
l := len(data) |
||||||
|
iNdEx := 0 |
||||||
|
for iNdEx < l { |
||||||
|
var wire uint64 |
||||||
|
for shift := uint(0); ; shift += 7 { |
||||||
|
if iNdEx >= l { |
||||||
|
return 0, io.ErrUnexpectedEOF |
||||||
|
} |
||||||
|
b := data[iNdEx] |
||||||
|
iNdEx++ |
||||||
|
wire |= (uint64(b) & 0x7F) << shift |
||||||
|
if b < 0x80 { |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
wireType := int(wire & 0x7) |
||||||
|
switch wireType { |
||||||
|
case 0: |
||||||
|
for { |
||||||
|
if iNdEx >= l { |
||||||
|
return 0, io.ErrUnexpectedEOF |
||||||
|
} |
||||||
|
iNdEx++ |
||||||
|
if data[iNdEx-1] < 0x80 { |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
return iNdEx, nil |
||||||
|
case 1: |
||||||
|
iNdEx += 8 |
||||||
|
return iNdEx, nil |
||||||
|
case 2: |
||||||
|
var length int |
||||||
|
for shift := uint(0); ; shift += 7 { |
||||||
|
if iNdEx >= l { |
||||||
|
return 0, io.ErrUnexpectedEOF |
||||||
|
} |
||||||
|
b := data[iNdEx] |
||||||
|
iNdEx++ |
||||||
|
length |= (int(b) & 0x7F) << shift |
||||||
|
if b < 0x80 { |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
iNdEx += length |
||||||
|
if length < 0 { |
||||||
|
return 0, ErrInvalidLengthUpsidedown |
||||||
|
} |
||||||
|
return iNdEx, nil |
||||||
|
case 3: |
||||||
|
for { |
||||||
|
var innerWire uint64 |
||||||
|
var start int = iNdEx |
||||||
|
for shift := uint(0); ; shift += 7 { |
||||||
|
if iNdEx >= l { |
||||||
|
return 0, io.ErrUnexpectedEOF |
||||||
|
} |
||||||
|
b := data[iNdEx] |
||||||
|
iNdEx++ |
||||||
|
innerWire |= (uint64(b) & 0x7F) << shift |
||||||
|
if b < 0x80 { |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
innerWireType := int(innerWire & 0x7) |
||||||
|
if innerWireType == 4 { |
||||||
|
break |
||||||
|
} |
||||||
|
next, err := skipUpsidedown(data[start:]) |
||||||
|
if err != nil { |
||||||
|
return 0, err |
||||||
|
} |
||||||
|
iNdEx = start + next |
||||||
|
} |
||||||
|
return iNdEx, nil |
||||||
|
case 4: |
||||||
|
return iNdEx, nil |
||||||
|
case 5: |
||||||
|
iNdEx += 4 |
||||||
|
return iNdEx, nil |
||||||
|
default: |
||||||
|
return 0, fmt.Errorf("proto: illegal wireType %d", wireType) |
||||||
|
} |
||||||
|
} |
||||||
|
panic("unreachable") |
||||||
|
} |
||||||
|
|
||||||
|
var ( |
||||||
|
ErrInvalidLengthUpsidedown = fmt.Errorf("proto: negative length found during unmarshaling") |
||||||
|
) |
||||||
|
|
||||||
|
func (m *BackIndexTermEntry) Size() (n int) { |
||||||
|
var l int |
||||||
|
_ = l |
||||||
|
if m.Term != nil { |
||||||
|
l = len(*m.Term) |
||||||
|
n += 1 + l + sovUpsidedown(uint64(l)) |
||||||
|
} |
||||||
|
if m.Field != nil { |
||||||
|
n += 1 + sovUpsidedown(uint64(*m.Field)) |
||||||
|
} |
||||||
|
if m.XXX_unrecognized != nil { |
||||||
|
n += len(m.XXX_unrecognized) |
||||||
|
} |
||||||
|
return n |
||||||
|
} |
||||||
|
|
||||||
|
func (m *BackIndexStoreEntry) Size() (n int) { |
||||||
|
var l int |
||||||
|
_ = l |
||||||
|
if m.Field != nil { |
||||||
|
n += 1 + sovUpsidedown(uint64(*m.Field)) |
||||||
|
} |
||||||
|
if len(m.ArrayPositions) > 0 { |
||||||
|
for _, e := range m.ArrayPositions { |
||||||
|
n += 1 + sovUpsidedown(uint64(e)) |
||||||
|
} |
||||||
|
} |
||||||
|
if m.XXX_unrecognized != nil { |
||||||
|
n += len(m.XXX_unrecognized) |
||||||
|
} |
||||||
|
return n |
||||||
|
} |
||||||
|
|
||||||
|
func (m *BackIndexRowValue) Size() (n int) { |
||||||
|
var l int |
||||||
|
_ = l |
||||||
|
if len(m.TermEntries) > 0 { |
||||||
|
for _, e := range m.TermEntries { |
||||||
|
l = e.Size() |
||||||
|
n += 1 + l + sovUpsidedown(uint64(l)) |
||||||
|
} |
||||||
|
} |
||||||
|
if len(m.StoredEntries) > 0 { |
||||||
|
for _, e := range m.StoredEntries { |
||||||
|
l = e.Size() |
||||||
|
n += 1 + l + sovUpsidedown(uint64(l)) |
||||||
|
} |
||||||
|
} |
||||||
|
if m.XXX_unrecognized != nil { |
||||||
|
n += len(m.XXX_unrecognized) |
||||||
|
} |
||||||
|
return n |
||||||
|
} |
||||||
|
|
||||||
|
func sovUpsidedown(x uint64) (n int) { |
||||||
|
for { |
||||||
|
n++ |
||||||
|
x >>= 7 |
||||||
|
if x == 0 { |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
return n |
||||||
|
} |
||||||
|
func sozUpsidedown(x uint64) (n int) { |
||||||
|
return sovUpsidedown(uint64((x << 1) ^ uint64((int64(x) >> 63)))) |
||||||
|
} |
||||||
|
func (m *BackIndexTermEntry) Marshal() (data []byte, err error) { |
||||||
|
size := m.Size() |
||||||
|
data = make([]byte, size) |
||||||
|
n, err := m.MarshalTo(data) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
return data[:n], nil |
||||||
|
} |
||||||
|
|
||||||
|
func (m *BackIndexTermEntry) MarshalTo(data []byte) (n int, err error) { |
||||||
|
var i int |
||||||
|
_ = i |
||||||
|
var l int |
||||||
|
_ = l |
||||||
|
if m.Term == nil { |
||||||
|
return 0, new(github_com_golang_protobuf_proto.RequiredNotSetError) |
||||||
|
} else { |
||||||
|
data[i] = 0xa |
||||||
|
i++ |
||||||
|
i = encodeVarintUpsidedown(data, i, uint64(len(*m.Term))) |
||||||
|
i += copy(data[i:], *m.Term) |
||||||
|
} |
||||||
|
if m.Field == nil { |
||||||
|
return 0, new(github_com_golang_protobuf_proto.RequiredNotSetError) |
||||||
|
} else { |
||||||
|
data[i] = 0x10 |
||||||
|
i++ |
||||||
|
i = encodeVarintUpsidedown(data, i, uint64(*m.Field)) |
||||||
|
} |
||||||
|
if m.XXX_unrecognized != nil { |
||||||
|
i += copy(data[i:], m.XXX_unrecognized) |
||||||
|
} |
||||||
|
return i, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (m *BackIndexStoreEntry) Marshal() (data []byte, err error) { |
||||||
|
size := m.Size() |
||||||
|
data = make([]byte, size) |
||||||
|
n, err := m.MarshalTo(data) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
return data[:n], nil |
||||||
|
} |
||||||
|
|
||||||
|
func (m *BackIndexStoreEntry) MarshalTo(data []byte) (n int, err error) { |
||||||
|
var i int |
||||||
|
_ = i |
||||||
|
var l int |
||||||
|
_ = l |
||||||
|
if m.Field == nil { |
||||||
|
return 0, new(github_com_golang_protobuf_proto.RequiredNotSetError) |
||||||
|
} else { |
||||||
|
data[i] = 0x8 |
||||||
|
i++ |
||||||
|
i = encodeVarintUpsidedown(data, i, uint64(*m.Field)) |
||||||
|
} |
||||||
|
if len(m.ArrayPositions) > 0 { |
||||||
|
for _, num := range m.ArrayPositions { |
||||||
|
data[i] = 0x10 |
||||||
|
i++ |
||||||
|
i = encodeVarintUpsidedown(data, i, uint64(num)) |
||||||
|
} |
||||||
|
} |
||||||
|
if m.XXX_unrecognized != nil { |
||||||
|
i += copy(data[i:], m.XXX_unrecognized) |
||||||
|
} |
||||||
|
return i, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (m *BackIndexRowValue) Marshal() (data []byte, err error) { |
||||||
|
size := m.Size() |
||||||
|
data = make([]byte, size) |
||||||
|
n, err := m.MarshalTo(data) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
return data[:n], nil |
||||||
|
} |
||||||
|
|
||||||
|
func (m *BackIndexRowValue) MarshalTo(data []byte) (n int, err error) { |
||||||
|
var i int |
||||||
|
_ = i |
||||||
|
var l int |
||||||
|
_ = l |
||||||
|
if len(m.TermEntries) > 0 { |
||||||
|
for _, msg := range m.TermEntries { |
||||||
|
data[i] = 0xa |
||||||
|
i++ |
||||||
|
i = encodeVarintUpsidedown(data, i, uint64(msg.Size())) |
||||||
|
n, err := msg.MarshalTo(data[i:]) |
||||||
|
if err != nil { |
||||||
|
return 0, err |
||||||
|
} |
||||||
|
i += n |
||||||
|
} |
||||||
|
} |
||||||
|
if len(m.StoredEntries) > 0 { |
||||||
|
for _, msg := range m.StoredEntries { |
||||||
|
data[i] = 0x12 |
||||||
|
i++ |
||||||
|
i = encodeVarintUpsidedown(data, i, uint64(msg.Size())) |
||||||
|
n, err := msg.MarshalTo(data[i:]) |
||||||
|
if err != nil { |
||||||
|
return 0, err |
||||||
|
} |
||||||
|
i += n |
||||||
|
} |
||||||
|
} |
||||||
|
if m.XXX_unrecognized != nil { |
||||||
|
i += copy(data[i:], m.XXX_unrecognized) |
||||||
|
} |
||||||
|
return i, nil |
||||||
|
} |
||||||
|
|
||||||
|
func encodeFixed64Upsidedown(data []byte, offset int, v uint64) int { |
||||||
|
data[offset] = uint8(v) |
||||||
|
data[offset+1] = uint8(v >> 8) |
||||||
|
data[offset+2] = uint8(v >> 16) |
||||||
|
data[offset+3] = uint8(v >> 24) |
||||||
|
data[offset+4] = uint8(v >> 32) |
||||||
|
data[offset+5] = uint8(v >> 40) |
||||||
|
data[offset+6] = uint8(v >> 48) |
||||||
|
data[offset+7] = uint8(v >> 56) |
||||||
|
return offset + 8 |
||||||
|
} |
||||||
|
func encodeFixed32Upsidedown(data []byte, offset int, v uint32) int { |
||||||
|
data[offset] = uint8(v) |
||||||
|
data[offset+1] = uint8(v >> 8) |
||||||
|
data[offset+2] = uint8(v >> 16) |
||||||
|
data[offset+3] = uint8(v >> 24) |
||||||
|
return offset + 4 |
||||||
|
} |
||||||
|
func encodeVarintUpsidedown(data []byte, offset int, v uint64) int { |
||||||
|
for v >= 1<<7 { |
||||||
|
data[offset] = uint8(v&0x7f | 0x80) |
||||||
|
v >>= 7 |
||||||
|
offset++ |
||||||
|
} |
||||||
|
data[offset] = uint8(v) |
||||||
|
return offset + 1 |
||||||
|
} |
@ -0,0 +1,14 @@ |
|||||||
|
message BackIndexTermEntry { |
||||||
|
required string term = 1; |
||||||
|
required uint32 field = 2; |
||||||
|
} |
||||||
|
|
||||||
|
message BackIndexStoreEntry { |
||||||
|
required uint32 field = 1; |
||||||
|
repeated uint64 arrayPositions = 2; |
||||||
|
} |
||||||
|
|
||||||
|
message BackIndexRowValue { |
||||||
|
repeated BackIndexTermEntry termEntries = 1; |
||||||
|
repeated BackIndexStoreEntry storedEntries = 2; |
||||||
|
} |
@ -0,0 +1,37 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package bleve |
||||||
|
|
||||||
|
// An IndexAlias is a wrapper around one or more
|
||||||
|
// Index objects. It has two distinct modes of
|
||||||
|
// operation.
|
||||||
|
// 1. When it points to a single index, ALL index
|
||||||
|
// operations are valid and will be passed through
|
||||||
|
// to the underlying index.
|
||||||
|
// 2. When it points to more than one index, the only
|
||||||
|
// valid operation is Search. In this case the
|
||||||
|
// search will be performed across all the
|
||||||
|
// underlying indexes and the results merged.
|
||||||
|
// Calls to Add/Remove/Swap the underlying indexes
|
||||||
|
// are atomic, so you can safely change the
|
||||||
|
// underlying Index objects while other components
|
||||||
|
// are performing operations.
|
||||||
|
type IndexAlias interface { |
||||||
|
Index |
||||||
|
|
||||||
|
Add(i ...Index) |
||||||
|
Remove(i ...Index) |
||||||
|
Swap(in, out []Index) |
||||||
|
} |
@ -0,0 +1,605 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package bleve |
||||||
|
|
||||||
|
import ( |
||||||
|
"sort" |
||||||
|
"sync" |
||||||
|
"time" |
||||||
|
|
||||||
|
"golang.org/x/net/context" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/document" |
||||||
|
"github.com/blevesearch/bleve/index" |
||||||
|
"github.com/blevesearch/bleve/index/store" |
||||||
|
"github.com/blevesearch/bleve/mapping" |
||||||
|
"github.com/blevesearch/bleve/search" |
||||||
|
) |
||||||
|
|
||||||
|
type indexAliasImpl struct { |
||||||
|
name string |
||||||
|
indexes []Index |
||||||
|
mutex sync.RWMutex |
||||||
|
open bool |
||||||
|
} |
||||||
|
|
||||||
|
// NewIndexAlias creates a new IndexAlias over the provided
|
||||||
|
// Index objects.
|
||||||
|
func NewIndexAlias(indexes ...Index) *indexAliasImpl { |
||||||
|
return &indexAliasImpl{ |
||||||
|
name: "alias", |
||||||
|
indexes: indexes, |
||||||
|
open: true, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexAliasImpl) isAliasToSingleIndex() error { |
||||||
|
if len(i.indexes) < 1 { |
||||||
|
return ErrorAliasEmpty |
||||||
|
} else if len(i.indexes) > 1 { |
||||||
|
return ErrorAliasMulti |
||||||
|
} |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexAliasImpl) Index(id string, data interface{}) error { |
||||||
|
i.mutex.RLock() |
||||||
|
defer i.mutex.RUnlock() |
||||||
|
|
||||||
|
if !i.open { |
||||||
|
return ErrorIndexClosed |
||||||
|
} |
||||||
|
|
||||||
|
err := i.isAliasToSingleIndex() |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
|
||||||
|
return i.indexes[0].Index(id, data) |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexAliasImpl) Delete(id string) error { |
||||||
|
i.mutex.RLock() |
||||||
|
defer i.mutex.RUnlock() |
||||||
|
|
||||||
|
if !i.open { |
||||||
|
return ErrorIndexClosed |
||||||
|
} |
||||||
|
|
||||||
|
err := i.isAliasToSingleIndex() |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
|
||||||
|
return i.indexes[0].Delete(id) |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexAliasImpl) Batch(b *Batch) error { |
||||||
|
i.mutex.RLock() |
||||||
|
defer i.mutex.RUnlock() |
||||||
|
|
||||||
|
if !i.open { |
||||||
|
return ErrorIndexClosed |
||||||
|
} |
||||||
|
|
||||||
|
err := i.isAliasToSingleIndex() |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
|
||||||
|
return i.indexes[0].Batch(b) |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexAliasImpl) Document(id string) (*document.Document, error) { |
||||||
|
i.mutex.RLock() |
||||||
|
defer i.mutex.RUnlock() |
||||||
|
|
||||||
|
if !i.open { |
||||||
|
return nil, ErrorIndexClosed |
||||||
|
} |
||||||
|
|
||||||
|
err := i.isAliasToSingleIndex() |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
return i.indexes[0].Document(id) |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexAliasImpl) DocCount() (uint64, error) { |
||||||
|
i.mutex.RLock() |
||||||
|
defer i.mutex.RUnlock() |
||||||
|
|
||||||
|
rv := uint64(0) |
||||||
|
|
||||||
|
if !i.open { |
||||||
|
return 0, ErrorIndexClosed |
||||||
|
} |
||||||
|
|
||||||
|
for _, index := range i.indexes { |
||||||
|
otherCount, err := index.DocCount() |
||||||
|
if err == nil { |
||||||
|
rv += otherCount |
||||||
|
} |
||||||
|
// tolerate errors to produce partial counts
|
||||||
|
} |
||||||
|
|
||||||
|
return rv, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexAliasImpl) Search(req *SearchRequest) (*SearchResult, error) { |
||||||
|
return i.SearchInContext(context.Background(), req) |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexAliasImpl) SearchInContext(ctx context.Context, req *SearchRequest) (*SearchResult, error) { |
||||||
|
i.mutex.RLock() |
||||||
|
defer i.mutex.RUnlock() |
||||||
|
|
||||||
|
if !i.open { |
||||||
|
return nil, ErrorIndexClosed |
||||||
|
} |
||||||
|
|
||||||
|
if len(i.indexes) < 1 { |
||||||
|
return nil, ErrorAliasEmpty |
||||||
|
} |
||||||
|
|
||||||
|
// short circuit the simple case
|
||||||
|
if len(i.indexes) == 1 { |
||||||
|
return i.indexes[0].SearchInContext(ctx, req) |
||||||
|
} |
||||||
|
|
||||||
|
return MultiSearch(ctx, req, i.indexes...) |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexAliasImpl) Fields() ([]string, error) { |
||||||
|
i.mutex.RLock() |
||||||
|
defer i.mutex.RUnlock() |
||||||
|
|
||||||
|
if !i.open { |
||||||
|
return nil, ErrorIndexClosed |
||||||
|
} |
||||||
|
|
||||||
|
err := i.isAliasToSingleIndex() |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
return i.indexes[0].Fields() |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexAliasImpl) FieldDict(field string) (index.FieldDict, error) { |
||||||
|
i.mutex.RLock() |
||||||
|
|
||||||
|
if !i.open { |
||||||
|
i.mutex.RUnlock() |
||||||
|
return nil, ErrorIndexClosed |
||||||
|
} |
||||||
|
|
||||||
|
err := i.isAliasToSingleIndex() |
||||||
|
if err != nil { |
||||||
|
i.mutex.RUnlock() |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
fieldDict, err := i.indexes[0].FieldDict(field) |
||||||
|
if err != nil { |
||||||
|
i.mutex.RUnlock() |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
return &indexAliasImplFieldDict{ |
||||||
|
index: i, |
||||||
|
fieldDict: fieldDict, |
||||||
|
}, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexAliasImpl) FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) { |
||||||
|
i.mutex.RLock() |
||||||
|
|
||||||
|
if !i.open { |
||||||
|
i.mutex.RUnlock() |
||||||
|
return nil, ErrorIndexClosed |
||||||
|
} |
||||||
|
|
||||||
|
err := i.isAliasToSingleIndex() |
||||||
|
if err != nil { |
||||||
|
i.mutex.RUnlock() |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
fieldDict, err := i.indexes[0].FieldDictRange(field, startTerm, endTerm) |
||||||
|
if err != nil { |
||||||
|
i.mutex.RUnlock() |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
return &indexAliasImplFieldDict{ |
||||||
|
index: i, |
||||||
|
fieldDict: fieldDict, |
||||||
|
}, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexAliasImpl) FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) { |
||||||
|
i.mutex.RLock() |
||||||
|
|
||||||
|
if !i.open { |
||||||
|
i.mutex.RUnlock() |
||||||
|
return nil, ErrorIndexClosed |
||||||
|
} |
||||||
|
|
||||||
|
err := i.isAliasToSingleIndex() |
||||||
|
if err != nil { |
||||||
|
i.mutex.RUnlock() |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
fieldDict, err := i.indexes[0].FieldDictPrefix(field, termPrefix) |
||||||
|
if err != nil { |
||||||
|
i.mutex.RUnlock() |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
return &indexAliasImplFieldDict{ |
||||||
|
index: i, |
||||||
|
fieldDict: fieldDict, |
||||||
|
}, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexAliasImpl) Close() error { |
||||||
|
i.mutex.Lock() |
||||||
|
defer i.mutex.Unlock() |
||||||
|
|
||||||
|
i.open = false |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexAliasImpl) Mapping() mapping.IndexMapping { |
||||||
|
i.mutex.RLock() |
||||||
|
defer i.mutex.RUnlock() |
||||||
|
|
||||||
|
if !i.open { |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
err := i.isAliasToSingleIndex() |
||||||
|
if err != nil { |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
return i.indexes[0].Mapping() |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexAliasImpl) Stats() *IndexStat { |
||||||
|
i.mutex.RLock() |
||||||
|
defer i.mutex.RUnlock() |
||||||
|
|
||||||
|
if !i.open { |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
err := i.isAliasToSingleIndex() |
||||||
|
if err != nil { |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
return i.indexes[0].Stats() |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexAliasImpl) StatsMap() map[string]interface{} { |
||||||
|
i.mutex.RLock() |
||||||
|
defer i.mutex.RUnlock() |
||||||
|
|
||||||
|
if !i.open { |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
err := i.isAliasToSingleIndex() |
||||||
|
if err != nil { |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
return i.indexes[0].StatsMap() |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexAliasImpl) GetInternal(key []byte) ([]byte, error) { |
||||||
|
i.mutex.RLock() |
||||||
|
defer i.mutex.RUnlock() |
||||||
|
|
||||||
|
if !i.open { |
||||||
|
return nil, ErrorIndexClosed |
||||||
|
} |
||||||
|
|
||||||
|
err := i.isAliasToSingleIndex() |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
return i.indexes[0].GetInternal(key) |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexAliasImpl) SetInternal(key, val []byte) error { |
||||||
|
i.mutex.RLock() |
||||||
|
defer i.mutex.RUnlock() |
||||||
|
|
||||||
|
if !i.open { |
||||||
|
return ErrorIndexClosed |
||||||
|
} |
||||||
|
|
||||||
|
err := i.isAliasToSingleIndex() |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
|
||||||
|
return i.indexes[0].SetInternal(key, val) |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexAliasImpl) DeleteInternal(key []byte) error { |
||||||
|
i.mutex.RLock() |
||||||
|
defer i.mutex.RUnlock() |
||||||
|
|
||||||
|
if !i.open { |
||||||
|
return ErrorIndexClosed |
||||||
|
} |
||||||
|
|
||||||
|
err := i.isAliasToSingleIndex() |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
|
||||||
|
return i.indexes[0].DeleteInternal(key) |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexAliasImpl) Advanced() (index.Index, store.KVStore, error) { |
||||||
|
i.mutex.RLock() |
||||||
|
defer i.mutex.RUnlock() |
||||||
|
|
||||||
|
if !i.open { |
||||||
|
return nil, nil, ErrorIndexClosed |
||||||
|
} |
||||||
|
|
||||||
|
err := i.isAliasToSingleIndex() |
||||||
|
if err != nil { |
||||||
|
return nil, nil, err |
||||||
|
} |
||||||
|
|
||||||
|
return i.indexes[0].Advanced() |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexAliasImpl) Add(indexes ...Index) { |
||||||
|
i.mutex.Lock() |
||||||
|
defer i.mutex.Unlock() |
||||||
|
|
||||||
|
i.indexes = append(i.indexes, indexes...) |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexAliasImpl) removeSingle(index Index) { |
||||||
|
for pos, in := range i.indexes { |
||||||
|
if in == index { |
||||||
|
i.indexes = append(i.indexes[:pos], i.indexes[pos+1:]...) |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexAliasImpl) Remove(indexes ...Index) { |
||||||
|
i.mutex.Lock() |
||||||
|
defer i.mutex.Unlock() |
||||||
|
|
||||||
|
for _, in := range indexes { |
||||||
|
i.removeSingle(in) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexAliasImpl) Swap(in, out []Index) { |
||||||
|
i.mutex.Lock() |
||||||
|
defer i.mutex.Unlock() |
||||||
|
|
||||||
|
// add
|
||||||
|
i.indexes = append(i.indexes, in...) |
||||||
|
|
||||||
|
// delete
|
||||||
|
for _, ind := range out { |
||||||
|
i.removeSingle(ind) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// createChildSearchRequest creates a separate
|
||||||
|
// request from the original
|
||||||
|
// For now, avoid data race on req structure.
|
||||||
|
// TODO disable highlight/field load on child
|
||||||
|
// requests, and add code to do this only on
|
||||||
|
// the actual final results.
|
||||||
|
// Perhaps that part needs to be optional,
|
||||||
|
// could be slower in remote usages.
|
||||||
|
func createChildSearchRequest(req *SearchRequest) *SearchRequest { |
||||||
|
rv := SearchRequest{ |
||||||
|
Query: req.Query, |
||||||
|
Size: req.Size + req.From, |
||||||
|
From: 0, |
||||||
|
Highlight: req.Highlight, |
||||||
|
Fields: req.Fields, |
||||||
|
Facets: req.Facets, |
||||||
|
Explain: req.Explain, |
||||||
|
Sort: req.Sort, |
||||||
|
} |
||||||
|
return &rv |
||||||
|
} |
||||||
|
|
||||||
|
type asyncSearchResult struct { |
||||||
|
Name string |
||||||
|
Result *SearchResult |
||||||
|
Err error |
||||||
|
} |
||||||
|
|
||||||
|
// MultiSearch executes a SearchRequest across multiple Index objects,
|
||||||
|
// then merges the results. The indexes must honor any ctx deadline.
|
||||||
|
func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*SearchResult, error) { |
||||||
|
|
||||||
|
searchStart := time.Now() |
||||||
|
asyncResults := make(chan *asyncSearchResult, len(indexes)) |
||||||
|
|
||||||
|
// run search on each index in separate go routine
|
||||||
|
var waitGroup sync.WaitGroup |
||||||
|
|
||||||
|
var searchChildIndex = func(in Index, childReq *SearchRequest) { |
||||||
|
rv := asyncSearchResult{Name: in.Name()} |
||||||
|
rv.Result, rv.Err = in.SearchInContext(ctx, childReq) |
||||||
|
asyncResults <- &rv |
||||||
|
waitGroup.Done() |
||||||
|
} |
||||||
|
|
||||||
|
waitGroup.Add(len(indexes)) |
||||||
|
for _, in := range indexes { |
||||||
|
go searchChildIndex(in, createChildSearchRequest(req)) |
||||||
|
} |
||||||
|
|
||||||
|
// on another go routine, close after finished
|
||||||
|
go func() { |
||||||
|
waitGroup.Wait() |
||||||
|
close(asyncResults) |
||||||
|
}() |
||||||
|
|
||||||
|
var sr *SearchResult |
||||||
|
indexErrors := make(map[string]error) |
||||||
|
|
||||||
|
for asr := range asyncResults { |
||||||
|
if asr.Err == nil { |
||||||
|
if sr == nil { |
||||||
|
// first result
|
||||||
|
sr = asr.Result |
||||||
|
} else { |
||||||
|
// merge with previous
|
||||||
|
sr.Merge(asr.Result) |
||||||
|
} |
||||||
|
} else { |
||||||
|
indexErrors[asr.Name] = asr.Err |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// merge just concatenated all the hits
|
||||||
|
// now lets clean it up
|
||||||
|
|
||||||
|
// handle case where no results were successful
|
||||||
|
if sr == nil { |
||||||
|
sr = &SearchResult{ |
||||||
|
Status: &SearchStatus{ |
||||||
|
Errors: make(map[string]error), |
||||||
|
}, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// sort all hits with the requested order
|
||||||
|
if len(req.Sort) > 0 { |
||||||
|
sorter := newMultiSearchHitSorter(req.Sort, sr.Hits) |
||||||
|
sort.Sort(sorter) |
||||||
|
} |
||||||
|
|
||||||
|
// now skip over the correct From
|
||||||
|
if req.From > 0 && len(sr.Hits) > req.From { |
||||||
|
sr.Hits = sr.Hits[req.From:] |
||||||
|
} else if req.From > 0 { |
||||||
|
sr.Hits = search.DocumentMatchCollection{} |
||||||
|
} |
||||||
|
|
||||||
|
// now trim to the correct size
|
||||||
|
if req.Size > 0 && len(sr.Hits) > req.Size { |
||||||
|
sr.Hits = sr.Hits[0:req.Size] |
||||||
|
} |
||||||
|
|
||||||
|
// fix up facets
|
||||||
|
for name, fr := range req.Facets { |
||||||
|
sr.Facets.Fixup(name, fr.Size) |
||||||
|
} |
||||||
|
|
||||||
|
// fix up original request
|
||||||
|
sr.Request = req |
||||||
|
searchDuration := time.Since(searchStart) |
||||||
|
sr.Took = searchDuration |
||||||
|
|
||||||
|
// fix up errors
|
||||||
|
if len(indexErrors) > 0 { |
||||||
|
if sr.Status.Errors == nil { |
||||||
|
sr.Status.Errors = make(map[string]error) |
||||||
|
} |
||||||
|
for indexName, indexErr := range indexErrors { |
||||||
|
sr.Status.Errors[indexName] = indexErr |
||||||
|
sr.Status.Total++ |
||||||
|
sr.Status.Failed++ |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return sr, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexAliasImpl) NewBatch() *Batch { |
||||||
|
i.mutex.RLock() |
||||||
|
defer i.mutex.RUnlock() |
||||||
|
|
||||||
|
if !i.open { |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
err := i.isAliasToSingleIndex() |
||||||
|
if err != nil { |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
return i.indexes[0].NewBatch() |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexAliasImpl) Name() string { |
||||||
|
return i.name |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexAliasImpl) SetName(name string) { |
||||||
|
i.name = name |
||||||
|
} |
||||||
|
|
||||||
|
type indexAliasImplFieldDict struct { |
||||||
|
index *indexAliasImpl |
||||||
|
fieldDict index.FieldDict |
||||||
|
} |
||||||
|
|
||||||
|
func (f *indexAliasImplFieldDict) Next() (*index.DictEntry, error) { |
||||||
|
return f.fieldDict.Next() |
||||||
|
} |
||||||
|
|
||||||
|
func (f *indexAliasImplFieldDict) Close() error { |
||||||
|
defer f.index.mutex.RUnlock() |
||||||
|
return f.fieldDict.Close() |
||||||
|
} |
||||||
|
|
||||||
|
type multiSearchHitSorter struct { |
||||||
|
hits search.DocumentMatchCollection |
||||||
|
sort search.SortOrder |
||||||
|
cachedScoring []bool |
||||||
|
cachedDesc []bool |
||||||
|
} |
||||||
|
|
||||||
|
func newMultiSearchHitSorter(sort search.SortOrder, hits search.DocumentMatchCollection) *multiSearchHitSorter { |
||||||
|
return &multiSearchHitSorter{ |
||||||
|
sort: sort, |
||||||
|
hits: hits, |
||||||
|
cachedScoring: sort.CacheIsScore(), |
||||||
|
cachedDesc: sort.CacheDescending(), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (m *multiSearchHitSorter) Len() int { return len(m.hits) } |
||||||
|
func (m *multiSearchHitSorter) Swap(i, j int) { m.hits[i], m.hits[j] = m.hits[j], m.hits[i] } |
||||||
|
func (m *multiSearchHitSorter) Less(i, j int) bool { |
||||||
|
c := m.sort.Compare(m.cachedScoring, m.cachedDesc, m.hits[i], m.hits[j]) |
||||||
|
return c < 0 |
||||||
|
} |
@ -0,0 +1,729 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package bleve |
||||||
|
|
||||||
|
import ( |
||||||
|
"encoding/json" |
||||||
|
"fmt" |
||||||
|
"os" |
||||||
|
"sync" |
||||||
|
"sync/atomic" |
||||||
|
"time" |
||||||
|
|
||||||
|
"golang.org/x/net/context" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/document" |
||||||
|
"github.com/blevesearch/bleve/index" |
||||||
|
"github.com/blevesearch/bleve/index/store" |
||||||
|
"github.com/blevesearch/bleve/index/upsidedown" |
||||||
|
"github.com/blevesearch/bleve/mapping" |
||||||
|
"github.com/blevesearch/bleve/registry" |
||||||
|
"github.com/blevesearch/bleve/search" |
||||||
|
"github.com/blevesearch/bleve/search/collector" |
||||||
|
"github.com/blevesearch/bleve/search/facet" |
||||||
|
"github.com/blevesearch/bleve/search/highlight" |
||||||
|
) |
||||||
|
|
||||||
|
type indexImpl struct { |
||||||
|
path string |
||||||
|
name string |
||||||
|
meta *indexMeta |
||||||
|
i index.Index |
||||||
|
m mapping.IndexMapping |
||||||
|
mutex sync.RWMutex |
||||||
|
open bool |
||||||
|
stats *IndexStat |
||||||
|
} |
||||||
|
|
||||||
|
const storePath = "store" |
||||||
|
|
||||||
|
var mappingInternalKey = []byte("_mapping") |
||||||
|
|
||||||
|
func indexStorePath(path string) string { |
||||||
|
return path + string(os.PathSeparator) + storePath |
||||||
|
} |
||||||
|
|
||||||
|
func newIndexUsing(path string, mapping mapping.IndexMapping, indexType string, kvstore string, kvconfig map[string]interface{}) (*indexImpl, error) { |
||||||
|
// first validate the mapping
|
||||||
|
err := mapping.Validate() |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
if kvconfig == nil { |
||||||
|
kvconfig = map[string]interface{}{} |
||||||
|
} |
||||||
|
|
||||||
|
if kvstore == "" { |
||||||
|
return nil, fmt.Errorf("bleve not configured for file based indexing") |
||||||
|
} |
||||||
|
|
||||||
|
rv := indexImpl{ |
||||||
|
path: path, |
||||||
|
name: path, |
||||||
|
m: mapping, |
||||||
|
meta: newIndexMeta(indexType, kvstore, kvconfig), |
||||||
|
} |
||||||
|
rv.stats = &IndexStat{i: &rv} |
||||||
|
// at this point there is hope that we can be successful, so save index meta
|
||||||
|
if path != "" { |
||||||
|
err = rv.meta.Save(path) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
kvconfig["create_if_missing"] = true |
||||||
|
kvconfig["error_if_exists"] = true |
||||||
|
kvconfig["path"] = indexStorePath(path) |
||||||
|
} else { |
||||||
|
kvconfig["path"] = "" |
||||||
|
} |
||||||
|
|
||||||
|
// open the index
|
||||||
|
indexTypeConstructor := registry.IndexTypeConstructorByName(rv.meta.IndexType) |
||||||
|
if indexTypeConstructor == nil { |
||||||
|
return nil, ErrorUnknownIndexType |
||||||
|
} |
||||||
|
|
||||||
|
rv.i, err = indexTypeConstructor(rv.meta.Storage, kvconfig, Config.analysisQueue) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
err = rv.i.Open() |
||||||
|
if err != nil { |
||||||
|
if err == index.ErrorUnknownStorageType { |
||||||
|
return nil, ErrorUnknownStorageType |
||||||
|
} |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
// now persist the mapping
|
||||||
|
mappingBytes, err := json.Marshal(mapping) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
err = rv.i.SetInternal(mappingInternalKey, mappingBytes) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
// mark the index as open
|
||||||
|
rv.mutex.Lock() |
||||||
|
defer rv.mutex.Unlock() |
||||||
|
rv.open = true |
||||||
|
indexStats.Register(&rv) |
||||||
|
return &rv, nil |
||||||
|
} |
||||||
|
|
||||||
|
func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *indexImpl, err error) { |
||||||
|
rv = &indexImpl{ |
||||||
|
path: path, |
||||||
|
name: path, |
||||||
|
} |
||||||
|
rv.stats = &IndexStat{i: rv} |
||||||
|
|
||||||
|
rv.meta, err = openIndexMeta(path) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
// backwards compatibility if index type is missing
|
||||||
|
if rv.meta.IndexType == "" { |
||||||
|
rv.meta.IndexType = upsidedown.Name |
||||||
|
} |
||||||
|
|
||||||
|
storeConfig := rv.meta.Config |
||||||
|
if storeConfig == nil { |
||||||
|
storeConfig = map[string]interface{}{} |
||||||
|
} |
||||||
|
|
||||||
|
storeConfig["path"] = indexStorePath(path) |
||||||
|
storeConfig["create_if_missing"] = false |
||||||
|
storeConfig["error_if_exists"] = false |
||||||
|
for rck, rcv := range runtimeConfig { |
||||||
|
storeConfig[rck] = rcv |
||||||
|
} |
||||||
|
|
||||||
|
// open the index
|
||||||
|
indexTypeConstructor := registry.IndexTypeConstructorByName(rv.meta.IndexType) |
||||||
|
if indexTypeConstructor == nil { |
||||||
|
return nil, ErrorUnknownIndexType |
||||||
|
} |
||||||
|
|
||||||
|
rv.i, err = indexTypeConstructor(rv.meta.Storage, storeConfig, Config.analysisQueue) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
err = rv.i.Open() |
||||||
|
if err != nil { |
||||||
|
if err == index.ErrorUnknownStorageType { |
||||||
|
return nil, ErrorUnknownStorageType |
||||||
|
} |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
// now load the mapping
|
||||||
|
indexReader, err := rv.i.Reader() |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
defer func() { |
||||||
|
if cerr := indexReader.Close(); cerr != nil && err == nil { |
||||||
|
err = cerr |
||||||
|
} |
||||||
|
}() |
||||||
|
|
||||||
|
mappingBytes, err := indexReader.GetInternal(mappingInternalKey) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
var im *mapping.IndexMappingImpl |
||||||
|
err = json.Unmarshal(mappingBytes, &im) |
||||||
|
if err != nil { |
||||||
|
return nil, fmt.Errorf("error parsing mapping JSON: %v\nmapping contents:\n%s", err, string(mappingBytes)) |
||||||
|
} |
||||||
|
|
||||||
|
// mark the index as open
|
||||||
|
rv.mutex.Lock() |
||||||
|
defer rv.mutex.Unlock() |
||||||
|
rv.open = true |
||||||
|
|
||||||
|
// validate the mapping
|
||||||
|
err = im.Validate() |
||||||
|
if err != nil { |
||||||
|
// note even if the mapping is invalid
|
||||||
|
// we still return an open usable index
|
||||||
|
return rv, err |
||||||
|
} |
||||||
|
|
||||||
|
rv.m = im |
||||||
|
indexStats.Register(rv) |
||||||
|
return rv, err |
||||||
|
} |
||||||
|
|
||||||
|
// Advanced returns implementation internals
|
||||||
|
// necessary ONLY for advanced usage.
|
||||||
|
func (i *indexImpl) Advanced() (index.Index, store.KVStore, error) { |
||||||
|
s, err := i.i.Advanced() |
||||||
|
if err != nil { |
||||||
|
return nil, nil, err |
||||||
|
} |
||||||
|
return i.i, s, nil |
||||||
|
} |
||||||
|
|
||||||
|
// Mapping returns the IndexMapping in use by this
|
||||||
|
// Index.
|
||||||
|
func (i *indexImpl) Mapping() mapping.IndexMapping { |
||||||
|
return i.m |
||||||
|
} |
||||||
|
|
||||||
|
// Index the object with the specified identifier.
|
||||||
|
// The IndexMapping for this index will determine
|
||||||
|
// how the object is indexed.
|
||||||
|
func (i *indexImpl) Index(id string, data interface{}) (err error) { |
||||||
|
if id == "" { |
||||||
|
return ErrorEmptyID |
||||||
|
} |
||||||
|
|
||||||
|
i.mutex.RLock() |
||||||
|
defer i.mutex.RUnlock() |
||||||
|
|
||||||
|
if !i.open { |
||||||
|
return ErrorIndexClosed |
||||||
|
} |
||||||
|
|
||||||
|
doc := document.NewDocument(id) |
||||||
|
err = i.m.MapDocument(doc, data) |
||||||
|
if err != nil { |
||||||
|
return |
||||||
|
} |
||||||
|
err = i.i.Update(doc) |
||||||
|
return |
||||||
|
} |
||||||
|
|
||||||
|
// Delete entries for the specified identifier from
|
||||||
|
// the index.
|
||||||
|
func (i *indexImpl) Delete(id string) (err error) { |
||||||
|
if id == "" { |
||||||
|
return ErrorEmptyID |
||||||
|
} |
||||||
|
|
||||||
|
i.mutex.RLock() |
||||||
|
defer i.mutex.RUnlock() |
||||||
|
|
||||||
|
if !i.open { |
||||||
|
return ErrorIndexClosed |
||||||
|
} |
||||||
|
|
||||||
|
err = i.i.Delete(id) |
||||||
|
return |
||||||
|
} |
||||||
|
|
||||||
|
// Batch executes multiple Index and Delete
|
||||||
|
// operations at the same time. There are often
|
||||||
|
// significant performance benefits when performing
|
||||||
|
// operations in a batch.
|
||||||
|
func (i *indexImpl) Batch(b *Batch) error { |
||||||
|
i.mutex.RLock() |
||||||
|
defer i.mutex.RUnlock() |
||||||
|
|
||||||
|
if !i.open { |
||||||
|
return ErrorIndexClosed |
||||||
|
} |
||||||
|
|
||||||
|
return i.i.Batch(b.internal) |
||||||
|
} |
||||||
|
|
||||||
|
// Document is used to find the values of all the
|
||||||
|
// stored fields for a document in the index. These
|
||||||
|
// stored fields are put back into a Document object
|
||||||
|
// and returned.
|
||||||
|
func (i *indexImpl) Document(id string) (doc *document.Document, err error) { |
||||||
|
i.mutex.RLock() |
||||||
|
defer i.mutex.RUnlock() |
||||||
|
|
||||||
|
if !i.open { |
||||||
|
return nil, ErrorIndexClosed |
||||||
|
} |
||||||
|
indexReader, err := i.i.Reader() |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
defer func() { |
||||||
|
if cerr := indexReader.Close(); err == nil && cerr != nil { |
||||||
|
err = cerr |
||||||
|
} |
||||||
|
}() |
||||||
|
|
||||||
|
doc, err = indexReader.Document(id) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
return doc, nil |
||||||
|
} |
||||||
|
|
||||||
|
// DocCount returns the number of documents in the
|
||||||
|
// index.
|
||||||
|
func (i *indexImpl) DocCount() (count uint64, err error) { |
||||||
|
i.mutex.RLock() |
||||||
|
defer i.mutex.RUnlock() |
||||||
|
|
||||||
|
if !i.open { |
||||||
|
return 0, ErrorIndexClosed |
||||||
|
} |
||||||
|
|
||||||
|
// open a reader for this search
|
||||||
|
indexReader, err := i.i.Reader() |
||||||
|
if err != nil { |
||||||
|
return 0, fmt.Errorf("error opening index reader %v", err) |
||||||
|
} |
||||||
|
defer func() { |
||||||
|
if cerr := indexReader.Close(); err == nil && cerr != nil { |
||||||
|
err = cerr |
||||||
|
} |
||||||
|
}() |
||||||
|
|
||||||
|
count, err = indexReader.DocCount() |
||||||
|
return |
||||||
|
} |
||||||
|
|
||||||
|
// Search executes a search request operation.
|
||||||
|
// Returns a SearchResult object or an error.
|
||||||
|
func (i *indexImpl) Search(req *SearchRequest) (sr *SearchResult, err error) { |
||||||
|
return i.SearchInContext(context.Background(), req) |
||||||
|
} |
||||||
|
|
||||||
|
// SearchInContext executes a search request operation within the provided
|
||||||
|
// Context. Returns a SearchResult object or an error.
|
||||||
|
func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr *SearchResult, err error) { |
||||||
|
i.mutex.RLock() |
||||||
|
defer i.mutex.RUnlock() |
||||||
|
|
||||||
|
searchStart := time.Now() |
||||||
|
|
||||||
|
if !i.open { |
||||||
|
return nil, ErrorIndexClosed |
||||||
|
} |
||||||
|
|
||||||
|
collector := collector.NewTopNCollector(req.Size, req.From, req.Sort) |
||||||
|
|
||||||
|
// open a reader for this search
|
||||||
|
indexReader, err := i.i.Reader() |
||||||
|
if err != nil { |
||||||
|
return nil, fmt.Errorf("error opening index reader %v", err) |
||||||
|
} |
||||||
|
defer func() { |
||||||
|
if cerr := indexReader.Close(); err == nil && cerr != nil { |
||||||
|
err = cerr |
||||||
|
} |
||||||
|
}() |
||||||
|
|
||||||
|
searcher, err := req.Query.Searcher(indexReader, i.m, req.Explain) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
defer func() { |
||||||
|
if serr := searcher.Close(); err == nil && serr != nil { |
||||||
|
err = serr |
||||||
|
} |
||||||
|
}() |
||||||
|
|
||||||
|
if req.Facets != nil { |
||||||
|
facetsBuilder := search.NewFacetsBuilder(indexReader) |
||||||
|
for facetName, facetRequest := range req.Facets { |
||||||
|
if facetRequest.NumericRanges != nil { |
||||||
|
// build numeric range facet
|
||||||
|
facetBuilder := facet.NewNumericFacetBuilder(facetRequest.Field, facetRequest.Size) |
||||||
|
for _, nr := range facetRequest.NumericRanges { |
||||||
|
facetBuilder.AddRange(nr.Name, nr.Min, nr.Max) |
||||||
|
} |
||||||
|
facetsBuilder.Add(facetName, facetBuilder) |
||||||
|
} else if facetRequest.DateTimeRanges != nil { |
||||||
|
// build date range facet
|
||||||
|
facetBuilder := facet.NewDateTimeFacetBuilder(facetRequest.Field, facetRequest.Size) |
||||||
|
dateTimeParser := i.m.DateTimeParserNamed("") |
||||||
|
for _, dr := range facetRequest.DateTimeRanges { |
||||||
|
start, end := dr.ParseDates(dateTimeParser) |
||||||
|
facetBuilder.AddRange(dr.Name, start, end) |
||||||
|
} |
||||||
|
facetsBuilder.Add(facetName, facetBuilder) |
||||||
|
} else { |
||||||
|
// build terms facet
|
||||||
|
facetBuilder := facet.NewTermsFacetBuilder(facetRequest.Field, facetRequest.Size) |
||||||
|
facetsBuilder.Add(facetName, facetBuilder) |
||||||
|
} |
||||||
|
} |
||||||
|
collector.SetFacetsBuilder(facetsBuilder) |
||||||
|
} |
||||||
|
|
||||||
|
err = collector.Collect(ctx, searcher, indexReader) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
hits := collector.Results() |
||||||
|
|
||||||
|
var highlighter highlight.Highlighter |
||||||
|
|
||||||
|
if req.Highlight != nil { |
||||||
|
// get the right highlighter
|
||||||
|
highlighter, err = Config.Cache.HighlighterNamed(Config.DefaultHighlighter) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
if req.Highlight.Style != nil { |
||||||
|
highlighter, err = Config.Cache.HighlighterNamed(*req.Highlight.Style) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
} |
||||||
|
if highlighter == nil { |
||||||
|
return nil, fmt.Errorf("no highlighter named `%s` registered", *req.Highlight.Style) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
for _, hit := range hits { |
||||||
|
if len(req.Fields) > 0 || highlighter != nil { |
||||||
|
doc, err := indexReader.Document(hit.ID) |
||||||
|
if err == nil && doc != nil { |
||||||
|
if len(req.Fields) > 0 { |
||||||
|
for _, f := range req.Fields { |
||||||
|
for _, docF := range doc.Fields { |
||||||
|
if f == "*" || docF.Name() == f { |
||||||
|
var value interface{} |
||||||
|
switch docF := docF.(type) { |
||||||
|
case *document.TextField: |
||||||
|
value = string(docF.Value()) |
||||||
|
case *document.NumericField: |
||||||
|
num, err := docF.Number() |
||||||
|
if err == nil { |
||||||
|
value = num |
||||||
|
} |
||||||
|
case *document.DateTimeField: |
||||||
|
datetime, err := docF.DateTime() |
||||||
|
if err == nil { |
||||||
|
value = datetime.Format(time.RFC3339) |
||||||
|
} |
||||||
|
case *document.BooleanField: |
||||||
|
boolean, err := docF.Boolean() |
||||||
|
if err == nil { |
||||||
|
value = boolean |
||||||
|
} |
||||||
|
} |
||||||
|
if value != nil { |
||||||
|
hit.AddFieldValue(docF.Name(), value) |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
if highlighter != nil { |
||||||
|
highlightFields := req.Highlight.Fields |
||||||
|
if highlightFields == nil { |
||||||
|
// add all fields with matches
|
||||||
|
highlightFields = make([]string, 0, len(hit.Locations)) |
||||||
|
for k := range hit.Locations { |
||||||
|
highlightFields = append(highlightFields, k) |
||||||
|
} |
||||||
|
} |
||||||
|
for _, hf := range highlightFields { |
||||||
|
highlighter.BestFragmentsInField(hit, doc, hf, 1) |
||||||
|
} |
||||||
|
} |
||||||
|
} else if doc == nil { |
||||||
|
// unexpected case, a doc ID that was found as a search hit
|
||||||
|
// was unable to be found during document lookup
|
||||||
|
return nil, ErrorIndexReadInconsistency |
||||||
|
} |
||||||
|
} |
||||||
|
if i.name != "" { |
||||||
|
hit.Index = i.name |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
atomic.AddUint64(&i.stats.searches, 1) |
||||||
|
searchDuration := time.Since(searchStart) |
||||||
|
atomic.AddUint64(&i.stats.searchTime, uint64(searchDuration)) |
||||||
|
|
||||||
|
if Config.SlowSearchLogThreshold > 0 && |
||||||
|
searchDuration > Config.SlowSearchLogThreshold { |
||||||
|
logger.Printf("slow search took %s - %v", searchDuration, req) |
||||||
|
} |
||||||
|
|
||||||
|
return &SearchResult{ |
||||||
|
Status: &SearchStatus{ |
||||||
|
Total: 1, |
||||||
|
Failed: 0, |
||||||
|
Successful: 1, |
||||||
|
Errors: make(map[string]error), |
||||||
|
}, |
||||||
|
Request: req, |
||||||
|
Hits: hits, |
||||||
|
Total: collector.Total(), |
||||||
|
MaxScore: collector.MaxScore(), |
||||||
|
Took: searchDuration, |
||||||
|
Facets: collector.FacetResults(), |
||||||
|
}, nil |
||||||
|
} |
||||||
|
|
||||||
|
// Fields returns the name of all the fields this
|
||||||
|
// Index has operated on.
|
||||||
|
func (i *indexImpl) Fields() (fields []string, err error) { |
||||||
|
i.mutex.RLock() |
||||||
|
defer i.mutex.RUnlock() |
||||||
|
|
||||||
|
if !i.open { |
||||||
|
return nil, ErrorIndexClosed |
||||||
|
} |
||||||
|
|
||||||
|
indexReader, err := i.i.Reader() |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
defer func() { |
||||||
|
if cerr := indexReader.Close(); err == nil && cerr != nil { |
||||||
|
err = cerr |
||||||
|
} |
||||||
|
}() |
||||||
|
|
||||||
|
fields, err = indexReader.Fields() |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
return fields, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexImpl) FieldDict(field string) (index.FieldDict, error) { |
||||||
|
i.mutex.RLock() |
||||||
|
|
||||||
|
if !i.open { |
||||||
|
i.mutex.RUnlock() |
||||||
|
return nil, ErrorIndexClosed |
||||||
|
} |
||||||
|
|
||||||
|
indexReader, err := i.i.Reader() |
||||||
|
if err != nil { |
||||||
|
i.mutex.RUnlock() |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
fieldDict, err := indexReader.FieldDict(field) |
||||||
|
if err != nil { |
||||||
|
i.mutex.RUnlock() |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
return &indexImplFieldDict{ |
||||||
|
index: i, |
||||||
|
indexReader: indexReader, |
||||||
|
fieldDict: fieldDict, |
||||||
|
}, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexImpl) FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) { |
||||||
|
i.mutex.RLock() |
||||||
|
|
||||||
|
if !i.open { |
||||||
|
i.mutex.RUnlock() |
||||||
|
return nil, ErrorIndexClosed |
||||||
|
} |
||||||
|
|
||||||
|
indexReader, err := i.i.Reader() |
||||||
|
if err != nil { |
||||||
|
i.mutex.RUnlock() |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
fieldDict, err := indexReader.FieldDictRange(field, startTerm, endTerm) |
||||||
|
if err != nil { |
||||||
|
i.mutex.RUnlock() |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
return &indexImplFieldDict{ |
||||||
|
index: i, |
||||||
|
indexReader: indexReader, |
||||||
|
fieldDict: fieldDict, |
||||||
|
}, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexImpl) FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) { |
||||||
|
i.mutex.RLock() |
||||||
|
|
||||||
|
if !i.open { |
||||||
|
i.mutex.RUnlock() |
||||||
|
return nil, ErrorIndexClosed |
||||||
|
} |
||||||
|
|
||||||
|
indexReader, err := i.i.Reader() |
||||||
|
if err != nil { |
||||||
|
i.mutex.RUnlock() |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
fieldDict, err := indexReader.FieldDictPrefix(field, termPrefix) |
||||||
|
if err != nil { |
||||||
|
i.mutex.RUnlock() |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
|
||||||
|
return &indexImplFieldDict{ |
||||||
|
index: i, |
||||||
|
indexReader: indexReader, |
||||||
|
fieldDict: fieldDict, |
||||||
|
}, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexImpl) Close() error { |
||||||
|
i.mutex.Lock() |
||||||
|
defer i.mutex.Unlock() |
||||||
|
|
||||||
|
indexStats.UnRegister(i) |
||||||
|
|
||||||
|
i.open = false |
||||||
|
return i.i.Close() |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexImpl) Stats() *IndexStat { |
||||||
|
return i.stats |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexImpl) StatsMap() map[string]interface{} { |
||||||
|
return i.stats.statsMap() |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexImpl) GetInternal(key []byte) (val []byte, err error) { |
||||||
|
i.mutex.RLock() |
||||||
|
defer i.mutex.RUnlock() |
||||||
|
|
||||||
|
if !i.open { |
||||||
|
return nil, ErrorIndexClosed |
||||||
|
} |
||||||
|
|
||||||
|
reader, err := i.i.Reader() |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
defer func() { |
||||||
|
if cerr := reader.Close(); err == nil && cerr != nil { |
||||||
|
err = cerr |
||||||
|
} |
||||||
|
}() |
||||||
|
|
||||||
|
val, err = reader.GetInternal(key) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
return val, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexImpl) SetInternal(key, val []byte) error { |
||||||
|
i.mutex.RLock() |
||||||
|
defer i.mutex.RUnlock() |
||||||
|
|
||||||
|
if !i.open { |
||||||
|
return ErrorIndexClosed |
||||||
|
} |
||||||
|
|
||||||
|
return i.i.SetInternal(key, val) |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexImpl) DeleteInternal(key []byte) error { |
||||||
|
i.mutex.RLock() |
||||||
|
defer i.mutex.RUnlock() |
||||||
|
|
||||||
|
if !i.open { |
||||||
|
return ErrorIndexClosed |
||||||
|
} |
||||||
|
|
||||||
|
return i.i.DeleteInternal(key) |
||||||
|
} |
||||||
|
|
||||||
|
// NewBatch creates a new empty batch.
|
||||||
|
func (i *indexImpl) NewBatch() *Batch { |
||||||
|
return &Batch{ |
||||||
|
index: i, |
||||||
|
internal: index.NewBatch(), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexImpl) Name() string { |
||||||
|
return i.name |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexImpl) SetName(name string) { |
||||||
|
indexStats.UnRegister(i) |
||||||
|
i.name = name |
||||||
|
indexStats.Register(i) |
||||||
|
} |
||||||
|
|
||||||
|
type indexImplFieldDict struct { |
||||||
|
index *indexImpl |
||||||
|
indexReader index.IndexReader |
||||||
|
fieldDict index.FieldDict |
||||||
|
} |
||||||
|
|
||||||
|
func (f *indexImplFieldDict) Next() (*index.DictEntry, error) { |
||||||
|
return f.fieldDict.Next() |
||||||
|
} |
||||||
|
|
||||||
|
func (f *indexImplFieldDict) Close() error { |
||||||
|
defer f.index.mutex.RUnlock() |
||||||
|
err := f.fieldDict.Close() |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
return f.indexReader.Close() |
||||||
|
} |
@ -0,0 +1,96 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package bleve |
||||||
|
|
||||||
|
import ( |
||||||
|
"encoding/json" |
||||||
|
"io/ioutil" |
||||||
|
"os" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/index/upsidedown" |
||||||
|
) |
||||||
|
|
||||||
|
const metaFilename = "index_meta.json" |
||||||
|
|
||||||
|
type indexMeta struct { |
||||||
|
Storage string `json:"storage"` |
||||||
|
IndexType string `json:"index_type"` |
||||||
|
Config map[string]interface{} `json:"config,omitempty"` |
||||||
|
} |
||||||
|
|
||||||
|
func newIndexMeta(indexType string, storage string, config map[string]interface{}) *indexMeta { |
||||||
|
return &indexMeta{ |
||||||
|
IndexType: indexType, |
||||||
|
Storage: storage, |
||||||
|
Config: config, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func openIndexMeta(path string) (*indexMeta, error) { |
||||||
|
if _, err := os.Stat(path); os.IsNotExist(err) { |
||||||
|
return nil, ErrorIndexPathDoesNotExist |
||||||
|
} |
||||||
|
indexMetaPath := indexMetaPath(path) |
||||||
|
metaBytes, err := ioutil.ReadFile(indexMetaPath) |
||||||
|
if err != nil { |
||||||
|
return nil, ErrorIndexMetaMissing |
||||||
|
} |
||||||
|
var im indexMeta |
||||||
|
err = json.Unmarshal(metaBytes, &im) |
||||||
|
if err != nil { |
||||||
|
return nil, ErrorIndexMetaCorrupt |
||||||
|
} |
||||||
|
if im.IndexType == "" { |
||||||
|
im.IndexType = upsidedown.Name |
||||||
|
} |
||||||
|
return &im, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (i *indexMeta) Save(path string) (err error) { |
||||||
|
indexMetaPath := indexMetaPath(path) |
||||||
|
// ensure any necessary parent directories exist
|
||||||
|
err = os.MkdirAll(path, 0700) |
||||||
|
if err != nil { |
||||||
|
if os.IsExist(err) { |
||||||
|
return ErrorIndexPathExists |
||||||
|
} |
||||||
|
return err |
||||||
|
} |
||||||
|
metaBytes, err := json.Marshal(i) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
indexMetaFile, err := os.OpenFile(indexMetaPath, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0666) |
||||||
|
if err != nil { |
||||||
|
if os.IsExist(err) { |
||||||
|
return ErrorIndexPathExists |
||||||
|
} |
||||||
|
return err |
||||||
|
} |
||||||
|
defer func() { |
||||||
|
if ierr := indexMetaFile.Close(); err == nil && ierr != nil { |
||||||
|
err = ierr |
||||||
|
} |
||||||
|
}() |
||||||
|
_, err = indexMetaFile.Write(metaBytes) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
func indexMetaPath(path string) string { |
||||||
|
return path + string(os.PathSeparator) + metaFilename |
||||||
|
} |
@ -0,0 +1,75 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package bleve |
||||||
|
|
||||||
|
import ( |
||||||
|
"encoding/json" |
||||||
|
"sync" |
||||||
|
"sync/atomic" |
||||||
|
) |
||||||
|
|
||||||
|
type IndexStat struct { |
||||||
|
searches uint64 |
||||||
|
searchTime uint64 |
||||||
|
i *indexImpl |
||||||
|
} |
||||||
|
|
||||||
|
func (is *IndexStat) statsMap() map[string]interface{} { |
||||||
|
m := map[string]interface{}{} |
||||||
|
m["index"] = is.i.i.StatsMap() |
||||||
|
m["searches"] = atomic.LoadUint64(&is.searches) |
||||||
|
m["search_time"] = atomic.LoadUint64(&is.searchTime) |
||||||
|
return m |
||||||
|
} |
||||||
|
|
||||||
|
func (is *IndexStat) MarshalJSON() ([]byte, error) { |
||||||
|
m := is.statsMap() |
||||||
|
return json.Marshal(m) |
||||||
|
} |
||||||
|
|
||||||
|
type IndexStats struct { |
||||||
|
indexes map[string]*IndexStat |
||||||
|
mutex sync.RWMutex |
||||||
|
} |
||||||
|
|
||||||
|
func NewIndexStats() *IndexStats { |
||||||
|
return &IndexStats{ |
||||||
|
indexes: make(map[string]*IndexStat), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (i *IndexStats) Register(index Index) { |
||||||
|
i.mutex.Lock() |
||||||
|
defer i.mutex.Unlock() |
||||||
|
i.indexes[index.Name()] = index.Stats() |
||||||
|
} |
||||||
|
|
||||||
|
func (i *IndexStats) UnRegister(index Index) { |
||||||
|
i.mutex.Lock() |
||||||
|
defer i.mutex.Unlock() |
||||||
|
delete(i.indexes, index.Name()) |
||||||
|
} |
||||||
|
|
||||||
|
func (i *IndexStats) String() string { |
||||||
|
i.mutex.RLock() |
||||||
|
defer i.mutex.RUnlock() |
||||||
|
bytes, err := json.Marshal(i.indexes) |
||||||
|
if err != nil { |
||||||
|
return "error marshaling stats" |
||||||
|
} |
||||||
|
return string(bytes) |
||||||
|
} |
||||||
|
|
||||||
|
var indexStats *IndexStats |
@ -0,0 +1,61 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package bleve |
||||||
|
|
||||||
|
import "github.com/blevesearch/bleve/mapping" |
||||||
|
|
||||||
|
// NewIndexMapping creates a new IndexMapping that will use all the default indexing rules
|
||||||
|
func NewIndexMapping() *mapping.IndexMappingImpl { |
||||||
|
return mapping.NewIndexMapping() |
||||||
|
} |
||||||
|
|
||||||
|
// NewDocumentMapping returns a new document mapping
|
||||||
|
// with all the default values.
|
||||||
|
func NewDocumentMapping() *mapping.DocumentMapping { |
||||||
|
return mapping.NewDocumentMapping() |
||||||
|
} |
||||||
|
|
||||||
|
// NewDocumentStaticMapping returns a new document
|
||||||
|
// mapping that will not automatically index parts
|
||||||
|
// of a document without an explicit mapping.
|
||||||
|
func NewDocumentStaticMapping() *mapping.DocumentMapping { |
||||||
|
return mapping.NewDocumentStaticMapping() |
||||||
|
} |
||||||
|
|
||||||
|
// NewDocumentDisabledMapping returns a new document
|
||||||
|
// mapping that will not perform any indexing.
|
||||||
|
func NewDocumentDisabledMapping() *mapping.DocumentMapping { |
||||||
|
return mapping.NewDocumentDisabledMapping() |
||||||
|
} |
||||||
|
|
||||||
|
// NewTextFieldMapping returns a default field mapping for text
|
||||||
|
func NewTextFieldMapping() *mapping.FieldMapping { |
||||||
|
return mapping.NewTextFieldMapping() |
||||||
|
} |
||||||
|
|
||||||
|
// NewNumericFieldMapping returns a default field mapping for numbers
|
||||||
|
func NewNumericFieldMapping() *mapping.FieldMapping { |
||||||
|
return mapping.NewNumericFieldMapping() |
||||||
|
} |
||||||
|
|
||||||
|
// NewDateTimeFieldMapping returns a default field mapping for dates
|
||||||
|
func NewDateTimeFieldMapping() *mapping.FieldMapping { |
||||||
|
return mapping.NewDateTimeFieldMapping() |
||||||
|
} |
||||||
|
|
||||||
|
// NewBooleanFieldMapping returns a default field mapping for booleans
|
||||||
|
func NewBooleanFieldMapping() *mapping.FieldMapping { |
||||||
|
return mapping.NewBooleanFieldMapping() |
||||||
|
} |
@ -0,0 +1,99 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package mapping |
||||||
|
|
||||||
|
type customAnalysis struct { |
||||||
|
CharFilters map[string]map[string]interface{} `json:"char_filters,omitempty"` |
||||||
|
Tokenizers map[string]map[string]interface{} `json:"tokenizers,omitempty"` |
||||||
|
TokenMaps map[string]map[string]interface{} `json:"token_maps,omitempty"` |
||||||
|
TokenFilters map[string]map[string]interface{} `json:"token_filters,omitempty"` |
||||||
|
Analyzers map[string]map[string]interface{} `json:"analyzers,omitempty"` |
||||||
|
DateTimeParsers map[string]map[string]interface{} `json:"date_time_parsers,omitempty"` |
||||||
|
} |
||||||
|
|
||||||
|
func (c *customAnalysis) registerAll(i *IndexMappingImpl) error { |
||||||
|
for name, config := range c.CharFilters { |
||||||
|
_, err := i.cache.DefineCharFilter(name, config) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if len(c.Tokenizers) > 0 { |
||||||
|
// put all the names in map tracking work to do
|
||||||
|
todo := map[string]struct{}{} |
||||||
|
for name := range c.Tokenizers { |
||||||
|
todo[name] = struct{}{} |
||||||
|
} |
||||||
|
registered := 1 |
||||||
|
errs := []error{} |
||||||
|
// as long as we keep making progress, keep going
|
||||||
|
for len(todo) > 0 && registered > 0 { |
||||||
|
registered = 0 |
||||||
|
errs = []error{} |
||||||
|
for name := range todo { |
||||||
|
config := c.Tokenizers[name] |
||||||
|
_, err := i.cache.DefineTokenizer(name, config) |
||||||
|
if err != nil { |
||||||
|
errs = append(errs, err) |
||||||
|
} else { |
||||||
|
delete(todo, name) |
||||||
|
registered++ |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if len(errs) > 0 { |
||||||
|
return errs[0] |
||||||
|
} |
||||||
|
} |
||||||
|
for name, config := range c.TokenMaps { |
||||||
|
_, err := i.cache.DefineTokenMap(name, config) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
} |
||||||
|
for name, config := range c.TokenFilters { |
||||||
|
_, err := i.cache.DefineTokenFilter(name, config) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
} |
||||||
|
for name, config := range c.Analyzers { |
||||||
|
_, err := i.cache.DefineAnalyzer(name, config) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
} |
||||||
|
for name, config := range c.DateTimeParsers { |
||||||
|
_, err := i.cache.DefineDateTimeParser(name, config) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
} |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
func newCustomAnalysis() *customAnalysis { |
||||||
|
rv := customAnalysis{ |
||||||
|
CharFilters: make(map[string]map[string]interface{}), |
||||||
|
Tokenizers: make(map[string]map[string]interface{}), |
||||||
|
TokenMaps: make(map[string]map[string]interface{}), |
||||||
|
TokenFilters: make(map[string]map[string]interface{}), |
||||||
|
Analyzers: make(map[string]map[string]interface{}), |
||||||
|
DateTimeParsers: make(map[string]map[string]interface{}), |
||||||
|
} |
||||||
|
return &rv |
||||||
|
} |
@ -0,0 +1,490 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package mapping |
||||||
|
|
||||||
|
import ( |
||||||
|
"encoding/json" |
||||||
|
"fmt" |
||||||
|
"reflect" |
||||||
|
"time" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/registry" |
||||||
|
) |
||||||
|
|
||||||
|
// A DocumentMapping describes how a type of document
|
||||||
|
// should be indexed.
|
||||||
|
// As documents can be hierarchical, named sub-sections
|
||||||
|
// of documents are mapped using the same structure in
|
||||||
|
// the Properties field.
|
||||||
|
// Each value inside a document can be indexed 0 or more
|
||||||
|
// ways. These index entries are called fields and
|
||||||
|
// are stored in the Fields field.
|
||||||
|
// Entire sections of a document can be ignored or
|
||||||
|
// excluded by setting Enabled to false.
|
||||||
|
// If not explicitly mapped, default mapping operations
|
||||||
|
// are used. To disable this automatic handling, set
|
||||||
|
// Dynamic to false.
|
||||||
|
type DocumentMapping struct { |
||||||
|
Enabled bool `json:"enabled"` |
||||||
|
Dynamic bool `json:"dynamic"` |
||||||
|
Properties map[string]*DocumentMapping `json:"properties,omitempty"` |
||||||
|
Fields []*FieldMapping `json:"fields,omitempty"` |
||||||
|
DefaultAnalyzer string `json:"default_analyzer"` |
||||||
|
|
||||||
|
// StructTagKey overrides "json" when looking for field names in struct tags
|
||||||
|
StructTagKey string `json:"struct_tag_key,omitempty"` |
||||||
|
} |
||||||
|
|
||||||
|
func (dm *DocumentMapping) Validate(cache *registry.Cache) error { |
||||||
|
var err error |
||||||
|
if dm.DefaultAnalyzer != "" { |
||||||
|
_, err := cache.AnalyzerNamed(dm.DefaultAnalyzer) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
} |
||||||
|
for _, property := range dm.Properties { |
||||||
|
err = property.Validate(cache) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
} |
||||||
|
for _, field := range dm.Fields { |
||||||
|
if field.Analyzer != "" { |
||||||
|
_, err = cache.AnalyzerNamed(field.Analyzer) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
} |
||||||
|
if field.DateFormat != "" { |
||||||
|
_, err = cache.DateTimeParserNamed(field.DateFormat) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
} |
||||||
|
switch field.Type { |
||||||
|
case "text", "datetime", "number", "boolean": |
||||||
|
default: |
||||||
|
return fmt.Errorf("unknown field type: '%s'", field.Type) |
||||||
|
} |
||||||
|
} |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
// analyzerNameForPath attempts to first find the field
|
||||||
|
// described by this path, then returns the analyzer
|
||||||
|
// configured for that field
|
||||||
|
func (dm *DocumentMapping) analyzerNameForPath(path string) string { |
||||||
|
field := dm.fieldDescribedByPath(path) |
||||||
|
if field != nil { |
||||||
|
return field.Analyzer |
||||||
|
} |
||||||
|
return "" |
||||||
|
} |
||||||
|
|
||||||
|
func (dm *DocumentMapping) fieldDescribedByPath(path string) *FieldMapping { |
||||||
|
pathElements := decodePath(path) |
||||||
|
if len(pathElements) > 1 { |
||||||
|
// easy case, there is more than 1 path element remaining
|
||||||
|
// the next path element must match a property name
|
||||||
|
// at this level
|
||||||
|
for propName, subDocMapping := range dm.Properties { |
||||||
|
if propName == pathElements[0] { |
||||||
|
return subDocMapping.fieldDescribedByPath(encodePath(pathElements[1:])) |
||||||
|
} |
||||||
|
} |
||||||
|
} else { |
||||||
|
// just 1 path elememnt
|
||||||
|
// first look for property name with empty field
|
||||||
|
for propName, subDocMapping := range dm.Properties { |
||||||
|
if propName == pathElements[0] { |
||||||
|
// found property name match, now look at its fields
|
||||||
|
for _, field := range subDocMapping.Fields { |
||||||
|
if field.Name == "" || field.Name == pathElements[0] { |
||||||
|
// match
|
||||||
|
return field |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
// next, walk the properties again, looking for field overriding the name
|
||||||
|
for propName, subDocMapping := range dm.Properties { |
||||||
|
if propName != pathElements[0] { |
||||||
|
// property name isn't a match, but field name could override it
|
||||||
|
for _, field := range subDocMapping.Fields { |
||||||
|
if field.Name == pathElements[0] { |
||||||
|
return field |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
// documentMappingForPath only returns EXACT matches for a sub document
|
||||||
|
// or for an explicitly mapped field, if you want to find the
|
||||||
|
// closest document mapping to a field not explicitly mapped
|
||||||
|
// use closestDocMapping
|
||||||
|
func (dm *DocumentMapping) documentMappingForPath(path string) *DocumentMapping { |
||||||
|
pathElements := decodePath(path) |
||||||
|
current := dm |
||||||
|
OUTER: |
||||||
|
for i, pathElement := range pathElements { |
||||||
|
for name, subDocMapping := range current.Properties { |
||||||
|
if name == pathElement { |
||||||
|
current = subDocMapping |
||||||
|
continue OUTER |
||||||
|
} |
||||||
|
} |
||||||
|
// no subDocMapping matches this pathElement
|
||||||
|
// only if this is the last element check for field name
|
||||||
|
if i == len(pathElements)-1 { |
||||||
|
for _, field := range current.Fields { |
||||||
|
if field.Name == pathElement { |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return nil |
||||||
|
} |
||||||
|
return current |
||||||
|
} |
||||||
|
|
||||||
|
// closestDocMapping findest the most specific document mapping that matches
|
||||||
|
// part of the provided path
|
||||||
|
func (dm *DocumentMapping) closestDocMapping(path string) *DocumentMapping { |
||||||
|
pathElements := decodePath(path) |
||||||
|
current := dm |
||||||
|
OUTER: |
||||||
|
for _, pathElement := range pathElements { |
||||||
|
for name, subDocMapping := range current.Properties { |
||||||
|
if name == pathElement { |
||||||
|
current = subDocMapping |
||||||
|
continue OUTER |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
return current |
||||||
|
} |
||||||
|
|
||||||
|
// NewDocumentMapping returns a new document mapping
|
||||||
|
// with all the default values.
|
||||||
|
func NewDocumentMapping() *DocumentMapping { |
||||||
|
return &DocumentMapping{ |
||||||
|
Enabled: true, |
||||||
|
Dynamic: true, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// NewDocumentStaticMapping returns a new document
|
||||||
|
// mapping that will not automatically index parts
|
||||||
|
// of a document without an explicit mapping.
|
||||||
|
func NewDocumentStaticMapping() *DocumentMapping { |
||||||
|
return &DocumentMapping{ |
||||||
|
Enabled: true, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// NewDocumentDisabledMapping returns a new document
|
||||||
|
// mapping that will not perform any indexing.
|
||||||
|
func NewDocumentDisabledMapping() *DocumentMapping { |
||||||
|
return &DocumentMapping{} |
||||||
|
} |
||||||
|
|
||||||
|
// AddSubDocumentMapping adds the provided DocumentMapping as a sub-mapping
|
||||||
|
// for the specified named subsection.
|
||||||
|
func (dm *DocumentMapping) AddSubDocumentMapping(property string, sdm *DocumentMapping) { |
||||||
|
if dm.Properties == nil { |
||||||
|
dm.Properties = make(map[string]*DocumentMapping) |
||||||
|
} |
||||||
|
dm.Properties[property] = sdm |
||||||
|
} |
||||||
|
|
||||||
|
// AddFieldMappingsAt adds one or more FieldMappings
|
||||||
|
// at the named sub-document. If the named sub-document
|
||||||
|
// doesn't yet exist it is created for you.
|
||||||
|
// This is a convenience function to make most common
|
||||||
|
// mappings more concise.
|
||||||
|
// Otherwise, you would:
|
||||||
|
// subMapping := NewDocumentMapping()
|
||||||
|
// subMapping.AddFieldMapping(fieldMapping)
|
||||||
|
// parentMapping.AddSubDocumentMapping(property, subMapping)
|
||||||
|
func (dm *DocumentMapping) AddFieldMappingsAt(property string, fms ...*FieldMapping) { |
||||||
|
if dm.Properties == nil { |
||||||
|
dm.Properties = make(map[string]*DocumentMapping) |
||||||
|
} |
||||||
|
sdm, ok := dm.Properties[property] |
||||||
|
if !ok { |
||||||
|
sdm = NewDocumentMapping() |
||||||
|
} |
||||||
|
for _, fm := range fms { |
||||||
|
sdm.AddFieldMapping(fm) |
||||||
|
} |
||||||
|
dm.Properties[property] = sdm |
||||||
|
} |
||||||
|
|
||||||
|
// AddFieldMapping adds the provided FieldMapping for this section
|
||||||
|
// of the document.
|
||||||
|
func (dm *DocumentMapping) AddFieldMapping(fm *FieldMapping) { |
||||||
|
if dm.Fields == nil { |
||||||
|
dm.Fields = make([]*FieldMapping, 0) |
||||||
|
} |
||||||
|
dm.Fields = append(dm.Fields, fm) |
||||||
|
} |
||||||
|
|
||||||
|
// UnmarshalJSON offers custom unmarshaling with optional strict validation
|
||||||
|
func (dm *DocumentMapping) UnmarshalJSON(data []byte) error { |
||||||
|
|
||||||
|
var tmp map[string]json.RawMessage |
||||||
|
err := json.Unmarshal(data, &tmp) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
|
||||||
|
// set defaults for fields which might have been omitted
|
||||||
|
dm.Enabled = true |
||||||
|
dm.Dynamic = true |
||||||
|
|
||||||
|
var invalidKeys []string |
||||||
|
for k, v := range tmp { |
||||||
|
switch k { |
||||||
|
case "enabled": |
||||||
|
err := json.Unmarshal(v, &dm.Enabled) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
case "dynamic": |
||||||
|
err := json.Unmarshal(v, &dm.Dynamic) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
case "default_analyzer": |
||||||
|
err := json.Unmarshal(v, &dm.DefaultAnalyzer) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
case "properties": |
||||||
|
err := json.Unmarshal(v, &dm.Properties) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
case "fields": |
||||||
|
err := json.Unmarshal(v, &dm.Fields) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
case "struct_tag_key": |
||||||
|
err := json.Unmarshal(v, &dm.StructTagKey) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
default: |
||||||
|
invalidKeys = append(invalidKeys, k) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if MappingJSONStrict && len(invalidKeys) > 0 { |
||||||
|
return fmt.Errorf("document mapping contains invalid keys: %v", invalidKeys) |
||||||
|
} |
||||||
|
|
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
func (dm *DocumentMapping) defaultAnalyzerName(path []string) string { |
||||||
|
rv := "" |
||||||
|
current := dm |
||||||
|
for _, pathElement := range path { |
||||||
|
var ok bool |
||||||
|
current, ok = current.Properties[pathElement] |
||||||
|
if !ok { |
||||||
|
break |
||||||
|
} |
||||||
|
if current.DefaultAnalyzer != "" { |
||||||
|
rv = current.DefaultAnalyzer |
||||||
|
} |
||||||
|
} |
||||||
|
return rv |
||||||
|
} |
||||||
|
|
||||||
|
func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes []uint64, context *walkContext) { |
||||||
|
// allow default "json" tag to be overriden
|
||||||
|
structTagKey := dm.StructTagKey |
||||||
|
if structTagKey == "" { |
||||||
|
structTagKey = "json" |
||||||
|
} |
||||||
|
|
||||||
|
val := reflect.ValueOf(data) |
||||||
|
typ := val.Type() |
||||||
|
switch typ.Kind() { |
||||||
|
case reflect.Map: |
||||||
|
// FIXME can add support for other map keys in the future
|
||||||
|
if typ.Key().Kind() == reflect.String { |
||||||
|
for _, key := range val.MapKeys() { |
||||||
|
fieldName := key.String() |
||||||
|
fieldVal := val.MapIndex(key).Interface() |
||||||
|
dm.processProperty(fieldVal, append(path, fieldName), indexes, context) |
||||||
|
} |
||||||
|
} |
||||||
|
case reflect.Struct: |
||||||
|
for i := 0; i < val.NumField(); i++ { |
||||||
|
field := typ.Field(i) |
||||||
|
fieldName := field.Name |
||||||
|
// anonymous fields of type struct can elide the type name
|
||||||
|
if field.Anonymous && field.Type.Kind() == reflect.Struct { |
||||||
|
fieldName = "" |
||||||
|
} |
||||||
|
|
||||||
|
// if the field has a name under the specified tag, prefer that
|
||||||
|
tag := field.Tag.Get(structTagKey) |
||||||
|
tagFieldName := parseTagName(tag) |
||||||
|
if tagFieldName == "-" { |
||||||
|
continue |
||||||
|
} |
||||||
|
// allow tag to set field name to empty, only if anonymous
|
||||||
|
if field.Tag != "" && (tagFieldName != "" || field.Anonymous) { |
||||||
|
fieldName = tagFieldName |
||||||
|
} |
||||||
|
|
||||||
|
if val.Field(i).CanInterface() { |
||||||
|
fieldVal := val.Field(i).Interface() |
||||||
|
newpath := path |
||||||
|
if fieldName != "" { |
||||||
|
newpath = append(path, fieldName) |
||||||
|
} |
||||||
|
dm.processProperty(fieldVal, newpath, indexes, context) |
||||||
|
} |
||||||
|
} |
||||||
|
case reflect.Slice, reflect.Array: |
||||||
|
for i := 0; i < val.Len(); i++ { |
||||||
|
if val.Index(i).CanInterface() { |
||||||
|
fieldVal := val.Index(i).Interface() |
||||||
|
dm.processProperty(fieldVal, path, append(indexes, uint64(i)), context) |
||||||
|
} |
||||||
|
} |
||||||
|
case reflect.Ptr: |
||||||
|
ptrElem := val.Elem() |
||||||
|
if ptrElem.IsValid() && ptrElem.CanInterface() { |
||||||
|
dm.processProperty(ptrElem.Interface(), path, indexes, context) |
||||||
|
} |
||||||
|
case reflect.String: |
||||||
|
dm.processProperty(val.String(), path, indexes, context) |
||||||
|
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: |
||||||
|
dm.processProperty(float64(val.Int()), path, indexes, context) |
||||||
|
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: |
||||||
|
dm.processProperty(float64(val.Uint()), path, indexes, context) |
||||||
|
case reflect.Float32, reflect.Float64: |
||||||
|
dm.processProperty(float64(val.Float()), path, indexes, context) |
||||||
|
case reflect.Bool: |
||||||
|
dm.processProperty(val.Bool(), path, indexes, context) |
||||||
|
} |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
func (dm *DocumentMapping) processProperty(property interface{}, path []string, indexes []uint64, context *walkContext) { |
||||||
|
pathString := encodePath(path) |
||||||
|
// look to see if there is a mapping for this field
|
||||||
|
subDocMapping := dm.documentMappingForPath(pathString) |
||||||
|
closestDocMapping := dm.closestDocMapping(pathString) |
||||||
|
|
||||||
|
// check to see if we even need to do further processing
|
||||||
|
if subDocMapping != nil && !subDocMapping.Enabled { |
||||||
|
return |
||||||
|
} |
||||||
|
|
||||||
|
propertyValue := reflect.ValueOf(property) |
||||||
|
if !propertyValue.IsValid() { |
||||||
|
// cannot do anything with the zero value
|
||||||
|
return |
||||||
|
} |
||||||
|
propertyType := propertyValue.Type() |
||||||
|
switch propertyType.Kind() { |
||||||
|
case reflect.String: |
||||||
|
propertyValueString := propertyValue.String() |
||||||
|
if subDocMapping != nil { |
||||||
|
// index by explicit mapping
|
||||||
|
for _, fieldMapping := range subDocMapping.Fields { |
||||||
|
fieldMapping.processString(propertyValueString, pathString, path, indexes, context) |
||||||
|
} |
||||||
|
} else if closestDocMapping.Dynamic { |
||||||
|
// automatic indexing behavior
|
||||||
|
|
||||||
|
// first see if it can be parsed by the default date parser
|
||||||
|
dateTimeParser := context.im.DateTimeParserNamed(context.im.DefaultDateTimeParser) |
||||||
|
if dateTimeParser != nil { |
||||||
|
parsedDateTime, err := dateTimeParser.ParseDateTime(propertyValueString) |
||||||
|
if err != nil { |
||||||
|
// index as text
|
||||||
|
fieldMapping := newTextFieldMappingDynamic(context.im) |
||||||
|
fieldMapping.processString(propertyValueString, pathString, path, indexes, context) |
||||||
|
} else { |
||||||
|
// index as datetime
|
||||||
|
fieldMapping := newDateTimeFieldMappingDynamic(context.im) |
||||||
|
fieldMapping.processTime(parsedDateTime, pathString, path, indexes, context) |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: |
||||||
|
dm.processProperty(float64(propertyValue.Int()), path, indexes, context) |
||||||
|
return |
||||||
|
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: |
||||||
|
dm.processProperty(float64(propertyValue.Uint()), path, indexes, context) |
||||||
|
return |
||||||
|
case reflect.Float64, reflect.Float32: |
||||||
|
propertyValFloat := propertyValue.Float() |
||||||
|
if subDocMapping != nil { |
||||||
|
// index by explicit mapping
|
||||||
|
for _, fieldMapping := range subDocMapping.Fields { |
||||||
|
fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context) |
||||||
|
} |
||||||
|
} else if closestDocMapping.Dynamic { |
||||||
|
// automatic indexing behavior
|
||||||
|
fieldMapping := newNumericFieldMappingDynamic(context.im) |
||||||
|
fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context) |
||||||
|
} |
||||||
|
case reflect.Bool: |
||||||
|
propertyValBool := propertyValue.Bool() |
||||||
|
if subDocMapping != nil { |
||||||
|
// index by explicit mapping
|
||||||
|
for _, fieldMapping := range subDocMapping.Fields { |
||||||
|
fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context) |
||||||
|
} |
||||||
|
} else if closestDocMapping.Dynamic { |
||||||
|
// automatic indexing behavior
|
||||||
|
fieldMapping := newBooleanFieldMappingDynamic(context.im) |
||||||
|
fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context) |
||||||
|
} |
||||||
|
case reflect.Struct: |
||||||
|
switch property := property.(type) { |
||||||
|
case time.Time: |
||||||
|
// don't descend into the time struct
|
||||||
|
if subDocMapping != nil { |
||||||
|
// index by explicit mapping
|
||||||
|
for _, fieldMapping := range subDocMapping.Fields { |
||||||
|
fieldMapping.processTime(property, pathString, path, indexes, context) |
||||||
|
} |
||||||
|
} else if closestDocMapping.Dynamic { |
||||||
|
fieldMapping := newDateTimeFieldMappingDynamic(context.im) |
||||||
|
fieldMapping.processTime(property, pathString, path, indexes, context) |
||||||
|
} |
||||||
|
default: |
||||||
|
dm.walkDocument(property, path, indexes, context) |
||||||
|
} |
||||||
|
default: |
||||||
|
dm.walkDocument(property, path, indexes, context) |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,296 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package mapping |
||||||
|
|
||||||
|
import ( |
||||||
|
"encoding/json" |
||||||
|
"fmt" |
||||||
|
"time" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/analysis" |
||||||
|
"github.com/blevesearch/bleve/document" |
||||||
|
) |
||||||
|
|
||||||
|
// control the default behavior for dynamic fields (those not explicitly mapped)
|
||||||
|
var ( |
||||||
|
IndexDynamic = true |
||||||
|
StoreDynamic = true |
||||||
|
) |
||||||
|
|
||||||
|
// A FieldMapping describes how a specific item
|
||||||
|
// should be put into the index.
|
||||||
|
type FieldMapping struct { |
||||||
|
Name string `json:"name,omitempty"` |
||||||
|
Type string `json:"type,omitempty"` |
||||||
|
|
||||||
|
// Analyzer specifies the name of the analyzer to use for this field. If
|
||||||
|
// Analyzer is empty, traverse the DocumentMapping tree toward the root and
|
||||||
|
// pick the first non-empty DefaultAnalyzer found. If there is none, use
|
||||||
|
// the IndexMapping.DefaultAnalyzer.
|
||||||
|
Analyzer string `json:"analyzer,omitempty"` |
||||||
|
|
||||||
|
// Store indicates whether to store field values in the index. Stored
|
||||||
|
// values can be retrieved from search results using SearchRequest.Fields.
|
||||||
|
Store bool `json:"store,omitempty"` |
||||||
|
Index bool `json:"index,omitempty"` |
||||||
|
|
||||||
|
// IncludeTermVectors, if true, makes terms occurrences to be recorded for
|
||||||
|
// this field. It includes the term position within the terms sequence and
|
||||||
|
// the term offsets in the source document field. Term vectors are required
|
||||||
|
// to perform phrase queries or terms highlighting in source documents.
|
||||||
|
IncludeTermVectors bool `json:"include_term_vectors,omitempty"` |
||||||
|
IncludeInAll bool `json:"include_in_all,omitempty"` |
||||||
|
DateFormat string `json:"date_format,omitempty"` |
||||||
|
} |
||||||
|
|
||||||
|
// NewTextFieldMapping returns a default field mapping for text
|
||||||
|
func NewTextFieldMapping() *FieldMapping { |
||||||
|
return &FieldMapping{ |
||||||
|
Type: "text", |
||||||
|
Store: true, |
||||||
|
Index: true, |
||||||
|
IncludeTermVectors: true, |
||||||
|
IncludeInAll: true, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func newTextFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping { |
||||||
|
rv := NewTextFieldMapping() |
||||||
|
rv.Store = im.StoreDynamic |
||||||
|
rv.Index = im.IndexDynamic |
||||||
|
return rv |
||||||
|
} |
||||||
|
|
||||||
|
// NewNumericFieldMapping returns a default field mapping for numbers
|
||||||
|
func NewNumericFieldMapping() *FieldMapping { |
||||||
|
return &FieldMapping{ |
||||||
|
Type: "number", |
||||||
|
Store: true, |
||||||
|
Index: true, |
||||||
|
IncludeInAll: true, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func newNumericFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping { |
||||||
|
rv := NewNumericFieldMapping() |
||||||
|
rv.Store = im.StoreDynamic |
||||||
|
rv.Index = im.IndexDynamic |
||||||
|
return rv |
||||||
|
} |
||||||
|
|
||||||
|
// NewDateTimeFieldMapping returns a default field mapping for dates
|
||||||
|
func NewDateTimeFieldMapping() *FieldMapping { |
||||||
|
return &FieldMapping{ |
||||||
|
Type: "datetime", |
||||||
|
Store: true, |
||||||
|
Index: true, |
||||||
|
IncludeInAll: true, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func newDateTimeFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping { |
||||||
|
rv := NewDateTimeFieldMapping() |
||||||
|
rv.Store = im.StoreDynamic |
||||||
|
rv.Index = im.IndexDynamic |
||||||
|
return rv |
||||||
|
} |
||||||
|
|
||||||
|
// NewBooleanFieldMapping returns a default field mapping for booleans
|
||||||
|
func NewBooleanFieldMapping() *FieldMapping { |
||||||
|
return &FieldMapping{ |
||||||
|
Type: "boolean", |
||||||
|
Store: true, |
||||||
|
Index: true, |
||||||
|
IncludeInAll: true, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func newBooleanFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping { |
||||||
|
rv := NewBooleanFieldMapping() |
||||||
|
rv.Store = im.StoreDynamic |
||||||
|
rv.Index = im.IndexDynamic |
||||||
|
return rv |
||||||
|
} |
||||||
|
|
||||||
|
// Options returns the indexing options for this field.
|
||||||
|
func (fm *FieldMapping) Options() document.IndexingOptions { |
||||||
|
var rv document.IndexingOptions |
||||||
|
if fm.Store { |
||||||
|
rv |= document.StoreField |
||||||
|
} |
||||||
|
if fm.Index { |
||||||
|
rv |= document.IndexField |
||||||
|
} |
||||||
|
if fm.IncludeTermVectors { |
||||||
|
rv |= document.IncludeTermVectors |
||||||
|
} |
||||||
|
return rv |
||||||
|
} |
||||||
|
|
||||||
|
func (fm *FieldMapping) processString(propertyValueString string, pathString string, path []string, indexes []uint64, context *walkContext) { |
||||||
|
fieldName := getFieldName(pathString, path, fm) |
||||||
|
options := fm.Options() |
||||||
|
if fm.Type == "text" { |
||||||
|
analyzer := fm.analyzerForField(path, context) |
||||||
|
field := document.NewTextFieldCustom(fieldName, indexes, []byte(propertyValueString), options, analyzer) |
||||||
|
context.doc.AddField(field) |
||||||
|
|
||||||
|
if !fm.IncludeInAll { |
||||||
|
context.excludedFromAll = append(context.excludedFromAll, fieldName) |
||||||
|
} |
||||||
|
} else if fm.Type == "datetime" { |
||||||
|
dateTimeFormat := context.im.DefaultDateTimeParser |
||||||
|
if fm.DateFormat != "" { |
||||||
|
dateTimeFormat = fm.DateFormat |
||||||
|
} |
||||||
|
dateTimeParser := context.im.DateTimeParserNamed(dateTimeFormat) |
||||||
|
if dateTimeParser != nil { |
||||||
|
parsedDateTime, err := dateTimeParser.ParseDateTime(propertyValueString) |
||||||
|
if err == nil { |
||||||
|
fm.processTime(parsedDateTime, pathString, path, indexes, context) |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (fm *FieldMapping) processFloat64(propertyValFloat float64, pathString string, path []string, indexes []uint64, context *walkContext) { |
||||||
|
fieldName := getFieldName(pathString, path, fm) |
||||||
|
if fm.Type == "number" { |
||||||
|
options := fm.Options() |
||||||
|
field := document.NewNumericFieldWithIndexingOptions(fieldName, indexes, propertyValFloat, options) |
||||||
|
context.doc.AddField(field) |
||||||
|
|
||||||
|
if !fm.IncludeInAll { |
||||||
|
context.excludedFromAll = append(context.excludedFromAll, fieldName) |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (fm *FieldMapping) processTime(propertyValueTime time.Time, pathString string, path []string, indexes []uint64, context *walkContext) { |
||||||
|
fieldName := getFieldName(pathString, path, fm) |
||||||
|
if fm.Type == "datetime" { |
||||||
|
options := fm.Options() |
||||||
|
field, err := document.NewDateTimeFieldWithIndexingOptions(fieldName, indexes, propertyValueTime, options) |
||||||
|
if err == nil { |
||||||
|
context.doc.AddField(field) |
||||||
|
} else { |
||||||
|
logger.Printf("could not build date %v", err) |
||||||
|
} |
||||||
|
|
||||||
|
if !fm.IncludeInAll { |
||||||
|
context.excludedFromAll = append(context.excludedFromAll, fieldName) |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (fm *FieldMapping) processBoolean(propertyValueBool bool, pathString string, path []string, indexes []uint64, context *walkContext) { |
||||||
|
fieldName := getFieldName(pathString, path, fm) |
||||||
|
if fm.Type == "boolean" { |
||||||
|
options := fm.Options() |
||||||
|
field := document.NewBooleanFieldWithIndexingOptions(fieldName, indexes, propertyValueBool, options) |
||||||
|
context.doc.AddField(field) |
||||||
|
|
||||||
|
if !fm.IncludeInAll { |
||||||
|
context.excludedFromAll = append(context.excludedFromAll, fieldName) |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (fm *FieldMapping) analyzerForField(path []string, context *walkContext) *analysis.Analyzer { |
||||||
|
analyzerName := fm.Analyzer |
||||||
|
if analyzerName == "" { |
||||||
|
analyzerName = context.dm.defaultAnalyzerName(path) |
||||||
|
if analyzerName == "" { |
||||||
|
analyzerName = context.im.DefaultAnalyzer |
||||||
|
} |
||||||
|
} |
||||||
|
return context.im.AnalyzerNamed(analyzerName) |
||||||
|
} |
||||||
|
|
||||||
|
func getFieldName(pathString string, path []string, fieldMapping *FieldMapping) string { |
||||||
|
fieldName := pathString |
||||||
|
if fieldMapping.Name != "" { |
||||||
|
parentName := "" |
||||||
|
if len(path) > 1 { |
||||||
|
parentName = encodePath(path[:len(path)-1]) + pathSeparator |
||||||
|
} |
||||||
|
fieldName = parentName + fieldMapping.Name |
||||||
|
} |
||||||
|
return fieldName |
||||||
|
} |
||||||
|
|
||||||
|
// UnmarshalJSON offers custom unmarshaling with optional strict validation
|
||||||
|
func (fm *FieldMapping) UnmarshalJSON(data []byte) error { |
||||||
|
|
||||||
|
var tmp map[string]json.RawMessage |
||||||
|
err := json.Unmarshal(data, &tmp) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
|
||||||
|
var invalidKeys []string |
||||||
|
for k, v := range tmp { |
||||||
|
switch k { |
||||||
|
case "name": |
||||||
|
err := json.Unmarshal(v, &fm.Name) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
case "type": |
||||||
|
err := json.Unmarshal(v, &fm.Type) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
case "analyzer": |
||||||
|
err := json.Unmarshal(v, &fm.Analyzer) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
case "store": |
||||||
|
err := json.Unmarshal(v, &fm.Store) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
case "index": |
||||||
|
err := json.Unmarshal(v, &fm.Index) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
case "include_term_vectors": |
||||||
|
err := json.Unmarshal(v, &fm.IncludeTermVectors) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
case "include_in_all": |
||||||
|
err := json.Unmarshal(v, &fm.IncludeInAll) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
case "date_format": |
||||||
|
err := json.Unmarshal(v, &fm.DateFormat) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
default: |
||||||
|
invalidKeys = append(invalidKeys, k) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if MappingJSONStrict && len(invalidKeys) > 0 { |
||||||
|
return fmt.Errorf("field mapping contains invalid keys: %v", invalidKeys) |
||||||
|
} |
||||||
|
|
||||||
|
return nil |
||||||
|
} |
@ -0,0 +1,430 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package mapping |
||||||
|
|
||||||
|
import ( |
||||||
|
"encoding/json" |
||||||
|
"fmt" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/analysis" |
||||||
|
"github.com/blevesearch/bleve/analysis/analyzer/standard" |
||||||
|
"github.com/blevesearch/bleve/analysis/datetime/optional" |
||||||
|
"github.com/blevesearch/bleve/document" |
||||||
|
"github.com/blevesearch/bleve/registry" |
||||||
|
) |
||||||
|
|
||||||
|
var MappingJSONStrict = false |
||||||
|
|
||||||
|
const defaultTypeField = "_type" |
||||||
|
const defaultType = "_default" |
||||||
|
const defaultField = "_all" |
||||||
|
const defaultAnalyzer = standard.Name |
||||||
|
const defaultDateTimeParser = optional.Name |
||||||
|
|
||||||
|
// An IndexMappingImpl controls how objects are placed
|
||||||
|
// into an index.
|
||||||
|
// First the type of the object is determined.
|
||||||
|
// Once the type is know, the appropriate
|
||||||
|
// DocumentMapping is selected by the type.
|
||||||
|
// If no mapping was determined for that type,
|
||||||
|
// a DefaultMapping will be used.
|
||||||
|
type IndexMappingImpl struct { |
||||||
|
TypeMapping map[string]*DocumentMapping `json:"types,omitempty"` |
||||||
|
DefaultMapping *DocumentMapping `json:"default_mapping"` |
||||||
|
TypeField string `json:"type_field"` |
||||||
|
DefaultType string `json:"default_type"` |
||||||
|
DefaultAnalyzer string `json:"default_analyzer"` |
||||||
|
DefaultDateTimeParser string `json:"default_datetime_parser"` |
||||||
|
DefaultField string `json:"default_field"` |
||||||
|
StoreDynamic bool `json:"store_dynamic"` |
||||||
|
IndexDynamic bool `json:"index_dynamic"` |
||||||
|
CustomAnalysis *customAnalysis `json:"analysis,omitempty"` |
||||||
|
cache *registry.Cache |
||||||
|
} |
||||||
|
|
||||||
|
// AddCustomCharFilter defines a custom char filter for use in this mapping
|
||||||
|
func (im *IndexMappingImpl) AddCustomCharFilter(name string, config map[string]interface{}) error { |
||||||
|
_, err := im.cache.DefineCharFilter(name, config) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
im.CustomAnalysis.CharFilters[name] = config |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
// AddCustomTokenizer defines a custom tokenizer for use in this mapping
|
||||||
|
func (im *IndexMappingImpl) AddCustomTokenizer(name string, config map[string]interface{}) error { |
||||||
|
_, err := im.cache.DefineTokenizer(name, config) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
im.CustomAnalysis.Tokenizers[name] = config |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
// AddCustomTokenMap defines a custom token map for use in this mapping
|
||||||
|
func (im *IndexMappingImpl) AddCustomTokenMap(name string, config map[string]interface{}) error { |
||||||
|
_, err := im.cache.DefineTokenMap(name, config) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
im.CustomAnalysis.TokenMaps[name] = config |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
// AddCustomTokenFilter defines a custom token filter for use in this mapping
|
||||||
|
func (im *IndexMappingImpl) AddCustomTokenFilter(name string, config map[string]interface{}) error { |
||||||
|
_, err := im.cache.DefineTokenFilter(name, config) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
im.CustomAnalysis.TokenFilters[name] = config |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
// AddCustomAnalyzer defines a custom analyzer for use in this mapping. The
|
||||||
|
// config map must have a "type" string entry to resolve the analyzer
|
||||||
|
// constructor. The constructor is invoked with the remaining entries and
|
||||||
|
// returned analyzer is registered in the IndexMapping.
|
||||||
|
//
|
||||||
|
// bleve comes with predefined analyzers, like
|
||||||
|
// github.com/blevesearch/bleve/analysis/analyzers/custom_analyzer. They are
|
||||||
|
// available only if their package is imported by client code. To achieve this,
|
||||||
|
// use their metadata to fill configuration entries:
|
||||||
|
//
|
||||||
|
// import (
|
||||||
|
// "github.com/blevesearch/bleve/analysis/analyzers/custom_analyzer"
|
||||||
|
// "github.com/blevesearch/bleve/analysis/char_filters/html_char_filter"
|
||||||
|
// "github.com/blevesearch/bleve/analysis/token_filters/lower_case_filter"
|
||||||
|
// "github.com/blevesearch/bleve/analysis/tokenizers/unicode"
|
||||||
|
// )
|
||||||
|
//
|
||||||
|
// m := bleve.NewIndexMapping()
|
||||||
|
// err := m.AddCustomAnalyzer("html", map[string]interface{}{
|
||||||
|
// "type": custom_analyzer.Name,
|
||||||
|
// "char_filters": []string{
|
||||||
|
// html_char_filter.Name,
|
||||||
|
// },
|
||||||
|
// "tokenizer": unicode.Name,
|
||||||
|
// "token_filters": []string{
|
||||||
|
// lower_case_filter.Name,
|
||||||
|
// ...
|
||||||
|
// },
|
||||||
|
// })
|
||||||
|
func (im *IndexMappingImpl) AddCustomAnalyzer(name string, config map[string]interface{}) error { |
||||||
|
_, err := im.cache.DefineAnalyzer(name, config) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
im.CustomAnalysis.Analyzers[name] = config |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
// AddCustomDateTimeParser defines a custom date time parser for use in this mapping
|
||||||
|
func (im *IndexMappingImpl) AddCustomDateTimeParser(name string, config map[string]interface{}) error { |
||||||
|
_, err := im.cache.DefineDateTimeParser(name, config) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
im.CustomAnalysis.DateTimeParsers[name] = config |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
// NewIndexMapping creates a new IndexMapping that will use all the default indexing rules
|
||||||
|
func NewIndexMapping() *IndexMappingImpl { |
||||||
|
return &IndexMappingImpl{ |
||||||
|
TypeMapping: make(map[string]*DocumentMapping), |
||||||
|
DefaultMapping: NewDocumentMapping(), |
||||||
|
TypeField: defaultTypeField, |
||||||
|
DefaultType: defaultType, |
||||||
|
DefaultAnalyzer: defaultAnalyzer, |
||||||
|
DefaultDateTimeParser: defaultDateTimeParser, |
||||||
|
DefaultField: defaultField, |
||||||
|
IndexDynamic: IndexDynamic, |
||||||
|
StoreDynamic: StoreDynamic, |
||||||
|
CustomAnalysis: newCustomAnalysis(), |
||||||
|
cache: registry.NewCache(), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Validate will walk the entire structure ensuring the following
|
||||||
|
// explicitly named and default analyzers can be built
|
||||||
|
func (im *IndexMappingImpl) Validate() error { |
||||||
|
_, err := im.cache.AnalyzerNamed(im.DefaultAnalyzer) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
_, err = im.cache.DateTimeParserNamed(im.DefaultDateTimeParser) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
err = im.DefaultMapping.Validate(im.cache) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
for _, docMapping := range im.TypeMapping { |
||||||
|
err = docMapping.Validate(im.cache) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
} |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
// AddDocumentMapping sets a custom document mapping for the specified type
|
||||||
|
func (im *IndexMappingImpl) AddDocumentMapping(doctype string, dm *DocumentMapping) { |
||||||
|
im.TypeMapping[doctype] = dm |
||||||
|
} |
||||||
|
|
||||||
|
func (im *IndexMappingImpl) mappingForType(docType string) *DocumentMapping { |
||||||
|
docMapping := im.TypeMapping[docType] |
||||||
|
if docMapping == nil { |
||||||
|
docMapping = im.DefaultMapping |
||||||
|
} |
||||||
|
return docMapping |
||||||
|
} |
||||||
|
|
||||||
|
// UnmarshalJSON offers custom unmarshaling with optional strict validation
|
||||||
|
func (im *IndexMappingImpl) UnmarshalJSON(data []byte) error { |
||||||
|
|
||||||
|
var tmp map[string]json.RawMessage |
||||||
|
err := json.Unmarshal(data, &tmp) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
|
||||||
|
// set defaults for fields which might have been omitted
|
||||||
|
im.cache = registry.NewCache() |
||||||
|
im.CustomAnalysis = newCustomAnalysis() |
||||||
|
im.TypeField = defaultTypeField |
||||||
|
im.DefaultType = defaultType |
||||||
|
im.DefaultAnalyzer = defaultAnalyzer |
||||||
|
im.DefaultDateTimeParser = defaultDateTimeParser |
||||||
|
im.DefaultField = defaultField |
||||||
|
im.DefaultMapping = NewDocumentMapping() |
||||||
|
im.TypeMapping = make(map[string]*DocumentMapping) |
||||||
|
im.StoreDynamic = StoreDynamic |
||||||
|
im.IndexDynamic = IndexDynamic |
||||||
|
|
||||||
|
var invalidKeys []string |
||||||
|
for k, v := range tmp { |
||||||
|
switch k { |
||||||
|
case "analysis": |
||||||
|
err := json.Unmarshal(v, &im.CustomAnalysis) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
case "type_field": |
||||||
|
err := json.Unmarshal(v, &im.TypeField) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
case "default_type": |
||||||
|
err := json.Unmarshal(v, &im.DefaultType) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
case "default_analyzer": |
||||||
|
err := json.Unmarshal(v, &im.DefaultAnalyzer) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
case "default_datetime_parser": |
||||||
|
err := json.Unmarshal(v, &im.DefaultDateTimeParser) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
case "default_field": |
||||||
|
err := json.Unmarshal(v, &im.DefaultField) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
case "default_mapping": |
||||||
|
err := json.Unmarshal(v, &im.DefaultMapping) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
case "types": |
||||||
|
err := json.Unmarshal(v, &im.TypeMapping) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
case "store_dynamic": |
||||||
|
err := json.Unmarshal(v, &im.StoreDynamic) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
case "index_dynamic": |
||||||
|
err := json.Unmarshal(v, &im.IndexDynamic) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
default: |
||||||
|
invalidKeys = append(invalidKeys, k) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if MappingJSONStrict && len(invalidKeys) > 0 { |
||||||
|
return fmt.Errorf("index mapping contains invalid keys: %v", invalidKeys) |
||||||
|
} |
||||||
|
|
||||||
|
err = im.CustomAnalysis.registerAll(im) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
|
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
func (im *IndexMappingImpl) determineType(data interface{}) string { |
||||||
|
// first see if the object implements Classifier
|
||||||
|
classifier, ok := data.(Classifier) |
||||||
|
if ok { |
||||||
|
return classifier.Type() |
||||||
|
} |
||||||
|
|
||||||
|
// now see if we can find a type using the mapping
|
||||||
|
typ, ok := mustString(lookupPropertyPath(data, im.TypeField)) |
||||||
|
if ok { |
||||||
|
return typ |
||||||
|
} |
||||||
|
|
||||||
|
return im.DefaultType |
||||||
|
} |
||||||
|
|
||||||
|
func (im *IndexMappingImpl) MapDocument(doc *document.Document, data interface{}) error { |
||||||
|
docType := im.determineType(data) |
||||||
|
docMapping := im.mappingForType(docType) |
||||||
|
walkContext := im.newWalkContext(doc, docMapping) |
||||||
|
if docMapping.Enabled { |
||||||
|
docMapping.walkDocument(data, []string{}, []uint64{}, walkContext) |
||||||
|
|
||||||
|
// see if the _all field was disabled
|
||||||
|
allMapping := docMapping.documentMappingForPath("_all") |
||||||
|
if allMapping == nil || (allMapping.Enabled != false) { |
||||||
|
field := document.NewCompositeFieldWithIndexingOptions("_all", true, []string{}, walkContext.excludedFromAll, document.IndexField|document.IncludeTermVectors) |
||||||
|
doc.AddField(field) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
type walkContext struct { |
||||||
|
doc *document.Document |
||||||
|
im *IndexMappingImpl |
||||||
|
dm *DocumentMapping |
||||||
|
excludedFromAll []string |
||||||
|
} |
||||||
|
|
||||||
|
func (im *IndexMappingImpl) newWalkContext(doc *document.Document, dm *DocumentMapping) *walkContext { |
||||||
|
return &walkContext{ |
||||||
|
doc: doc, |
||||||
|
im: im, |
||||||
|
dm: dm, |
||||||
|
excludedFromAll: []string{}, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// AnalyzerNameForPath attempts to find the best analyzer to use with only a
|
||||||
|
// field name will walk all the document types, look for field mappings at the
|
||||||
|
// provided path, if one exists and it has an explicit analyzer that is
|
||||||
|
// returned.
|
||||||
|
func (im *IndexMappingImpl) AnalyzerNameForPath(path string) string { |
||||||
|
// first we look for explicit mapping on the field
|
||||||
|
for _, docMapping := range im.TypeMapping { |
||||||
|
analyzerName := docMapping.analyzerNameForPath(path) |
||||||
|
if analyzerName != "" { |
||||||
|
return analyzerName |
||||||
|
} |
||||||
|
} |
||||||
|
// now try the default mapping
|
||||||
|
pathMapping := im.DefaultMapping.documentMappingForPath(path) |
||||||
|
if pathMapping != nil { |
||||||
|
if len(pathMapping.Fields) > 0 { |
||||||
|
if pathMapping.Fields[0].Analyzer != "" { |
||||||
|
return pathMapping.Fields[0].Analyzer |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// next we will try default analyzers for the path
|
||||||
|
pathDecoded := decodePath(path) |
||||||
|
for _, docMapping := range im.TypeMapping { |
||||||
|
rv := docMapping.defaultAnalyzerName(pathDecoded) |
||||||
|
if rv != "" { |
||||||
|
return rv |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return im.DefaultAnalyzer |
||||||
|
} |
||||||
|
|
||||||
|
func (im *IndexMappingImpl) AnalyzerNamed(name string) *analysis.Analyzer { |
||||||
|
analyzer, err := im.cache.AnalyzerNamed(name) |
||||||
|
if err != nil { |
||||||
|
logger.Printf("error using analyzer named: %s", name) |
||||||
|
return nil |
||||||
|
} |
||||||
|
return analyzer |
||||||
|
} |
||||||
|
|
||||||
|
func (im *IndexMappingImpl) DateTimeParserNamed(name string) analysis.DateTimeParser { |
||||||
|
if name == "" { |
||||||
|
name = im.DefaultDateTimeParser |
||||||
|
} |
||||||
|
dateTimeParser, err := im.cache.DateTimeParserNamed(name) |
||||||
|
if err != nil { |
||||||
|
logger.Printf("error using datetime parser named: %s", name) |
||||||
|
return nil |
||||||
|
} |
||||||
|
return dateTimeParser |
||||||
|
} |
||||||
|
|
||||||
|
func (im *IndexMappingImpl) datetimeParserNameForPath(path string) string { |
||||||
|
|
||||||
|
// first we look for explicit mapping on the field
|
||||||
|
for _, docMapping := range im.TypeMapping { |
||||||
|
pathMapping := docMapping.documentMappingForPath(path) |
||||||
|
if pathMapping != nil { |
||||||
|
if len(pathMapping.Fields) > 0 { |
||||||
|
if pathMapping.Fields[0].Analyzer != "" { |
||||||
|
return pathMapping.Fields[0].Analyzer |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return im.DefaultDateTimeParser |
||||||
|
} |
||||||
|
|
||||||
|
func (im *IndexMappingImpl) AnalyzeText(analyzerName string, text []byte) (analysis.TokenStream, error) { |
||||||
|
analyzer, err := im.cache.AnalyzerNamed(analyzerName) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
return analyzer.Analyze(text), nil |
||||||
|
} |
||||||
|
|
||||||
|
// FieldAnalyzer returns the name of the analyzer used on a field.
|
||||||
|
func (im *IndexMappingImpl) FieldAnalyzer(field string) string { |
||||||
|
return im.AnalyzerNameForPath(field) |
||||||
|
} |
||||||
|
|
||||||
|
// wrapper to satisfy new interface
|
||||||
|
|
||||||
|
func (im *IndexMappingImpl) DefaultSearchField() string { |
||||||
|
return im.DefaultField |
||||||
|
} |
@ -0,0 +1,49 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package mapping |
||||||
|
|
||||||
|
import ( |
||||||
|
"io/ioutil" |
||||||
|
"log" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/analysis" |
||||||
|
"github.com/blevesearch/bleve/document" |
||||||
|
) |
||||||
|
|
||||||
|
// A Classifier is an interface describing any object
|
||||||
|
// which knows how to identify its own type.
|
||||||
|
type Classifier interface { |
||||||
|
Type() string |
||||||
|
} |
||||||
|
|
||||||
|
var logger = log.New(ioutil.Discard, "bleve mapping ", log.LstdFlags) |
||||||
|
|
||||||
|
// SetLog sets the logger used for logging
|
||||||
|
// by default log messages are sent to ioutil.Discard
|
||||||
|
func SetLog(l *log.Logger) { |
||||||
|
logger = l |
||||||
|
} |
||||||
|
|
||||||
|
type IndexMapping interface { |
||||||
|
MapDocument(doc *document.Document, data interface{}) error |
||||||
|
Validate() error |
||||||
|
|
||||||
|
DateTimeParserNamed(name string) analysis.DateTimeParser |
||||||
|
|
||||||
|
DefaultSearchField() string |
||||||
|
|
||||||
|
AnalyzerNameForPath(path string) string |
||||||
|
AnalyzerNamed(name string) *analysis.Analyzer |
||||||
|
} |
@ -0,0 +1,89 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package mapping |
||||||
|
|
||||||
|
import ( |
||||||
|
"reflect" |
||||||
|
"strings" |
||||||
|
) |
||||||
|
|
||||||
|
func lookupPropertyPath(data interface{}, path string) interface{} { |
||||||
|
pathParts := decodePath(path) |
||||||
|
|
||||||
|
current := data |
||||||
|
for _, part := range pathParts { |
||||||
|
current = lookupPropertyPathPart(current, part) |
||||||
|
if current == nil { |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return current |
||||||
|
} |
||||||
|
|
||||||
|
func lookupPropertyPathPart(data interface{}, part string) interface{} { |
||||||
|
val := reflect.ValueOf(data) |
||||||
|
typ := val.Type() |
||||||
|
switch typ.Kind() { |
||||||
|
case reflect.Map: |
||||||
|
// FIXME can add support for other map keys in the future
|
||||||
|
if typ.Key().Kind() == reflect.String { |
||||||
|
key := reflect.ValueOf(part) |
||||||
|
entry := val.MapIndex(key) |
||||||
|
if entry.IsValid() { |
||||||
|
return entry.Interface() |
||||||
|
} |
||||||
|
} |
||||||
|
case reflect.Struct: |
||||||
|
field := val.FieldByName(part) |
||||||
|
if field.IsValid() && field.CanInterface() { |
||||||
|
return field.Interface() |
||||||
|
} |
||||||
|
case reflect.Ptr: |
||||||
|
ptrElem := val.Elem() |
||||||
|
if ptrElem.IsValid() && ptrElem.CanInterface() { |
||||||
|
return lookupPropertyPathPart(ptrElem.Interface(), part) |
||||||
|
} |
||||||
|
} |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
const pathSeparator = "." |
||||||
|
|
||||||
|
func decodePath(path string) []string { |
||||||
|
return strings.Split(path, pathSeparator) |
||||||
|
} |
||||||
|
|
||||||
|
func encodePath(pathElements []string) string { |
||||||
|
return strings.Join(pathElements, pathSeparator) |
||||||
|
} |
||||||
|
|
||||||
|
func mustString(data interface{}) (string, bool) { |
||||||
|
if data != nil { |
||||||
|
str, ok := data.(string) |
||||||
|
if ok { |
||||||
|
return str, true |
||||||
|
} |
||||||
|
} |
||||||
|
return "", false |
||||||
|
} |
||||||
|
|
||||||
|
// parseTagName extracts the field name from a struct tag
|
||||||
|
func parseTagName(tag string) string { |
||||||
|
if idx := strings.Index(tag, ","); idx != -1 { |
||||||
|
return tag[:idx] |
||||||
|
} |
||||||
|
return tag |
||||||
|
} |
@ -0,0 +1,34 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package numeric |
||||||
|
|
||||||
|
import ( |
||||||
|
"math" |
||||||
|
) |
||||||
|
|
||||||
|
func Float64ToInt64(f float64) int64 { |
||||||
|
fasint := int64(math.Float64bits(f)) |
||||||
|
if fasint < 0 { |
||||||
|
fasint = fasint ^ 0x7fffffffffffffff |
||||||
|
} |
||||||
|
return fasint |
||||||
|
} |
||||||
|
|
||||||
|
func Int64ToFloat64(i int64) float64 { |
||||||
|
if i < 0 { |
||||||
|
i ^= 0x7fffffffffffffff |
||||||
|
} |
||||||
|
return math.Float64frombits(uint64(i)) |
||||||
|
} |
@ -0,0 +1,92 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package numeric |
||||||
|
|
||||||
|
import "fmt" |
||||||
|
|
||||||
|
const ShiftStartInt64 byte = 0x20 |
||||||
|
|
||||||
|
// PrefixCoded is a byte array encoding of
|
||||||
|
// 64-bit numeric values shifted by 0-63 bits
|
||||||
|
type PrefixCoded []byte |
||||||
|
|
||||||
|
func NewPrefixCodedInt64(in int64, shift uint) (PrefixCoded, error) { |
||||||
|
if shift > 63 { |
||||||
|
return nil, fmt.Errorf("cannot shift %d, must be between 0 and 63", shift) |
||||||
|
} |
||||||
|
|
||||||
|
nChars := ((63 - shift) / 7) + 1 |
||||||
|
rv := make(PrefixCoded, nChars+1) |
||||||
|
rv[0] = ShiftStartInt64 + byte(shift) |
||||||
|
|
||||||
|
sortableBits := int64(uint64(in) ^ 0x8000000000000000) |
||||||
|
sortableBits = int64(uint64(sortableBits) >> shift) |
||||||
|
for nChars > 0 { |
||||||
|
// Store 7 bits per byte for compatibility
|
||||||
|
// with UTF-8 encoding of terms
|
||||||
|
rv[nChars] = byte(sortableBits & 0x7f) |
||||||
|
nChars-- |
||||||
|
sortableBits = int64(uint64(sortableBits) >> 7) |
||||||
|
} |
||||||
|
return rv, nil |
||||||
|
} |
||||||
|
|
||||||
|
func MustNewPrefixCodedInt64(in int64, shift uint) PrefixCoded { |
||||||
|
rv, err := NewPrefixCodedInt64(in, shift) |
||||||
|
if err != nil { |
||||||
|
panic(err) |
||||||
|
} |
||||||
|
return rv |
||||||
|
} |
||||||
|
|
||||||
|
// Shift returns the number of bits shifted
|
||||||
|
// returns 0 if in uninitialized state
|
||||||
|
func (p PrefixCoded) Shift() (uint, error) { |
||||||
|
if len(p) > 0 { |
||||||
|
shift := p[0] - ShiftStartInt64 |
||||||
|
if shift < 0 || shift < 63 { |
||||||
|
return uint(shift), nil |
||||||
|
} |
||||||
|
} |
||||||
|
return 0, fmt.Errorf("invalid prefix coded value") |
||||||
|
} |
||||||
|
|
||||||
|
func (p PrefixCoded) Int64() (int64, error) { |
||||||
|
shift, err := p.Shift() |
||||||
|
if err != nil { |
||||||
|
return 0, err |
||||||
|
} |
||||||
|
var sortableBits int64 |
||||||
|
for _, inbyte := range p[1:] { |
||||||
|
sortableBits <<= 7 |
||||||
|
sortableBits |= int64(inbyte) |
||||||
|
} |
||||||
|
return int64(uint64((sortableBits << shift)) ^ 0x8000000000000000), nil |
||||||
|
} |
||||||
|
|
||||||
|
func ValidPrefixCodedTerm(p string) (bool, int) { |
||||||
|
if len(p) > 0 { |
||||||
|
if p[0] < ShiftStartInt64 || p[0] > ShiftStartInt64+63 { |
||||||
|
return false, 0 |
||||||
|
} |
||||||
|
shift := p[0] - ShiftStartInt64 |
||||||
|
nChars := ((63 - int(shift)) / 7) + 1 |
||||||
|
if len(p) != nChars+1 { |
||||||
|
return false, 0 |
||||||
|
} |
||||||
|
return true, int(shift) |
||||||
|
} |
||||||
|
return false, 0 |
||||||
|
} |
@ -0,0 +1,186 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package bleve |
||||||
|
|
||||||
|
import ( |
||||||
|
"time" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/search/query" |
||||||
|
) |
||||||
|
|
||||||
|
// NewBoolFieldQuery creates a new Query for boolean fields
|
||||||
|
func NewBoolFieldQuery(val bool) *query.BoolFieldQuery { |
||||||
|
return query.NewBoolFieldQuery(val) |
||||||
|
} |
||||||
|
|
||||||
|
// NewBooleanQuery creates a compound Query composed
|
||||||
|
// of several other Query objects.
|
||||||
|
// These other query objects are added using the
|
||||||
|
// AddMust() AddShould() and AddMustNot() methods.
|
||||||
|
// Result documents must satisfy ALL of the
|
||||||
|
// must Queries.
|
||||||
|
// Result documents must satisfy NONE of the must not
|
||||||
|
// Queries.
|
||||||
|
// Result documents that ALSO satisfy any of the should
|
||||||
|
// Queries will score higher.
|
||||||
|
func NewBooleanQuery() *query.BooleanQuery { |
||||||
|
return query.NewBooleanQuery(nil, nil, nil) |
||||||
|
} |
||||||
|
|
||||||
|
// NewConjunctionQuery creates a new compound Query.
|
||||||
|
// Result documents must satisfy all of the queries.
|
||||||
|
func NewConjunctionQuery(conjuncts ...query.Query) *query.ConjunctionQuery { |
||||||
|
return query.NewConjunctionQuery(conjuncts) |
||||||
|
} |
||||||
|
|
||||||
|
// NewDateRangeQuery creates a new Query for ranges
|
||||||
|
// of date values.
|
||||||
|
// Date strings are parsed using the DateTimeParser configured in the
|
||||||
|
// top-level config.QueryDateTimeParser
|
||||||
|
// Either, but not both endpoints can be nil.
|
||||||
|
func NewDateRangeQuery(start, end time.Time) *query.DateRangeQuery { |
||||||
|
return query.NewDateRangeQuery(start, end) |
||||||
|
} |
||||||
|
|
||||||
|
// NewDateRangeInclusiveQuery creates a new Query for ranges
|
||||||
|
// of date values.
|
||||||
|
// Date strings are parsed using the DateTimeParser configured in the
|
||||||
|
// top-level config.QueryDateTimeParser
|
||||||
|
// Either, but not both endpoints can be nil.
|
||||||
|
// startInclusive and endInclusive control inclusion of the endpoints.
|
||||||
|
func NewDateRangeInclusiveQuery(start, end time.Time, startInclusive, endInclusive *bool) *query.DateRangeQuery { |
||||||
|
return query.NewDateRangeInclusiveQuery(start, end, startInclusive, endInclusive) |
||||||
|
} |
||||||
|
|
||||||
|
// NewDisjunctionQuery creates a new compound Query.
|
||||||
|
// Result documents satisfy at least one Query.
|
||||||
|
func NewDisjunctionQuery(disjuncts ...query.Query) *query.DisjunctionQuery { |
||||||
|
return query.NewDisjunctionQuery(disjuncts) |
||||||
|
} |
||||||
|
|
||||||
|
// NewDocIDQuery creates a new Query object returning indexed documents among
|
||||||
|
// the specified set. Combine it with ConjunctionQuery to restrict the scope of
|
||||||
|
// other queries output.
|
||||||
|
func NewDocIDQuery(ids []string) *query.DocIDQuery { |
||||||
|
return query.NewDocIDQuery(ids) |
||||||
|
} |
||||||
|
|
||||||
|
// NewFuzzyQuery creates a new Query which finds
|
||||||
|
// documents containing terms within a specific
|
||||||
|
// fuzziness of the specified term.
|
||||||
|
// The default fuzziness is 1.
|
||||||
|
//
|
||||||
|
// The current implementation uses Levenshtein edit
|
||||||
|
// distance as the fuzziness metric.
|
||||||
|
func NewFuzzyQuery(term string) *query.FuzzyQuery { |
||||||
|
return query.NewFuzzyQuery(term) |
||||||
|
} |
||||||
|
|
||||||
|
// NewMatchAllQuery creates a Query which will
|
||||||
|
// match all documents in the index.
|
||||||
|
func NewMatchAllQuery() *query.MatchAllQuery { |
||||||
|
return query.NewMatchAllQuery() |
||||||
|
} |
||||||
|
|
||||||
|
// NewMatchNoneQuery creates a Query which will not
|
||||||
|
// match any documents in the index.
|
||||||
|
func NewMatchNoneQuery() *query.MatchNoneQuery { |
||||||
|
return query.NewMatchNoneQuery() |
||||||
|
} |
||||||
|
|
||||||
|
// NewMatchPhraseQuery creates a new Query object
|
||||||
|
// for matching phrases in the index.
|
||||||
|
// An Analyzer is chosen based on the field.
|
||||||
|
// Input text is analyzed using this analyzer.
|
||||||
|
// Token terms resulting from this analysis are
|
||||||
|
// used to build a search phrase. Result documents
|
||||||
|
// must match this phrase. Queried field must have been indexed with
|
||||||
|
// IncludeTermVectors set to true.
|
||||||
|
func NewMatchPhraseQuery(matchPhrase string) *query.MatchPhraseQuery { |
||||||
|
return query.NewMatchPhraseQuery(matchPhrase) |
||||||
|
} |
||||||
|
|
||||||
|
// NewMatchQuery creates a Query for matching text.
|
||||||
|
// An Analyzer is chosen based on the field.
|
||||||
|
// Input text is analyzed using this analyzer.
|
||||||
|
// Token terms resulting from this analysis are
|
||||||
|
// used to perform term searches. Result documents
|
||||||
|
// must satisfy at least one of these term searches.
|
||||||
|
func NewMatchQuery(match string) *query.MatchQuery { |
||||||
|
return query.NewMatchQuery(match) |
||||||
|
} |
||||||
|
|
||||||
|
// NewNumericRangeQuery creates a new Query for ranges
|
||||||
|
// of numeric values.
|
||||||
|
// Either, but not both endpoints can be nil.
|
||||||
|
// The minimum value is inclusive.
|
||||||
|
// The maximum value is exclusive.
|
||||||
|
func NewNumericRangeQuery(min, max *float64) *query.NumericRangeQuery { |
||||||
|
return query.NewNumericRangeQuery(min, max) |
||||||
|
} |
||||||
|
|
||||||
|
// NewNumericRangeInclusiveQuery creates a new Query for ranges
|
||||||
|
// of numeric values.
|
||||||
|
// Either, but not both endpoints can be nil.
|
||||||
|
// Control endpoint inclusion with inclusiveMin, inclusiveMax.
|
||||||
|
func NewNumericRangeInclusiveQuery(min, max *float64, minInclusive, maxInclusive *bool) *query.NumericRangeQuery { |
||||||
|
return query.NewNumericRangeInclusiveQuery(min, max, minInclusive, maxInclusive) |
||||||
|
} |
||||||
|
|
||||||
|
// NewPhraseQuery creates a new Query for finding
|
||||||
|
// exact term phrases in the index.
|
||||||
|
// The provided terms must exist in the correct
|
||||||
|
// order, at the correct index offsets, in the
|
||||||
|
// specified field. Queried field must have been indexed with
|
||||||
|
// IncludeTermVectors set to true.
|
||||||
|
func NewPhraseQuery(terms []string, field string) *query.PhraseQuery { |
||||||
|
return query.NewPhraseQuery(terms, field) |
||||||
|
} |
||||||
|
|
||||||
|
// NewPrefixQuery creates a new Query which finds
|
||||||
|
// documents containing terms that start with the
|
||||||
|
// specified prefix.
|
||||||
|
func NewPrefixQuery(prefix string) *query.PrefixQuery { |
||||||
|
return query.NewPrefixQuery(prefix) |
||||||
|
} |
||||||
|
|
||||||
|
// NewRegexpQuery creates a new Query which finds
|
||||||
|
// documents containing terms that match the
|
||||||
|
// specified regular expression.
|
||||||
|
func NewRegexpQuery(regexp string) *query.RegexpQuery { |
||||||
|
return query.NewRegexpQuery(regexp) |
||||||
|
} |
||||||
|
|
||||||
|
// NewQueryStringQuery creates a new Query used for
|
||||||
|
// finding documents that satisfy a query string. The
|
||||||
|
// query string is a small query language for humans.
|
||||||
|
func NewQueryStringQuery(q string) *query.QueryStringQuery { |
||||||
|
return query.NewQueryStringQuery(q) |
||||||
|
} |
||||||
|
|
||||||
|
// NewTermQuery creates a new Query for finding an
|
||||||
|
// exact term match in the index.
|
||||||
|
func NewTermQuery(term string) *query.TermQuery { |
||||||
|
return query.NewTermQuery(term) |
||||||
|
} |
||||||
|
|
||||||
|
// NewWildcardQuery creates a new Query which finds
|
||||||
|
// documents containing terms that match the
|
||||||
|
// specified wildcard. In the wildcard pattern '*'
|
||||||
|
// will match any sequence of 0 or more characters,
|
||||||
|
// and '?' will match any single character.
|
||||||
|
func NewWildcardQuery(wildcard string) *query.WildcardQuery { |
||||||
|
return query.NewWildcardQuery(wildcard) |
||||||
|
} |
@ -0,0 +1,89 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package registry |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/analysis" |
||||||
|
) |
||||||
|
|
||||||
|
func RegisterAnalyzer(name string, constructor AnalyzerConstructor) { |
||||||
|
_, exists := analyzers[name] |
||||||
|
if exists { |
||||||
|
panic(fmt.Errorf("attempted to register duplicate analyzer named '%s'", name)) |
||||||
|
} |
||||||
|
analyzers[name] = constructor |
||||||
|
} |
||||||
|
|
||||||
|
type AnalyzerConstructor func(config map[string]interface{}, cache *Cache) (*analysis.Analyzer, error) |
||||||
|
type AnalyzerRegistry map[string]AnalyzerConstructor |
||||||
|
|
||||||
|
type AnalyzerCache struct { |
||||||
|
*ConcurrentCache |
||||||
|
} |
||||||
|
|
||||||
|
func NewAnalyzerCache() *AnalyzerCache { |
||||||
|
return &AnalyzerCache{ |
||||||
|
NewConcurrentCache(), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func AnalyzerBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) { |
||||||
|
cons, registered := analyzers[name] |
||||||
|
if !registered { |
||||||
|
return nil, fmt.Errorf("no analyzer with name or type '%s' registered", name) |
||||||
|
} |
||||||
|
analyzer, err := cons(config, cache) |
||||||
|
if err != nil { |
||||||
|
return nil, fmt.Errorf("error building analyzer: %v", err) |
||||||
|
} |
||||||
|
return analyzer, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (c *AnalyzerCache) AnalyzerNamed(name string, cache *Cache) (*analysis.Analyzer, error) { |
||||||
|
item, err := c.ItemNamed(name, cache, AnalyzerBuild) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
return item.(*analysis.Analyzer), nil |
||||||
|
} |
||||||
|
|
||||||
|
func (c *AnalyzerCache) DefineAnalyzer(name string, typ string, config map[string]interface{}, cache *Cache) (*analysis.Analyzer, error) { |
||||||
|
item, err := c.DefineItem(name, typ, config, cache, AnalyzerBuild) |
||||||
|
if err != nil { |
||||||
|
if err == ErrAlreadyDefined { |
||||||
|
return nil, fmt.Errorf("analyzer named '%s' already defined", name) |
||||||
|
} |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
return item.(*analysis.Analyzer), nil |
||||||
|
} |
||||||
|
|
||||||
|
func AnalyzerTypesAndInstances() ([]string, []string) { |
||||||
|
emptyConfig := map[string]interface{}{} |
||||||
|
emptyCache := NewCache() |
||||||
|
var types []string |
||||||
|
var instances []string |
||||||
|
for name, cons := range analyzers { |
||||||
|
_, err := cons(emptyConfig, emptyCache) |
||||||
|
if err == nil { |
||||||
|
instances = append(instances, name) |
||||||
|
} else { |
||||||
|
types = append(types, name) |
||||||
|
} |
||||||
|
} |
||||||
|
return types, instances |
||||||
|
} |
@ -0,0 +1,87 @@ |
|||||||
|
// Copyright (c) 2016 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package registry |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"sync" |
||||||
|
) |
||||||
|
|
||||||
|
var ErrAlreadyDefined = fmt.Errorf("item already defined") |
||||||
|
|
||||||
|
type CacheBuild func(name string, config map[string]interface{}, cache *Cache) (interface{}, error) |
||||||
|
|
||||||
|
type ConcurrentCache struct { |
||||||
|
mutex sync.RWMutex |
||||||
|
data map[string]interface{} |
||||||
|
} |
||||||
|
|
||||||
|
func NewConcurrentCache() *ConcurrentCache { |
||||||
|
return &ConcurrentCache{ |
||||||
|
data: make(map[string]interface{}), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (c *ConcurrentCache) ItemNamed(name string, cache *Cache, build CacheBuild) (interface{}, error) { |
||||||
|
c.mutex.RLock() |
||||||
|
item, cached := c.data[name] |
||||||
|
if cached { |
||||||
|
c.mutex.RUnlock() |
||||||
|
return item, nil |
||||||
|
} |
||||||
|
// give up read lock
|
||||||
|
c.mutex.RUnlock() |
||||||
|
// try to build it
|
||||||
|
newItem, err := build(name, nil, cache) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
// acquire write lock
|
||||||
|
c.mutex.Lock() |
||||||
|
defer c.mutex.Unlock() |
||||||
|
// check again because it could have been created while trading locks
|
||||||
|
item, cached = c.data[name] |
||||||
|
if cached { |
||||||
|
return item, nil |
||||||
|
} |
||||||
|
c.data[name] = newItem |
||||||
|
return newItem, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (c *ConcurrentCache) DefineItem(name string, typ string, config map[string]interface{}, cache *Cache, build CacheBuild) (interface{}, error) { |
||||||
|
c.mutex.RLock() |
||||||
|
_, cached := c.data[name] |
||||||
|
if cached { |
||||||
|
c.mutex.RUnlock() |
||||||
|
return nil, ErrAlreadyDefined |
||||||
|
} |
||||||
|
// give up read lock so others lookups can proceed
|
||||||
|
c.mutex.RUnlock() |
||||||
|
// really not there, try to build it
|
||||||
|
newItem, err := build(typ, config, cache) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
// now we've built it, acquire lock
|
||||||
|
c.mutex.Lock() |
||||||
|
defer c.mutex.Unlock() |
||||||
|
// check again because it could have been created while trading locks
|
||||||
|
_, cached = c.data[name] |
||||||
|
if cached { |
||||||
|
return nil, ErrAlreadyDefined |
||||||
|
} |
||||||
|
c.data[name] = newItem |
||||||
|
return newItem, nil |
||||||
|
} |
@ -0,0 +1,89 @@ |
|||||||
|
// Copyright (c) 2014 Couchbase, Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package registry |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/analysis" |
||||||
|
) |
||||||
|
|
||||||
|
func RegisterCharFilter(name string, constructor CharFilterConstructor) { |
||||||
|
_, exists := charFilters[name] |
||||||
|
if exists { |
||||||
|
panic(fmt.Errorf("attempted to register duplicate char filter named '%s'", name)) |
||||||
|
} |
||||||
|
charFilters[name] = constructor |
||||||
|
} |
||||||
|
|
||||||
|
type CharFilterConstructor func(config map[string]interface{}, cache *Cache) (analysis.CharFilter, error) |
||||||
|
type CharFilterRegistry map[string]CharFilterConstructor |
||||||
|
|
||||||
|
type CharFilterCache struct { |
||||||
|
*ConcurrentCache |
||||||
|
} |
||||||
|
|
||||||
|
func NewCharFilterCache() *CharFilterCache { |
||||||
|
return &CharFilterCache{ |
||||||
|
NewConcurrentCache(), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func CharFilterBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) { |
||||||
|
cons, registered := charFilters[name] |
||||||
|
if !registered { |
||||||
|
return nil, fmt.Errorf("no char filter with name or type '%s' registered", name) |
||||||
|
} |
||||||
|
charFilter, err := cons(config, cache) |
||||||
|
if err != nil { |
||||||
|
return nil, fmt.Errorf("error building char filter: %v", err) |
||||||
|
} |
||||||
|
return charFilter, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (c *CharFilterCache) CharFilterNamed(name string, cache *Cache) (analysis.CharFilter, error) { |
||||||
|
item, err := c.ItemNamed(name, cache, CharFilterBuild) |
||||||
|
if err != nil { |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
return item.(analysis.CharFilter), nil |
||||||
|
} |
||||||
|
|
||||||
|
func (c *CharFilterCache) DefineCharFilter(name string, typ string, config map[string]interface{}, cache *Cache) (analysis.CharFilter, error) { |
||||||
|
item, err := c.DefineItem(name, typ, config, cache, CharFilterBuild) |
||||||
|
if err != nil { |
||||||
|
if err == ErrAlreadyDefined { |
||||||
|
return nil, fmt.Errorf("char filter named '%s' already defined", name) |
||||||
|
} |
||||||
|
return nil, err |
||||||
|
} |
||||||
|
return item.(analysis.CharFilter), nil |
||||||
|
} |
||||||
|
|
||||||
|
func CharFilterTypesAndInstances() ([]string, []string) { |
||||||
|
emptyConfig := map[string]interface{}{} |
||||||
|
emptyCache := NewCache() |
||||||
|
var types []string |
||||||
|
var instances []string |
||||||
|
for name, cons := range charFilters { |
||||||
|
_, err := cons(emptyConfig, emptyCache) |
||||||
|
if err == nil { |
||||||
|
instances = append(instances, name) |
||||||
|
} else { |
||||||
|
types = append(types, name) |
||||||
|
} |
||||||
|
} |
||||||
|
return types, instances |
||||||
|
} |
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue