You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							110 lines
						
					
					
						
							3.6 KiB
						
					
					
				
			
		
		
	
	
							110 lines
						
					
					
						
							3.6 KiB
						
					
					
				| //  Copyright (c) 2015 Couchbase, Inc.
 | |
| //
 | |
| // Licensed under the Apache License, Version 2.0 (the "License");
 | |
| // you may not use this file except in compliance with the License.
 | |
| // You may obtain a copy of the License at
 | |
| //
 | |
| // 		http://www.apache.org/licenses/LICENSE-2.0
 | |
| //
 | |
| // Unless required by applicable law or agreed to in writing, software
 | |
| // distributed under the License is distributed on an "AS IS" BASIS,
 | |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| // See the License for the specific language governing permissions and
 | |
| // limitations under the License.
 | |
| 
 | |
| package upsidedown
 | |
| 
 | |
| import (
 | |
| 	"github.com/blevesearch/bleve/analysis"
 | |
| 	"github.com/blevesearch/bleve/document"
 | |
| 	"github.com/blevesearch/bleve/index"
 | |
| )
 | |
| 
 | |
| func (udc *UpsideDownCouch) Analyze(d *document.Document) *index.AnalysisResult {
 | |
| 	rv := &index.AnalysisResult{
 | |
| 		DocID: d.ID,
 | |
| 		Rows:  make([]index.IndexRow, 0, 100),
 | |
| 	}
 | |
| 
 | |
| 	docIDBytes := []byte(d.ID)
 | |
| 
 | |
| 	// track our back index entries
 | |
| 	backIndexStoredEntries := make([]*BackIndexStoreEntry, 0)
 | |
| 
 | |
| 	// information we collate as we merge fields with same name
 | |
| 	fieldTermFreqs := make(map[uint16]analysis.TokenFrequencies)
 | |
| 	fieldLengths := make(map[uint16]int)
 | |
| 	fieldIncludeTermVectors := make(map[uint16]bool)
 | |
| 	fieldNames := make(map[uint16]string)
 | |
| 
 | |
| 	analyzeField := func(field document.Field, storable bool) {
 | |
| 		fieldIndex, newFieldRow := udc.fieldIndexOrNewRow(field.Name())
 | |
| 		if newFieldRow != nil {
 | |
| 			rv.Rows = append(rv.Rows, newFieldRow)
 | |
| 		}
 | |
| 		fieldNames[fieldIndex] = field.Name()
 | |
| 
 | |
| 		if field.Options().IsIndexed() {
 | |
| 			fieldLength, tokenFreqs := field.Analyze()
 | |
| 			existingFreqs := fieldTermFreqs[fieldIndex]
 | |
| 			if existingFreqs == nil {
 | |
| 				fieldTermFreqs[fieldIndex] = tokenFreqs
 | |
| 			} else {
 | |
| 				existingFreqs.MergeAll(field.Name(), tokenFreqs)
 | |
| 				fieldTermFreqs[fieldIndex] = existingFreqs
 | |
| 			}
 | |
| 			fieldLengths[fieldIndex] += fieldLength
 | |
| 			fieldIncludeTermVectors[fieldIndex] = field.Options().IncludeTermVectors()
 | |
| 		}
 | |
| 
 | |
| 		if storable && field.Options().IsStored() {
 | |
| 			rv.Rows, backIndexStoredEntries = udc.storeField(docIDBytes, field, fieldIndex, rv.Rows, backIndexStoredEntries)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// walk all the fields, record stored fields now
 | |
| 	// place information about indexed fields into map
 | |
| 	// this collates information across fields with
 | |
| 	// same names (arrays)
 | |
| 	for _, field := range d.Fields {
 | |
| 		analyzeField(field, true)
 | |
| 	}
 | |
| 
 | |
| 	if len(d.CompositeFields) > 0 {
 | |
| 		for fieldIndex, tokenFreqs := range fieldTermFreqs {
 | |
| 			// see if any of the composite fields need this
 | |
| 			for _, compositeField := range d.CompositeFields {
 | |
| 				compositeField.Compose(fieldNames[fieldIndex], fieldLengths[fieldIndex], tokenFreqs)
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		for _, compositeField := range d.CompositeFields {
 | |
| 			analyzeField(compositeField, false)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	rowsCapNeeded := len(rv.Rows) + 1
 | |
| 	for _, tokenFreqs := range fieldTermFreqs {
 | |
| 		rowsCapNeeded += len(tokenFreqs)
 | |
| 	}
 | |
| 
 | |
| 	rv.Rows = append(make([]index.IndexRow, 0, rowsCapNeeded), rv.Rows...)
 | |
| 
 | |
| 	backIndexTermsEntries := make([]*BackIndexTermsEntry, 0, len(fieldTermFreqs))
 | |
| 
 | |
| 	// walk through the collated information and process
 | |
| 	// once for each indexed field (unique name)
 | |
| 	for fieldIndex, tokenFreqs := range fieldTermFreqs {
 | |
| 		fieldLength := fieldLengths[fieldIndex]
 | |
| 		includeTermVectors := fieldIncludeTermVectors[fieldIndex]
 | |
| 
 | |
| 		// encode this field
 | |
| 		rv.Rows, backIndexTermsEntries = udc.indexField(docIDBytes, includeTermVectors, fieldIndex, fieldLength, tokenFreqs, rv.Rows, backIndexTermsEntries)
 | |
| 	}
 | |
| 
 | |
| 	// build the back index row
 | |
| 	backIndexRow := NewBackIndexRow(docIDBytes, backIndexTermsEntries, backIndexStoredEntries)
 | |
| 	rv.Rows = append(rv.Rows, backIndexRow)
 | |
| 
 | |
| 	return rv
 | |
| }
 | |
| 
 |