You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							156 lines
						
					
					
						
							2.8 KiB
						
					
					
				
			
		
		
	
	
							156 lines
						
					
					
						
							2.8 KiB
						
					
					
				| package mahonia
 | |
| 
 | |
| import (
 | |
| 	"sync"
 | |
| )
 | |
| 
 | |
| // Converters for GB18030 encoding.
 | |
| 
 | |
| func init() {
 | |
| 	RegisterCharset(&Charset{
 | |
| 		Name: "GB18030",
 | |
| 		NewDecoder: func() Decoder {
 | |
| 			gb18030Once.Do(buildGB18030Tables)
 | |
| 			return decodeGB18030Rune
 | |
| 		},
 | |
| 		NewEncoder: func() Encoder {
 | |
| 			gb18030Once.Do(buildGB18030Tables)
 | |
| 			return encodeGB18030Rune
 | |
| 		},
 | |
| 	})
 | |
| }
 | |
| 
 | |
| func decodeGB18030Rune(p []byte) (r rune, size int, status Status) {
 | |
| 	if len(p) == 0 {
 | |
| 		status = NO_ROOM
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	b := p[0]
 | |
| 	if b < 128 {
 | |
| 		return rune(b), 1, SUCCESS
 | |
| 	}
 | |
| 
 | |
| 	if len(p) < 2 {
 | |
| 		status = NO_ROOM
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	if p[0] < 0x81 || p[0] > 0xfe {
 | |
| 		return 0xfffd, 1, INVALID_CHAR
 | |
| 	}
 | |
| 
 | |
| 	if p[1] >= 0x40 {
 | |
| 		// 2-byte character
 | |
| 		c := uint16(p[0])<<8 + uint16(p[1])
 | |
| 		r = rune(gbkToUnicode[c])
 | |
| 		if r == 0 {
 | |
| 			r = gbkToUnicodeExtra[c]
 | |
| 		}
 | |
| 
 | |
| 		if r != 0 {
 | |
| 			return r, 2, SUCCESS
 | |
| 		}
 | |
| 	} else if p[1] >= 0x30 {
 | |
| 		// 4-byte character
 | |
| 		if len(p) < 4 {
 | |
| 			return 0, 0, NO_ROOM
 | |
| 		}
 | |
| 		if p[2] < 0x81 || p[2] > 0xfe || p[3] < 0x30 || p[3] > 0x39 {
 | |
| 			return 0xfffd, 1, INVALID_CHAR
 | |
| 		}
 | |
| 
 | |
| 		code := uint32(p[0])<<24 + uint32(p[1])<<16 + uint32(p[2])<<8 + uint32(p[3])
 | |
| 		lin := gb18030Linear(code)
 | |
| 
 | |
| 		if lin <= maxGB18030Linear {
 | |
| 			r = rune(gb18030LinearToUnicode[lin])
 | |
| 			if r != 0 {
 | |
| 				return r, 4, SUCCESS
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		for _, rng := range gb18030Ranges {
 | |
| 			if lin >= rng.firstGB && lin <= rng.lastGB {
 | |
| 				return rng.firstRune + rune(lin) - rune(rng.firstGB), 4, SUCCESS
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return 0xfffd, 1, INVALID_CHAR
 | |
| }
 | |
| 
 | |
| func encodeGB18030Rune(p []byte, r rune) (size int, status Status) {
 | |
| 	if len(p) == 0 {
 | |
| 		status = NO_ROOM
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	if r < 128 {
 | |
| 		p[0] = byte(r)
 | |
| 		return 1, SUCCESS
 | |
| 	}
 | |
| 
 | |
| 	if len(p) < 2 {
 | |
| 		status = NO_ROOM
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	var c uint16
 | |
| 	if r < 0x10000 {
 | |
| 		c = unicodeToGBK[r]
 | |
| 	} else {
 | |
| 		c = unicodeToGBKExtra[r]
 | |
| 	}
 | |
| 
 | |
| 	if c != 0 {
 | |
| 		p[0] = byte(c >> 8)
 | |
| 		p[1] = byte(c)
 | |
| 		return 2, SUCCESS
 | |
| 	}
 | |
| 
 | |
| 	if len(p) < 4 {
 | |
| 		return 0, NO_ROOM
 | |
| 	}
 | |
| 
 | |
| 	if r < 0x10000 {
 | |
| 		f := unicodeToGB18030[r]
 | |
| 		if f != 0 {
 | |
| 			p[0] = byte(f >> 24)
 | |
| 			p[1] = byte(f >> 16)
 | |
| 			p[2] = byte(f >> 8)
 | |
| 			p[3] = byte(f)
 | |
| 			return 4, SUCCESS
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	for _, rng := range gb18030Ranges {
 | |
| 		if r >= rng.firstRune && r <= rng.lastRune {
 | |
| 			lin := rng.firstGB + uint32(r) - uint32(rng.firstRune)
 | |
| 			p[0] = byte(lin/(10*126*10)) + 0x81
 | |
| 			p[1] = byte(lin/(126*10)%10) + 0x30
 | |
| 			p[2] = byte(lin/10%126) + 0x81
 | |
| 			p[3] = byte(lin%10) + 0x30
 | |
| 			return 4, SUCCESS
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	p[0] = 0x1a
 | |
| 	return 1, INVALID_CHAR
 | |
| }
 | |
| 
 | |
| var gb18030Once sync.Once
 | |
| 
 | |
| // Mapping from gb18039Linear values to Unicode.
 | |
| var gb18030LinearToUnicode []uint16
 | |
| 
 | |
| var unicodeToGB18030 []uint32
 | |
| 
 | |
| func buildGB18030Tables() {
 | |
| 	gb18030LinearToUnicode = make([]uint16, maxGB18030Linear+1)
 | |
| 	unicodeToGB18030 = make([]uint32, 65536)
 | |
| 	for _, data := range gb18030Data {
 | |
| 		gb18030LinearToUnicode[gb18030Linear(data.gb18030)] = data.unicode
 | |
| 		unicodeToGB18030[data.unicode] = data.gb18030
 | |
| 	}
 | |
| }
 | |
| 
 |