Skip to content

Commit

Permalink
Merge branch 'master' into mblakele-dev-go111
Browse files Browse the repository at this point in the history
  • Loading branch information
mblakele authored Mar 17, 2020
2 parents 068e16f + fa4d2aa commit ffe05ec
Show file tree
Hide file tree
Showing 203 changed files with 197,502 additions and 34 deletions.
2 changes: 2 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
ISC License

Copyright (c) 2013 Matt Jibson <[email protected]>

Permission to use, copy, modify, and distribute this software for any
Expand Down
88 changes: 88 additions & 0 deletions _third_party/code.google.com/p/go-charset/charset/big5.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
package charset

import (
"fmt"
"unicode/utf8"
)

func init() {
registerClass("big5", fromBig5, nil)
}

// Big5 consists of 89 fonts of 157 chars each
const (
big5Max = 13973
big5Font = 157
big5Data = "big5.dat"
)

type translateFromBig5 struct {
font int
scratch []byte
big5map []rune
}

func (p *translateFromBig5) Translate(data []byte, eof bool) (int, []byte, error) {
p.scratch = p.scratch[:0]
n := 0
for len(data) > 0 {
c := int(data[0])
data = data[1:]
n++
if p.font == -1 {
// idle state
if c >= 0xa1 {
p.font = c
continue
}
if c == 26 {
c = '\n'
}
continue
}
f := p.font
p.font = -1
r := utf8.RuneError
switch {
case c >= 64 && c <= 126:
c -= 64
case c >= 161 && c <= 254:
c = c - 161 + 63
default:
// bad big5 char
f = 255
}
if f <= 254 {
f -= 161
ix := f*big5Font + c
if ix < len(p.big5map) {
r = p.big5map[ix]
}
if r == -1 {
r = utf8.RuneError
}
}
p.scratch = appendRune(p.scratch, r)
}
return n, p.scratch, nil
}

type big5Key bool

func fromBig5(arg string) (Translator, error) {
big5map, err := cache(big5Key(false), func() (interface{}, error) {
data, err := readFile(big5Data)
if err != nil {
return nil, fmt.Errorf("charset: cannot open big5 data file: %v", err)
}
big5map := []rune(string(data))
if len(big5map) != big5Max {
return nil, fmt.Errorf("charset: corrupt big5 data")
}
return big5map, nil
})
if err != nil {
return nil, err
}
return &translateFromBig5{big5map: big5map.([]rune), font: -1}, nil
}
301 changes: 301 additions & 0 deletions _third_party/code.google.com/p/go-charset/charset/charset.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,301 @@
// The charset package implements translation between character sets.
// It uses Unicode as the intermediate representation.
// Because it can be large, the character set data is separated
// from the charset package. It can be embedded in the Go
// executable by importing the data package:
//
// import _ "code.google.com/p/go-charset/data"
//
// It can also made available in a data directory (by settting CharsetDir).
package charset

import (
"io"
"strings"
"unicode/utf8"
)

// Charset holds information about a given character set.
type Charset struct {
Name string // Canonical name of character set.
Aliases []string // Known aliases.
Desc string // Description.
NoFrom bool // Not possible to translate from this charset.
NoTo bool // Not possible to translate to this charset.
}

// Translator represents a character set converter.
// The Translate method translates the given data,
// and returns the number of bytes of data consumed,
// a slice containing the converted data (which may be
// overwritten on the next call to Translate), and any
// conversion error. If eof is true, the data represents
// the final bytes of the input.
type Translator interface {
Translate(data []byte, eof bool) (n int, cdata []byte, err error)
}

// A Factory can be used to make character set translators.
type Factory interface {
// TranslatorFrom creates a translator that will translate from the named character
// set to UTF-8.
TranslatorFrom(name string) (Translator, error) // Create a Translator from this character set to.

// TranslatorTo creates a translator that will translate from UTF-8 to the named character set.
TranslatorTo(name string) (Translator, error) // Create a Translator To this character set.

// Names returns all the character set names accessibile through the factory.
Names() []string

// Info returns information on the named character set. It returns nil if the
// factory doesn't recognise the given name.
Info(name string) *Charset
}

var factories = []Factory{localFactory{}}

// Register registers a new Factory which will be consulted when NewReader
// or NewWriter needs a character set translator for a given name.
func Register(factory Factory) {
factories = append(factories, factory)
}

// NewReader returns a new Reader that translates from the named
// character set to UTF-8 as it reads r.
func NewReader(charset string, r io.Reader) (io.Reader, error) {
tr, err := TranslatorFrom(charset)
if err != nil {
return nil, err
}
return NewTranslatingReader(r, tr), nil
}

// NewWriter returns a new WriteCloser writing to w. It converts writes
// of UTF-8 text into writes on w of text in the named character set.
// The Close is necessary to flush any remaining partially translated
// characters to the output.
func NewWriter(charset string, w io.Writer) (io.WriteCloser, error) {
tr, err := TranslatorTo(charset)
if err != nil {
return nil, err
}
return NewTranslatingWriter(w, tr), nil
}

// Info returns information about a character set, or nil
// if the character set is not found.
func Info(name string) *Charset {
for _, f := range factories {
if info := f.Info(name); info != nil {
return info
}
}
return nil
}

// Names returns the canonical names of all supported character sets, in alphabetical order.
func Names() []string {
// TODO eliminate duplicates
var names []string
for _, f := range factories {
names = append(names, f.Names()...)
}
return names
}

// TranslatorFrom returns a translator that will translate from
// the named character set to UTF-8.
func TranslatorFrom(charset string) (Translator, error) {
var err error
var tr Translator
for _, f := range factories {
tr, err = f.TranslatorFrom(charset)
if err == nil {
break
}
}
if tr == nil {
return nil, err
}
return tr, nil
}

// TranslatorTo returns a translator that will translate from UTF-8
// to the named character set.
func TranslatorTo(charset string) (Translator, error) {
var err error
var tr Translator
for _, f := range factories {
tr, err = f.TranslatorTo(charset)
if err == nil {
break
}
}
if tr == nil {
return nil, err
}
return tr, nil
}

func normalizedChar(c rune) rune {
switch {
case c >= 'A' && c <= 'Z':
c = c - 'A' + 'a'
case c == '_':
c = '-'
}
return c
}

// NormalisedName returns s with all Roman capitals
// mapped to lower case, and '_' mapped to '-'
func NormalizedName(s string) string {
return strings.Map(normalizedChar, s)
}

type translatingWriter struct {
w io.Writer
tr Translator
buf []byte // unconsumed data from writer.
}

// NewTranslatingWriter returns a new WriteCloser writing to w.
// It passes the written bytes through the given Translator.
func NewTranslatingWriter(w io.Writer, tr Translator) io.WriteCloser {
return &translatingWriter{w: w, tr: tr}
}

func (w *translatingWriter) Write(data []byte) (rn int, rerr error) {
wdata := data
if len(w.buf) > 0 {
w.buf = append(w.buf, data...)
wdata = w.buf
}
n, cdata, err := w.tr.Translate(wdata, false)
if err != nil {
// TODO
}
if n > 0 {
_, err = w.w.Write(cdata)
if err != nil {
return 0, err
}
}
w.buf = w.buf[:0]
if n < len(wdata) {
w.buf = append(w.buf, wdata[n:]...)
}
return len(data), nil
}

func (p *translatingWriter) Close() error {
for {
n, data, err := p.tr.Translate(p.buf, true)
p.buf = p.buf[n:]
if err != nil {
// TODO
}
// If the Translator produces no data
// at EOF, then assume that it never will.
if len(data) == 0 {
break
}
n, err = p.w.Write(data)
if err != nil {
return err
}
if n < len(data) {
return io.ErrShortWrite
}
if len(p.buf) == 0 {
break
}
}
return nil
}

type translatingReader struct {
r io.Reader
tr Translator
cdata []byte // unconsumed data from converter.
rdata []byte // unconverted data from reader.
err error // final error from reader.
}

// NewTranslatingReader returns a new Reader that
// translates data using the given Translator as it reads r.
func NewTranslatingReader(r io.Reader, tr Translator) io.Reader {
return &translatingReader{r: r, tr: tr}
}

func (r *translatingReader) Read(buf []byte) (int, error) {
for {
if len(r.cdata) > 0 {
n := copy(buf, r.cdata)
r.cdata = r.cdata[n:]
return n, nil
}
if r.err == nil {
r.rdata = ensureCap(r.rdata, len(r.rdata)+len(buf))
n, err := r.r.Read(r.rdata[len(r.rdata):cap(r.rdata)])
// Guard against non-compliant Readers.
if n == 0 && err == nil {
err = io.EOF
}
r.rdata = r.rdata[0 : len(r.rdata)+n]
r.err = err
} else if len(r.rdata) == 0 {
break
}
nc, cdata, cvterr := r.tr.Translate(r.rdata, r.err != nil)
if cvterr != nil {
// TODO
}
r.cdata = cdata

// Ensure that we consume all bytes at eof
// if the converter refuses them.
if nc == 0 && r.err != nil {
nc = len(r.rdata)
}

// Copy unconsumed data to the start of the rdata buffer.
r.rdata = r.rdata[0:copy(r.rdata, r.rdata[nc:])]
}
return 0, r.err
}

// ensureCap returns s with a capacity of at least n bytes.
// If cap(s) < n, then it returns a new copy of s with the
// required capacity.
func ensureCap(s []byte, n int) []byte {
if n <= cap(s) {
return s
}
// logic adapted from appendslice1 in runtime
m := cap(s)
if m == 0 {
m = n
} else {
for {
if m < 1024 {
m += m
} else {
m += m / 4
}
if m >= n {
break
}
}
}
t := make([]byte, len(s), m)
copy(t, s)
return t
}

func appendRune(buf []byte, r rune) []byte {
n := len(buf)
buf = ensureCap(buf, n+utf8.UTFMax)
nu := utf8.EncodeRune(buf[n:n+utf8.UTFMax], r)
return buf[0 : n+nu]
}
Loading

0 comments on commit ffe05ec

Please sign in to comment.