forked from maddyblue/goread
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' into mblakele-dev-go111
- Loading branch information
Showing
203 changed files
with
197,502 additions
and
34 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
ISC License | ||
|
||
Copyright (c) 2013 Matt Jibson <[email protected]> | ||
|
||
Permission to use, copy, modify, and distribute this software for any | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
package charset | ||
|
||
import ( | ||
"fmt" | ||
"unicode/utf8" | ||
) | ||
|
||
func init() { | ||
registerClass("big5", fromBig5, nil) | ||
} | ||
|
||
// Big5 consists of 89 fonts of 157 chars each | ||
const ( | ||
big5Max = 13973 | ||
big5Font = 157 | ||
big5Data = "big5.dat" | ||
) | ||
|
||
type translateFromBig5 struct { | ||
font int | ||
scratch []byte | ||
big5map []rune | ||
} | ||
|
||
func (p *translateFromBig5) Translate(data []byte, eof bool) (int, []byte, error) { | ||
p.scratch = p.scratch[:0] | ||
n := 0 | ||
for len(data) > 0 { | ||
c := int(data[0]) | ||
data = data[1:] | ||
n++ | ||
if p.font == -1 { | ||
// idle state | ||
if c >= 0xa1 { | ||
p.font = c | ||
continue | ||
} | ||
if c == 26 { | ||
c = '\n' | ||
} | ||
continue | ||
} | ||
f := p.font | ||
p.font = -1 | ||
r := utf8.RuneError | ||
switch { | ||
case c >= 64 && c <= 126: | ||
c -= 64 | ||
case c >= 161 && c <= 254: | ||
c = c - 161 + 63 | ||
default: | ||
// bad big5 char | ||
f = 255 | ||
} | ||
if f <= 254 { | ||
f -= 161 | ||
ix := f*big5Font + c | ||
if ix < len(p.big5map) { | ||
r = p.big5map[ix] | ||
} | ||
if r == -1 { | ||
r = utf8.RuneError | ||
} | ||
} | ||
p.scratch = appendRune(p.scratch, r) | ||
} | ||
return n, p.scratch, nil | ||
} | ||
|
||
type big5Key bool | ||
|
||
func fromBig5(arg string) (Translator, error) { | ||
big5map, err := cache(big5Key(false), func() (interface{}, error) { | ||
data, err := readFile(big5Data) | ||
if err != nil { | ||
return nil, fmt.Errorf("charset: cannot open big5 data file: %v", err) | ||
} | ||
big5map := []rune(string(data)) | ||
if len(big5map) != big5Max { | ||
return nil, fmt.Errorf("charset: corrupt big5 data") | ||
} | ||
return big5map, nil | ||
}) | ||
if err != nil { | ||
return nil, err | ||
} | ||
return &translateFromBig5{big5map: big5map.([]rune), font: -1}, nil | ||
} |
301 changes: 301 additions & 0 deletions
301
_third_party/code.google.com/p/go-charset/charset/charset.go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,301 @@ | ||
// The charset package implements translation between character sets. | ||
// It uses Unicode as the intermediate representation. | ||
// Because it can be large, the character set data is separated | ||
// from the charset package. It can be embedded in the Go | ||
// executable by importing the data package: | ||
// | ||
// import _ "code.google.com/p/go-charset/data" | ||
// | ||
// It can also made available in a data directory (by settting CharsetDir). | ||
package charset | ||
|
||
import ( | ||
"io" | ||
"strings" | ||
"unicode/utf8" | ||
) | ||
|
||
// Charset holds information about a given character set. | ||
type Charset struct { | ||
Name string // Canonical name of character set. | ||
Aliases []string // Known aliases. | ||
Desc string // Description. | ||
NoFrom bool // Not possible to translate from this charset. | ||
NoTo bool // Not possible to translate to this charset. | ||
} | ||
|
||
// Translator represents a character set converter. | ||
// The Translate method translates the given data, | ||
// and returns the number of bytes of data consumed, | ||
// a slice containing the converted data (which may be | ||
// overwritten on the next call to Translate), and any | ||
// conversion error. If eof is true, the data represents | ||
// the final bytes of the input. | ||
type Translator interface { | ||
Translate(data []byte, eof bool) (n int, cdata []byte, err error) | ||
} | ||
|
||
// A Factory can be used to make character set translators. | ||
type Factory interface { | ||
// TranslatorFrom creates a translator that will translate from the named character | ||
// set to UTF-8. | ||
TranslatorFrom(name string) (Translator, error) // Create a Translator from this character set to. | ||
|
||
// TranslatorTo creates a translator that will translate from UTF-8 to the named character set. | ||
TranslatorTo(name string) (Translator, error) // Create a Translator To this character set. | ||
|
||
// Names returns all the character set names accessibile through the factory. | ||
Names() []string | ||
|
||
// Info returns information on the named character set. It returns nil if the | ||
// factory doesn't recognise the given name. | ||
Info(name string) *Charset | ||
} | ||
|
||
var factories = []Factory{localFactory{}} | ||
|
||
// Register registers a new Factory which will be consulted when NewReader | ||
// or NewWriter needs a character set translator for a given name. | ||
func Register(factory Factory) { | ||
factories = append(factories, factory) | ||
} | ||
|
||
// NewReader returns a new Reader that translates from the named | ||
// character set to UTF-8 as it reads r. | ||
func NewReader(charset string, r io.Reader) (io.Reader, error) { | ||
tr, err := TranslatorFrom(charset) | ||
if err != nil { | ||
return nil, err | ||
} | ||
return NewTranslatingReader(r, tr), nil | ||
} | ||
|
||
// NewWriter returns a new WriteCloser writing to w. It converts writes | ||
// of UTF-8 text into writes on w of text in the named character set. | ||
// The Close is necessary to flush any remaining partially translated | ||
// characters to the output. | ||
func NewWriter(charset string, w io.Writer) (io.WriteCloser, error) { | ||
tr, err := TranslatorTo(charset) | ||
if err != nil { | ||
return nil, err | ||
} | ||
return NewTranslatingWriter(w, tr), nil | ||
} | ||
|
||
// Info returns information about a character set, or nil | ||
// if the character set is not found. | ||
func Info(name string) *Charset { | ||
for _, f := range factories { | ||
if info := f.Info(name); info != nil { | ||
return info | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
// Names returns the canonical names of all supported character sets, in alphabetical order. | ||
func Names() []string { | ||
// TODO eliminate duplicates | ||
var names []string | ||
for _, f := range factories { | ||
names = append(names, f.Names()...) | ||
} | ||
return names | ||
} | ||
|
||
// TranslatorFrom returns a translator that will translate from | ||
// the named character set to UTF-8. | ||
func TranslatorFrom(charset string) (Translator, error) { | ||
var err error | ||
var tr Translator | ||
for _, f := range factories { | ||
tr, err = f.TranslatorFrom(charset) | ||
if err == nil { | ||
break | ||
} | ||
} | ||
if tr == nil { | ||
return nil, err | ||
} | ||
return tr, nil | ||
} | ||
|
||
// TranslatorTo returns a translator that will translate from UTF-8 | ||
// to the named character set. | ||
func TranslatorTo(charset string) (Translator, error) { | ||
var err error | ||
var tr Translator | ||
for _, f := range factories { | ||
tr, err = f.TranslatorTo(charset) | ||
if err == nil { | ||
break | ||
} | ||
} | ||
if tr == nil { | ||
return nil, err | ||
} | ||
return tr, nil | ||
} | ||
|
||
func normalizedChar(c rune) rune { | ||
switch { | ||
case c >= 'A' && c <= 'Z': | ||
c = c - 'A' + 'a' | ||
case c == '_': | ||
c = '-' | ||
} | ||
return c | ||
} | ||
|
||
// NormalisedName returns s with all Roman capitals | ||
// mapped to lower case, and '_' mapped to '-' | ||
func NormalizedName(s string) string { | ||
return strings.Map(normalizedChar, s) | ||
} | ||
|
||
type translatingWriter struct { | ||
w io.Writer | ||
tr Translator | ||
buf []byte // unconsumed data from writer. | ||
} | ||
|
||
// NewTranslatingWriter returns a new WriteCloser writing to w. | ||
// It passes the written bytes through the given Translator. | ||
func NewTranslatingWriter(w io.Writer, tr Translator) io.WriteCloser { | ||
return &translatingWriter{w: w, tr: tr} | ||
} | ||
|
||
func (w *translatingWriter) Write(data []byte) (rn int, rerr error) { | ||
wdata := data | ||
if len(w.buf) > 0 { | ||
w.buf = append(w.buf, data...) | ||
wdata = w.buf | ||
} | ||
n, cdata, err := w.tr.Translate(wdata, false) | ||
if err != nil { | ||
// TODO | ||
} | ||
if n > 0 { | ||
_, err = w.w.Write(cdata) | ||
if err != nil { | ||
return 0, err | ||
} | ||
} | ||
w.buf = w.buf[:0] | ||
if n < len(wdata) { | ||
w.buf = append(w.buf, wdata[n:]...) | ||
} | ||
return len(data), nil | ||
} | ||
|
||
func (p *translatingWriter) Close() error { | ||
for { | ||
n, data, err := p.tr.Translate(p.buf, true) | ||
p.buf = p.buf[n:] | ||
if err != nil { | ||
// TODO | ||
} | ||
// If the Translator produces no data | ||
// at EOF, then assume that it never will. | ||
if len(data) == 0 { | ||
break | ||
} | ||
n, err = p.w.Write(data) | ||
if err != nil { | ||
return err | ||
} | ||
if n < len(data) { | ||
return io.ErrShortWrite | ||
} | ||
if len(p.buf) == 0 { | ||
break | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
type translatingReader struct { | ||
r io.Reader | ||
tr Translator | ||
cdata []byte // unconsumed data from converter. | ||
rdata []byte // unconverted data from reader. | ||
err error // final error from reader. | ||
} | ||
|
||
// NewTranslatingReader returns a new Reader that | ||
// translates data using the given Translator as it reads r. | ||
func NewTranslatingReader(r io.Reader, tr Translator) io.Reader { | ||
return &translatingReader{r: r, tr: tr} | ||
} | ||
|
||
func (r *translatingReader) Read(buf []byte) (int, error) { | ||
for { | ||
if len(r.cdata) > 0 { | ||
n := copy(buf, r.cdata) | ||
r.cdata = r.cdata[n:] | ||
return n, nil | ||
} | ||
if r.err == nil { | ||
r.rdata = ensureCap(r.rdata, len(r.rdata)+len(buf)) | ||
n, err := r.r.Read(r.rdata[len(r.rdata):cap(r.rdata)]) | ||
// Guard against non-compliant Readers. | ||
if n == 0 && err == nil { | ||
err = io.EOF | ||
} | ||
r.rdata = r.rdata[0 : len(r.rdata)+n] | ||
r.err = err | ||
} else if len(r.rdata) == 0 { | ||
break | ||
} | ||
nc, cdata, cvterr := r.tr.Translate(r.rdata, r.err != nil) | ||
if cvterr != nil { | ||
// TODO | ||
} | ||
r.cdata = cdata | ||
|
||
// Ensure that we consume all bytes at eof | ||
// if the converter refuses them. | ||
if nc == 0 && r.err != nil { | ||
nc = len(r.rdata) | ||
} | ||
|
||
// Copy unconsumed data to the start of the rdata buffer. | ||
r.rdata = r.rdata[0:copy(r.rdata, r.rdata[nc:])] | ||
} | ||
return 0, r.err | ||
} | ||
|
||
// ensureCap returns s with a capacity of at least n bytes. | ||
// If cap(s) < n, then it returns a new copy of s with the | ||
// required capacity. | ||
func ensureCap(s []byte, n int) []byte { | ||
if n <= cap(s) { | ||
return s | ||
} | ||
// logic adapted from appendslice1 in runtime | ||
m := cap(s) | ||
if m == 0 { | ||
m = n | ||
} else { | ||
for { | ||
if m < 1024 { | ||
m += m | ||
} else { | ||
m += m / 4 | ||
} | ||
if m >= n { | ||
break | ||
} | ||
} | ||
} | ||
t := make([]byte, len(s), m) | ||
copy(t, s) | ||
return t | ||
} | ||
|
||
func appendRune(buf []byte, r rune) []byte { | ||
n := len(buf) | ||
buf = ensureCap(buf, n+utf8.UTFMax) | ||
nu := utf8.EncodeRune(buf[n:n+utf8.UTFMax], r) | ||
return buf[0 : n+nu] | ||
} |
Oops, something went wrong.