Skip to content

Commit 418f042

Browse files
- Address issue: #62 (Chinese characters and pythong BOM prefix)
1 parent a103044 commit 418f042

File tree

2 files changed

+18
-2
lines changed

2 files changed

+18
-2
lines changed

exports/parquet.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ func ExportToParquet(ctx context.Context, w io.Writer, df *dataframe.DataFrame,
6565
// Create Schema
6666
dataSchema := dynamicstruct.NewStruct()
6767
for _, aSeries := range df.Series {
68-
fieldName := strings.Title(strings.ToLower(aSeries.Name()))
68+
fieldName := "Z" + strings.Title(strings.ToLower(aSeries.Name())) // Make it validly exported
6969
seriesName := santizeColumnName(aSeries.Name())
7070

7171
switch aSeries.(type) {
@@ -125,7 +125,7 @@ func ExportToParquet(ctx context.Context, w io.Writer, df *dataframe.DataFrame,
125125

126126
rec := schemaStruct.New()
127127
for _, aSeries := range df.Series {
128-
fieldName := strings.Title(strings.ToLower(aSeries.Name()))
128+
fieldName := "Z" + strings.Title(strings.ToLower(aSeries.Name()))
129129

130130
v := reflect.ValueOf(rec).Elem().FieldByName(fieldName)
131131
if v.IsValid() {

imports/csv.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,22 @@ func LoadFromCSV(ctx context.Context, r io.ReadSeeker, options ...CSVLoadOptions
7474

7575
var init *dataframe.SeriesInit
7676

77+
// Check for bom characters in the beginning (that python seems to add).
78+
// See:
79+
// https://github.com/rocketlaunchr/dataframe-go/issues/62
80+
// https://github.com/golang/go/issues/33887
81+
// https://github.com/dimchansky/utfbom
82+
// https://github.com/spkg/bom/
83+
checkBOM := make([]byte, 3)
84+
readN, err := r.Read(checkBOM)
85+
if err != nil {
86+
return nil, err
87+
}
88+
if !(readN == 3 && checkBOM[0] == 0xef && checkBOM[1] == 0xbb && checkBOM[2] == 0xbf) {
89+
// bom not found so reset reader
90+
r.Seek(0, io.SeekStart)
91+
}
92+
7793
var (
7894
comma rune
7995
comment rune

0 commit comments

Comments
 (0)