-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathspec.go
119 lines (102 loc) · 2.62 KB
/
spec.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
package filetypes
import (
"bytes"
"encoding/json"
"fmt"
"github.com/cloudquery/filetypes/v4/csv"
jsonfile "github.com/cloudquery/filetypes/v4/json"
"github.com/cloudquery/filetypes/v4/parquet"
)
type FormatType string
const (
FormatTypeCSV = "csv"
FormatTypeJSON = "json"
FormatTypeParquet = "parquet"
)
// Compression type.
type CompressionType string
const (
CompressionTypeNone CompressionType = ""
CompressionTypeGZip CompressionType = "gzip"
)
type FileSpec struct {
// Output format.
Format FormatType `json:"format,omitempty" jsonschema:"required,enum=csv,enum=json,enum=parquet"`
// Format spec.
FormatSpec any `json:"format_spec,omitempty"`
// Compression type.
// Empty or missing stands for no compression.
Compression CompressionType `json:"compression,omitempty" jsonschema:"enum=,enum=gzip"`
csvSpec *csv.CSVSpec
jsonSpec *jsonfile.JSONSpec
parquetSpec *parquet.ParquetSpec
}
func (s *FileSpec) SetDefaults() {
switch s.Format {
case FormatTypeCSV:
s.csvSpec.SetDefaults()
case FormatTypeJSON:
s.jsonSpec.SetDefaults()
case FormatTypeParquet:
s.parquetSpec.SetDefaults()
}
}
func (s *FileSpec) Validate() error {
if !s.Compression.IsValid() {
return fmt.Errorf("`compression` must be either empty or `%s`", CompressionTypeGZip)
}
if s.Format == "" {
return fmt.Errorf("format is required")
}
switch s.Format {
case FormatTypeCSV:
return s.csvSpec.Validate()
case FormatTypeJSON:
return s.jsonSpec.Validate()
case FormatTypeParquet:
if s.Compression != CompressionTypeNone {
return fmt.Errorf("compression is not supported for parquet format") // This won't work even if we wanted to, because parquet writer prematurely closes the file handle
}
return s.parquetSpec.Validate()
default:
return fmt.Errorf("unknown format %s", s.Format)
}
}
func (s *FileSpec) UnmarshalSpec() error {
b, err := json.Marshal(s.FormatSpec)
if err != nil {
return err
}
dec := json.NewDecoder(bytes.NewReader(b))
dec.UseNumber()
dec.DisallowUnknownFields()
switch s.Format {
case FormatTypeCSV:
s.csvSpec = &csv.CSVSpec{}
return dec.Decode(s.csvSpec)
case FormatTypeJSON:
s.jsonSpec = &jsonfile.JSONSpec{}
return dec.Decode(s.jsonSpec)
case FormatTypeParquet:
s.parquetSpec = &parquet.ParquetSpec{}
return dec.Decode(s.parquetSpec)
default:
return fmt.Errorf("unknown format %s", s.Format)
}
}
func (c CompressionType) IsValid() bool {
switch c {
case CompressionTypeNone, CompressionTypeGZip:
return true
default:
return false
}
}
func (c CompressionType) Extension() string {
switch c {
case CompressionTypeGZip:
return ".gz"
default:
return ""
}
}