From 2f722902de7e8f790b915c8da53518d4f3dddfd5 Mon Sep 17 00:00:00 2001 From: JocularMarrow Date: Fri, 31 May 2024 14:26:11 +0200 Subject: [PATCH] init --- .github/workflows/release.yml | 117 +++++++++++++++++++++++ .gitignore | 1 + LICENSE | 21 +++++ README.md | 0 cmd/csv2json/main.go | 96 +++++++++++++++++++ cmd/generate/main.go | 49 ++++++++++ go.mod | 5 + go.sum | 2 + pkg/csv.go | 170 ++++++++++++++++++++++++++++++++++ 9 files changed, 461 insertions(+) create mode 100644 .github/workflows/release.yml create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 cmd/csv2json/main.go create mode 100644 cmd/generate/main.go create mode 100644 go.mod create mode 100644 go.sum create mode 100644 pkg/csv.go diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..9ed52ac --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,117 @@ +name: Build and Release + +on: + push: + tags: + - "v*" # Trigger the workflow on tags starting with "v" + +jobs: + build: + runs-on: ubuntu-latest + + strategy: + matrix: + goos: [linux, darwin, windows] + goarch: [386, amd64, arm, arm64] + exclude: + # Exclude certain combinations + - goos: darwin + goarch: 386 + - goos: darwin + goarch: arm + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: "1.22" # Adjust the Go version if necessary + + - name: Build binary + run: | + mkdir -p bin + GOOS=${{ matrix.goos }} GOARCH=${{ matrix.goarch }} go build -o bin/csv2json_${{ matrix.goos }}_${{ matrix.goarch }}$([[ ${{ matrix.goos }} == 'windows' ]] && echo .exe || echo '') ./cmd/csv2json/main.go + + - name: List files for debug + run: ls -la bin + + - name: Move binary to release directory + run: | + mkdir -p release + ls -la release + mv bin/csv2json_${{ matrix.goos }}_${{ matrix.goarch }}* release/ + ls -la release + + - name: List release directory for debug + run: ls -la release + + - name: Upload Release Assets + uses: actions/upload-artifact@v4 + with: + name: release-assets-${{ matrix.goos }}-${{ matrix.goarch }} + path: release/ + + release: + runs-on: ubuntu-latest + needs: build + + steps: + - name: Download Release Assets + uses: actions/download-artifact@v4 + with: + path: release-assets + pattern: release-assets-* + merge-multiple: true + + - name: Create directories for compression + run: mkdir -p release-compressed + + - name: Compress Release Assets + run: | + version=${GITHUB_REF#refs/tags/v} + + # Set up name mappings + declare -A goos_map=( ["linux"]="linux" ["darwin"]="macos" ["windows"]="windows" ) + declare -A goarch_map=( ["386"]="x86" ["amd64"]="x64" ["arm"]="arm" ["arm64"]="arm64" ) + + # Compress binaries + for file in release-assets/*; do + base_name=$(basename "$file") + echo "Compressing $base_name" + + # Extract goos and goarch + parts=(${base_name//_/ }) + goos=${parts[1]} + goarch_with_ext=${parts[2]} + goarch=${goarch_with_ext%%.*} # Remove the file extension + + friendly_goos=${goos_map[$goos]} + friendly_goarch=${goarch_map[$goarch]} + + # Rename the file to csv2json or csv2json.exe + if [[ $goos == "windows" ]]; then + mv "$file" release-assets/csv2json.exe + file="release-assets/csv2json.exe" + else + mv "$file" release-assets/csv2json + file="release-assets/csv2json" + fi + + # Use .zip for macOS and Windows, .tar.gz for Linux + if [[ $goos == "windows" || $goos == "darwin" ]]; then + zip_filename="csv2json-${version}-${friendly_goos}-${friendly_goarch}.zip" + zip release-compressed/${zip_filename} -j "$file" + else + tar_filename="csv2json-${version}-${friendly_goos}-${friendly_goarch}.tar.gz" + tar -czvf release-compressed/${tar_filename} -C release-assets csv2json + fi + done + + - name: Create GitHub Release + uses: softprops/action-gh-release@v2 + with: + files: release-compressed/* + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6b67131 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +test.csv \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..2149952 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 TechMDW + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/cmd/csv2json/main.go b/cmd/csv2json/main.go new file mode 100644 index 0000000..f436cb1 --- /dev/null +++ b/cmd/csv2json/main.go @@ -0,0 +1,96 @@ +package main + +import ( + "bufio" + "flag" + "fmt" + "io" + "os" + "time" + + csv2json "github.com/TechMDW/csv2json/pkg" +) + +func main() { + filepath := flag.String("file", "", "Path to the CSV file") + seperator := flag.String("seperator", "", "CSV seperator, default is auto detect") + header := flag.Bool("header", false, "CSV has header") + + flag.Parse() + + var sep rune + if *seperator != "" { + sep = []rune(*seperator)[0] + } + + if *filepath != "" { + data, err := csv2json.ParseFile(*filepath, sep) + if err != nil { + panic(err) + } + + jsonData, err := data.ToJSON(*header) + if err != nil { + panic(err) + } + + println(string(jsonData)) + return + } + + // Workaround to avoid blocking (io.ReadAll) on stdin when no data input. + // Read lines from stdin and append to csvData until EOF. + lineChan := make(chan string) + errChan := make(chan error) + doneChan := make(chan struct{}) + + go func() { + scanner := bufio.NewScanner(os.Stdin) + for scanner.Scan() { + lineChan <- scanner.Text() + } + if err := scanner.Err(); err != nil && err != io.EOF { + errChan <- err + } + close(doneChan) + }() + + timeout := 1000 * time.Millisecond + timer := time.NewTimer(timeout) + defer timer.Stop() + + var csvData []byte + + for { + select { + case line := <-lineChan: + csvData = append(csvData, []byte(line+"\n")...) + if !timer.Stop() { + <-timer.C + } + timer.Reset(timeout) + case err := <-errChan: + panic(err) + case <-doneChan: + if len(csvData) == 0 { + fmt.Println("No CSV data provided") + return + } + data, err := csv2json.Parse(csvData, sep) + if err != nil { + panic(err) + } + + jsonData, err := data.ToJSON(*header) + if err != nil { + panic(err) + } + + fmt.Println(string(jsonData)) + return + case <-timer.C: + fmt.Println("No CSV data provided (timeout)") + return + } + } +} diff --git a/cmd/generate/main.go b/cmd/generate/main.go new file mode 100644 index 0000000..7a8260d --- /dev/null +++ b/cmd/generate/main.go @@ -0,0 +1,49 @@ +package main + +import ( + "encoding/csv" + "fmt" + "os" + "strconv" + + "github.com/TechMDW/randish" +) + +func main() { + const numHeaders = 1000 + const numRows = 10000 + + file, err := os.Create("test.csv") + if err != nil { + fmt.Println("Error creating file:", err) + return + } + defer file.Close() + + writer := csv.NewWriter(file) + defer writer.Flush() + + headers := make([]string, numHeaders) + for i := 0; i < numHeaders; i++ { + headers[i] = "Column" + strconv.Itoa(i+1) + } + if err := writer.Write(headers); err != nil { + fmt.Println("Error writing headers:", err) + return + } + + rand := randish.RandS() + + for i := 0; i < numRows; i++ { + row := make([]string, numHeaders) + for j := 0; j < numHeaders; j++ { + row[j] = strconv.Itoa(rand.Intn(100) + 1) + } + if err := writer.Write(row); err != nil { + fmt.Println("Error writing row:", err) + return + } + } + + fmt.Println("CSV file generated successfully.") +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..2125d66 --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module github.com/TechMDW/csv2json + +go 1.22.0 + +require github.com/TechMDW/randish v0.0.0-20230622121239-ae1cb6f6bfa6 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..6b0ce7f --- /dev/null +++ b/go.sum @@ -0,0 +1,2 @@ +github.com/TechMDW/randish v0.0.0-20230622121239-ae1cb6f6bfa6 h1:r4435wx45Wm++uZtyNg6/Sg7q2rDQFVroW/sAfRu4Bc= +github.com/TechMDW/randish v0.0.0-20230622121239-ae1cb6f6bfa6/go.mod h1:X1PDNLQpmwGrhhbDUVvA+fJWUzZ1simGm0Fv/qmiSgk= diff --git a/pkg/csv.go b/pkg/csv.go new file mode 100644 index 0000000..7dadb40 --- /dev/null +++ b/pkg/csv.go @@ -0,0 +1,170 @@ +package csv2json + +import ( + "bufio" + "bytes" + "encoding/json" + "os" + "strconv" + "strings" +) + +type CSVData [][]interface{} + +func (c CSVData) ToJSON(header bool) ([]byte, error) { + return ConvertToJSON(c, header) +} + +// parseScanner parses the CSV data from a bufio.Scanner +func parseScanner(scanner *bufio.Scanner, separator rune) (CSVData, error) { + var arr CSVData + + for scanner.Scan() { + line := scanner.Text() + + if separator == 0 { + separator = detectSeparator(line) + } + + values := strings.Split(line, string(separator)) + + interfaceValues := make([]interface{}, len(values)) + for i, v := range values { + interfaceValues[i] = inferType(v) + } + + arr = append(arr, interfaceValues) + } + + if err := scanner.Err(); err != nil { + return nil, err + } + + return arr, nil +} + +// Parse parses CSV data from a byte slice +func Parse(csv []byte, separator rune) (CSVData, error) { + scanner := bufio.NewScanner(bytes.NewReader(csv)) + return parseScanner(scanner, separator) +} + +// ParseFile parses CSV data from a file +func ParseFile(path string, separator rune) (CSVData, error) { + file, err := os.Open(path) + if err != nil { + return nil, err + } + defer file.Close() + + scanner := bufio.NewScanner(file) + return parseScanner(scanner, separator) +} + +// ParseCSVToStruct parses CSV data from a byte slice and unmarshals it into the provided struct type +func ParseCSVToStruct(csvData []byte, separator rune, result interface{}) error { + data, err := Parse(csvData, separator) + if err != nil { + return err + } + + jsonData, err := ConvertToJSON(data, true) + if err != nil { + return err + } + + return json.Unmarshal(jsonData, result) +} + +// ParseFileToStruct parses CSV data from a file and unmarshals it into the provided struct type +func ParseFileToStruct(filePath string, separator rune, result interface{}) error { + data, err := ParseFile(filePath, separator) + if err != nil { + return err + } + + jsonData, err := ConvertToJSON(data, true) + if err != nil { + return err + } + + return json.Unmarshal(jsonData, result) +} + +// ConvertToJSON converts CSVData to JSON format, using headers if specified +func ConvertToJSON(data CSVData, header bool) ([]byte, error) { + if header && len(data) > 0 { + headers := data[0] + // Remove trailing nil values from headers, if any + for i := len(headers) - 1; i >= 0; i-- { + if headers[i] == nil { + headers = headers[:i] + } else { + break + } + } + + var result []map[string]interface{} + + for _, row := range data[1:] { + rowMap := make(map[string]interface{}) + for i, value := range row[:len(headers)] { + key, ok := headers[i].(string) + if ok { + rowMap[key] = value + } + } + result = append(result, rowMap) + } + + return json.Marshal(result) + } + + return json.Marshal(data) +} + +// detectSeparator detects the separator used in a CSV line, pretty basic +func detectSeparator(line string) (separator rune) { + separators := []rune{',', ';', '\t', '|'} + maxCount := 0 + + for _, sep := range separators { + count := strings.Count(line, string(sep)) + if count > maxCount { + maxCount = count + separator = sep + } + } + + if separator == 0 { + separator = ',' + } + + return +} + +func inferType(value string) interface{} { + // Return nil if value is "null" + if value == "null" || value == "" { + return nil + } + + // Attempt to parse as bool + if inferredValue, err := strconv.ParseBool(value); err == nil { + return inferredValue + } + + // Attempt to parse as int + if inferredValue, err := strconv.Atoi(value); err == nil { + return inferredValue + } + + // Replace commas with dots for float parsing + valueFloat := strings.Replace(value, ",", ".", 1) + // Attempt to parse as float64 + if inferredValue, err := strconv.ParseFloat(valueFloat, 64); err == nil { + return inferredValue + } + + return value +}