Files
csv-vs-parquet/main.go
cubixle 0539fd5361 init
2024-05-07 09:04:11 +01:00

97 lines
1.8 KiB
Go

package main
import (
"flag"
"fmt"
"log"
"log/slog"
"os"
"github.com/xitongsys/parquet-go-source/local"
"github.com/xitongsys/parquet-go/parquet"
"github.com/xitongsys/parquet-go/writer"
)
func main() {
t := flag.String("type", "csv", "")
rows := flag.Int("amount", 1000, "")
flag.Parse()
slog.Info("Running", "type", *t, "rows", *rows)
switch *t {
case "csv":
err := writeCSV(*rows)
if err != nil {
log.Fatal(err)
}
case "parquet":
err := writeParquet(*rows)
if err != nil {
log.Fatal(err)
}
}
}
func writeCSV(rows int) error {
const filename = "output.csv"
f, err := os.Create(filename)
if err != nil {
return err
}
for i := 0; i < rows; i++ {
_, err := fmt.Fprintf(f, "%d,ice hockey,pizza", i)
if err != nil {
return err
}
}
return nil
}
type Row struct {
ID int `parquet:"name=id, type=INT32, encoding=PLAIN"`
Sport string `parquet:"name=sport, type=BYTE_ARRAY, convertedtype=UTF8, encoding=PLAIN_DICTIONARY"`
Food string `parquet:"name=food, type=BYTE_ARRAY, convertedtype=UTF8, encoding=PLAIN_DICTIONARY"`
}
func writeParquet(rows int) error {
const filename = "output.parquet"
fw, err := local.NewLocalFileWriter(filename)
if err != nil {
return fmt.Errorf("failed to NewLocalFileWriter %w", err)
}
// write
pw, err := writer.NewParquetWriter(fw, new(Row), 4)
if err != nil {
return fmt.Errorf("failed to NewParquertWriter %w", err)
}
pw.RowGroupSize = 128 * 1024 * 1024 // 128M
pw.CompressionType = parquet.CompressionCodec_SNAPPY
for i := 0; i < rows; i++ {
r := Row{
ID: i,
Food: "pizza",
Sport: "ice hockey",
}
if err = pw.Write(r); err != nil {
return fmt.Errorf("failed to Write %w", err)
}
}
if err = pw.WriteStop(); err != nil {
return fmt.Errorf("failed to WriteStop %w", err)
}
fw.Close()
return nil
}