mirror of
https://github.com/cubixle/csv-vs-parquet.git
synced 2026-04-24 14:44:41 +01:00
init
This commit is contained in:
96
main.go
Normal file
96
main.go
Normal file
@@ -0,0 +1,96 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"log/slog"
|
||||
"os"
|
||||
|
||||
"github.com/xitongsys/parquet-go-source/local"
|
||||
"github.com/xitongsys/parquet-go/parquet"
|
||||
"github.com/xitongsys/parquet-go/writer"
|
||||
)
|
||||
|
||||
func main() {
|
||||
t := flag.String("type", "csv", "")
|
||||
rows := flag.Int("amount", 1000, "")
|
||||
|
||||
flag.Parse()
|
||||
|
||||
slog.Info("Running", "type", *t, "rows", *rows)
|
||||
|
||||
switch *t {
|
||||
case "csv":
|
||||
err := writeCSV(*rows)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
case "parquet":
|
||||
err := writeParquet(*rows)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func writeCSV(rows int) error {
|
||||
const filename = "output.csv"
|
||||
|
||||
f, err := os.Create(filename)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for i := 0; i < rows; i++ {
|
||||
_, err := fmt.Fprintf(f, "%d,ice hockey,pizza", i)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type Row struct {
|
||||
ID int `parquet:"name=id, type=INT32, encoding=PLAIN"`
|
||||
Sport string `parquet:"name=sport, type=BYTE_ARRAY, convertedtype=UTF8, encoding=PLAIN_DICTIONARY"`
|
||||
Food string `parquet:"name=food, type=BYTE_ARRAY, convertedtype=UTF8, encoding=PLAIN_DICTIONARY"`
|
||||
}
|
||||
|
||||
func writeParquet(rows int) error {
|
||||
const filename = "output.parquet"
|
||||
|
||||
fw, err := local.NewLocalFileWriter(filename)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to NewLocalFileWriter %w", err)
|
||||
}
|
||||
|
||||
// write
|
||||
pw, err := writer.NewParquetWriter(fw, new(Row), 4)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to NewParquertWriter %w", err)
|
||||
}
|
||||
|
||||
pw.RowGroupSize = 128 * 1024 * 1024 // 128M
|
||||
pw.CompressionType = parquet.CompressionCodec_SNAPPY
|
||||
|
||||
for i := 0; i < rows; i++ {
|
||||
r := Row{
|
||||
ID: i,
|
||||
Food: "pizza",
|
||||
Sport: "ice hockey",
|
||||
}
|
||||
if err = pw.Write(r); err != nil {
|
||||
return fmt.Errorf("failed to Write %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if err = pw.WriteStop(); err != nil {
|
||||
return fmt.Errorf("failed to WriteStop %w", err)
|
||||
}
|
||||
|
||||
fw.Close()
|
||||
|
||||
return nil
|
||||
}
|
||||
Reference in New Issue
Block a user