Skip to content

Commit

Permalink
csv import delim param
Browse files Browse the repository at this point in the history
  • Loading branch information
Brian Hendriks committed Aug 8, 2019
1 parent ed41e11 commit 8e6cc41
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 16 deletions.
36 changes: 26 additions & 10 deletions go/cmd/dolt/commands/tblcmds/import.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ const (
contOnErrParam = "continue"
primaryKeyParam = "pk"
fileTypeParam = "file-type"
delimParam = "delim"
)

var schemaFileHelp = "Schema definition files are json files in the format:" + `
Expand Down Expand Up @@ -97,31 +98,33 @@ be used when creating a new table, or updating an existing table.
`
In both create and update scenarios the file's extension is used to infer the type of the file. If a file does not
have the expected extension then the <b>--file-type</b> parameter should be used to explicitly define the format of
the file in one of the supported formats (csv, psv, nbf, json, xlsx)`
the file in one of the supported formats (csv, psv, nbf, json, xlsx). For files separated by a delimiter other than a
',' (type csv) or a '|' (type psv), the --delim parameter can be used to specify a delimeter`

var importSynopsis = []string{
"-c [-f] [--pk <field>] [--schema <file>] [--map <file>] [--continue] [--file-type <type>] <table> <file>",
"-u [--schema <file>] [--map <file>] [--continue] [--file-type <type>] <table> <file>",
}

func validateImportArgs(apr *argparser.ArgParseResults, usage cli.UsagePrinter) (mvdata.MoveOperation, *mvdata.DataLocation, *mvdata.DataLocation) {
func validateImportArgs(apr *argparser.ArgParseResults, usage cli.UsagePrinter) (mvdata.MoveOperation, *mvdata.DataLocation, *mvdata.DataLocation, interface{}) {
if apr.NArg() != 2 {
usage()
return mvdata.InvalidOp, nil, nil
return mvdata.InvalidOp, nil, nil, nil
}

var mvOp mvdata.MoveOperation
var srcOpts interface{}
if !apr.Contains(createParam) && !apr.Contains(updateParam) {
cli.PrintErrln("Must include '-c' for initial table import or -u to update existing table.")
return mvdata.InvalidOp, nil, nil
return mvdata.InvalidOp, nil, nil, nil
} else if apr.Contains(createParam) {
mvOp = mvdata.OverwriteOp
} else {
mvOp = mvdata.UpdateOp
if apr.Contains(outSchemaParam) {
cli.PrintErrln("fatal:", outSchemaParam+"is not supported for update operations")
usage()
return mvdata.InvalidOp, nil, nil
return mvdata.InvalidOp, nil, nil, nil
}
}

Expand All @@ -130,23 +133,34 @@ func validateImportArgs(apr *argparser.ArgParseResults, usage cli.UsagePrinter)
cli.PrintErrln(
color.RedString("'%s' is not a valid table name\n", tableName),
"table names must match the regular expression:", doltdb.TableNameRegexStr)
return mvdata.InvalidOp, nil, nil
return mvdata.InvalidOp, nil, nil, nil
}

path := apr.Arg(1)
fType, _ := apr.GetValue(fileTypeParam)
delim, hasDelim := apr.GetValue(delimParam)
fType, hasFileType := apr.GetValue(fileTypeParam)
fileLoc := mvdata.NewDataLocation(path, fType)

if fileLoc.Format == mvdata.InvalidDataFormat && !hasFileType && hasDelim {
fileLoc.Format = mvdata.CsvFile
srcOpts = mvdata.CsvOptions{Delim: delim}
}

if fileLoc.Format == mvdata.InvalidDataFormat {
cli.PrintErrln(
color.RedString("Could not infer type file '%s'\n", path),
"File extensions should match supported file types, or should be explicitly defined via the file-type parameter")
return mvdata.InvalidOp, nil, nil
return mvdata.InvalidOp, nil, nil, nil
}

if fileLoc.Format != mvdata.CsvFile && hasDelim {
cli.PrintErrln(color.RedString("delim is not a valid parameter for this type of file"))
return mvdata.InvalidOp, nil, nil, nil
}

tableLoc := &mvdata.DataLocation{Path: tableName, Format: mvdata.DoltDB}

return mvOp, tableLoc, fileLoc
return mvOp, tableLoc, fileLoc, srcOpts
}

func Import(commandStr string, args []string, dEnv *env.DoltEnv) int {
Expand All @@ -170,7 +184,7 @@ func parseCreateArgs(commandStr string, args []string) (bool, *mvdata.MoveOption

help, usage := cli.HelpAndUsagePrinters(commandStr, importShortDesc, importLongDesc, importSynopsis, ap)
apr := cli.ParseArgs(ap, args, help)
moveOp, tableLoc, fileLoc := validateImportArgs(apr, usage)
moveOp, tableLoc, fileLoc, srcOpts := validateImportArgs(apr, usage)

if fileLoc == nil || tableLoc == nil {
return false, nil
Expand All @@ -188,6 +202,7 @@ func parseCreateArgs(commandStr string, args []string) (bool, *mvdata.MoveOption
PrimaryKey: primaryKey,
Src: fileLoc,
Dest: tableLoc,
SrcOptions: srcOpts,
}
}

Expand All @@ -203,6 +218,7 @@ func createArgParser() *argparser.ArgParser {
ap.SupportsString(mappingFileParam, "m", "mapping_file", "A file that lays out how fields should be mapped from input data to output data.")
ap.SupportsString(primaryKeyParam, "pk", "primary_key", "Explicitly define the name of the field in the schema which should be used as the primary key.")
ap.SupportsString(fileTypeParam, "", "file_type", "Explicitly define the type of the file if it can't be inferred from the file extension.")
ap.SupportsString(delimParam, "", "delimiter", "Specify a delimeter for a csv style file with a non-comma delimiter.")
return ap
}

Expand Down
17 changes: 14 additions & 3 deletions go/libraries/doltcore/mvdata/data_loc.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ func (dl *DataLocation) String() string {
}

func NewDataLocation(path, fileFmtStr string) *DataLocation {
var dataFmt DataFormat
dataFmt := InvalidDataFormat

if fileFmtStr == "" {
if doltdb.IsValidTableName(path) {
Expand Down Expand Up @@ -129,7 +129,7 @@ func (dl *DataLocation) IsFileType() bool {
return true
}

func (dl *DataLocation) CreateReader(ctx context.Context, root *doltdb.RootValue, fs filesys.ReadableFS, schPath string, tblName string) (rdCl table.TableReadCloser, sorted bool, err error) {
func (dl *DataLocation) CreateReader(ctx context.Context, root *doltdb.RootValue, fs filesys.ReadableFS, schPath string, tblName string, opts interface{}) (rdCl table.TableReadCloser, sorted bool, err error) {
if dl.Format == DoltDB {
tbl, ok, err := root.GetTable(ctx, dl.Path)

Expand Down Expand Up @@ -171,7 +171,18 @@ func (dl *DataLocation) CreateReader(ctx context.Context, root *doltdb.RootValue

switch dl.Format {
case CsvFile:
rd, err := csv.OpenCSVReader(root.VRW().Format(), dl.Path, fs, csv.NewCSVInfo())
delim := ","

if opts != nil {
csvOpts, _ := opts.(CsvOptions)

if len(csvOpts.Delim) != 0 {
delim = csvOpts.Delim
}
}

rd, err := csv.OpenCSVReader(root.VRW().Format(), dl.Path, fs, csv.NewCSVInfo().SetDelim(delim))

return rd, false, err

case PsvFile:
Expand Down
2 changes: 1 addition & 1 deletion go/libraries/doltcore/mvdata/data_loc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ func TestCreateRdWr(t *testing.T) {
}

// TODO (oo): fix this for json path test
rd, _, err := loc.CreateReader(context.Background(), root, fs, "schema.json", "")
rd, _, err := loc.CreateReader(context.Background(), root, fs, "schema.json", "", nil)

if err != nil {
t.Fatal("Unexpected error creating writer", err)
Expand Down
9 changes: 7 additions & 2 deletions go/libraries/doltcore/mvdata/data_mover.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ const (
InvalidOp MoveOperation = "invalid"
)

type CsvOptions struct {
Delim string
}

type MoveOptions struct {
Operation MoveOperation
ContOnErr bool
Expand All @@ -49,6 +53,7 @@ type MoveOptions struct {
PrimaryKey string
Src *DataLocation
Dest *DataLocation
SrcOptions interface{}
}

type DataMover struct {
Expand Down Expand Up @@ -84,7 +89,7 @@ func NewDataMover(ctx context.Context, root *doltdb.RootValue, fs filesys.Filesy
var err error
transforms := pipeline.NewTransformCollection()

rd, srcIsSorted, err := mvOpts.Src.CreateReader(ctx, root, fs, mvOpts.SchFile, mvOpts.Dest.Path)
rd, srcIsSorted, err := mvOpts.Src.CreateReader(ctx, root, fs, mvOpts.SchFile, mvOpts.Dest.Path, mvOpts.SrcOptions)

if err != nil {
return nil, &DataMoverCreationError{CreateReaderErr, err}
Expand Down Expand Up @@ -206,7 +211,7 @@ func getOutSchema(ctx context.Context, inSch schema.Schema, root *doltdb.RootVal
if mvOpts.Operation == UpdateOp {
// Get schema from target

rd, _, err := mvOpts.Dest.CreateReader(ctx, root, fs, mvOpts.SchFile, mvOpts.Dest.Path)
rd, _, err := mvOpts.Dest.CreateReader(ctx, root, fs, mvOpts.SchFile, mvOpts.Dest.Path, mvOpts.SrcOptions)

if err != nil {
return nil, err
Expand Down

0 comments on commit 8e6cc41

Please sign in to comment.