-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathload_csv.py
More file actions
98 lines (85 loc) · 3.09 KB
/
load_csv.py
File metadata and controls
98 lines (85 loc) · 3.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# Copyright 2021 RelationalAI, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License
"""Load a CSV file into the given database with the given relation name."""
from argparse import ArgumentParser
import json
from os import path
from urllib.request import HTTPError
from railib import api, config, show
def _read(fname: str) -> str:
with open(fname) as fp:
return fp.read()
def _sansext(fname: str) -> str:
return path.splitext(path.basename(fname))[0]
def run(database: str, engine: str, fname: str, relation: str,
syntax: dict, schema: dict, profile: str):
data = _read(fname)
relation = relation or _sansext(fname)
cfg = config.read(profile=profile)
ctx = api.Context(**cfg)
rsp = api.load_csv(ctx, database, engine, relation, data, syntax, schema)
print(json.dumps(rsp, indent=2))
if __name__ == "__main__":
p = ArgumentParser()
p.add_argument("database", type=str, help="database name")
p.add_argument("engine", type=str, help="engine name")
p.add_argument("file", type=str, help="source file")
p.add_argument(
"--header-row",
type=int,
default=None,
help="header row number, 0 for no header (default: 1)",
)
p.add_argument("--delim", type=str, default=None, help="field delimiter")
p.add_argument(
"--escapechar", type=str, default=None, help="character used to escape quotes"
)
p.add_argument("--quotechar", type=str, default=None, help="quoted field character")
p.add_argument(
"-r",
"--relation",
type=str,
default=None,
help="relation name (default: file name)",
)
p.add_argument("-p", "--profile", type=str, default="default", help="profile name")
p.add_argument(
"--schema",
type=str,
default="",
help="Comma separated list of expressions `col=type` specifying that `col` has Rel type `type`."
)
args = p.parse_args()
syntax = {} # find full list of syntax options in the RAI docs
if args.header_row is not None:
syntax["header_row"] = args.header_row
if args.delim:
syntax["delim"] = args.delim
if args.escapechar:
syntax["escapechar"] = args.escapechar
if args.quotechar:
syntax["quotechar"] = args.quotechar
schema = {col: type for col, type in [pair.split("=") for pair in args.schema.split(",")]}
try:
run(
args.database,
args.engine,
args.file,
args.relation,
syntax,
args.profile,
args.schema
)
except HTTPError as e:
show.http_error(e)