rai-sdk-python/examples/load_csv.py at 4ae5eef642bcc4cb2059bff2d44b36701206c61b · RelationalAI/rai-sdk-python · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# Copyright 2021 RelationalAI, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License

"""Load a CSV file into the given database with the given relation name."""

from argparse import ArgumentParser
import json
from os import path
from urllib.request import HTTPError
from railib import api, config, show


def _read(fname: str) -> str:
    with open(fname) as fp:
        return fp.read()


def _sansext(fname: str) -> str:
    return path.splitext(path.basename(fname))[0]


def run(database: str, engine: str, fname: str, relation: str,
        syntax: dict, schema: dict, profile: str):
    data = _read(fname)
    relation = relation or _sansext(fname)
    cfg = config.read(profile=profile)
    ctx = api.Context(**cfg)
    rsp = api.load_csv(ctx, database, engine, relation, data, syntax, schema)
    print(json.dumps(rsp, indent=2))


if __name__ == "__main__":
    p = ArgumentParser()
    p.add_argument("database", type=str, help="database name")
    p.add_argument("engine", type=str, help="engine name")
    p.add_argument("file", type=str, help="source file")
    p.add_argument(
        "--header-row",
        type=int,
        default=None,
        help="header row number, 0 for no header (default: 1)",
    )
    p.add_argument("--delim", type=str, default=None, help="field delimiter")
    p.add_argument(
        "--escapechar", type=str, default=None, help="character used to escape quotes"
    )
    p.add_argument("--quotechar", type=str, default=None, help="quoted field character")
    p.add_argument(
        "-r",
        "--relation",
        type=str,
        default=None,
        help="relation name (default: file name)",
    )
    p.add_argument("-p", "--profile", type=str, default="default", help="profile name")
    p.add_argument(
        "--schema",
        type=str,
        default="",
        help="Comma separated list of expressions `col=type` specifying that `col` has Rel type `type`."
    )

    args = p.parse_args()
    syntax = {}  # find full list of syntax options in the RAI docs
    if args.header_row is not None:
        syntax["header_row"] = args.header_row
    if args.delim:
        syntax["delim"] = args.delim
    if args.escapechar:
        syntax["escapechar"] = args.escapechar
    if args.quotechar:
        syntax["quotechar"] = args.quotechar

    schema = {col: type for col, type in [pair.split("=") for pair in args.schema.split(",")]}

    try:
        run(
            args.database,
            args.engine,
            args.file,
            args.relation,
            syntax,
            args.profile,
            args.schema
        )
    except HTTPError as e:
        show.http_error(e)