Skip to content

cmd.sync_git

cmd.sync_git¤

get_dataherb(source) ¤

get dataherb.json from source

Parameters:

Name Type Description Default
source Path

local folder

required
Source code in dataherb/cmd/sync_git.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
def get_dataherb(source: Path) -> dict:
    """
    get dataherb.json from source

    :param source: local folder
    """

    if not (source / "dataherb.json").exists():
        click.echo(
            f"No dataherb.json found in {source}. Please run `dataherb create` first."
        )
        sys.exit()

    with open(source / "dataherb.json", "r") as f:
        data = json.load(f)

    return data

is_git_repo(path) ¤

checks if path is a git repo

Parameters:

Name Type Description Default
path Path

path to check

required
Source code in dataherb/cmd/sync_git.py
11
12
13
14
15
16
17
18
19
20
21
def is_git_repo(path: Path) -> bool:
    """
    checks if path is a git repo

    :param path: path to check
    """
    try:
        _ = git.Repo(path).git_dir
        return True
    except git.exc.InvalidGitRepositoryError:
        return False

remote_git_repo(metadata_url) ¤

parse a remote git repo url

Parameters:

Name Type Description Default
metadata_url str

remote url to metadata file

required
Source code in dataherb/cmd/sync_git.py
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
def remote_git_repo(metadata_url: str):
    """
    parse a remote git repo url

    :param metadata_url: remote url to metadata file
    """

    parsed = giturlparse.parse(metadata_url)

    url_host_dispatcher = {"github.com": "https://raw.githubusercontent.com"}

    if parsed.host not in url_host_dispatcher:
        raise ValueError(f"{parsed.host} is not supported.")

    return {
        "metadata_uri": f"{url_host_dispatcher[parsed.host]}{parsed.pathname}",
        "path": parsed.pathname,
        "protocol": parsed.protocol,
        "host": parsed.host,
        "resource": parsed.resource,
        "user": parsed.user,
        "port": parsed.port,
        "name": parsed.name,
        "owner": parsed.owner,
    }

upload_dataset_to_git(source, target, experimental=False) ¤

uploads local folder to remote

Parameters:

Name Type Description Default
source Path

local folder

required
target str

remote url

required
experimental bool

experimental flag

False
Source code in dataherb/cmd/sync_git.py
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
def upload_dataset_to_git(
    source: Path, target: str, experimental: bool = False
) -> None:
    """
    uploads local folder to remote

    :param source: local folder
    :param target: remote url
    :param experimental: experimental flag
    """

    is_git_initialized = is_git_repo(source)

    if not experimental:
        text = (
            f"git sync is still a WIP.\n"
            f"Please go to {source} and sync your git repository to {target} manually.\n"
        )
        if is_git_initialized:
            text += f"Note: simply add, commit and push."
        else:
            text += f"Note: git init your repo, commit, add remote {target}, and push."
        click.echo(text)
    else:
        if is_git_initialized:
            repo = git.Repo(source)
            repo.index.add(["*"])
            repo.index.commit("created datset: added dataherb.json")

            if len(repo.remotes) == 0:
                origin = repo.create_remote("origin", target)
                assert origin.exists()
                origin.fetch()
                repo.create_head("master", origin.refs.master).set_tracking_branch(
                    origin.refs.master
                ).checkout()
                origin.push()
            else:
                repo.git.push()
        else:
            repo = git.Repo.init(source)
            repo.git.add(["*"])
            repo.index.commit("initial commit")
            origin = repo.create_remote("origin", target)
            assert origin.exists()
            origin.fetch()
            repo.create_head("master", origin.refs.master).set_tracking_branch(
                origin.refs.master
            ).checkout()
            origin.push()