-
Notifications
You must be signed in to change notification settings - Fork 2
/
image-crawler.py
executable file
·194 lines (172 loc) · 5.94 KB
/
image-crawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
#!/usr/bin/env python3
#
# image crawler
#
# the image crawler checks for new openstack ("cloud") images
# whenever a new image is detected all relevant information needed for
# maintaining an image catalog
#
# 2023-06-11 v0.4.0 [email protected]
import argparse
import sys
import os
from loguru import logger
from crawler.core.config import config_read
from crawler.core.database import (
database_connect,
database_disconnect,
database_initialize,
)
from crawler.core.exporter import export_image_catalog, export_image_catalog_all
from crawler.core.main import crawl_image_sources
from crawler.git.base import clone_or_pull, update_repository
def main():
working_directory = os.getcwd()
program_directory = os.path.dirname(os.path.abspath(__file__))
parser = argparse.ArgumentParser(
description="checks cloud image repositories for new updates and"
+ " keeps track of all images within its sqlite3 database"
)
parser.add_argument(
"--config",
type=str,
required=False,
help="specify the config file to be used (default: <path_to_crawler_dir>/etc/config.yaml)",
)
parser.add_argument(
"--sources",
type=str,
required=False,
help="specify the sources file to be used - overrides value from config file",
)
parser.add_argument(
"--init-db",
action="store_true",
required=False,
help="initialize image catalog database",
)
parser.add_argument(
"--export-only",
action="store_true",
required=False,
help="export only existing image catalog",
)
parser.add_argument(
"--updates-only",
action="store_true",
required=False,
help="check only for updates, do not export catalog",
)
parser.add_argument(
"--debug",
action="store_true",
required=False,
help="give more output for debugging",
)
args = parser.parse_args()
if args.debug:
log_level = "DEBUG"
log_format = (
"<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | "
"<cyan>{name}:{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>"
)
else:
log_level = "INFO"
log_format = (
"<level>{message}</level>"
)
logger.remove()
logger.add(sys.stderr, format=log_format, level=log_level, colorize=True)
logger.info("plusserver Image Crawler v0.4.0 started")
# read configuration
if args.config is not None:
config_filename = args.config
else:
# default
config_filename = program_directory + "/etc/config.yaml"
config = config_read(config_filename, "configuration")
if config is None:
logger.error("ERROR: Unable to open config " + config_filename)
raise SystemExit(1)
# read the image sources
if args.sources is not None:
sources_filename = args.sources
else:
sources_filename = config["sources_name"]
image_source_catalog = config_read(sources_filename, "source catalog")
if image_source_catalog is None:
logger.error("Unable to open image source catalog " + sources_filename)
raise SystemExit(1)
# initialize database when run with --init-db
if args.init_db:
database_initialize(config["database_name"], program_directory)
sys.exit(0)
# clone or update local repository when git is enabled
if "remote_repository" in config:
if "git_ssh_command" in config:
git_ssh_command = config["git_ssh_command"]
else:
git_ssh_command = None
if "branch" in config:
working_branch = config["branch"]
else:
working_branch = main
clone_or_pull(
config["remote_repository"],
config["local_repository"],
working_branch,
git_ssh_command,
)
else:
logger.warning("No image catalog repository configured")
# connect to database
database = database_connect(config["database_name"])
if database is None:
logger.error("Could not open database %s" % config["database_name"])
logger.error(
'Run "./image-crawler.py --init-db" to create a new database OR config check your etc/config.yaml'
)
sys.exit(1)
# crawl image sources when requested
if args.export_only:
logger.info("Skipping repository crawling")
updated_sources = {}
else:
logger.info("Start repository crawling")
updated_sources = crawl_image_sources(image_source_catalog, database)
# export image catalog
if args.updates_only:
logger.info("Skipping catalog export")
else:
if config["local_repository"].startswith("/"):
export_path = config["local_repository"]
else:
export_path = working_directory + "/" + config["local_repository"]
if updated_sources:
logger.info("Exporting catalog to %s" % export_path)
export_image_catalog(
database,
image_source_catalog,
updated_sources,
config["local_repository"],
config["template_path"],
)
else:
if args.export_only:
logger.info("Exporting all catalog files to %s" % export_path)
export_image_catalog_all(
database,
image_source_catalog,
config["local_repository"],
config["template_path"],
)
# push changes to git repository when configured
if "remote_repository" in config and updated_sources:
update_repository(
database, config["local_repository"], updated_sources, git_ssh_command
)
else:
logger.info("No remote repository update needed.")
database_disconnect(database)
if __name__ == "__main__":
main()