-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathowners_details.py
executable file
·200 lines (151 loc) · 7.12 KB
/
owners_details.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
#!/usr/local/bin/python3
# Copyright (C) 2019-2022 Dawn M. Foster
# Licensed under GNU General Public License (GPL), version 3 or later: http://www.gnu.org/licenses/gpl.txt
"""
Note: By default this only looks at a subset of k8s OWNERS files.
Uses the owners files found in sigs.yaml plus the OWNERS_ALIASES file containing leads
You can also provide as a command line argument, the full path to an
additional list of owners files to use that you can generate using
get_more_owners.py.
Parameters
----------
new_owners_file : str
Full path to a file containing a list of owners files
"""
def write_aliases(role, alias_url, csv_file, affil_dict):
"""
Takes OWNERS_ALIASES file with details about SIG/WG leadership and
writes those details to the csv file with role of 'lead' and NA
for subproject.
"""
import yaml
from common_functions import download_file, write_affil_line
alias_file = download_file(alias_url)
aliases = yaml.safe_load(alias_file)
# Filter out anything that isn't a SIG/WG (committees, etc.)
for x in aliases['aliases'].items():
if x[0].startswith('sig') or x[0].startswith('wg'):
sig_or_wg = x[0][:-6] #Note: this strips the -leads from the end of the sig name
for username in x[1]:
write_affil_line(username, role, sig_or_wg, 'NA', alias_url, csv_file, affil_dict)
def get_sig_list(sigs):
# Gets the list of SIGs from sigs.yaml using the dir format sig-name
import yaml
sig_name_list = []
for k in sigs["sigs"]:
sig_name_list.append(k['dir'])
return sig_name_list
def find_sig(sig_name_list, area):
# Uses the sig-name formatted data from sigs.yaml and compares to data from OWNERS_ALIASES
# to determine the SIG name
sig_name = 'NA'
for name in sig_name_list:
if area.startswith(name):
sig_name = area[:len(name)]
return sig_name
def kk_aliases(sigs, csv_file, affil_dict):
# Reads OWNERS_ALIASES file from k/k and uses the find_sig function to split the
# area into SIG, subproject, and role for things that are mostly, but not always,
# formatted like sig-name-subproject-role. Example: sig-auth-audit-approvers
# Then it grabs org affiliation and other data before saving it to the CSV file.
import yaml
from common_functions import download_file, write_affil_line
sig_name_list = get_sig_list(sigs)
owners_url = 'https://raw.githubusercontent.com/kubernetes/kubernetes/master/OWNERS_ALIASES'
k_k_alias_file = download_file(owners_url)
k_k_aliases = yaml.safe_load(k_k_alias_file)
for x in k_k_aliases.items():
for y in x[1].items():
area = y[0]
if area.endswith('approvers'):
role = 'approver'
role_len = 9
elif area.endswith('reviewers'):
role = 'reviewer'
role_len = 9
elif area.endswith('maintainer'):
role = 'maintainer'
role_len = 10
elif area.endswith('maintainers'):
role = 'maintainer'
role_len = 11
else:
role = 'unknown'
role_len = 0
sig_name = find_sig(sig_name_list, area)
if area.startswith('release-engineering'):
sig_name = 'sig-release'
subproject = 'release-engineering'
else:
subproject = area[len(sig_name)+1:-role_len-1]
if subproject == '':
subproject = 'NA'
if 'NA' not in sig_name:
for username in y[1]:
write_affil_line(username, role, sig_name, subproject, owners_url, csv_file, affil_dict)
def build_owners_csv():
"""This is the primary function that pulls all of this together.
It gets the list of OWNERS files from sigs.yaml, downloads the
content of each OWNERS file, and writes details about owners to a
csv file which is dropped into an output directory.
It also reads an optional command line argument from the main program
containing the full path to a file with additional owners files
"""
import yaml
import sys
import csv
from datetime import datetime
from common_functions import download_file, read_owners_file, files_done
from common_functions import read_cncf_affiliations
# read additional owners file from command line if available
try:
new_owners_file = str(sys.argv[1])
additional_owners = True
except:
additional_owners = False
affil_dict = read_cncf_affiliations()
sig_file = download_file('https://raw.githubusercontent.com/kubernetes/community/master/sigs.yaml')
sigs = yaml.safe_load(sig_file)
# Open the CSV file for writing and write the license and header lines
today = datetime.today().strftime('%Y-%m-%d')
outfile_name = 'output/owners_data_' + today + '.csv'
csv_file = open(outfile_name,'w')
csv_file.write("License: Creative Commons Attribution-ShareAlike 4.0 International License\n")
csv_file.write("License Link: http://creativecommons.org/licenses/by-sa/4.0/\n")
csv_file.write("Author: Dr. Dawn M. Foster\n")
csv_file.write("Status: Updated on April 18 2022\n")
csv_file.write("Source URL: https://github.com/geekygirldawn/k8s_data/datasets\n\n")
csv_file.write("company,username,status,sig_name,subproject,owners_file\n")
kk_aliases(sigs, csv_file, affil_dict)
# Get list of SIG / WG leads and add them to the csv file
alias_url = 'https://raw.githubusercontent.com/kubernetes/community/master/OWNERS_ALIASES'
write_aliases('lead', alias_url, csv_file, affil_dict)
# Gather data for each SIG in sigs.yaml
# NOTE: WGs don't have OWNERS files in sigs.yaml
for x in sigs['sigs']:
sig_name = x['dir']
for y in x['subprojects']:
for owners_url in y['owners']:
subproject = y['name']
read_owners_file(owners_url, sig_name, subproject, csv_file, affil_dict)
csv_file.close()
# Gather data from an additional list of OWNERS files if available
if additional_owners == True:
# Open output csv from earlier for reading and close it again before writing to it
csv_file = open(outfile_name,'r')
files_doneDF = files_done(csv_file)
csv_file.close()
# Open csv with new list of owners files
with open(new_owners_file, newline='') as f:
new_owners_list = list(csv.reader(f))
#re-open original output file to append data from new owners files
csv_file = open(outfile_name,'a')
for owners_url_list in new_owners_list:
owners_url = owners_url_list[0]
# Only process owners files that weren't done in one of the above steps
if files_doneDF['owners_file'].str.contains(owners_url).any() == False:
sig_name = 'NA'
subproject = 'NA'
read_owners_file(owners_url, sig_name, subproject, csv_file, affil_dict)
csv_file.close()
build_owners_csv()