8 changed files with 277 additions and 14 deletions
@ -0,0 +1,62 @@
|
||||
#!/usr/bin/env python3 |
||||
# |
||||
# collect_data.py |
||||
# |
||||
# Copyright (C) 2021-2022 Franco Masotti (franco \D\o\T masotti {-A-T-} tutanota \D\o\T com) |
||||
# |
||||
# This file is part of python-packages-source. |
||||
# |
||||
# python-packages-source is free software: you can redistribute it and/or modify |
||||
# it under the terms of the GNU General Public License as published by |
||||
# the Free Software Foundation, either version 3 of the License, or |
||||
# (at your option) any later version. |
||||
# |
||||
# python-packages-source is distributed in the hope that it will be useful, |
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
# GNU General Public License for more details. |
||||
# |
||||
# You should have received a copy of the GNU General Public License |
||||
# along with python-packages-source. If not, see <http://www.gnu.org/licenses/>. |
||||
r"""Get the data to use in the stats.""" |
||||
|
||||
import csv |
||||
import datetime |
||||
import pathlib |
||||
import re |
||||
|
||||
import gitea |
||||
|
||||
OUTPUT_FILE = '../stats/repo_stats.csv' |
||||
REGEX = r'http(|s)://(|www\.)github.com/.*/.*' |
||||
|
||||
# Collect data max one every 86400 seconds by default. |
||||
PLOT_DAYS_SENSIBILITY = 1 |
||||
|
||||
if __name__ == '__main__': |
||||
def main(): |
||||
r"""main.""" |
||||
now = datetime.datetime.now(datetime.timezone.utc) |
||||
|
||||
results = gitea.get_org_repos() |
||||
if pathlib.Path(OUTPUT_FILE).is_file(): |
||||
total, match, date, runs = gitea.read_csv(OUTPUT_FILE) |
||||
date = max(date) |
||||
else: |
||||
date = now - datetime.timedelta(days=1) |
||||
|
||||
if (now - date).days >= PLOT_DAYS_SENSIBILITY: |
||||
date = now.strftime('%Y-%m-%d %H:%M:%S %z') |
||||
|
||||
total = 0 |
||||
match = 0 |
||||
for r in results: |
||||
total += len(r) |
||||
for rr in r: |
||||
if rr['mirror'] and not rr['empty']: |
||||
if re.match(REGEX, rr['original_url']): |
||||
match += 1 |
||||
|
||||
gitea.write_csv(OUTPUT_FILE, total, match, date) |
||||
|
||||
main() |
@ -0,0 +1,109 @@
|
||||
#!/usr/bin/env python3 |
||||
# |
||||
# plot_data.py |
||||
# |
||||
# Copyright (C) 2021-2022 Franco Masotti (franco \D\o\T masotti {-A-T-} tutanota \D\o\T com) |
||||
# |
||||
# This file is part of python-packages-source. |
||||
# |
||||
# python-packages-source is free software: you can redistribute it and/or modify |
||||
# it under the terms of the GNU General Public License as published by |
||||
# the Free Software Foundation, either version 3 of the License, or |
||||
# (at your option) any later version. |
||||
# |
||||
# python-packages-source is distributed in the hope that it will be useful, |
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
# GNU General Public License for more details. |
||||
# |
||||
# You should have received a copy of the GNU General Public License |
||||
# along with python-packages-source. If not, see <http://www.gnu.org/licenses/>. |
||||
r"""Plot stats data.""" |
||||
|
||||
import csv |
||||
import datetime |
||||
import os |
||||
import sys |
||||
|
||||
import gitea |
||||
import matplotlib |
||||
import matplotlib.dates as mdates |
||||
import matplotlib.pyplot as plt |
||||
from matplotlib.pyplot import figure |
||||
|
||||
INPUT_FILE = '../stats/repo_stats.csv' |
||||
OUTPUT_FILE = sys.argv[1] |
||||
|
||||
if __name__ == '__main__': |
||||
def main(): |
||||
r"""main.""" |
||||
######## |
||||
# Data # |
||||
######## |
||||
total, match, date, runs = gitea.read_csv(INPUT_FILE) |
||||
|
||||
# Normalize date to 00:00 hours |
||||
dd = list() |
||||
for d in date: |
||||
dd.append(datetime.date(year=d.year, month=d.month, day=d.day)) |
||||
|
||||
# Duplicate dates are forbidden. |
||||
if len(list(set(dd))) != len(date): |
||||
raise ValueError |
||||
|
||||
date = dd |
||||
|
||||
# Compute ratio. |
||||
ratio = list() |
||||
i = 0 |
||||
for e in range(0, len(total)): |
||||
ratio.append(match[i] / total[i]) |
||||
i += 1 |
||||
|
||||
######## |
||||
# Plot # |
||||
######## |
||||
# Get and resize graph. |
||||
fig, (frquency_plot, ratio_plot) = plt.subplots(2, figsize=(15, 10), dpi=160) |
||||
|
||||
# Set grids. |
||||
frquency_plot.grid() |
||||
ratio_plot.grid() |
||||
|
||||
# Y |
||||
frquency_plot.set_ylabel('Repositories') |
||||
y_ticks_step = int(max(total) / 10) |
||||
y_ticks_rage_max = max(total) + int(max(total) / 10) |
||||
frquency_plot.set_yticks(range(0, y_ticks_rage_max, y_ticks_step)) |
||||
|
||||
# X |
||||
frquency_plot.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d')) |
||||
for label in frquency_plot.get_xticklabels(which='major'): |
||||
label.set(rotation=30, horizontalalignment='right') |
||||
|
||||
frquency_plot.plot(date, total, linestyle='solid', marker='o', label='Total mirrored repositories') |
||||
frquency_plot.plot(date, match, linestyle='solid', marker='o', label='Repositories mirrored from GitHub') |
||||
frquency_plot.legend() |
||||
|
||||
frquency_plot.set_ylim(bottom=0, top=y_ticks_rage_max + (y_ticks_rage_max * 0.10)) |
||||
|
||||
# Y |
||||
ratio_plot.set_ylabel('Ratio') |
||||
y_ticks_step = int(max(total) / 10) |
||||
y_ticks_rage_max = max(total) + int(max(total) / 10) |
||||
frquency_plot.set_yticks(range(0, y_ticks_rage_max, y_ticks_step)) |
||||
|
||||
# X |
||||
ratio_plot.set_xlabel('Date') |
||||
ratio_plot.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d')) |
||||
for label in ratio_plot.get_xticklabels(which='major'): |
||||
label.set(rotation=30, horizontalalignment='right') |
||||
|
||||
ratio_plot.plot(date, ratio, linestyle='solid', marker='o', label='GitHub / Total repositories') |
||||
ratio_plot.legend() |
||||
|
||||
ratio_plot.set_ylim(bottom=0, top=1.1) |
||||
|
||||
fig.savefig(OUTPUT_FILE) |
||||
|
||||
main() |
Loading…
Reference in new issue