Browse Source

Added and fixed scripts. Fixed readme.

master
Franco Masotti 4 months ago
parent
commit
ecb4ee135d
Signed by: frnmst
GPG Key ID: 24116ED85666780A
  1. 42
      .pre-commit-config.yaml
  2. 5
      Makefile
  3. 1
      Pipfile
  4. 31
      README.md
  5. 62
      scripts/collect_data.py
  6. 40
      scripts/gitea.py
  7. 109
      scripts/plot_data.py
  8. 1
      stats/repo_stats.csv

42
.pre-commit-config.yaml

@ -1,5 +1,5 @@
# Do not deal with submodules.
files: '(README.md|0000_add_submodules.sh|configuration.yaml|Makefile)'
files: '(README.md|configuration.yaml|Makefile|scripts/.*)'
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
@ -26,3 +26,43 @@ repos:
hooks:
- id: licheck
args: ['--configuration-file', '.allowed_licenses.yml']
- repo: https://github.com/pycqa/isort
rev: 5.9.3
hooks:
- id: isort
- repo: https://github.com/PyCQA/bandit
rev: '1.7.0' # Use the sha / tag you want to point at
hooks:
- id: bandit
args: ['--skip', 'B404,B506,B410,B603', '--level', 'LOW']
- repo: https://github.com/twu/skjold
rev: 'v0.4.1'
hooks:
- id: skjold
args: ['-s', 'gemnasium', '-s', 'pyup']
- repo: https://github.com/pycqa/flake8
rev: '3.9.2' # Use the sha / tag you want to point at
hooks:
- id: flake8
args: ['--ignore=E501,W503,W504,F401,D401']
additional_dependencies: [flake8-docstrings]
- repo: https://github.com/pre-commit/mirrors-yapf
rev: 'v0.31.0' # Use the sha / tag you want to point at
hooks:
- id: yapf
args: ['--style', '{based_on_style: pep8; indent_width: 4}']
- repo: local
hooks:
- id: stats
name: stats
language: system
entry: make stats
verbose: true
always_run: true
pass_filenames: false

5
Makefile

@ -22,6 +22,9 @@
default: install-dev
stats:
cd scripts; pipenv run python3 -m collect_data
submodules-add:
cd scripts; ./add_submodules.sh $(SUBMODULES)
@ -47,4 +50,4 @@ update: install-dev
clean:
rm -rf build dist *.egg-info
.PHONY: default install-dev uninstall-dev update clean
.PHONY: default stats submodules-add submodules-add-gitea submodules-update install-dev uninstall-dev update clean

1
Pipfile

@ -7,3 +7,4 @@ name = "pypi"
pre-commit = '>=2,<3'
fpyutils = '>=2,<3'
requests = '>=2.26,<2.27'
matplotlib = '>=3,<4'

31
README.md

@ -15,11 +15,13 @@ Git repository pointers and configurations to build Python packages from source.
- [Add](#add)
- [Important](#important)
- [Comments](#comments)
- [python-cffi](#python-cffi)
- [cryptography](#cryptography)
- [graphviz](#graphviz)
- [babel](#babel)
- [lxml](#lxml)
- [Instructions](#instructions)
- [python-cffi](#python-cffi)
- [cryptography](#cryptography)
- [graphviz](#graphviz)
- [babel](#babel)
- [lxml](#lxml)
- [Problematic repositories](#problematic-repositories)
- [Client configuration](#client-configuration)
- [PIP](#pip)
- [Pipenv](#pipenv)
@ -122,7 +124,9 @@ have been built using the [build_python_packages.py](https://docs.franco.net.eu.
### Comments
#### python-cffi
#### Instructions
##### python-cffi
- see [Installation and Status](https://cffi.readthedocs.io/en/latest/installation.html)
@ -130,7 +134,7 @@ have been built using the [build_python_packages.py](https://docs.franco.net.eu.
apt-get install libffi-dev python3-dev
```
#### cryptography
##### cryptography
- see [Installation -> Rust](https://cryptography.io/en/latest/installation/#rust) and [this stackoverflow answer](https://stackoverflow.com/a/22210069)
@ -138,7 +142,7 @@ have been built using the [build_python_packages.py](https://docs.franco.net.eu.
apt-get install build-essential libssl-dev libffi-dev python3-dev rustc cargo`
```
#### graphviz
##### graphviz
- see [Installation](https://graphviz.readthedocs.io/en/stable/manual.html#installation)
@ -146,7 +150,7 @@ have been built using the [build_python_packages.py](https://docs.franco.net.eu.
apt-get install graphviz libgraphviz-dev
```
#### babel
##### babel
- see [installation -> Living on the Edge](http://babel.pocoo.org/en/latest/installation.html#living-on-the-edge)
@ -155,10 +159,17 @@ have been built using the [build_python_packages.py](https://docs.franco.net.eu.
python3 setup.py import_cldr
```
#### lxml
##### lxml
- see [How to build lxml from source](https://lxml.de/build.html)
#### Problematic repositories
- `kiwi`
- `lxml`
- `matplotlib`
- `numpy`
## Client configuration
### PIP

62
scripts/collect_data.py

@ -0,0 +1,62 @@
#!/usr/bin/env python3
#
# collect_data.py
#
# Copyright (C) 2021-2022 Franco Masotti (franco \D\o\T masotti {-A-T-} tutanota \D\o\T com)
#
# This file is part of python-packages-source.
#
# python-packages-source is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# python-packages-source is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with python-packages-source. If not, see <http://www.gnu.org/licenses/>.
r"""Get the data to use in the stats."""
import csv
import datetime
import pathlib
import re
import gitea
OUTPUT_FILE = '../stats/repo_stats.csv'
REGEX = r'http(|s)://(|www\.)github.com/.*/.*'
# Collect data max one every 86400 seconds by default.
PLOT_DAYS_SENSIBILITY = 1
if __name__ == '__main__':
def main():
r"""main."""
now = datetime.datetime.now(datetime.timezone.utc)
results = gitea.get_org_repos()
if pathlib.Path(OUTPUT_FILE).is_file():
total, match, date, runs = gitea.read_csv(OUTPUT_FILE)
date = max(date)
else:
date = now - datetime.timedelta(days=1)
if (now - date).days >= PLOT_DAYS_SENSIBILITY:
date = now.strftime('%Y-%m-%d %H:%M:%S %z')
total = 0
match = 0
for r in results:
total += len(r)
for rr in r:
if rr['mirror'] and not rr['empty']:
if re.match(REGEX, rr['original_url']):
match += 1
gitea.write_csv(OUTPUT_FILE, total, match, date)
main()

40
scripts/gitea.py

@ -18,16 +18,22 @@
#
# You should have received a copy of the GNU General Public License
# along with python-packages-source. If not, see <http://www.gnu.org/licenses/>.
r"""Common functions."""
import requests
import csv
import datetime
import json
import urllib
import os
import urllib
import requests
GITEA_BASE_URL = 'software.franco.net.eu.org'
GITEA_ORG = 'mirrors-python'
def get_org_repos() -> list:
r"""Get a list of objects containing repositories."""
results = list()
go = True
@ -58,3 +64,33 @@ def get_org_repos() -> list:
i += 1
return results
def read_csv(file: str) -> tuple:
r"""Read a CSV file."""
with open(file, 'r') as csvfile:
reader = csv.reader(csvfile, delimiter=',', quotechar='|')
total = list()
match = list()
date = list()
# Add a new value: a list of incrementing integers
# representing the run number.
runs = list()
i = 1
for row in reader:
total.append(int(row[0]))
match.append(int(row[1]))
date.append(datetime.datetime.strptime(row[2], '%Y-%m-%d %H:%M:%S %z'))
runs.append(i)
i += 1
return total, match, date, runs
def write_csv(file: str, total: list, match: list, date: list):
r"""Write a CSV file."""
with open(file, 'a') as csvfile:
writer = csv.writer(csvfile, delimiter=',',
quotechar='|', quoting=csv.QUOTE_MINIMAL)
writer.writerow([total, match, date])

109
scripts/plot_data.py

@ -0,0 +1,109 @@
#!/usr/bin/env python3
#
# plot_data.py
#
# Copyright (C) 2021-2022 Franco Masotti (franco \D\o\T masotti {-A-T-} tutanota \D\o\T com)
#
# This file is part of python-packages-source.
#
# python-packages-source is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# python-packages-source is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with python-packages-source. If not, see <http://www.gnu.org/licenses/>.
r"""Plot stats data."""
import csv
import datetime
import os
import sys
import gitea
import matplotlib
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
INPUT_FILE = '../stats/repo_stats.csv'
OUTPUT_FILE = sys.argv[1]
if __name__ == '__main__':
def main():
r"""main."""
########
# Data #
########
total, match, date, runs = gitea.read_csv(INPUT_FILE)
# Normalize date to 00:00 hours
dd = list()
for d in date:
dd.append(datetime.date(year=d.year, month=d.month, day=d.day))
# Duplicate dates are forbidden.
if len(list(set(dd))) != len(date):
raise ValueError
date = dd
# Compute ratio.
ratio = list()
i = 0
for e in range(0, len(total)):
ratio.append(match[i] / total[i])
i += 1
########
# Plot #
########
# Get and resize graph.
fig, (frquency_plot, ratio_plot) = plt.subplots(2, figsize=(15, 10), dpi=160)
# Set grids.
frquency_plot.grid()
ratio_plot.grid()
# Y
frquency_plot.set_ylabel('Repositories')
y_ticks_step = int(max(total) / 10)
y_ticks_rage_max = max(total) + int(max(total) / 10)
frquency_plot.set_yticks(range(0, y_ticks_rage_max, y_ticks_step))
# X
frquency_plot.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
for label in frquency_plot.get_xticklabels(which='major'):
label.set(rotation=30, horizontalalignment='right')
frquency_plot.plot(date, total, linestyle='solid', marker='o', label='Total mirrored repositories')
frquency_plot.plot(date, match, linestyle='solid', marker='o', label='Repositories mirrored from GitHub')
frquency_plot.legend()
frquency_plot.set_ylim(bottom=0, top=y_ticks_rage_max + (y_ticks_rage_max * 0.10))
# Y
ratio_plot.set_ylabel('Ratio')
y_ticks_step = int(max(total) / 10)
y_ticks_rage_max = max(total) + int(max(total) / 10)
frquency_plot.set_yticks(range(0, y_ticks_rage_max, y_ticks_step))
# X
ratio_plot.set_xlabel('Date')
ratio_plot.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
for label in ratio_plot.get_xticklabels(which='major'):
label.set(rotation=30, horizontalalignment='right')
ratio_plot.plot(date, ratio, linestyle='solid', marker='o', label='GitHub / Total repositories')
ratio_plot.legend()
ratio_plot.set_ylim(bottom=0, top=1.1)
fig.savefig(OUTPUT_FILE)
main()

1
stats/repo_stats.csv

@ -0,0 +1 @@
378,355,2022-01-06 11:33:48 +0000
1 378 355 2022-01-06 11:33:48 +0000
Loading…
Cancel
Save