Describe a Python Package¶
The import system is easy to satisfy, but the approach has problems.
- Can’t upgrade (there is no version!)
- Manual intervention needed to install/uninstall
To automate the process, we need to provide metadata for tools (e.g. pip) to recognise the package. This is specified in PEP 376.
- A
{name}-{version}.dist-info
directory to describe an installation. METADATA
describes the installed distribution for tools to recognise.RECORD
records installed files, so tools can uninstall them later. Note that we always use/
in paths.INSTALLER
identifies what tool was used to install the distribution, so tools don’t step on each others’ files.
Let’s write some code to automate the process.
import argparse
import base64
import csv
import email.message
import hashlib
import pathlib
import tempfile
def create_dist_info_dir(container, name, version):
dist_info = container.joinpath(f"{name}-{version}.dist-info")
dist_info.mkdir()
return dist_info
def write_metadata(dist_info, name, version):
m = email.message.EmailMessage() # RFC 822.
m["Metadata-Version"] = "2.1"
m["Name"] = name
m["Version"] = version
dist_info.joinpath("METADATA").write_bytes(bytes(m))
def _record_row_from_path(path, relative):
file_data = path.read_bytes()
file_hash = base64.urlsafe_b64encode(hashlib.md5(file_data).digest())
return [relative.as_posix(), str(len(file_data)), f"md5={file_hash}"]
def iter_files(roots):
for root in roots:
for path in root.glob("**/*"):
if not path.is_file():
continue
if path.suffix == ".pyc" or path.parent.name == "__pycache__":
continue
yield path, path.relative_to(root.parent)
def write_record(dist_info, package):
with dist_info.joinpath("RECORD").open("w") as f:
w = csv.writer(f, lineterminator="\n")
for path, relative in iter_files((package, dist_info)):
w.writerow(_record_row_from_path(path, relative))
w.writerow([f"{dist_info.name}/RECORD", "", ""])
def write_installer(dist_info):
installer = dist_info.joinpath("INSTALLER")
installer.write_text("home-grown-packager/distinfo.py")
def _parse_args(argv):
parser = argparse.ArgumentParser()
parser.add_argument("target", type=pathlib.Path)
return parser.parse_args(argv)
_NAME = "my_package"
_VERSION = "0"
_PACKAGE = pathlib.Path("my_package")
def main(argv=None):
options = _parse_args(argv)
with tempfile.TemporaryDirectory() as td:
dist_info = create_dist_info_dir(pathlib.Path(td), _NAME, _VERSION)
write_metadata(dist_info, _NAME, _VERSION)
write_installer(dist_info)
write_record(dist_info, _PACKAGE)
for path, relative in iter_files((_PACKAGE, dist_info)):
target = options.target.joinpath(relative)
target.parent.mkdir(parents=True, exist_ok=True)
target.write_bytes(path.read_bytes())
if __name__ == "__main__":
main()
Now if we install our package with this script:
$ cd /path/to/example-project
$ py -m packager.distinfo /path/to/site-packages
pip would magically recognise our package!
$ py -m pip list
Package Version
---------- -------
my-package 0
pip 20.2.1
setuptools 46.0.0
wheel 0.34.2
$ py -m pip show my-package
Name: my-package
Version: 0
Summary: None
Home-page: None
Author: None
Author-email: None
License: None
Location: /path/to/site-packages
Requires:
Required-by:
Note how pip shows our example as my-package
, although we defined
DIST_NAME
as my_package
. The two are actually equivalent according to
PEP 503, and the dash form is called the “normalised” name form. In
practice, you should be able to refer your project anyhow you like—just
remember that pip will conflate the different notation.