#!/usr/bin/python3
# -*- coding: utf-8 -*-
# pylint: disable=line-too-long
# kate: space-indent on; indent-width 4; replace-tabs on; indent-mode python; remove-trailing-space modified;
# vim: expandtab ts=4
# pylint: enable=line-too-long
############################################################################
# Copyright © 2021 José Manuel Santamaría Lema <panfaust@gmail.com> #
# #
# This program is free software; you can redistribute it and/or modify #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation; either version 2 of the License, or #
# (at your option) any later version. #
############################################################################
"""This module just provides the KAWatchFile class"""
import re
[docs]
class KAWatchFile(): #pylint: disable=too-many-instance-attributes
"""Class to represent a debian/watch file"""
def __init__(self, file_path='debian/watch'):
self._file_path = file_path
self._watch_file_parsed = False
self._raw_lines = []
self._real_lines = []
self._opts = []
self._uscan_supported_extensions = [
#This list comes from the uscan man page, quote:
#
# @ARCHIVE_EXT@
# This is substituted by the typical archive file extension regex (non-capturing).
# (?i)\.(?:tar\.xz|tar\.bz2|tar\.gz|zip|tgz|tbz|txz)
#
".tar.xz",
".tar.bz2",
".tar.gz",
".zip",
".tgz",
".tbz",
".txz",
]
self._uscan_supported_url_protocols = [
#These come from the uscan man page
"http", "https", "ftp"
]
self._version = 0
self._raw_tarball_patterns = []
self._tarball_patterns = None
self._tarball_name = None
#pylint: disable=too-many-locals,too-many-branches,too-many-statements
[docs]
def parse_watch(self):
"""Parses a watch file"""
#Open file and read raw lines
watch_file = open(self._file_path, "r")
self._raw_lines = watch_file.read().splitlines()
#Find out the real lines taking into account final '\'
#and droping comments starting with '#'
current_real_line = ""
for raw_line in self._raw_lines:
raw_line = raw_line.split('#', 1)[0]
if raw_line.endswith('\\'):
current_real_line += raw_line.rstrip('\\')
else:
current_real_line += raw_line
self._real_lines.append(current_real_line)
current_real_line = ""
#Process each line finding out version, opts and urls
for real_line in self._real_lines:
if real_line.startswith("version="):
#Set the version of this watch file
self._version = real_line.split("version=", 1)[1]
#Append an empty opts dictionary for this line
self._opts.append({})
#Let's process the next line
continue
if real_line.startswith("opts="):
#Prepare the opts dictionary for this line
opts_dict = {}
#Remove opts= from the beginning and extract the URL part of the line
opts_string_split = real_line.split("opts=", 1)[1].split(" ", 1)
opts_string = opts_string_split[0]
try:
line_url = opts_string_split[1]
except IndexError: #this doesn't happen unless we have a malformed watch file
raise Exception("Failed to parse watch file")
#Remove the quotes and everything after the quotes
opts_string = opts_string.strip('"')
opts_string.split('"')
#Split the comma separated list
opts_string_list = opts_string.split(",")
#Process each opt_string having this formats:
#justoptname
#optname=optvalue
for opt_string in opts_string_list:
opt_string_split = opt_string.split("=", 1)
opt_string_split_len = len(opt_string_split)
if opt_string_split_len == 1:
opts_dict[opt_string_split[0]] = ""
elif opt_string_split_len >= 2:
opts_dict[opt_string_split[0]] = opt_string_split[1]
else:
#This line has no opts= so let's append an empty dictionary
opts_dict = {}
#Set the url as the whole line
line_url = real_line
#Append the dictionary to the list of dictionaries
self._opts.append(opts_dict)
#Process the url part of the line to get the tarball name raw pattern
line_pattern = line_url.strip()
#Check protocol, discard line if protocol is not on the supported list
protocol = line_pattern.split("://")[0]
if protocol not in self._uscan_supported_url_protocols:
continue
#Discard the part of the URL before the last space
line_pattern = line_pattern.split(" ")[-1]
#Do the filenamemangle magic if needed
if 'filenamemangle' in opts_dict:
filenamemangle_ex_str = opts_dict['filenamemangle']
if filenamemangle_ex_str.startswith("s/"):
prev_char = ''
char_index = 0
for char in filenamemangle_ex_str:
if char_index >= 2 and char == '/' and prev_char != '\\':
break
char_index += 1
prev_char = char
string_to_replace = filenamemangle_ex_str[2:char_index]
string_to_replace = string_to_replace.replace("\\/", "/")
replacement_string = filenamemangle_ex_str[char_index+1:-1]
replacement_string = replacement_string.replace("\\/", "/")
line_pattern = line_pattern.replace(string_to_replace, replacement_string)
#Discard the part of the URL before the last slash
line_pattern = line_pattern.split("/")[-1]
#Add the pattern to the list
self._raw_tarball_patterns.append(line_pattern)
#Close file
watch_file.close()
#Mark watch file as parsed
self._watch_file_parsed = True
#pylint: enable=too-many-locals,too-many-branches,too-many-statements
[docs]
def raw_lines(self):
"""Returns a list with the raw lines of the watch file"""
if not self._watch_file_parsed:
self.parse_watch()
return self._raw_lines
[docs]
def real_lines(self):
#pylint: disable=anomalous-backslash-in-string
r"""
Returns a list with the real lines of the watch file,
taking into account those ending with ``\`` and the comments
starting with ``#``
"""
#pylint: enable=anomalous-backslash-in-string
if not self._watch_file_parsed:
self.parse_watch()
return self._real_lines
[docs]
def opts(self):
"""
Returns a list of dictionaries of 'opts', each dictionary is indexed by option name,
the whole list of dictionaries is indexed by 'real line' number.
"""
if not self._watch_file_parsed:
self.parse_watch()
return self._opts
[docs]
def uscan_supported_extensions(self):
"""Returns a list of the uscan supported extensions"""
return self._uscan_supported_extensions
[docs]
def raw_tarball_patterns(self):
"""
Returns a list of raw tarball patterns i.e. without replacing ``@PACKAGE@``,
``@ANY_VERSION@`` or ``@ARCHIVE_EXT@``
"""
if not self._watch_file_parsed:
self.parse_watch()
return self._raw_tarball_patterns
[docs]
def tarball_name(self, src_pkg_name):
"""
Returns the tarball name.
"""
#Parse watch file if not already parsed
if not self._watch_file_parsed:
self.parse_watch()
#Return the tarball name if we already have it
if self._tarball_name is not None:
return self._tarball_name
#Inspect the raw tarball patterns trying to find a valid tarball name
for raw_tarball_pattern in self._raw_tarball_patterns:
tarball_name = re.sub(r'.*\/', '', raw_tarball_pattern)
tarball_name = re.sub(r'-\(.*', '', tarball_name)
tarball_name = re.sub(r'-@ANY_VERSION@.*', '', tarball_name)
tarball_name = tarball_name.strip()
if tarball_name.startswith("@PACKAGE@"): #pylint: disable=no-else-return
tarball_name = src_pkg_name
else:
#Remove anything such as "@VERSION@" or "@ARCHIVE_EXT@"
tarball_name = tarball_name.split('@')[0]
#We found a valid tarball name, regex inspired by debian policy 5.6.1, quote:
#[...]
#Package names (both source and binary, see Package) must consist only of
#lower case letters (a-z), digits (0-9), plus (+) and minus (-) signs, and
#periods (.). They must be at least two characters long and must start with
#an alphanumeric character.
if re.fullmatch(r'[a-z0-9][a-z0-9][a-z0-9\+\-\.]*', tarball_name) is not None:
self._tarball_name = tarball_name
return tarball_name
#If we reach this point without having a valid tarball name, let's supose the
#tarball name matches the source package name
self._tarball_name = src_pkg_name
return src_pkg_name
[docs]
def tarball_repacked(self):
"""
Returns `True` if the tarball is repacked.
"""
#Parse the watch file if it wasn't parsed already
if not self._watch_file_parsed:
self.parse_watch()
#Check if we have 'repacksuffix' in the watch file
for opt in self._opts:
if 'repacksuffix' in opt:
return True
#If we reach this point, we can conclude the tarball is not repacked,
#so let's return False
return False
[docs]
def tarball_repack_suffix(self):
"""
Returns the tarball repack suffix or `None` if the tarball is not repacked
or we couldn't find the suffix.
"""
#Parse the watch file if it wasn't parsed already
if not self._watch_file_parsed:
self.parse_watch()
#Check if we have 'repacksuffix' in the watch file
for opt in self._opts:
if 'repacksuffix' in opt:
return opt['repacksuffix']
#If we reach this point, we can conclude the tarball is not repacked or we
#couldn't find any repack suffix, so let's return None
return None
[docs]
def tarball_patterns(self, src_pkg_name, src_pkg_uversion):
"""
Returns a list of tarball patterns after expanding uscan variables ``@PACKAGE``,
``@ANY_VERSION@`` and ``@ARCHIVE_EXT@``
"""
#Parse the watch file if it wasn't parsed already
if not self._watch_file_parsed:
self.parse_watch()
#If we already calculated the tarball patterns let's just return them
if self._tarball_patterns is not None:
return self._tarball_patterns
#Check if we have a repacked tarball
repacked_tarball = self.tarball_repacked()
#Compose a list of tarball patterns with the tarball name, upstream version and
#possible extensions
self._tarball_patterns = []
separators = ['-', '_', '-v']
if not repacked_tarball:
extensions = self._uscan_supported_extensions
else:
extensions = []
for ext in self._uscan_supported_extensions:
extensions.append(".orig" + ext)
tarball_name = self.tarball_name(src_pkg_name)
for separator in separators:
for extension in extensions:
tarball_pattern = tarball_name + separator + src_pkg_uversion + extension
self._tarball_patterns.append(tarball_pattern)
#Return result
return self._tarball_patterns