Source code for libka.pkgedit.ka_watch_file

#!/usr/bin/python3
# -*- coding: utf-8 -*-
# pylint: disable=line-too-long
# kate: space-indent on; indent-width 4; replace-tabs on; indent-mode python; remove-trailing-space modified;
# vim: expandtab ts=4
# pylint: enable=line-too-long

############################################################################
#   Copyright © 2021 José Manuel Santamaría Lema <panfaust@gmail.com>      #
#                                                                          #
#   This program is free software; you can redistribute it and/or modify   #
#   it under the terms of the GNU General Public License as published by   #
#   the Free Software Foundation; either version 2 of the License, or      #
#   (at your option) any later version.                                    #
############################################################################

"""This module just provides the KAWatchFile class"""

import re


[docs]
class KAWatchFile(): #pylint: disable=too-many-instance-attributes
    """Class to represent a debian/watch file"""

    def __init__(self, file_path='debian/watch'):
        self._file_path = file_path
        self._watch_file_parsed = False
        self._raw_lines = []
        self._real_lines = []
        self._opts = []
        self._uscan_supported_extensions = [
            #This list comes from the uscan man page, quote:
            #
            # @ARCHIVE_EXT@
            #     This is substituted by the typical archive file extension regex (non-capturing).
            #     (?i)\.(?:tar\.xz|tar\.bz2|tar\.gz|zip|tgz|tbz|txz)
            #
            ".tar.xz",
            ".tar.bz2",
            ".tar.gz",
            ".zip",
            ".tgz",
            ".tbz",
            ".txz",
        ]
        self._uscan_supported_url_protocols = [
            #These come from the uscan man page
            "http", "https", "ftp"
        ]
        self._version = 0
        self._raw_tarball_patterns = []
        self._tarball_patterns = None
        self._tarball_name = None

    #pylint: disable=too-many-locals,too-many-branches,too-many-statements

[docs]
    def parse_watch(self):
        """Parses a watch file"""
        #Open file and read raw lines
        watch_file = open(self._file_path, "r")
        self._raw_lines = watch_file.read().splitlines()
        #Find out the real lines taking into account final '\'
        #and droping comments starting with '#'
        current_real_line = ""
        for raw_line in self._raw_lines:
            raw_line = raw_line.split('#', 1)[0]
            if raw_line.endswith('\\'):
                current_real_line += raw_line.rstrip('\\')
            else:
                current_real_line += raw_line
                self._real_lines.append(current_real_line)
                current_real_line = ""
        #Process each line finding out version, opts and urls
        for real_line in self._real_lines:
            if real_line.startswith("version="):
                #Set the version of this watch file
                self._version = real_line.split("version=", 1)[1]
                #Append an empty opts dictionary for this line
                self._opts.append({})
                #Let's process the next line
                continue
            if real_line.startswith("opts="):
                #Prepare the opts dictionary for this line
                opts_dict = {}
                #Remove opts= from the beginning and extract the URL part of the line
                opts_string_split = real_line.split("opts=", 1)[1].split(" ", 1)
                opts_string = opts_string_split[0]
                try:
                    line_url = opts_string_split[1]
                except IndexError: #this doesn't happen unless we have a malformed watch file
                    raise Exception("Failed to parse watch file")
                #Remove the quotes and everything after the quotes
                opts_string = opts_string.strip('"')
                opts_string.split('"')
                #Split the comma separated list
                opts_string_list = opts_string.split(",")
                #Process each opt_string having this formats:
                #justoptname
                #optname=optvalue
                for opt_string in opts_string_list:
                    opt_string_split = opt_string.split("=", 1)
                    opt_string_split_len = len(opt_string_split)
                    if opt_string_split_len == 1:
                        opts_dict[opt_string_split[0]] = ""
                    elif opt_string_split_len >= 2:
                        opts_dict[opt_string_split[0]] = opt_string_split[1]
            else:
                #This line has no opts= so let's append an empty dictionary
                opts_dict = {}
                #Set the url as the whole line
                line_url = real_line
            #Append the dictionary to the list of dictionaries
            self._opts.append(opts_dict)
            #Process the url part of the line to get the tarball name raw pattern
            line_pattern = line_url.strip()
            #Check protocol, discard line if protocol is not on the supported list
            protocol = line_pattern.split("://")[0]
            if protocol not in self._uscan_supported_url_protocols:
                continue
            #Discard the part of the URL before the last space
            line_pattern = line_pattern.split(" ")[-1]
            #Do the filenamemangle magic if needed
            if 'filenamemangle' in opts_dict:
                filenamemangle_ex_str = opts_dict['filenamemangle']
                if filenamemangle_ex_str.startswith("s/"):
                    prev_char = ''
                    char_index = 0
                    for char in filenamemangle_ex_str:
                        if char_index >= 2 and char == '/' and prev_char != '\\':
                            break
                        char_index += 1
                        prev_char = char
                    string_to_replace = filenamemangle_ex_str[2:char_index]
                    string_to_replace = string_to_replace.replace("\\/", "/")
                    replacement_string = filenamemangle_ex_str[char_index+1:-1]
                    replacement_string = replacement_string.replace("\\/", "/")
                    line_pattern = line_pattern.replace(string_to_replace, replacement_string)
            #Discard the part of the URL before the last slash
            line_pattern = line_pattern.split("/")[-1]
            #Add the pattern to the list
            self._raw_tarball_patterns.append(line_pattern)
        #Close file
        watch_file.close()
        #Mark watch file as parsed
        self._watch_file_parsed = True

    #pylint: enable=too-many-locals,too-many-branches,too-many-statements


[docs]
    def raw_lines(self):
        """Returns a list with the raw lines of the watch file"""
        if not self._watch_file_parsed:
            self.parse_watch()
        return self._raw_lines



[docs]
    def real_lines(self):
        #pylint: disable=anomalous-backslash-in-string
        r"""
        Returns a list with the real lines of the watch file,
        taking into account those ending with ``\`` and the comments
        starting with ``#``
        """
        #pylint: enable=anomalous-backslash-in-string
        if not self._watch_file_parsed:
            self.parse_watch()
        return self._real_lines



[docs]
    def opts(self):
        """
        Returns a list of dictionaries of 'opts', each dictionary is indexed by option name,
        the whole list of dictionaries is indexed by 'real line' number.
        """
        if not self._watch_file_parsed:
            self.parse_watch()
        return self._opts



[docs]
    def uscan_supported_extensions(self):
        """Returns a list of the uscan supported extensions"""
        return self._uscan_supported_extensions



[docs]
    def raw_tarball_patterns(self):
        """
        Returns a list of raw tarball patterns i.e. without replacing ``@PACKAGE@``,
        ``@ANY_VERSION@`` or ``@ARCHIVE_EXT@``
        """
        if not self._watch_file_parsed:
            self.parse_watch()
        return self._raw_tarball_patterns



[docs]
    def tarball_name(self, src_pkg_name):
        """
        Returns the tarball name.
        """
        #Parse watch file if not already parsed
        if not self._watch_file_parsed:
            self.parse_watch()
        #Return the tarball name if we already have it
        if self._tarball_name is not None:
            return self._tarball_name
        #Inspect the raw tarball patterns trying to find a valid tarball name
        for raw_tarball_pattern in self._raw_tarball_patterns:
            tarball_name = re.sub(r'.*\/', '', raw_tarball_pattern)
            tarball_name = re.sub(r'-\(.*', '', tarball_name)
            tarball_name = re.sub(r'-@ANY_VERSION@.*', '', tarball_name)
            tarball_name = tarball_name.strip()
            if tarball_name.startswith("@PACKAGE@"): #pylint: disable=no-else-return
                tarball_name = src_pkg_name
            else:
                #Remove anything such as "@VERSION@" or "@ARCHIVE_EXT@"
                tarball_name = tarball_name.split('@')[0]
            #We found a valid tarball name, regex inspired by debian policy 5.6.1, quote:
            #[...]
            #Package names (both source and binary, see Package) must consist only of
            #lower case letters (a-z), digits (0-9), plus (+) and minus (-) signs, and
            #periods (.). They must be at least two characters long and must start with
            #an alphanumeric character.
            if re.fullmatch(r'[a-z0-9][a-z0-9][a-z0-9\+\-\.]*', tarball_name) is not None:
                self._tarball_name = tarball_name
                return tarball_name
        #If we reach this point without having a valid tarball name, let's supose the
        #tarball name matches the source package name
        self._tarball_name = src_pkg_name
        return src_pkg_name



[docs]
    def tarball_repacked(self):
        """
        Returns `True` if the tarball is repacked.
        """
        #Parse the watch file if it wasn't parsed already
        if not self._watch_file_parsed:
            self.parse_watch()
        #Check if we have 'repacksuffix' in the watch file
        for opt in self._opts:
            if 'repacksuffix' in opt:
                return True
        #If we reach this point, we can conclude the tarball is not repacked,
        #so let's return False
        return False



[docs]
    def tarball_repack_suffix(self):
        """
        Returns the tarball repack suffix or `None` if the tarball is not repacked
        or we couldn't find the suffix.
        """
        #Parse the watch file if it wasn't parsed already
        if not self._watch_file_parsed:
            self.parse_watch()
        #Check if we have 'repacksuffix' in the watch file
        for opt in self._opts:
            if 'repacksuffix' in opt:
                return opt['repacksuffix']
        #If we reach this point, we can conclude the tarball is not repacked or we
        #couldn't find any repack suffix, so let's return None
        return None



[docs]
    def tarball_patterns(self, src_pkg_name, src_pkg_uversion):
        """
        Returns a list of tarball patterns after expanding uscan variables ``@PACKAGE``,
        ``@ANY_VERSION@`` and ``@ARCHIVE_EXT@``
        """
        #Parse the watch file if it wasn't parsed already
        if not self._watch_file_parsed:
            self.parse_watch()
        #If we already calculated the tarball patterns let's just return them
        if self._tarball_patterns is not None:
            return self._tarball_patterns
        #Check if we have a repacked tarball
        repacked_tarball = self.tarball_repacked()
        #Compose a list of tarball patterns with the tarball name, upstream version and
        #possible extensions
        self._tarball_patterns = []
        separators = ['-', '_', '-v']
        if not repacked_tarball:
            extensions = self._uscan_supported_extensions
        else:
            extensions = []
            for ext in self._uscan_supported_extensions:
                extensions.append(".orig" + ext)
        tarball_name = self.tarball_name(src_pkg_name)
        for separator in separators:
            for extension in extensions:
                tarball_pattern = tarball_name + separator + src_pkg_uversion + extension
                self._tarball_patterns.append(tarball_pattern)
        #Return result
        return self._tarball_patterns