Source code for libka.qtwebkit_releases_html_parser
# -*- coding: utf-8 -*-
# pylint: disable=line-too-long
# kate: space-indent on; indent-width 4; replace-tabs on; indent-mode python; remove-trailing-space modified;
# vim: expandtab ts=4
# pylint: enable=line-too-long
############################################################################
# Copyright © 2018 José Manuel Santamaría Lema <panfaust@gmail.com> #
# #
# This program is free software; you can redistribute it and/or modify #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation; either version 2 of the License, or #
# (at your option) any later version. #
############################################################################
"""Module providing the QtWebKitReleasesHTMLParser class."""
import re
from html.parser import HTMLParser
import urllib3
import certifi
from debian.debian_support import Version
[docs]
class QtWebKitReleasesHTMLParser(HTMLParser): #pylint: disable=abstract-method
"""
Class to parse the HTML of the webkit releases and find out the latest
upstream release.
"""
def __init__(self):
super(QtWebKitReleasesHTMLParser, self).__init__()
self._latest_version = Version("0")
[docs]
def parse_releases_url(self):
"""
Parses QtWebkit releases url.
"""
releases_url = "https://github.com/annulen/webkit/releases/"
#Get the update excuses web page
http = urllib3.PoolManager(cert_reqs='CERT_REQUIRED', ca_certs=certifi.where())
response = http.request("GET", releases_url)
html_str = response.data.decode('utf-8')
self.feed(html_str)
[docs]
def handle_starttag(self, tag, attrs):
"""
Process all HTML tags, detect links which may point to `*.tar.xz` source code releases.
"""
if (tag == "a") and (attrs[0][0] == "href"):
rel_url = attrs[0][1]
if rel_url.startswith("/annulen/webkit/releases/download/qtwebkit-tp"):
#Discard this kind of links
return
if rel_url.startswith("/annulen/webkit/releases/download/qtwebkit-"):
version_found = rel_url.split('/')[-1].split('qtwebkit-')[1].split('.tar.xz')[0]
version_found = re.sub(r'\-', r'~', version_found)
try:
version_found = Version(version_found)
except ValueError:
return
if version_found > self._latest_version:
self._latest_version = version_found
[docs]
def get_latest_version(self):
"""
Returns the latest QtWebkit version after parsing the HTML releases page
"""
return self._latest_version