[compat] Remove deprecated functions from core code

pull/4181/head
pukkandan 5 months ago
parent 54007a45f1
commit 14f25df2b6
No known key found for this signature in database
GPG Key ID: 7EEE9E1E817D0A39
  1. 4
      devscripts/check-porn.py
  2. 28
      test/helper.py
  3. 4
      test/test_YoutubeDL.py
  4. 11
      test/test_compat.py
  5. 3
      test/test_download.py
  6. 7
      test/test_socks.py
  7. 3
      test/test_youtube_signature.py
  8. 14
      yt_dlp/YoutubeDL.py
  9. 5
      yt_dlp/aes.py
  10. 4
      yt_dlp/cookies.py
  11. 2
      yt_dlp/downloader/external.py
  12. 19
      yt_dlp/downloader/f4m.py
  13. 10
      yt_dlp/downloader/hls.py
  14. 3
      yt_dlp/downloader/rtmp.py
  15. 4
      yt_dlp/extractor/abematv.py
  16. 4
      yt_dlp/extractor/audius.py
  17. 39
      yt_dlp/extractor/common.py
  18. 5
      yt_dlp/extractor/commonprotocols.py
  19. 10
      yt_dlp/extractor/curiositystream.py
  20. 4
      yt_dlp/extractor/espn.py
  21. 40
      yt_dlp/extractor/generic.py
  22. 9
      yt_dlp/extractor/giga.py
  23. 6
      yt_dlp/extractor/hitbox.py
  24. 2
      yt_dlp/extractor/lnkgo.py
  25. 7
      yt_dlp/extractor/nrk.py
  26. 7
      yt_dlp/extractor/puls4.py
  27. 2
      yt_dlp/extractor/stv.py
  28. 99
      yt_dlp/extractor/youtube.py
  29. 4
      yt_dlp/postprocessor/sponsorblock.py
  30. 89
      yt_dlp/utils.py

@ -14,10 +14,10 @@ import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import urllib.parse
import urllib.request
from test.helper import gettestcases
from yt_dlp.utils import compat_urllib_parse_urlparse
if len(sys.argv) > 1:
METHOD = 'LIST'
@ -38,7 +38,7 @@ for test in gettestcases():
RESULT = 'porn' in webpage.lower()
elif METHOD == 'LIST':
domain = compat_urllib_parse_urlparse(test['url']).netloc
domain = urllib.parse.urlparse(test['url']).netloc
if not domain:
print('\nFail: {}'.format(test['name']))
continue

@ -9,7 +9,7 @@ import types
import yt_dlp.extractor
from yt_dlp import YoutubeDL
from yt_dlp.compat import compat_os_name, compat_str
from yt_dlp.compat import compat_os_name
from yt_dlp.utils import preferredencoding, write_string
if 'pytest' in sys.modules:
@ -96,29 +96,29 @@ md5 = lambda s: hashlib.md5(s.encode()).hexdigest()
def expect_value(self, got, expected, field):
if isinstance(expected, compat_str) and expected.startswith('re:'):
if isinstance(expected, str) and expected.startswith('re:'):
match_str = expected[len('re:'):]
match_rex = re.compile(match_str)
self.assertTrue(
isinstance(got, compat_str),
f'Expected a {compat_str.__name__} object, but got {type(got).__name__} for field {field}')
isinstance(got, str),
f'Expected a {str.__name__} object, but got {type(got).__name__} for field {field}')
self.assertTrue(
match_rex.match(got),
f'field {field} (value: {got!r}) should match {match_str!r}')
elif isinstance(expected, compat_str) and expected.startswith('startswith:'):
elif isinstance(expected, str) and expected.startswith('startswith:'):
start_str = expected[len('startswith:'):]
self.assertTrue(
isinstance(got, compat_str),
f'Expected a {compat_str.__name__} object, but got {type(got).__name__} for field {field}')
isinstance(got, str),
f'Expected a {str.__name__} object, but got {type(got).__name__} for field {field}')
self.assertTrue(
got.startswith(start_str),
f'field {field} (value: {got!r}) should start with {start_str!r}')
elif isinstance(expected, compat_str) and expected.startswith('contains:'):
elif isinstance(expected, str) and expected.startswith('contains:'):
contains_str = expected[len('contains:'):]
self.assertTrue(
isinstance(got, compat_str),
f'Expected a {compat_str.__name__} object, but got {type(got).__name__} for field {field}')
isinstance(got, str),
f'Expected a {str.__name__} object, but got {type(got).__name__} for field {field}')
self.assertTrue(
contains_str in got,
f'field {field} (value: {got!r}) should contain {contains_str!r}')
@ -142,12 +142,12 @@ def expect_value(self, got, expected, field):
index, field, type_expected, type_got))
expect_value(self, item_got, item_expected, field)
else:
if isinstance(expected, compat_str) and expected.startswith('md5:'):
if isinstance(expected, str) and expected.startswith('md5:'):
self.assertTrue(
isinstance(got, compat_str),
isinstance(got, str),
f'Expected field {field} to be a unicode object, but got value {got!r} of type {type(got)!r}')
got = 'md5:' + md5(got)
elif isinstance(expected, compat_str) and re.match(r'^(?:min|max)?count:\d+', expected):
elif isinstance(expected, str) and re.match(r'^(?:min|max)?count:\d+', expected):
self.assertTrue(
isinstance(got, (list, dict)),
f'Expected field {field} to be a list or a dict, but it is of type {type(got).__name__}')
@ -236,7 +236,7 @@ def expect_info_dict(self, got_dict, expected_dict):
missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
if missing_keys:
def _repr(v):
if isinstance(v, compat_str):
if isinstance(v, str):
return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n')
elif isinstance(v, type):
return v.__name__

@ -14,7 +14,7 @@ import urllib.error
from test.helper import FakeYDL, assertRegexpMatches
from yt_dlp import YoutubeDL
from yt_dlp.compat import compat_os_name, compat_str
from yt_dlp.compat import compat_os_name
from yt_dlp.extractor import YoutubeIE
from yt_dlp.extractor.common import InfoExtractor
from yt_dlp.postprocessor.common import PostProcessor
@ -1185,7 +1185,7 @@ class TestYoutubeDL(unittest.TestCase):
def _entries(self):
for n in range(3):
video_id = compat_str(n)
video_id = str(n)
yield {
'_type': 'url_transparent',
'ie_key': VideoIE.ie_key(),

@ -15,7 +15,6 @@ from yt_dlp import compat
from yt_dlp.compat import (
compat_etree_fromstring,
compat_expanduser,
compat_str,
compat_urllib_parse_unquote,
compat_urllib_parse_urlencode,
)
@ -82,11 +81,11 @@ class TestCompat(unittest.TestCase):
</root>
'''
doc = compat_etree_fromstring(xml.encode())
self.assertTrue(isinstance(doc.attrib['foo'], compat_str))
self.assertTrue(isinstance(doc.attrib['spam'], compat_str))
self.assertTrue(isinstance(doc.find('normal').text, compat_str))
self.assertTrue(isinstance(doc.find('chinese').text, compat_str))
self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str))
self.assertTrue(isinstance(doc.attrib['foo'], str))
self.assertTrue(isinstance(doc.attrib['spam'], str))
self.assertTrue(isinstance(doc.find('normal').text, str))
self.assertTrue(isinstance(doc.find('chinese').text, str))
self.assertTrue(isinstance(doc.find('foo/bar').text, str))
def test_compat_etree_fromstring_doctype(self):
xml = '''<?xml version="1.0"?>

@ -26,7 +26,6 @@ from test.helper import (
)
import yt_dlp.YoutubeDL # isort: split
from yt_dlp.compat import compat_HTTPError
from yt_dlp.extractor import get_info_extractor
from yt_dlp.utils import (
DownloadError,
@ -168,7 +167,7 @@ def generator(test_case, tname):
force_generic_extractor=params.get('force_generic_extractor', False))
except (DownloadError, ExtractorError) as err:
# Check if the exception is not a network related one
if not err.exc_info[0] in (urllib.error.URLError, socket.timeout, UnavailableVideoError, http.client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
if not err.exc_info[0] in (urllib.error.URLError, socket.timeout, UnavailableVideoError, http.client.BadStatusLine) or (err.exc_info[0] == urllib.error.HTTPError and err.exc_info[1].code == 503):
raise
if try_num == RETRIES:

@ -13,7 +13,6 @@ import subprocess
import urllib.request
from test.helper import FakeYDL, get_params, is_download_test
from yt_dlp.compat import compat_str
@is_download_test
@ -102,13 +101,13 @@ class TestSocks(unittest.TestCase):
return ydl.urlopen('http://yt-dl.org/ip').read().decode()
def test_socks4(self):
self.assertTrue(isinstance(self._get_ip('socks4'), compat_str))
self.assertTrue(isinstance(self._get_ip('socks4'), str))
def test_socks4a(self):
self.assertTrue(isinstance(self._get_ip('socks4a'), compat_str))
self.assertTrue(isinstance(self._get_ip('socks4a'), str))
def test_socks5(self):
self.assertTrue(isinstance(self._get_ip('socks5'), compat_str))
self.assertTrue(isinstance(self._get_ip('socks5'), str))
if __name__ == '__main__':

@ -14,7 +14,6 @@ import string
import urllib.request
from test.helper import FakeYDL, is_download_test
from yt_dlp.compat import compat_str
from yt_dlp.extractor import YoutubeIE
from yt_dlp.jsinterp import JSInterpreter
@ -159,7 +158,7 @@ def t_factory(name, sig_func, url_pattern):
def signature(jscode, sig_input):
func = YoutubeIE(FakeYDL())._parse_sig_js(jscode)
src_sig = (
compat_str(string.printable[:sig_input])
str(string.printable[:sig_input])
if isinstance(sig_input, int) else sig_input)
return func(src_sig)

@ -26,7 +26,7 @@ from string import ascii_letters
from .cache import Cache
from .compat import HAS_LEGACY as compat_has_legacy
from .compat import compat_os_name, compat_shlex_quote, compat_str
from .compat import compat_os_name, compat_shlex_quote
from .cookies import load_cookies
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
from .downloader.rtmp import rtmpdump_version
@ -791,7 +791,7 @@ class YoutubeDL:
return message
assert hasattr(self, '_output_process')
assert isinstance(message, compat_str)
assert isinstance(message, str)
line_count = message.count('\n') + 1
self._output_process.stdin.write((message + '\n').encode())
self._output_process.stdin.flush()
@ -827,7 +827,7 @@ class YoutubeDL:
def to_stderr(self, message, only_once=False):
"""Print message to stderr"""
assert isinstance(message, compat_str)
assert isinstance(message, str)
if self.params.get('logger'):
self.params['logger'].error(message)
else:
@ -1562,7 +1562,7 @@ class YoutubeDL:
additional_urls = (ie_result or {}).get('additional_urls')
if additional_urls:
# TODO: Improve MetadataParserPP to allow setting a list
if isinstance(additional_urls, compat_str):
if isinstance(additional_urls, str):
additional_urls = [additional_urls]
self.to_screen(
'[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
@ -2355,10 +2355,10 @@ class YoutubeDL:
def sanitize_string_field(info, string_field):
field = info.get(string_field)
if field is None or isinstance(field, compat_str):
if field is None or isinstance(field, str):
return
report_force_conversion(string_field, 'a string', 'string')
info[string_field] = compat_str(field)
info[string_field] = str(field)
def sanitize_numeric_fields(info):
for numeric_field in self._NUMERIC_FIELDS:
@ -2461,7 +2461,7 @@ class YoutubeDL:
sanitize_numeric_fields(format)
format['url'] = sanitize_url(format['url'])
if not format.get('format_id'):
format['format_id'] = compat_str(i)
format['format_id'] = str(i)
else:
# Sanitize format_id from characters used in format selector expression
format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])

@ -1,6 +1,7 @@
import base64
from math import ceil
from .compat import compat_b64decode, compat_ord
from .compat import compat_ord
from .dependencies import Cryptodome_AES
from .utils import bytes_to_intlist, intlist_to_bytes
@ -264,7 +265,7 @@ def aes_decrypt_text(data, password, key_size_bytes):
"""
NONCE_LENGTH_BYTES = 8
data = bytes_to_intlist(compat_b64decode(data))
data = bytes_to_intlist(base64.b64decode(data))
password = bytes_to_intlist(password.encode())
key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password))

@ -1,3 +1,4 @@
import base64
import contextlib
import ctypes
import http.cookiejar
@ -18,7 +19,6 @@ from .aes import (
aes_gcm_decrypt_and_verify_bytes,
unpad_pkcs7,
)
from .compat import compat_b64decode
from .dependencies import (
_SECRETSTORAGE_UNAVAILABLE_REASON,
secretstorage,
@ -836,7 +836,7 @@ def _get_windows_v10_key(browser_root, logger):
except KeyError:
logger.error('no encrypted key in Local State')
return None
encrypted_key = compat_b64decode(base64_key)
encrypted_key = base64.b64decode(base64_key)
prefix = b'DPAPI'
if not encrypted_key.startswith(prefix):
logger.error('invalid key')

@ -6,7 +6,7 @@ import sys
import time
from .fragment import FragmentFD
from ..compat import functools # isort: split
from ..compat import functools
from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor
from ..utils import (
Popen,

@ -1,16 +1,13 @@
import base64
import io
import itertools
import struct
import time
import urllib.error
import urllib.parse
from .fragment import FragmentFD
from ..compat import (
compat_b64decode,
compat_etree_fromstring,
compat_urllib_parse_urlparse,
compat_urlparse,
)
from ..compat import compat_etree_fromstring
from ..utils import fix_xml_ampersands, xpath_text
@ -300,12 +297,12 @@ class F4mFD(FragmentFD):
# 1. http://live-1-1.rutube.ru/stream/1024/HDS/SD/C2NKsS85HQNckgn5HdEmOQ/1454167650/S-s604419906/move/four/dirs/upper/1024-576p.f4m
bootstrap_url = node.get('url')
if bootstrap_url:
bootstrap_url = compat_urlparse.urljoin(
bootstrap_url = urllib.parse.urljoin(
base_url, bootstrap_url)
boot_info = self._get_bootstrap_from_url(bootstrap_url)
else:
bootstrap_url = None
bootstrap = compat_b64decode(node.text)
bootstrap = base64.b64decode(node.text)
boot_info = read_bootstrap_info(bootstrap)
return boot_info, bootstrap_url
@ -335,14 +332,14 @@ class F4mFD(FragmentFD):
# Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
man_base_url = get_base_url(doc) or man_url
base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url'])
base_url = urllib.parse.urljoin(man_base_url, media.attrib['url'])
bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
boot_info, bootstrap_url = self._parse_bootstrap_node(
bootstrap_node, man_base_url)
live = boot_info['live']
metadata_node = media.find(_add_ns('metadata'))
if metadata_node is not None:
metadata = compat_b64decode(metadata_node.text)
metadata = base64.b64decode(metadata_node.text)
else:
metadata = None
@ -370,7 +367,7 @@ class F4mFD(FragmentFD):
if not live:
write_metadata_tag(dest_stream, metadata)
base_url_parsed = compat_urllib_parse_urlparse(base_url)
base_url_parsed = urllib.parse.urlparse(base_url)
self._start_frag_download(ctx, info_dict)

@ -1,12 +1,12 @@
import binascii
import io
import re
import urllib.parse
from . import get_suitable_downloader
from .external import FFmpegFD
from .fragment import FragmentFD
from .. import webvtt
from ..compat import compat_urlparse
from ..dependencies import Cryptodome_AES
from ..utils import bug_reports_message, parse_m3u8_attributes, update_url_query
@ -140,7 +140,7 @@ class HlsFD(FragmentFD):
extra_query = None
extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
if extra_param_to_segment_url:
extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url)
extra_query = urllib.parse.parse_qs(extra_param_to_segment_url)
i = 0
media_sequence = 0
decrypt_info = {'METHOD': 'NONE'}
@ -162,7 +162,7 @@ class HlsFD(FragmentFD):
frag_url = (
line
if re.match(r'^https?://', line)
else compat_urlparse.urljoin(man_url, line))
else urllib.parse.urljoin(man_url, line))
if extra_query:
frag_url = update_url_query(frag_url, extra_query)
@ -187,7 +187,7 @@ class HlsFD(FragmentFD):
frag_url = (
map_info.get('URI')
if re.match(r'^https?://', map_info.get('URI'))
else compat_urlparse.urljoin(man_url, map_info.get('URI')))
else urllib.parse.urljoin(man_url, map_info.get('URI')))
if extra_query:
frag_url = update_url_query(frag_url, extra_query)
@ -215,7 +215,7 @@ class HlsFD(FragmentFD):
if 'IV' in decrypt_info:
decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32))
if not re.match(r'^https?://', decrypt_info['URI']):
decrypt_info['URI'] = compat_urlparse.urljoin(
decrypt_info['URI'] = urllib.parse.urljoin(
man_url, decrypt_info['URI'])
if extra_query:
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)

@ -4,7 +4,6 @@ import subprocess
import time
from .common import FileDownloader
from ..compat import compat_str
from ..utils import (
Popen,
check_executable,
@ -143,7 +142,7 @@ class RtmpFD(FileDownloader):
if isinstance(conn, list):
for entry in conn:
basic_args += ['--conn', entry]
elif isinstance(conn, compat_str):
elif isinstance(conn, str):
basic_args += ['--conn', conn]
if protocol is not None:
basic_args += ['--protocol', protocol]

@ -7,13 +7,13 @@ import json
import re
import struct
import time
import urllib.parse
import urllib.request
import urllib.response
import uuid
from .common import InfoExtractor
from ..aes import aes_ecb_decrypt
from ..compat import compat_urllib_parse_urlparse
from ..utils import (
ExtractorError,
bytes_to_intlist,
@ -137,7 +137,7 @@ class AbemaLicenseHandler(urllib.request.BaseHandler):
def abematv_license_open(self, url):
url = request_to_url(url)
ticket = compat_urllib_parse_urlparse(url).netloc
ticket = urllib.parse.urlparse(url).netloc
response_data = self._get_videokey_from_ticket(ticket)
return urllib.response.addinfourl(io.BytesIO(response_data), headers={
'Content-Length': len(response_data),

@ -1,8 +1,8 @@
import random
from .common import InfoExtractor
from ..utils import ExtractorError, try_get, compat_str, str_or_none
from ..compat import compat_urllib_parse_unquote
from ..compat import compat_str, compat_urllib_parse_unquote
from ..utils import ExtractorError, str_or_none, try_get
class AudiusBaseIE(InfoExtractor):

@ -13,19 +13,12 @@ import os
import random
import sys
import time
import urllib.parse
import urllib.request
import xml.etree.ElementTree
from ..compat import functools, re # isort: split
from ..compat import (
compat_etree_fromstring,
compat_expanduser,
compat_os_name,
compat_str,
compat_urllib_parse_unquote,
compat_urllib_parse_urlencode,
compat_urlparse,
)
from ..compat import compat_etree_fromstring, compat_expanduser, compat_os_name
from ..downloader import FileDownloader
from ..downloader.f4m import get_base_url, remove_encrypted_media
from ..utils import (
@ -834,7 +827,7 @@ class InfoExtractor:
"""
# Strip hashes from the URL (#1038)
if isinstance(url_or_request, (compat_str, str)):
if isinstance(url_or_request, str):
url_or_request = url_or_request.partition('#')[0]
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query, expected_status=expected_status)
@ -1427,7 +1420,7 @@ class InfoExtractor:
return {}
def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
if isinstance(json_ld, compat_str):
if isinstance(json_ld, str):
json_ld = self._parse_json(json_ld, video_id, fatal=fatal)
if not json_ld:
return {}
@ -1517,7 +1510,7 @@ class InfoExtractor:
# both types can have 'name' property(inherited from 'Thing' type). [1]
# however some websites are using 'Text' type instead.
# 1. https://schema.org/VideoObject
'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, compat_str) else None,
'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, str) else None,
'filesize': int_or_none(float_or_none(e.get('contentSize'))),
'tbr': int_or_none(e.get('bitrate')),
'width': int_or_none(e.get('width')),
@ -2166,7 +2159,7 @@ class InfoExtractor:
]), m3u8_doc)
def format_url(url):
return url if re.match(r'^https?://', url) else compat_urlparse.urljoin(m3u8_url, url)
return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url)
if self.get_param('hls_split_discontinuity', False):
def _extract_m3u8_playlist_indices(manifest_url=None, m3u8_doc=None):
@ -2539,7 +2532,7 @@ class InfoExtractor:
})
continue
src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
src_url = src if src.startswith('http') else urllib.parse.urljoin(base, src)
src_url = src_url.strip()
if proto == 'm3u8' or src_ext == 'm3u8':
@ -2562,7 +2555,7 @@ class InfoExtractor:
'plugin': 'flowplayer-3.2.0.1',
}
f4m_url += '&' if '?' in f4m_url else '?'
f4m_url += compat_urllib_parse_urlencode(f4m_params)
f4m_url += urllib.parse.urlencode(f4m_params)
formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
elif src_ext == 'mpd':
formats.extend(self._extract_mpd_formats(
@ -2832,7 +2825,7 @@ class InfoExtractor:
if re.match(r'^https?://', base_url):
break
if mpd_base_url and base_url.startswith('/'):
base_url = compat_urlparse.urljoin(mpd_base_url, base_url)
base_url = urllib.parse.urljoin(mpd_base_url, base_url)
elif mpd_base_url and not re.match(r'^https?://', base_url):
if not mpd_base_url.endswith('/'):
mpd_base_url += '/'
@ -3102,7 +3095,7 @@ class InfoExtractor:
sampling_rate = int_or_none(track.get('SamplingRate'))
track_url_pattern = re.sub(r'{[Bb]itrate}', track.attrib['Bitrate'], url_pattern)
track_url_pattern = compat_urlparse.urljoin(ism_url, track_url_pattern)
track_url_pattern = urllib.parse.urljoin(ism_url, track_url_pattern)
fragments = []
fragment_ctx = {
@ -3121,7 +3114,7 @@ class InfoExtractor:
fragment_ctx['duration'] = (next_fragment_time - fragment_ctx['time']) / fragment_repeat
for _ in range(fragment_repeat):
fragments.append({
'url': re.sub(r'{start[ _]time}', compat_str(fragment_ctx['time']), track_url_pattern),
'url': re.sub(r'{start[ _]time}', str(fragment_ctx['time']), track_url_pattern),
'duration': fragment_ctx['duration'] / stream_timescale,
})
fragment_ctx['time'] += fragment_ctx['duration']
@ -3365,7 +3358,7 @@ class InfoExtractor:
return formats, subtitles
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
query = compat_urlparse.urlparse(url).query
query = urllib.parse.urlparse(url).query
url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
mobj = re.search(
r'(?:(?:http|rtmp|rtsp)(?P<s>s)?:)?(?P<url>//[^?]+)', url)
@ -3471,7 +3464,7 @@ class InfoExtractor:
if not isinstance(track, dict):
continue
track_kind = track.get('kind')
if not track_kind or not isinstance(track_kind, compat_str):
if not track_kind or not isinstance(track_kind, str):
continue
if track_kind.lower() not in ('captions', 'subtitles'):
continue
@ -3544,7 +3537,7 @@ class InfoExtractor:
# Often no height is provided but there is a label in
# format like "1080p", "720p SD", or 1080.
height = int_or_none(self._search_regex(
r'^(\d{3,4})[pP]?(?:\b|$)', compat_str(source.get('label') or ''),
r'^(\d{3,4})[pP]?(?:\b|$)', str(source.get('label') or ''),
'height', default=None))
a_format = {
'url': source_url,
@ -3770,10 +3763,10 @@ class InfoExtractor:
return headers
def _generic_id(self, url):
return compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
return urllib.parse.unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
def _generic_title(self, url):
return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
return urllib.parse.unquote(os.path.splitext(url_basename(url))[0])
@staticmethod
def _availability(is_private=None, needs_premium=None, needs_subscription=None, needs_auth=None, is_unlisted=None):

@ -1,5 +1,6 @@
import urllib.parse
from .common import InfoExtractor
from ..compat import compat_urlparse
class RtmpIE(InfoExtractor):
@ -23,7 +24,7 @@ class RtmpIE(InfoExtractor):
'formats': [{
'url': url,
'ext': 'flv',
'format_id': compat_urlparse.urlparse(url).scheme,
'format_id': urllib.parse.urlparse(url).scheme,
}],
}

@ -1,12 +1,8 @@
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
urlencode_postdata,
compat_str,
ExtractorError,
)
from ..compat import compat_str
from ..utils import ExtractorError, int_or_none, urlencode_postdata
class CuriosityStreamBaseIE(InfoExtractor):
@ -50,7 +46,7 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
IE_NAME = 'curiositystream'
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)'
_TESTS = [{
'url': 'https://app.curiositystream.com/video/2',
'url': 'http://app.curiositystream.com/video/2',
'info_dict': {
'id': '2',
'ext': 'mp4',

@ -3,8 +3,8 @@ import json
import re
import urllib.parse
from .common import InfoExtractor
from .adobepass import AdobePassIE
from .common import InfoExtractor
from .once import OnceIE
from ..utils import (
determine_ext,
@ -197,7 +197,7 @@ class ESPNArticleIE(InfoExtractor):
@classmethod
def suitable(cls, url):
return False if (ESPNIE.suitable(url) or WatchESPNIE.suitable(url)) else super(ESPNArticleIE, cls).suitable(url)
return False if (ESPNIE.suitable(url) or WatchESPNIE.suitable(url)) else super().suitable(url)
def _real_extract(self, url):
video_id = self._match_id(url)

@ -1,5 +1,6 @@
import os
import re
import urllib.parse
import xml.etree.ElementTree
from .ant1newsgr import Ant1NewsGrEmbedIE
@ -106,12 +107,7 @@ from .yapfiles import YapFilesIE
from .youporn import YouPornIE
from .youtube import YoutubeIE
from .zype import ZypeIE
from ..compat import (
compat_etree_fromstring,
compat_str,
compat_urllib_parse_unquote,
compat_urlparse,
)
from ..compat import compat_etree_fromstring
from ..utils import (
KNOWN_EXTENSIONS,
ExtractorError,
@ -2703,7 +2699,7 @@ class GenericIE(InfoExtractor):
title = self._html_search_meta('DC.title', webpage, fatal=True)
camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
camtasia_url = urllib.parse.urljoin(url, camtasia_cfg)
camtasia_cfg = self._download_xml(
camtasia_url, video_id,
note='Downloading camtasia configuration',
@ -2719,7 +2715,7 @@ class GenericIE(InfoExtractor):
entries.append({
'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
'title': f'{title} - {n.tag}',
'url': compat_urlparse.urljoin(url, url_n.text),
'url': urllib.parse.urljoin(url, url_n.text),
'duration': float_or_none(n.find('./duration').text),
})
@ -2771,7 +2767,7 @@ class GenericIE(InfoExtractor):
if url.startswith('//'):
return self.url_result(self.http_scheme() + url)
parsed_url = compat_urlparse.urlparse(url)
parsed_url = urllib.parse.urlparse(url)
if not parsed_url.scheme:
default_search = self.get_param('default_search')
if default_search is None:
@ -2847,7 +2843,7 @@ class GenericIE(InfoExtractor):
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
if m:
self.report_detected('direct video link')
format_id = compat_str(m.group('format_id'))
format_id = str(m.group('format_id'))
subtitles = {}
if format_id.endswith('mpegurl'):
formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
@ -2966,7 +2962,7 @@ class GenericIE(InfoExtractor):
# Unescaping the whole page allows to handle those cases in a generic way
# FIXME: unescaping the whole page may break URLs, commenting out for now.
# There probably should be a second run of generic extractor on unescaped webpage.
# webpage = compat_urllib_parse_unquote(webpage)
# webpage = urllib.parse.unquote(webpage)
# Unescape squarespace embeds to be detected by generic extractor,
# see https://github.com/ytdl-org/youtube-dl/issues/21294
@ -3239,7 +3235,7 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url'))
mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
if mobj is not None:
return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
return self.url_result(urllib.parse.unquote(mobj.group('url')))
# Look for funnyordie embed
matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
@ -3492,7 +3488,7 @@ class GenericIE(InfoExtractor):
r'<iframe[^>]+src="(?:https?:)?(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
if mobj is not None:
return self.url_result(
compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
urllib.parse.urljoin(url, mobj.group('url')), 'UDNEmbed')
# Look for Senate ISVP iframe
senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
@ -3725,7 +3721,7 @@ class GenericIE(InfoExtractor):
if mediasite_urls:
entries = [
self.url_result(smuggle_url(
compat_urlparse.urljoin(url, mediasite_url),
urllib.parse.urljoin(url, mediasite_url),
{'UrlReferrer': url}), ie=MediasiteIE.ie_key())
for mediasite_url in mediasite_urls]
return self.playlist_result(entries, video_id, video_title)
@ -3920,11 +3916,11 @@ class GenericIE(InfoExtractor):
subtitles = {}
for source in sources:
src = source.get('src')
if not src or not isinstance(src, compat_str):
if not src or not isinstance(src, str):
continue
src = compat_urlparse.urljoin(url, src)
src = urllib.parse.urljoin(url, src)
src_type = source.get('type')
if isinstance(src_type, compat_str):
if isinstance(src_type, str):
src_type = src_type.lower()
ext = determine_ext(src).lower()
if src_type == 'video/youtube':
@ -3958,7 +3954,7 @@ class GenericIE(InfoExtractor):
if not src:
continue
subtitles.setdefault(dict_get(sub, ('language', 'srclang')) or 'und', []).append({
'url': compat_urlparse.urljoin(url, src),
'url': urllib.parse.urljoin(url, src),
'name': sub.get('label'),
'http_headers': {
'Referer': full_response.geturl(),
@ -3985,7 +3981,7 @@ class GenericIE(InfoExtractor):
return True
if RtmpIE.suitable(vurl):
return True
vpath = compat_urlparse.urlparse(vurl).path
vpath = urllib.parse.urlparse(vurl).path
vext = determine_ext(vpath, None)
return vext not in (None, 'swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js', 'xml')
@ -4113,7 +4109,7 @@ class GenericIE(InfoExtractor):
if refresh_header:
found = re.search(REDIRECT_REGEX, refresh_header)
if found:
new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
new_url = urllib.parse.urljoin(url, unescapeHTML(found.group(1)))
if new_url != url:
self.report_following_redirect(new_url)
return {
@ -4139,8 +4135,8 @@ class GenericIE(InfoExtractor):
for video_url in orderedSet(found):
video_url = unescapeHTML(video_url)
video_url = video_url.replace('\\/', '/')
video_url = compat_urlparse.urljoin(url, video_url)
video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
video_url = urllib.parse.urljoin(url, video_url)
video_id = urllib.parse.unquote(os.path.basename(video_url))
# Sometimes, jwplayer extraction will result in a YouTube URL
if YoutubeIE.suitable(video_url):

@ -1,13 +1,8 @@
import itertools
from .common import InfoExtractor
from ..utils import (
qualities,
compat_str,
parse_duration,
parse_iso8601,
str_to_int,
)
from ..compat import compat_str
from ..utils import parse_duration, parse_iso8601, qualities, str_to_int
class GigaIE(InfoExtractor):

@ -1,13 +1,13 @@
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
clean_html,
parse_iso8601,
determine_ext,
float_or_none,
int_or_none,
compat_str,
determine_ext,
parse_iso8601,
)

@ -1,7 +1,7 @@
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
clean_html,
compat_str,
format_field,
int_or_none,
parse_iso8601,

@ -3,18 +3,17 @@ import random
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..compat import compat_HTTPError, compat_str
from ..utils import (
compat_HTTPError,
determine_ext,
ExtractorError,
determine_ext,
int_or_none,
parse_duration,
parse_iso8601,
str_or_none,
try_get,
urljoin,
url_or_none,
urljoin,
)

@ -1,9 +1,6 @@
from .prosiebensat1 import ProSiebenSat1BaseIE
from ..utils import (
unified_strdate,
parse_duration,
compat_str,
)
from ..compat import compat_str
from ..utils import parse_duration, unified_strdate
class Puls4IE(ProSiebenSat1BaseIE):

@ -1,6 +1,6 @@
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
compat_str,
float_or_none,
int_or_none,
smuggle_url,

@ -13,18 +13,11 @@ import sys
import threading
import time
import traceback
import urllib.error
import urllib.parse
from .common import InfoExtractor, SearchInfoExtractor
from ..compat import functools # isort: split
from ..compat import (
compat_HTTPError,
compat_parse_qs,
compat_str,
compat_urllib_parse_urlencode,
compat_urllib_parse_urlparse,
compat_urlparse,
)
from ..compat import functools
from ..jsinterp import JSInterpreter
from ..utils import (
NO_DEFAULT,
@ -381,11 +374,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
pref = {}
if pref_cookie:
try:
pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
except ValueError:
self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
pref.update({'hl': 'en', 'tz': 'UTC'})
self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
def _real_initialize(self):
self._initialize_pref()
@ -413,19 +406,19 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
def _extract_client_name(self, ytcfg, default_client='web'):
return self._ytcfg_get_safe(
ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
def _extract_client_version(self, ytcfg, default_client='web'):
return self._ytcfg_get_safe(
ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
def _select_api_hostname(self, req_api_hostname, default_client=None):
return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
or req_api_hostname or self._get_innertube_host(default_client or 'web'))
def _extract_api_key(self, ytcfg=None, default_client='web'):
return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
def _extract_context(self, ytcfg=None, default_client='web'):
context = get_first(
@ -497,7 +490,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
# Deprecated?
def _extract_identity_token(self, ytcfg=None, webpage=None):
if ytcfg:
token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
if token:
return token
if webpage:
@ -513,12 +506,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
"""
for data in args:
# ytcfg includes channel_syncid if on secondary channel
delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
if delegated_sid:
return delegated_sid
sync_ids = (try_get(
data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
lambda x: x['DATASYNC_ID']), str) or '').split('||')
if len(sync_ids) >= 2 and sync_ids[1]:
# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
# and just "user_syncid||" for primary channel. We only want the channel_syncid
@ -552,7 +545,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
headers = {
'X-YouTube-Client-Name': compat_str(
'X-YouTube-Client-Name': str(
self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
'Origin': origin,
@ -612,7 +605,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
def _extract_continuation_ep_data(cls, continuation_ep: dict):
if isinstance(continuation_ep, dict):
continuation = try_get(
continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
continuation_ep, lambda x: x['continuationCommand']['token'], str)
if not continuation:
return