You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
yt-dlp/yt_dlp/extractor/youtube.py

6041 lines
273 KiB

import base64
import calendar
import copy
import datetime
import hashlib
import itertools
import json
import math
import os.path
import random
import re
import sys
import threading
import time
import traceback
import urllib.error
import urllib.parse
from .common import InfoExtractor, SearchInfoExtractor
from ..compat import functools
from ..jsinterp import JSInterpreter
from ..utils import (
NO_DEFAULT,
ExtractorError,
bug_reports_message,
classproperty,
clean_html,
datetime_from_str,
dict_get,
error_to_compat_str,
float_or_none,
format_field,
get_first,
int_or_none,
is_html,
join_nonempty,
js_to_json,
mimetype2ext,
network_exceptions,
orderedSet,
parse_codecs,
parse_count,
parse_duration,
parse_iso8601,
parse_qs,
qualities,
remove_end,
remove_start,
smuggle_url,
str_or_none,
str_to_int,
strftime_or_none,
traverse_obj,
try_get,
unescapeHTML,
unified_strdate,
unified_timestamp,
unsmuggle_url,
update_url_query,
url_or_none,
urljoin,
variadic,
)
# any clients starting with _ cannot be explicitly requested by the user
INNERTUBE_CLIENTS = {
'web': {
'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'WEB',
'clientVersion': '2.20211221.00.00',
}
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 1
},
'web_embedded': {
'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'WEB_EMBEDDED_PLAYER',
'clientVersion': '1.20211215.00.01',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 56
},
'web_music': {
'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
'INNERTUBE_HOST': 'music.youtube.com',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'WEB_REMIX',
'clientVersion': '1.20211213.00.00',
}
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
},
'web_creator': {
'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'WEB_CREATOR',
'clientVersion': '1.20211220.02.00',
}
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
},
'android': {
'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'ANDROID',
'clientVersion': '16.49',
}
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
'REQUIRE_JS_PLAYER': False
},
'android_embedded': {
'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'ANDROID_EMBEDDED_PLAYER',
'clientVersion': '16.49',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
'REQUIRE_JS_PLAYER': False
},
'android_music': {
'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'ANDROID_MUSIC',
'clientVersion': '4.57',
}
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
'REQUIRE_JS_PLAYER': False
},
'android_creator': {
'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'ANDROID_CREATOR',
'clientVersion': '21.47',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
'REQUIRE_JS_PLAYER': False
},
# iOS clients have HLS live streams. Setting device model to get 60fps formats.
# See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
'ios': {
'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'IOS',
'clientVersion': '16.46',
'deviceModel': 'iPhone14,3',
}
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
'REQUIRE_JS_PLAYER': False
},
'ios_embedded': {
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'IOS_MESSAGES_EXTENSION',
'clientVersion': '16.46',
'deviceModel': 'iPhone14,3',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
'REQUIRE_JS_PLAYER': False
},
'ios_music': {
'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'IOS_MUSIC',
'clientVersion': '4.57',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
'REQUIRE_JS_PLAYER': False
},
'ios_creator': {
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'IOS_CREATOR',
'clientVersion': '21.47',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
'REQUIRE_JS_PLAYER': False
},
# mweb has 'ultralow' formats
# See: https://github.com/yt-dlp/yt-dlp/pull/557
'mweb': {
'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'MWEB',
'clientVersion': '2.20211221.01.00',
}
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 2
},
# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
# See: https://github.com/zerodytrash/YouTube-Internal-Clients
'tv_embedded': {
'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
'clientVersion': '2.0',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 85
},
}
def _split_innertube_client(client_name):
variant, *base = client_name.rsplit('.', 1)
if base:
return variant, base[0], variant
base, *variant = client_name.split('_', 1)
return client_name, base, variant[0] if variant else None
def build_innertube_clients():
THIRD_PARTY = {
'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
}
BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
priority = qualities(BASE_CLIENTS[::-1])
for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
_, base_client, variant = _split_innertube_client(client)
ytcfg['priority'] = 10 * priority(base_client)
if not variant:
INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
embedscreen['priority'] -= 3
elif variant == 'embedded':
ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
ytcfg['priority'] -= 2
else:
ytcfg['priority'] -= 3
build_innertube_clients()
class YoutubeBaseInfoExtractor(InfoExtractor):
"""Provide base functions for Youtube extractors"""
_RESERVED_NAMES = (
r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
r'browse|oembed|get_video_info|iframe_api|s/player|'
r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
# _NETRC_MACHINE = 'youtube'
# If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = False
_INVIDIOUS_SITES = (
# invidious-redirect websites
r'(?:www\.)?redirect\.invidious\.io',
r'(?:(?:www|dev)\.)?invidio\.us',
# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
r'(?:www\.)?invidious\.pussthecat\.org',
r'(?:www\.)?invidious\.zee\.li',
r'(?:www\.)?invidious\.ethibox\.fr',
r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
# youtube-dl invidious instances list
r'(?:(?:www|no)\.)?invidiou\.sh',
r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
r'(?:www\.)?invidious\.kabi\.tk',
r'(?:www\.)?invidious\.mastodon\.host',
r'(?:www\.)?invidious\.zapashcanon\.fr',
r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
r'(?:www\.)?invidious\.tinfoil-hat\.net',
r'(?:www\.)?invidious\.himiko\.cloud',
r'(?:www\.)?invidious\.reallyancient\.tech',
r'(?:www\.)?invidious\.tube',
r'(?:www\.)?invidiou\.site',
r'(?:www\.)?invidious\.site',
r'(?:www\.)?invidious\.xyz',
r'(?:www\.)?invidious\.nixnet\.xyz',
r'(?:www\.)?invidious\.048596\.xyz',
r'(?:www\.)?invidious\.drycat\.fr',
r'(?:www\.)?inv\.skyn3t\.in',
r'(?:www\.)?tube\.poal\.co',
r'(?:www\.)?tube\.connect\.cafe',
r'(?:www\.)?vid\.wxzm\.sx',
r'(?:www\.)?vid\.mint\.lgbt',
r'(?:www\.)?vid\.puffyan\.us',
r'(?:www\.)?yewtu\.be',
r'(?:www\.)?yt\.elukerio\.org',
r'(?:www\.)?yt\.lelux\.fi',
r'(?:www\.)?invidious\.ggc-project\.de',
r'(?:www\.)?yt\.maisputain\.ovh',
r'(?:www\.)?ytprivate\.com',
r'(?:www\.)?invidious\.13ad\.de',
r'(?:www\.)?invidious\.toot\.koeln',
r'(?:www\.)?invidious\.fdn\.fr',
r'(?:www\.)?watch\.nettohikari\.com',
r'(?:www\.)?invidious\.namazso\.eu',
r'(?:www\.)?invidious\.silkky\.cloud',
r'(?:www\.)?invidious\.exonip\.de',
r'(?:www\.)?invidious\.riverside\.rocks',
r'(?:www\.)?invidious\.blamefran\.net',
r'(?:www\.)?invidious\.moomoo\.de',
r'(?:www\.)?ytb\.trom\.tf',
r'(?:www\.)?yt\.cyberhost\.uk',
r'(?:www\.)?kgg2m7yk5aybusll\.onion',
r'(?:www\.)?qklhadlycap4cnod\.onion',
r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
# piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
r'(?:www\.)?piped\.kavin\.rocks',
r'(?:www\.)?piped\.silkky\.cloud',
r'(?:www\.)?piped\.tokhmi\.xyz',
r'(?:www\.)?piped\.moomoo\.me',
r'(?:www\.)?il\.ax',
r'(?:www\.)?piped\.syncpundit\.com',
r'(?:www\.)?piped\.mha\.fi',
r'(?:www\.)?piped\.mint\.lgbt',
r'(?:www\.)?piped\.privacy\.com\.de',
)
def _initialize_consent(self):
cookies = self._get_cookies('https://www.youtube.com/')
if cookies.get('__Secure-3PSID'):
return
consent_id = None
consent = cookies.get('CONSENT')
if consent:
if 'YES' in consent.value:
return
consent_id = self._search_regex(
r'PENDING\+(\d+)', consent.value, 'consent', default=None)
if not consent_id:
consent_id = random.randint(100, 999)
self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
def _initialize_pref(self):
cookies = self._get_cookies('https://www.youtube.com/')
pref_cookie = cookies.get('PREF')
pref = {}
if pref_cookie:
try:
pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
except ValueError:
self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
pref.update({'hl': 'en', 'tz': 'UTC'})
self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
def _real_initialize(self):
self._initialize_pref()
self._initialize_consent()
self._check_login_required()
def _check_login_required(self):
if self._LOGIN_REQUIRED and not self._cookies_passed:
self.raise_login_required('Login details are needed to download this content', method='cookies')
_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
def _get_default_ytcfg(self, client='web'):
return copy.deepcopy(INNERTUBE_CLIENTS[client])
def _get_innertube_host(self, client='web'):
return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
# try_get but with fallback to default ytcfg client values when present
_func = lambda y: try_get(y, getter, expected_type)
return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
def _extract_client_name(self, ytcfg, default_client='web'):
return self._ytcfg_get_safe(
ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
def _extract_client_version(self, ytcfg, default_client='web'):
return self._ytcfg_get_safe(
ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
def _select_api_hostname(self, req_api_hostname, default_client=None):
return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
or req_api_hostname or self._get_innertube_host(default_client or 'web'))
def _extract_api_key(self, ytcfg=None, default_client='web'):
return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
def _extract_context(self, ytcfg=None, default_client='web'):
context = get_first(
(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
# Enforce language and tz for extraction
client_context = traverse_obj(context, 'client', expected_type=dict, default={})
client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
return context
_SAPISID = None
def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
time_now = round(time.time())
if self._SAPISID is None:
yt_cookies = self._get_cookies('https://www.youtube.com')
# Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
# See: https://github.com/yt-dlp/yt-dlp/issues/393
sapisid_cookie = dict_get(
yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
if sapisid_cookie and sapisid_cookie.value:
self._SAPISID = sapisid_cookie.value
self.write_debug('Extracted SAPISID cookie')
# SAPISID cookie is required if not already present
if not yt_cookies.get('SAPISID'):
self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
self._set_cookie(
'.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
else:
self._SAPISID = False
if not self._SAPISID:
return None
# SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
sapisidhash = hashlib.sha1(
f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
return f'SAPISIDHASH {time_now}_{sapisidhash}'
def _call_api(self, ep, query, video_id, fatal=True, headers=None,
note='Downloading API JSON', errnote='Unable to download API page',
context=None, api_key=None, api_hostname=None, default_client='web'):
data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
data.update(query)
real_headers = self.generate_api_headers(default_client=default_client)
real_headers.update({'content-type': 'application/json'})
if headers:
real_headers.update(headers)
api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
or api_key or self._extract_api_key(default_client=default_client))
return self._download_json(
f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
video_id=video_id, fatal=fatal, note=note, errnote=errnote,
data=json.dumps(data).encode('utf8'), headers=real_headers,
query={'key': api_key, 'prettyPrint': 'false'})
def extract_yt_initial_data(self, item_id, webpage, fatal=True):
return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
@staticmethod
def _extract_session_index(*data):
"""
Index of current account in account list.
See: https://github.com/yt-dlp/yt-dlp/pull/519
"""
for ytcfg in data:
session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
if session_index is not None:
return session_index
# Deprecated?
def _extract_identity_token(self, ytcfg=None, webpage=None):
if ytcfg:
token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
if token:
return token
if webpage:
return self._search_regex(
r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
'identity token', default=None, fatal=False)
@staticmethod
def _extract_account_syncid(*args):
"""
Extract syncId required to download private playlists of secondary channels
@params response and/or ytcfg
"""
for data in args:
# ytcfg includes channel_syncid if on secondary channel
delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
if delegated_sid:
return delegated_sid
sync_ids = (try_get(
data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
lambda x: x['DATASYNC_ID']), str) or '').split('||')
if len(sync_ids) >= 2 and sync_ids[1]:
# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
# and just "user_syncid||" for primary channel. We only want the channel_syncid
return sync_ids[0]
@staticmethod
def _extract_visitor_data(*args):
"""
Extracts visitorData from an API response or ytcfg
Appears to be used to track session state
"""
return get_first(
args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
expected_type=str)
@functools.cached_property
def is_authenticated(self):
return bool(self._generate_sapisidhash_header())
def extract_ytcfg(self, video_id, webpage):
if not webpage:
return {}
return self._parse_json(
self._search_regex(
r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
default='{}'), video_id, fatal=False) or {}
def generate_api_headers(
self, *, ytcfg=None, account_syncid=None, session_index=None,
visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
headers = {
'X-YouTube-Client-Name': str(
self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
'Origin': origin,
'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
}
if session_index is None:
session_index = self._extract_session_index(ytcfg)
if account_syncid or session_index is not None:
headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
auth = self._generate_sapisidhash_header(origin)
if auth is not None:
headers['Authorization'] = auth
headers['X-Origin'] = origin
return {h: v for h, v in headers.items() if v is not None}
def _download_ytcfg(self, client, video_id):
url = {
'web': 'https://www.youtube.com',
'web_music': 'https://music.youtube.com',
'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
}.get(client)
if not url:
return {}
webpage = self._download_webpage(
url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
return self.extract_ytcfg(video_id, webpage) or {}
@staticmethod
def _build_api_continuation_query(continuation, ctp=None):
query = {
'continuation': continuation
}
# TODO: Inconsistency with clickTrackingParams.
# Currently we have a fixed ctp contained within context (from ytcfg)
# and a ctp in root query for continuation.
if ctp:
query['clickTracking'] = {'clickTrackingParams': ctp}
return query
@classmethod
def _extract_next_continuation_data(cls, renderer):
next_continuation = try_get(
renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
lambda x: x['continuation']['reloadContinuationData']), dict)
if not next_continuation:
return
continuation = next_continuation.get('continuation')
if not continuation:
return
ctp = next_continuation.get('clickTrackingParams')
return cls._build_api_continuation_query(continuation, ctp)
@classmethod
def _extract_continuation_ep_data(cls, continuation_ep: dict):
if isinstance(continuation_ep, dict):
continuation = try_get(
continuation_ep, lambda x: x['continuationCommand']['token'], str)
if not continuation:
return
ctp = continuation_ep.get('clickTrackingParams')
return cls._build_api_continuation_query(continuation, ctp)
@classmethod
def _extract_continuation(cls, renderer):
next_continuation = cls._extract_next_continuation_data(renderer)
if next_continuation:
return next_continuation
contents = []
for key in ('contents', 'items'):
contents.extend(try_get(renderer, lambda x: x[key], list) or [])
for content in contents:
if not isinstance(content, dict):
continue
continuation_ep = try_get(
content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
dict)
continuation = cls._extract_continuation_ep_data(continuation_ep)
if continuation:
return continuation
@classmethod
def _extract_alerts(cls, data):
for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
if not isinstance(alert_dict, dict):
continue
for alert in alert_dict.values():
alert_type = alert.get('type')
if not alert_type:
continue
message = cls._get_text(alert, 'text')
if message:
yield alert_type, message
def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
errors = []
warnings = []
for alert_type, alert_message in alerts:
if alert_type.lower() == 'error' and fatal:
errors.append([alert_type, alert_message])
else:
warnings.append([alert_type, alert_message])
for alert_type, alert_message in (warnings + errors[:-1]):
self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
if errors:
raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
def _extract_and_report_alerts(self, data, *args, **kwargs):
return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
def _extract_badges(self, renderer: dict):
badges = set()
for badge in try_get(renderer, lambda x: x['badges'], list) or []:
label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], str)
if label:
badges.add(label.lower())
return badges
@staticmethod
def _get_text(data, *path_list, max_runs=None):
for path in path_list or [None]:
if path is None:
obj = [data]
else:
obj = traverse_obj(data, path, default=[])
if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
obj = [obj]
for item in obj:
text = try_get(item, lambda x: x['simpleText'], str)
if text:
return text
runs = try_get(item, lambda x: x['runs'], list) or []
if not runs and isinstance(item, list):
runs = item
runs = runs[:min(len(runs), max_runs or len(runs))]
text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
if text:
return text
def _get_count(self, data, *path_list):
count_text = self._get_text(data, *path_list) or ''
count = parse_count(count_text)
if count is None:
count = str_to_int(
self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
return count
@staticmethod
def _extract_thumbnails(data, *path_list):
"""
Extract thumbnails from thumbnails dict
@param path_list: path list to level that contains 'thumbnails' key
"""
thumbnails = []
for path in path_list or [()]:
for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
thumbnail_url = url_or_none(thumbnail.get('url'))
if not thumbnail_url:
continue
# Sometimes youtube gives a wrong thumbnail URL. See:
# https://github.com/yt-dlp/yt-dlp/issues/233
# https://github.com/ytdl-org/youtube-dl/issues/28023
if 'maxresdefault' in thumbnail_url:
thumbnail_url = thumbnail_url.split('?')[0]
thumbnails.append({
'url': thumbnail_url,
'height': int_or_none(thumbnail.get('height')),
'width': int_or_none(thumbnail.get('width')),
})
return thumbnails
@staticmethod
def extract_relative_time(relative_time_text):
"""
Extracts a relative time from string and converts to dt object
e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'
"""
mobj = re.search(r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
if mobj:
start = mobj.group('start')
if start:
return datetime_from_str(start)
try:
return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
except ValueError:
return None
def _extract_time_text(self, renderer, *path_list):
"""@returns (timestamp, time_text)"""
text = self._get_text(renderer, *path_list) or ''
dt = self.extract_relative_time(text)
timestamp = None
if isinstance(dt, datetime.datetime):
timestamp = calendar.timegm(dt.timetuple())
if timestamp is None:
timestamp = (
unified_timestamp(text) or unified_timestamp(
self._search_regex(
(r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
text.lower(), 'time text', default=None)))
if text and timestamp is None:
self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)
return timestamp, text
def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
default_client='web'):
response = None
last_error = None
count = -1
retries = self.get_param('extractor_retries', 3)
if check_get_keys is None:
check_get_keys = []
while count < retries:
count += 1
if last_error:
self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
try:
response = self._call_api(
ep=ep, fatal=True, headers=headers,
video_id=item_id, query=query,
context=self._extract_context(ytcfg, default_client),
api_key=self._extract_api_key(ytcfg, default_client),
api_hostname=api_hostname, default_client=default_client,
note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
except ExtractorError as e:
if isinstance(e.cause, network_exceptions):
if isinstance(e.cause, urllib.error.HTTPError):
first_bytes = e.cause.read(512)
if not is_html(first_bytes):
yt_error = try_get(
self._parse_json(
self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
lambda x: x['error']['message'], str)
if yt_error:
self._report_alerts([('ERROR', yt_error)], fatal=False)
# Downloading page may result in intermittent 5xx HTTP error
# Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289
# We also want to catch all other network exceptions since errors in later pages can be troublesome
# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
last_error = error_to_compat_str(e.cause or e.msg)
if count < retries:
continue
if fatal:
raise
else:
self.report_warning(error_to_compat_str(e))
return
else:
try:
self._extract_and_report_alerts(response, only_once=True)
except ExtractorError as e:
# YouTube servers may return errors we want to retry on in a 200 OK response
# See: https://github.com/yt-dlp/yt-dlp/issues/839
if 'unknown error' in e.msg.lower():
last_error = e.msg
continue
if fatal:
raise
self.report_warning(error_to_compat_str(e))
return
if not check_get_keys or dict_get(response, check_get_keys):
break
# Youtube sometimes sends incomplete data
# See: https://github.com/ytdl-org/youtube-dl/issues/28194
last_error = 'Incomplete data received'
if count >= retries:
if fatal:
raise ExtractorError(last_error)
else:
self.report_warning(last_error)
return
return response
@staticmethod
def is_music_url(url):
return re.match(r'https?://music\.youtube\.com/', url) is not None
def _extract_video(self, renderer):
video_id = renderer.get('videoId')
title = self._get_text(renderer, 'title')
description = self._get_text(renderer, 'descriptionSnippet')
duration = parse_duration(self._get_text(
renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
if duration is None:
duration = parse_duration(self._search_regex(
r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
video_id, default=None, group='duration'))
view_count = self._get_count(renderer, 'viewCountText')
uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
channel_id = traverse_obj(
renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
expected_type=str, get_all=False)
timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
overlay_style = traverse_obj(
renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
get_all=False, expected_type=str)
badges = self._extract_badges(renderer)
thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
expected_type=str)) or ''
url = f'https://www.youtube.com/watch?v={video_id}'
if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
url = f'https://www.youtube.com/shorts/{video_id}'
return {
'_type': 'url',
'ie_key': YoutubeIE.ie_key(),
'id': video_id,
'url': url,
'title': title,
'description': description,
'duration': duration,
'view_count': view_count,
'uploader': uploader,
'channel_id': channel_id,
'thumbnails': thumbnails,
'upload_date': (strftime_or_none(timestamp, '%Y%m%d')
if self._configuration_arg('approximate_date', ie_key='youtubetab')
else None),
'live_status': ('is_upcoming' if scheduled_timestamp is not None
else 'was_live' if 'streamed' in time_text.lower()
else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges
else None),
'release_timestamp': scheduled_timestamp,
'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
}
class YoutubeIE(YoutubeBaseInfoExtractor):
IE_DESC = 'YouTube'
_VALID_URL = r"""(?x)^
(
(?:https?://|//) # http(s):// or protocol-independent URL
(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
(?:www\.)?deturl\.com/www\.youtube\.com|
(?:www\.)?pwnyoutube\.com|
(?:www\.)?hooktube\.com|
(?:www\.)?yourepeat\.com|
tube\.majestyc\.net|
%(invidious)s|
youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
(?:.*?\#/)? # handle anchor (#/) redirect urls
(?: # the various things that can precede the ID:
(?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
|(?: # or the v= param in all its forms
(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
(?:\?|\#!?) # the params delimiter ? or # or #!
(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
v=
)
))
|(?:
youtu\.be| # just youtu.be/xxxx
vid\.plus| # or vid.plus/xxxx
zwearz\.com/watch| # or zwearz.com/watch/xxxx
%(invidious)s
)/
|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
)
)? # all until now is optional -> you can pass the naked ID
(?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
(?(1).+)? # if we found the ID, everything can follow
(?:\#|$)""" % {
'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
}
_PLAYER_INFO_RE = (
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
)
_formats = {
'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
# 3D videos
'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
# Apple HTTP Live Streaming
'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
# DASH mp4 video
'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
# Dash mp4 audio
'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
# Dash webm
'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
# Dash webm audio
'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
# Dash webm audio with opus inside
'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
# RTMP (unnamed)
'_rtmp': {'protocol': 'rtmp'},
# av01 video only formats sometimes served with "unknown" codecs
'394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
'395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
'396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
'397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
'398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
'399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
}
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
_GEO_BYPASS = False
IE_NAME = 'youtube'
_TESTS = [
{
'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
'info_dict': {
'id': 'BaW_jenozKc',
'ext': 'mp4',
'title': 'youtube-dl test video "\'/\\ä↭𝕐',
'uploader': 'Philipp Hagemeister',
'uploader_id': 'phihag',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
'channel': 'Philipp Hagemeister',
'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
'upload_date': '20121002',
'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
'categories': ['Science & Technology'],
'tags': ['youtube-dl'],
'duration': 10,
'view_count': int,
'like_count': int,
'availability': 'public',
'playable_in_embed': True,
'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
'live_status': 'not_live',
'age_limit': 0,
'start_time': 1,
'end_time': 9,
'channel_follower_count': int
}
},
{
'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
'note': 'Embed-only video (#1746)',
'info_dict': {
'id': 'yZIXLfi8CZQ',
'ext': 'mp4',
'upload_date': '20120608',
'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
'uploader': 'SET India',
'uploader_id': 'setindia',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
'age_limit': 18,
},
'skip': 'Private video',
},
{
'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
'note': 'Use the first video ID in the URL',
'info_dict': {
'id': 'BaW_jenozKc',
'ext': 'mp4',
'title': 'youtube-dl test video "\'/\\ä↭𝕐',
'uploader': 'Philipp Hagemeister',
'uploader_id': 'phihag',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
'channel': 'Philipp Hagemeister',
'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
'upload_date': '20121002',
'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
'categories': ['Science & Technology'],
'tags': ['youtube-dl'],
'duration': 10,
'view_count': int,
'like_count': int,
'availability': 'public',
'playable_in_embed': True,
'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
'live_status': 'not_live',
'age_limit': 0,
'channel_follower_count': int
},
'params': {
'skip_download': True,
},
},
{
'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
'note': '256k DASH audio (format 141) via DASH manifest',
'info_dict': {
'id': 'a9LDPn-MO4I',
'ext': 'm4a',
'upload_date': '20121002',
'uploader_id': '8KVIDEO',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
'description': '',
'uploader': '8KVIDEO',
'title': 'UHDTV TEST 8K VIDEO.mp4'
},
'params': {
'youtube_include_dash_manifest': True,
'format': '141',
},
'skip': 'format 141 not served anymore',
},
# DASH manifest with encrypted signature
{
'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
'info_dict': {
'id': 'IB3lcPjvWLA',
'ext': 'm4a',
'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
'duration': 244,
'uploader': 'AfrojackVEVO',
'uploader_id': 'AfrojackVEVO',
'upload_date': '20131011',
'abr': 129.495,
'like_count': int,
'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
'playable_in_embed': True,
'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
'view_count': int,
'track': 'The Spark',
'live_status': 'not_live',
'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
'channel': 'Afrojack',
'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',
'tags': 'count:19',
'availability': 'public',
'categories': ['Music'],
'age_limit': 0,
'alt_title': 'The Spark',
'channel_follower_count': int
},
'params': {
'youtube_include_dash_manifest': True,
'format': '141/bestaudio[ext=m4a]',
},
},
# Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
{
'note': 'Embed allowed age-gate video',
'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
'info_dict': {
'id': 'HtVdAasjOgU',
'ext': 'mp4',
'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
'duration': 142,
'uploader': 'The Witcher',
'uploader_id': 'WitcherGame',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
'upload_date': '20140605',
'age_limit': 18,
'categories': ['Gaming'],
'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
'availability': 'needs_auth',
'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
'like_count': int,
'channel': 'The Witcher',
'live_status': 'not_live',
'tags': 'count:17',
'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
'playable_in_embed': True,
'view_count': int,
'channel_follower_count': int
},
},
{
'note': 'Age-gate video with embed allowed in public site',
'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
'info_dict': {
'id': 'HsUATh_Nc2U',
'ext': 'mp4',
'title': 'Godzilla 2 (Official Video)',
'description': 'md5:bf77e03fcae5529475e500129b05668a',
'upload_date': '20200408',
'uploader_id': 'FlyingKitty900',
'uploader': 'FlyingKitty',
'age_limit': 18,
'availability': 'needs_auth',
'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',
'channel': 'FlyingKitty',
'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
'view_count': int,
'categories': ['Entertainment'],
'live_status': 'not_live',
'tags': ['Flyingkitty', 'godzilla 2'],
'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
'like_count': int,
'duration': 177,
'playable_in_embed': True,
'channel_follower_count': int
},
},
{
'note': 'Age-gate video embedable only with clientScreen=EMBED',
'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
'info_dict': {
'id': 'Tq92D6wQ1mg',
'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
'ext': 'mp4',
'upload_date': '20191228',
'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
'uploader': 'Projekt Melody',
'description': 'md5:17eccca93a786d51bc67646756894066',
'age_limit': 18,
'like_count': int,
'availability': 'needs_auth',
'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
'view_count': int,
'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
'channel': 'Projekt Melody',
'live_status': 'not_live',
'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
'playable_in_embed': True,
'categories': ['Entertainment'],
'duration': 106,
'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
'channel_follower_count': int
},
},
{
'note': 'Non-Agegated non-embeddable video',
'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
'info_dict': {
'id': 'MeJVWBSsPAY',
'ext': 'mp4',
'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
'uploader': 'Herr Lurik',
'uploader_id': 'st3in234',
'description': 'Fan Video. Music & Lyrics by OOMPH!.',
'upload_date': '20130730',
'track': 'Such mich find mich',
'age_limit': 0,
'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
'like_count': int,
'playable_in_embed': False,
'creator': 'OOMPH!',
'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
'view_count': int,
'alt_title': 'Such mich find mich',
'duration': 210,
'channel': 'Herr Lurik',
'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
'categories': ['Music'],
'availability': 'public',
'uploader_url': 'http://www.youtube.com/user/st3in234',
'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
'live_status': 'not_live',
'artist': 'OOMPH!',
'channel_follower_count': int
},
},
{
'note': 'Non-bypassable age-gated video',
'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
'only_matching': True,
},
# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
# YouTube Red ad is not captured for creator
{
'url': '__2ABJjxzNo',
'info_dict': {
'id': '__2ABJjxzNo',
'ext': 'mp4',
'duration': 266,
'upload_date': '20100430',
'uploader_id': 'deadmau5',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
'creator': 'deadmau5',
'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
'uploader': 'deadmau5',
'title': 'Deadmau5 - Some Chords (HD)',
'alt_title': 'Some Chords',
'availability': 'public',
'tags': 'count:14',
'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
'view_count': int,
'live_status': 'not_live',
'channel': 'deadmau5',
'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
'like_count': int,
'track': 'Some Chords',
'artist': 'deadmau5',
'playable_in_embed': True,
'age_limit': 0,
'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
'categories': ['Music'],
'album': 'Some Chords',
'channel_follower_count': int
},
'expected_warnings': [
'DASH manifest missing',
]
},
# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
{
'url': 'lqQg6PlCWgI',
'info_dict': {
'id': 'lqQg6PlCWgI',
'ext': 'mp4',
'duration': 6085,
'upload_date': '20150827',
'uploader_id': 'olympic',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
'uploader': 'Olympics',
'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
'like_count': int,
'release_timestamp': 1343767800,
'playable_in_embed': True,
'categories': ['Sports'],
'release_date': '20120731',
'channel': 'Olympics',
'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
'age_limit': 0,
'availability': 'public',
'live_status': 'was_live',
'view_count': int,
'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
'channel_follower_count': int
},
'params': {
'skip_download': 'requires avconv',
}
},
# Non-square pixels
{
'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
'info_dict': {
'id': '_b-2C3KPAM0',
'ext': 'mp4',
'stretched_ratio': 16 / 9.,
'duration': 85,
'upload_date': '20110310',
'uploader_id': 'AllenMeow',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
'uploader': '孫ᄋᄅ',
'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
'playable_in_embed': True,
'channel': '孫ᄋᄅ',
'age_limit': 0,
'tags': 'count:11',
'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
'view_count': int,
'categories': ['People & Blogs'],
'like_count': int,
'live_status': 'not_live',
'availability': 'unlisted',
'channel_follower_count': int
},
},
# url_encoded_fmt_stream_map is empty string
{
'url': 'qEJwOuvDf7I',
'info_dict': {
'id': 'qEJwOuvDf7I',
'ext': 'webm',
'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
'description': '',
'upload_date': '20150404',
'uploader_id': 'spbelect',
'uploader': 'Наблюдатели Петербурга',
},
'params': {
'skip_download': 'requires avconv',
},
'skip': 'This live event has ended.',
},
# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
{
'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
'info_dict': {
'id': 'FIl7x6_3R5Y',
'ext': 'webm',
'title': 'md5:7b81415841e02ecd4313668cde88737a',
'description': 'md5:116377fd2963b81ec4ce64b542173306',
'duration': 220,
'upload_date': '20150625',
'uploader_id': 'dorappi2000',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
'uploader': 'dorappi2000',
'formats': 'mincount:31',
},
'skip': 'not actual anymore',
},
# DASH manifest with segment_list
{
'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
'md5': '8ce563a1d667b599d21064e982ab9e31',
'info_dict': {
'id': 'CsmdDsKjzN8',
'ext': 'mp4',
'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
'uploader': 'Airtek',
'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
},
'params': {
'youtube_include_dash_manifest': True,
'format': '135', # bestvideo
},
'skip': 'This live event has ended.',
},
{
# Multifeed videos (multiple cameras), URL is for Main Camera
'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
'info_dict': {
'id': 'jvGDaLqkpTg',
'title': 'Tom Clancy Free Weekend Rainbow Whatever',
'description': 'md5:e03b909557865076822aa169218d6a5d',
},
'playlist': [{
'info_dict': {
'id': 'jvGDaLqkpTg',
'ext': 'mp4',
'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
'description': 'md5:e03b909557865076822aa169218d6a5d',
'duration': 10643,
'upload_date': '20161111',
'uploader': 'Team PGP',
'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
},
}, {
'info_dict': {
'id': '3AKt1R1aDnw',
'ext': 'mp4',
'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
'description': 'md5:e03b909557865076822aa169218d6a5d',
'duration': 10991,
'upload_date': '20161111',
'uploader': 'Team PGP',
'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
},
}, {
'info_dict': {
'id': 'RtAMM00gpVc',
'ext': 'mp4',
'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
'description': 'md5:e03b909557865076822aa169218d6a5d',
'duration': 10995,
'upload_date': '20161111',
'uploader': 'Team PGP',
'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
},
}, {
'info_dict': {
'id': '6N2fdlP3C5U',
'ext': 'mp4',
'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
'description': 'md5:e03b909557865076822aa169218d6a5d',
'duration': 10990,
'upload_date': '20161111',
'uploader': 'Team PGP',
'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
},
}],
'params': {
'skip_download': True,
},
'skip': 'Not multifeed anymore',
},
{
# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
'info_dict': {
'id': 'gVfLd0zydlo',
'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
},
'playlist_count': 2,
'skip': 'Not multifeed anymore',
},
{
'url': 'https://vid.plus/FlRa-iH7PGw',
'only_matching': True,
},
{
'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
'only_matching': True,
},
{
# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
# Also tests cut-off URL expansion in video description (see
# https://github.com/ytdl-org/youtube-dl/issues/1892,
# https://github.com/ytdl-org/youtube-dl/issues/8164)
'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
'info_dict': {
'id': 'lsguqyKfVQg',
'ext': 'mp4',
'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
'alt_title': 'Dark Walk',
'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
'duration': 133,
'upload_date': '20151119',
'uploader_id': 'IronSoulElf',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
'uploader': 'IronSoulElf',
'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
'track': 'Dark Walk',
'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
'categories': ['Film & Animation'],
'view_count': int,
'live_status': 'not_live',
'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
'tags': 'count:13',
'availability': 'public',
'channel': 'IronSoulElf',
'playable_in_embed': True,
'like_count': int,
'age_limit': 0,
'channel_follower_count': int
},
'params': {
'skip_download': True,
},
},
{
# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
'only_matching': True,
},
{
# Video with yt:stretch=17:0
'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
'info_dict': {
'id': 'Q39EVAstoRM',
'ext': 'mp4',
'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
'description': 'md5:ee18a25c350637c8faff806845bddee9',
'upload_date': '20151107',
'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
'uploader': 'CH GAMER DROID',
},
'params': {
'skip_download': True,
},
'skip': 'This video does not exist.',
},
{
# Video with incomplete 'yt:stretch=16:'
'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
'only_matching': True,
},
{
# Video licensed under Creative Commons
'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
'info_dict': {
'id': 'M4gD1WSo5mA',
'ext': 'mp4',
'title': 'md5:e41008789470fc2533a3252216f1c1d1',
'description': 'md5:a677553cf0840649b731a3024aeff4cc',
'duration': 721,
'upload_date': '20150128',
'uploader_id': 'BerkmanCenter',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
'uploader': 'The Berkman Klein Center for Internet & Society',
'license': 'Creative Commons Attribution license (reuse allowed)',
'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
'like_count': int,
'age_limit': 0,
'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
'channel': 'The Berkman Klein Center for Internet & Society',
'availability': 'public',
'view_count': int,
'categories': ['Education'],
'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
'live_status': 'not_live',
'playable_in_embed': True,
'channel_follower_count': int
},
'params': {
'skip_download': True,
},
},
{
# Channel-like uploader_url
'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
'info_dict': {
'id': 'eQcmzGIKrzg',
'ext': 'mp4',
'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
'duration': 4060,
'upload_date': '20151120',
'uploader': 'Bernie Sanders',
'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
'license': 'Creative Commons Attribution license (reuse allowed)',
'playable_in_embed': True,
'tags': 'count:12',
'like_count': int,
'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
'age_limit': 0,
'availability': 'public',
'categories': ['News & Politics'],
'channel': 'Bernie Sanders',
'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
'view_count': int,
'live_status': 'not_live',
'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
'channel_follower_count': int
},
'params': {
'skip_download': True,
},
},
{
'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
'only_matching': True,
},
{
# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
'only_matching': True,
},
{
# Rental video preview
'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
'info_dict': {
'id': 'uGpuVWrhIzE',
'ext': 'mp4',
'title': 'Piku - Trailer',
'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
'upload_date': '20150811',
'uploader': 'FlixMatrix',
'uploader_id': 'FlixMatrixKaravan',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
'license': 'Standard YouTube License',
},
'params': {
'skip_download': True,
},
'skip': 'This video is not available.',
},
{
# YouTube Red video with episode data
'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
'info_dict': {
'id': 'iqKdEhx-dD4',
'ext': 'mp4',
'title': 'Isolation - Mind Field (Ep 1)',
'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
'duration': 2085,
'upload_date': '20170118',
'uploader': 'Vsauce',
'uploader_id': 'Vsauce',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
'series': 'Mind Field',
'season_number': 1,
'episode_number': 1,
'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
'tags': 'count:12',
'view_count': int,
'availability': 'public',
'age_limit': 0,
'channel': 'Vsauce',
'episode': 'Episode 1',
'categories': ['Entertainment'],
'season': 'Season 1',
'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
'like_count': int,
'playable_in_embed': True,
'live_status': 'not_live',
'channel_follower_count': int
},
'params': {
'skip_download': True,
},
'expected_warnings': [
'Skipping DASH manifest',
],
},
{
# The following content has been identified by the YouTube community
# as inappropriate or offensive to some audiences.
'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
'info_dict': {
'id': '6SJNVb0GnPI',
'ext': 'mp4',
'title': 'Race Differences in Intelligence',
'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
'duration': 965,
'upload_date': '20140124',
'uploader': 'New Century Foundation',
'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
},
'params': {
'skip_download': True,
},
'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
},
{
# itag 212
'url': '1t24XAntNCY',
'only_matching': True,
},
{
# geo restricted to JP
'url': 'sJL6WA-aGkQ',
'only_matching': True,
},
{
'url': 'https://invidio.us/watch?v=BaW_jenozKc',
'only_matching': True,
},
{
'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
'only_matching': True,
},
{
# from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
'only_matching': True,
},
{
# DRM protected
'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
'only_matching': True,
},
{
# Video with unsupported adaptive stream type formats
'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
'info_dict': {
'id': 'Z4Vy8R84T1U',
'ext': 'mp4',
'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'duration': 433,
'upload_date': '20130923',
'uploader': 'Amelia Putri Harwita',
'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
'formats': 'maxcount:10',
},
'params': {
'skip_download': True,
'youtube_include_dash_manifest': False,
},
'skip': 'not actual anymore',
},
{
# Youtube Music Auto-generated description
'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
'info_dict': {
'id': 'MgNrAu2pzNs',
'ext': 'mp4',
'title': 'Voyeur Girl',
'description': 'md5:7ae382a65843d6df2685993e90a8628f',
'upload_date': '20190312',
'uploader': 'Stephen - Topic',
'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
'artist': 'Stephen',
'track': 'Voyeur Girl',
'album': 'it\'s too much love to know my dear',
'release_date': '20190313',
'release_year': 2019,
'alt_title': 'Voyeur Girl',
'view_count': int,
'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
'playable_in_embed': True,
'like_count': int,
'categories': ['Music'],
'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
'channel': 'Stephen',
'availability': 'public',
'creator': 'Stephen',
'duration': 169,
'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
'age_limit': 0,
'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
'tags': 'count:11',
'live_status': 'not_live',
'channel_follower_count': int
},
'params': {
'skip_download': True,
},
},
{
'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
'only_matching': True,
},
{
# invalid -> valid video id redirection
'url': 'DJztXj2GPfl',
'info_dict': {
'id': 'DJztXj2GPfk',
'ext': 'mp4',
'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
'description': 'md5:bf577a41da97918e94fa9798d9228825',
'upload_date': '20090125',
'uploader': 'Prochorowka',
'uploader_id': 'Prochorowka',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
'artist': 'Panjabi MC',
'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
'album': 'Beware of the Boys (Mundian To Bach Ke)',
},
'params': {
'skip_download': True,
},
'skip': 'Video unavailable',
},
{
# empty description results in an empty string
'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
'info_dict': {
'id': 'x41yOUIvK2k',
'ext': 'mp4',
'title': 'IMG 3456',
'description': '',
'upload_date': '20170613',
'uploader_id': 'ElevageOrVert',
'uploader': 'ElevageOrVert',
'view_count': int,
'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',
'like_count': int,
'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
'tags': [],
'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
'availability': 'public',
'age_limit': 0,
'categories': ['Pets & Animals'],
'duration': 7,
'playable_in_embed': True,
'live_status': 'not_live',
'channel': 'ElevageOrVert',
'channel_follower_count': int
},
'params': {
'skip_download': True,
},
},
{
# with '};' inside yt initial data (see [1])
# see [2] for an example with '};' inside ytInitialPlayerResponse
# 1. https://github.com/ytdl-org/youtube-dl/issues/27093
# 2. https://github.com/ytdl-org/youtube-dl/issues/27216
'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
'info_dict': {
'id': 'CHqg6qOn4no',
'ext': 'mp4',
'title': 'Part 77 Sort a list of simple types in c#',
'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
'upload_date': '20130831',
'uploader_id': 'kudvenkat',
'uploader': 'kudvenkat',
'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
'like_count': int,
'uploader_url': 'http://www.youtube.com/user/kudvenkat',
'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
'live_status': 'not_live',
'categories': ['Education'],
'availability': 'public',
'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
'tags': 'count:12',
'playable_in_embed': True,
'age_limit': 0,
'view_count': int,
'duration': 522,
'channel': 'kudvenkat',
'channel_follower_count': int
},
'params': {
'skip_download': True,
},
},
{
# another example of '};' in ytInitialData
'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
'only_matching': True,
},
{
'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
'only_matching': True,
},
{
# https://github.com/ytdl-org/youtube-dl/pull/28094
'url': 'OtqTfy26tG0',
'info_dict': {
'id': 'OtqTfy26tG0',
'ext': 'mp4',
'title': 'Burn Out',
'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
'upload_date': '20141120',
'uploader': 'The Cinematic Orchestra - Topic',
'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
'artist': 'The Cinematic Orchestra',
'track': 'Burn Out',
'album': 'Every Day',
'like_count': int,
'live_status': 'not_live',
'alt_title': 'Burn Out',
'duration': 614,
'age_limit': 0,
'view_count': int,
'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
'creator': 'The Cinematic Orchestra',
'channel': 'The Cinematic Orchestra',
'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
'availability': 'public',
'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
'categories': ['Music'],
'playable_in_embed': True,
'channel_follower_count': int
},
'params': {
'skip_download': True,
},
},
{
# controversial video, only works with bpctr when authenticated with cookies
'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
'only_matching': True,
},
{
# controversial video, requires bpctr/contentCheckOk
'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
'info_dict': {
'id': 'SZJvDhaSDnc',
'ext': 'mp4',
'title': 'San Diego teen commits suicide after bullying over embarrassing video',
'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
'uploader': 'CBS Mornings',
'uploader_id': 'CBSThisMorning',
'upload_date': '20140716',
'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
'duration': 170,
'categories': ['News & Politics'],
'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',
'view_count': int,
'channel': 'CBS Mornings',
'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
'age_limit': 18,
'availability': 'needs_auth',
'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
'like_count': int,
'live_status': 'not_live',
'playable_in_embed': True,
'channel_follower_count': int
}
},
{
# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
'url