src/loader/m3u8-parser.ts
- import * as URLToolkit from 'url-toolkit';
-
- import { Fragment, Part } from './fragment';
- import { LevelDetails } from './level-details';
- import { LevelKey } from './level-key';
-
- import { AttrList } from '../utils/attr-list';
- import { logger } from '../utils/logger';
- import type { CodecType } from '../utils/codecs';
- import { isCodecType } from '../utils/codecs';
- import type {
- MediaPlaylist,
- AudioGroup,
- MediaPlaylistType,
- } from '../types/media-playlist';
- import type { PlaylistLevelType } from '../types/loader';
- import type { LevelAttributes, LevelParsed } from '../types/level';
-
- type M3U8ParserFragments = Array<Fragment | null>;
-
- // https://regex101.com is your friend
- const MASTER_PLAYLIST_REGEX = /#EXT-X-STREAM-INF:([^\r\n]*)(?:[\r\n](?:#[^\r\n]*)?)*([^\r\n]+)|#EXT-X-SESSION-DATA:([^\r\n]*)[\r\n]+/g;
- const MASTER_PLAYLIST_MEDIA_REGEX = /#EXT-X-MEDIA:(.*)/g;
-
- const LEVEL_PLAYLIST_REGEX_FAST = new RegExp(
- [
- /#EXTINF:\s*(\d*(?:\.\d+)?)(?:,(.*)\s+)?/.source, // duration (#EXTINF:<duration>,<title>), group 1 => duration, group 2 => title
- /(?!#) *(\S[\S ]*)/.source, // segment URI, group 3 => the URI (note newline is not eaten)
- /#EXT-X-BYTERANGE:*(.+)/.source, // next segment's byterange, group 4 => range spec (x@y)
- /#EXT-X-PROGRAM-DATE-TIME:(.+)/.source, // next segment's program date/time group 5 => the datetime spec
- /#.*/.source, // All other non-segment oriented tags will match with all groups empty
- ].join('|'),
- 'g'
- );
-
- const LEVEL_PLAYLIST_REGEX_SLOW = new RegExp(
- [
- /#(EXTM3U)/.source,
- /#EXT-X-(PLAYLIST-TYPE):(.+)/.source,
- /#EXT-X-(MEDIA-SEQUENCE): *(\d+)/.source,
- /#EXT-X-(SKIP):(.+)/.source,
- /#EXT-X-(TARGETDURATION): *(\d+)/.source,
- /#EXT-X-(KEY):(.+)/.source,
- /#EXT-X-(START):(.+)/.source,
- /#EXT-X-(ENDLIST)/.source,
- /#EXT-X-(DISCONTINUITY-SEQ)UENCE: *(\d+)/.source,
- /#EXT-X-(DIS)CONTINUITY/.source,
- /#EXT-X-(VERSION):(\d+)/.source,
- /#EXT-X-(MAP):(.+)/.source,
- /#EXT-X-(SERVER-CONTROL):(.+)/.source,
- /#EXT-X-(PART-INF):(.+)/.source,
- /#EXT-X-(GAP)/.source,
- /#EXT-X-(BITRATE):\s*(\d+)/.source,
- /#EXT-X-(PART):(.+)/.source,
- /#EXT-X-(PRELOAD-HINT):(.+)/.source,
- /#EXT-X-(RENDITION-REPORT):(.+)/.source,
- /(#)([^:]*):(.*)/.source,
- /(#)(.*)(?:.*)\r?\n?/.source,
- ].join('|')
- );
-
- const MP4_REGEX_SUFFIX = /\.(mp4|m4s|m4v|m4a)$/i;
-
- export default class M3U8Parser {
- static findGroup(
- groups: Array<AudioGroup>,
- mediaGroupId: string
- ): AudioGroup | undefined {
- for (let i = 0; i < groups.length; i++) {
- const group = groups[i];
- if (group.id === mediaGroupId) {
- return group;
- }
- }
- }
-
- static convertAVC1ToAVCOTI(codec) {
- // Convert avc1 codec string from RFC-4281 to RFC-6381 for MediaSource.isTypeSupported
- const avcdata = codec.split('.');
- if (avcdata.length > 2) {
- let result = avcdata.shift() + '.';
- result += parseInt(avcdata.shift()).toString(16);
- result += ('000' + parseInt(avcdata.shift()).toString(16)).substr(-4);
- return result;
- }
- return codec;
- }
-
- static resolve(url, baseUrl) {
- return URLToolkit.buildAbsoluteURL(baseUrl, url, { alwaysNormalize: true });
- }
-
- static parseMasterPlaylist(string: string, baseurl: string) {
- const levels: Array<LevelParsed> = [];
- const sessionData: Record<string, AttrList> = {};
- let hasSessionData = false;
- MASTER_PLAYLIST_REGEX.lastIndex = 0;
-
- let result: RegExpExecArray | null;
- while ((result = MASTER_PLAYLIST_REGEX.exec(string)) != null) {
- if (result[1]) {
- // '#EXT-X-STREAM-INF' is found, parse level tag in group 1
- const attrs = new AttrList(result[1]);
- const level: LevelParsed = {
- attrs,
- bitrate:
- attrs.decimalInteger('AVERAGE-BANDWIDTH') ||
- attrs.decimalInteger('BANDWIDTH'),
- name: attrs.NAME,
- url: M3U8Parser.resolve(result[2], baseurl),
- };
-
- const resolution = attrs.decimalResolution('RESOLUTION');
- if (resolution) {
- level.width = resolution.width;
- level.height = resolution.height;
- }
-
- setCodecs(
- (attrs.CODECS || '').split(/[ ,]+/).filter((c) => c),
- level
- );
-
- if (level.videoCodec && level.videoCodec.indexOf('avc1') !== -1) {
- level.videoCodec = M3U8Parser.convertAVC1ToAVCOTI(level.videoCodec);
- }
-
- levels.push(level);
- } else if (result[3]) {
- // '#EXT-X-SESSION-DATA' is found, parse session data in group 3
- const sessionAttrs = new AttrList(result[3]);
- if (sessionAttrs['DATA-ID']) {
- hasSessionData = true;
- sessionData[sessionAttrs['DATA-ID']] = sessionAttrs;
- }
- }
- }
- return {
- levels,
- sessionData: hasSessionData ? sessionData : null,
- };
- }
-
- static parseMasterPlaylistMedia(
- string: string,
- baseurl: string,
- type: MediaPlaylistType,
- groups: Array<AudioGroup> = []
- ): Array<MediaPlaylist> {
- let result: RegExpExecArray | null;
- const medias: Array<MediaPlaylist> = [];
- let id = 0;
- MASTER_PLAYLIST_MEDIA_REGEX.lastIndex = 0;
- while ((result = MASTER_PLAYLIST_MEDIA_REGEX.exec(string)) !== null) {
- const attrs = new AttrList(result[1]) as LevelAttributes;
- if (attrs.TYPE === type) {
- const media: MediaPlaylist = {
- attrs,
- bitrate: 0,
- id: id++,
- groupId: attrs['GROUP-ID'],
- instreamId: attrs['INSTREAM-ID'],
- name: attrs.NAME || attrs.LANGUAGE || '',
- type,
- default: attrs.bool('DEFAULT'),
- autoselect: attrs.bool('AUTOSELECT'),
- forced: attrs.bool('FORCED'),
- lang: attrs.LANGUAGE,
- url: attrs.URI ? M3U8Parser.resolve(attrs.URI, baseurl) : '',
- };
-
- if (groups.length) {
- // If there are audio or text groups signalled in the manifest, let's look for a matching codec string for this track
- // If we don't find the track signalled, lets use the first audio groups codec we have
- // Acting as a best guess
- const groupCodec =
- M3U8Parser.findGroup(groups, media.groupId as string) || groups[0];
- assignCodec(media, groupCodec, 'audioCodec');
- assignCodec(media, groupCodec, 'textCodec');
- }
-
- medias.push(media);
- }
- }
- return medias;
- }
-
- static parseLevelPlaylist(
- string: string,
- baseurl: string,
- id: number,
- type: PlaylistLevelType,
- levelUrlId: number
- ): LevelDetails {
- const level = new LevelDetails(baseurl);
- const fragments: M3U8ParserFragments = level.fragments;
- let currentSN = 0;
- let currentPart = 0;
- let totalduration = 0;
- let discontinuityCounter = 0;
- let prevFrag: Fragment | null = null;
- let frag: Fragment = new Fragment(type, baseurl);
- let result: RegExpExecArray | RegExpMatchArray | null;
- let i: number;
- let levelkey: LevelKey | undefined;
- let firstPdtIndex = -1;
-
- LEVEL_PLAYLIST_REGEX_FAST.lastIndex = 0;
- level.m3u8 = string;
-
- while ((result = LEVEL_PLAYLIST_REGEX_FAST.exec(string)) !== null) {
- const duration = result[1];
- if (duration) {
- // INF
- frag.duration = parseFloat(duration);
- // avoid sliced strings https://github.com/video-dev/hls.js/issues/939
- const title = (' ' + result[2]).slice(1);
- frag.title = title || null;
- frag.tagList.push(title ? ['INF', duration, title] : ['INF', duration]);
- } else if (result[3]) {
- // url
- if (Number.isFinite(frag.duration)) {
- frag.start = totalduration;
- if (levelkey) {
- frag.levelkey = levelkey;
- }
- frag.sn = currentSN;
- frag.level = id;
- frag.cc = discontinuityCounter;
- frag.urlId = levelUrlId;
- fragments.push(frag);
- // avoid sliced strings https://github.com/video-dev/hls.js/issues/939
- frag.relurl = (' ' + result[3]).slice(1);
- assignProgramDateTime(frag, prevFrag);
- prevFrag = frag;
- totalduration += frag.duration;
- currentSN++;
- currentPart = 0;
-
- frag = new Fragment(type, baseurl);
- // setup the next fragment for part loading
- frag.start = totalduration;
- frag.sn = currentSN;
- frag.cc = discontinuityCounter;
- frag.level = id;
- }
- } else if (result[4]) {
- // X-BYTERANGE
- const data = (' ' + result[4]).slice(1);
- if (prevFrag) {
- frag.setByteRange(data, prevFrag);
- } else {
- frag.setByteRange(data);
- }
- } else if (result[5]) {
- // PROGRAM-DATE-TIME
- // avoid sliced strings https://github.com/video-dev/hls.js/issues/939
- frag.rawProgramDateTime = (' ' + result[5]).slice(1);
- frag.tagList.push(['PROGRAM-DATE-TIME', frag.rawProgramDateTime]);
- if (firstPdtIndex === -1) {
- firstPdtIndex = fragments.length;
- }
- } else {
- result = result[0].match(LEVEL_PLAYLIST_REGEX_SLOW);
- if (!result) {
- logger.warn('No matches on slow regex match for level playlist!');
- continue;
- }
- for (i = 1; i < result.length; i++) {
- if (typeof result[i] !== 'undefined') {
- break;
- }
- }
-
- // avoid sliced strings https://github.com/video-dev/hls.js/issues/939
- const tag = (' ' + result[i]).slice(1);
- const value1 = (' ' + result[i + 1]).slice(1);
- const value2 = result[i + 2] ? (' ' + result[i + 2]).slice(1) : '';
-
- switch (tag) {
- case 'PLAYLIST-TYPE':
- level.type = value1.toUpperCase();
- break;
- case 'MEDIA-SEQUENCE':
- currentSN = level.startSN = parseInt(value1);
- break;
- case 'SKIP': {
- const skipAttrs = new AttrList(value1);
- const skippedSegments = skipAttrs.decimalInteger(
- 'SKIPPED-SEGMENTS'
- );
- if (Number.isFinite(skippedSegments)) {
- level.skippedSegments = skippedSegments;
- // This will result in fragments[] containing undefined values, which we will fill in with `mergeDetails`
- for (let i = skippedSegments; i--; ) {
- fragments.unshift(null);
- }
- currentSN += skippedSegments;
- }
- const recentlyRemovedDateranges = skipAttrs.enumeratedString(
- 'RECENTLY-REMOVED-DATERANGES'
- );
- if (recentlyRemovedDateranges) {
- level.recentlyRemovedDateranges = recentlyRemovedDateranges.split(
- '\t'
- );
- }
- break;
- }
- case 'TARGETDURATION':
- level.targetduration = parseFloat(value1);
- break;
- case 'VERSION':
- level.version = parseInt(value1);
- break;
- case 'EXTM3U':
- break;
- case 'ENDLIST':
- level.live = false;
- break;
- case '#':
- if (value1 || value2) {
- frag.tagList.push(value2 ? [value1, value2] : [value1]);
- }
- break;
- case 'DIS':
- discontinuityCounter++;
- /* falls through */
- case 'GAP':
- frag.tagList.push([tag]);
- break;
- case 'BITRATE':
- frag.tagList.push([tag, value1]);
- break;
- case 'DISCONTINUITY-SEQ':
- discontinuityCounter = parseInt(value1);
- break;
- case 'KEY': {
- // https://tools.ietf.org/html/rfc8216#section-4.3.2.4
- const keyAttrs = new AttrList(value1);
- const decryptmethod = keyAttrs.enumeratedString('METHOD');
- const decrypturi = keyAttrs.URI;
- const decryptiv = keyAttrs.hexadecimalInteger('IV');
- const decryptkeyformatversions = keyAttrs.enumeratedString(
- 'KEYFORMATVERSIONS'
- );
- const decryptkeyid = keyAttrs.enumeratedString('KEYID');
- // From RFC: This attribute is OPTIONAL; its absence indicates an implicit value of "identity".
- const decryptkeyformat =
- keyAttrs.enumeratedString('KEYFORMAT') ?? 'identity';
-
- const unsupportedKnownKeyformatsInManifest = [
- 'com.apple.streamingkeydelivery',
- 'com.microsoft.playready',
- 'urn:uuid:edef8ba9-79d6-4ace-a3c8-27dcd51d21ed', // widevine (v2)
- 'com.widevine', // earlier widevine (v1)
- ];
-
- if (
- unsupportedKnownKeyformatsInManifest.indexOf(decryptkeyformat) >
- -1
- ) {
- logger.warn(
- `Keyformat ${decryptkeyformat} is not supported from the manifest`
- );
- continue;
- } else if (decryptkeyformat !== 'identity') {
- // We are supposed to skip keys we don't understand.
- // As we currently only officially support identity keys
- // from the manifest we shouldn't save any other key.
- continue;
- }
-
- // TODO: multiple keys can be defined on a fragment, and we need to support this
- // for clients that support both playready and widevine
- if (decryptmethod) {
- // TODO: need to determine if the level key is actually a relative URL
- // if it isn't, then we should instead construct the LevelKey using fromURI.
- levelkey = LevelKey.fromURL(baseurl, decrypturi);
- if (
- decrypturi &&
- ['AES-128', 'SAMPLE-AES', 'SAMPLE-AES-CENC'].indexOf(
- decryptmethod
- ) >= 0
- ) {
- levelkey.method = decryptmethod;
- levelkey.keyFormat = decryptkeyformat;
-
- if (decryptkeyid) {
- levelkey.keyID = decryptkeyid;
- }
-
- if (decryptkeyformatversions) {
- levelkey.keyFormatVersions = decryptkeyformatversions;
- }
-
- // Initialization Vector (IV)
- levelkey.iv = decryptiv;
- }
- }
- break;
- }
- case 'START': {
- const startAttrs = new AttrList(value1);
- const startTimeOffset = startAttrs.decimalFloatingPoint(
- 'TIME-OFFSET'
- );
- // TIME-OFFSET can be 0
- if (Number.isFinite(startTimeOffset)) {
- level.startTimeOffset = startTimeOffset;
- }
- break;
- }
- case 'MAP': {
- const mapAttrs = new AttrList(value1);
- frag.relurl = mapAttrs.URI;
- if (mapAttrs.BYTERANGE) {
- frag.setByteRange(mapAttrs.BYTERANGE);
- }
- frag.level = id;
- frag.sn = 'initSegment';
- if (levelkey) {
- frag.levelkey = levelkey;
- }
- level.initSegment = frag;
- frag = new Fragment(type, baseurl);
- frag.rawProgramDateTime = level.initSegment.rawProgramDateTime;
- break;
- }
- case 'SERVER-CONTROL': {
- const serverControlAttrs = new AttrList(value1);
- level.canBlockReload = serverControlAttrs.bool('CAN-BLOCK-RELOAD');
- level.canSkipUntil = serverControlAttrs.optionalFloat(
- 'CAN-SKIP-UNTIL',
- 0
- );
- level.canSkipDateRanges =
- level.canSkipUntil > 0 &&
- serverControlAttrs.bool('CAN-SKIP-DATERANGES');
- level.partHoldBack = serverControlAttrs.optionalFloat(
- 'PART-HOLD-BACK',
- 0
- );
- level.holdBack = serverControlAttrs.optionalFloat('HOLD-BACK', 0);
- break;
- }
- case 'PART-INF': {
- const partInfAttrs = new AttrList(value1);
- level.partTarget = partInfAttrs.decimalFloatingPoint('PART-TARGET');
- break;
- }
- case 'PART': {
- let partList = level.partList;
- if (!partList) {
- partList = level.partList = [];
- }
- const previousFragmentPart =
- currentPart > 0 ? partList[partList.length - 1] : undefined;
- const index = currentPart++;
- const part = new Part(
- new AttrList(value1),
- frag,
- baseurl,
- index,
- previousFragmentPart
- );
- partList.push(part);
- frag.duration += part.duration;
- break;
- }
- case 'PRELOAD-HINT': {
- const preloadHintAttrs = new AttrList(value1);
- level.preloadHint = preloadHintAttrs;
- break;
- }
- case 'RENDITION-REPORT': {
- const renditionReportAttrs = new AttrList(value1);
- level.renditionReports = level.renditionReports || [];
- level.renditionReports.push(renditionReportAttrs);
- break;
- }
- default:
- logger.warn(`line parsed but not handled: ${result}`);
- break;
- }
- }
- }
- if (prevFrag && !prevFrag.relurl) {
- fragments.pop();
- totalduration -= prevFrag.duration;
- if (level.partList) {
- level.fragmentHint = prevFrag;
- }
- } else if (level.partList) {
- assignProgramDateTime(frag, prevFrag);
- frag.cc = discontinuityCounter;
- level.fragmentHint = frag;
- }
- const fragmentLength = fragments.length;
- const firstFragment = fragments[0];
- const lastFragment = fragments[fragmentLength - 1];
- totalduration += level.skippedSegments * level.targetduration;
- if (totalduration > 0 && fragmentLength && lastFragment) {
- level.averagetargetduration = totalduration / fragmentLength;
- const lastSn = lastFragment.sn;
- level.endSN = lastSn !== 'initSegment' ? lastSn : 0;
- if (firstFragment) {
- level.startCC = firstFragment.cc;
- if (!level.initSegment) {
- // this is a bit lurky but HLS really has no other way to tell us
- // if the fragments are TS or MP4, except if we download them :/
- // but this is to be able to handle SIDX.
- if (
- level.fragments.every((frag) =>
- MP4_REGEX_SUFFIX.test(frag.relurl as string)
- )
- ) {
- logger.warn(
- 'MP4 fragments found but no init segment (probably no MAP, incomplete M3U8), trying to fetch SIDX'
- );
- frag = new Fragment(type, baseurl);
- frag.relurl = lastFragment.relurl;
- frag.level = id;
- frag.sn = 'initSegment';
- level.initSegment = frag;
- level.needSidxRanges = true;
- }
- }
- }
- } else {
- level.endSN = 0;
- level.startCC = 0;
- }
- if (level.fragmentHint) {
- totalduration += level.fragmentHint.duration;
- }
- level.totalduration = totalduration;
- level.endCC = discontinuityCounter;
-
- /**
- * Backfill any missing PDT values
- * "If the first EXT-X-PROGRAM-DATE-TIME tag in a Playlist appears after
- * one or more Media Segment URIs, the client SHOULD extrapolate
- * backward from that tag (using EXTINF durations and/or media
- * timestamps) to associate dates with those segments."
- * We have already extrapolated forward, but all fragments up to the first instance of PDT do not have their PDTs
- * computed.
- */
- if (firstPdtIndex > 0) {
- backfillProgramDateTimes(fragments, firstPdtIndex);
- }
-
- return level;
- }
- }
-
- function setCodecs(codecs: Array<string>, level: LevelParsed) {
- ['video', 'audio', 'text'].forEach((type: CodecType) => {
- const filtered = codecs.filter((codec) => isCodecType(codec, type));
- if (filtered.length) {
- const preferred = filtered.filter((codec) => {
- return (
- codec.lastIndexOf('avc1', 0) === 0 ||
- codec.lastIndexOf('mp4a', 0) === 0
- );
- });
- level[`${type}Codec`] = preferred.length > 0 ? preferred[0] : filtered[0];
-
- // remove from list
- codecs = codecs.filter((codec) => filtered.indexOf(codec) === -1);
- }
- });
-
- level.unknownCodecs = codecs;
- }
-
- function assignCodec(media, groupItem, codecProperty) {
- const codecValue = groupItem[codecProperty];
- if (codecValue) {
- media[codecProperty] = codecValue;
- }
- }
-
- function backfillProgramDateTimes(
- fragments: M3U8ParserFragments,
- firstPdtIndex: number
- ) {
- let fragPrev = fragments[firstPdtIndex] as Fragment;
- for (let i = firstPdtIndex; i--; ) {
- const frag = fragments[i];
- // Exit on delta-playlist skipped segments
- if (!frag) {
- return;
- }
- frag.programDateTime =
- (fragPrev.programDateTime as number) - frag.duration * 1000;
- fragPrev = frag;
- }
- }
-
- function assignProgramDateTime(frag, prevFrag) {
- if (frag.rawProgramDateTime) {
- frag.programDateTime = Date.parse(frag.rawProgramDateTime);
- } else if (prevFrag?.programDateTime) {
- frag.programDateTime = prevFrag.endProgramDateTime;
- }
-
- if (!Number.isFinite(frag.programDateTime)) {
- frag.programDateTime = null;
- frag.rawProgramDateTime = null;
- }
- }