Skip to content

Commit

Permalink
Re-work URL joining
Browse files Browse the repository at this point in the history
  • Loading branch information
bbayles authored and mauricioabreu committed May 10, 2023
1 parent ca305e9 commit 8c1a047
Show file tree
Hide file tree
Showing 7 changed files with 28 additions and 50 deletions.
10 changes: 6 additions & 4 deletions m3u8/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,16 @@
import sys
import os

from m3u8.httpclient import DefaultHTTPClient, _parsed_url
from urllib.parse import urljoin, urlsplit

from m3u8.httpclient import DefaultHTTPClient
from m3u8.model import (M3U8, Segment, SegmentList, PartialSegment,
PartialSegmentList, Key, Playlist, IFramePlaylist,
Media, MediaList, PlaylistList, Start,
RenditionReport, RenditionReportList, ServerControl,
Skip, PartInformation, PreloadHint, DateRange,
DateRangeList, ContentSteering)
from m3u8.parser import parse, is_url, ParseError
from m3u8.parser import parse, ParseError


__all__ = ('M3U8', 'Segment', 'SegmentList', 'PartialSegment',
Expand All @@ -33,7 +35,7 @@ def loads(content, uri=None, custom_tags_parser=None):
if uri is None:
return M3U8(content, custom_tags_parser=custom_tags_parser)
else:
base_uri = _parsed_url(uri)
base_uri = urljoin(uri, '.')
return M3U8(content, base_uri=base_uri, custom_tags_parser=custom_tags_parser)


Expand All @@ -42,7 +44,7 @@ def load(uri, timeout=None, headers={}, custom_tags_parser=None, http_client=Def
Retrieves the content from a given URI and returns a M3U8 object.
Raises ValueError if invalid content or IOError if request fails.
'''
if is_url(uri):
if urlsplit(uri).scheme:
content, base_uri = http_client.download(uri, timeout, headers, verify_ssl)
return M3U8(content, base_uri=base_uri, custom_tags_parser=custom_tags_parser)
else:
Expand Down
7 changes: 2 additions & 5 deletions m3u8/httpclient.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
import ssl
import urllib.request
from m3u8.parser import urljoin


def _parsed_url(url):
return urljoin(url, '.')
from urllib.parse import urljoin


class DefaultHTTPClient:
Expand All @@ -18,7 +15,7 @@ def download(self, uri, timeout=None, headers={}, verify_ssl=True):
opener = urllib.request.build_opener(proxy_handler, https_handler)
opener.addheaders = headers.items()
resource = opener.open(uri, timeout=timeout)
base_uri = _parsed_url(resource.geturl())
base_uri = urljoin(resource.geturl(), '.')
content = resource.read().decode(
resource.headers.get_content_charset(failobj="utf-8")
)
Expand Down
31 changes: 12 additions & 19 deletions m3u8/mixins.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,5 @@

import os
from m3u8.parser import is_url, urljoin


def _urijoin(base_uri, path):
if is_url(base_uri):
if base_uri[-1] != '/':
base_uri += '/'
return urljoin(base_uri, path)
else:
return os.path.normpath(os.path.join(base_uri, path.strip('/')))
from os.path import dirname
from urllib.parse import urljoin, urlsplit


class BasePathMixin(object):
Expand All @@ -18,18 +8,21 @@ class BasePathMixin(object):
def absolute_uri(self):
if self.uri is None:
return None
if is_url(self.uri):
return self.uri
else:
if self.base_uri is None:
raise ValueError('There can not be `absolute_uri` with no `base_uri` set')
return _urijoin(self.base_uri, self.uri)

ret = urljoin(self.base_uri, self.uri)
if self.base_uri and (not urlsplit(self.base_uri).scheme):
return ret

if not urlsplit(ret).scheme:
raise ValueError('There can not be `absolute_uri` with no `base_uri` set')

return ret

@property
def base_path(self):
if self.uri is None:
return None
return os.path.dirname(self.get_path_from_uri())
return dirname(self.get_path_from_uri())

def get_path_from_uri(self):
"""Some URIs have a slash in the query string."""
Expand Down
14 changes: 0 additions & 14 deletions m3u8/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,20 +594,6 @@ def normalize_attribute(attribute):
return attribute.replace('-', '_').lower().strip()


def is_url(uri):
return uri.startswith(URI_PREFIXES)


def urljoin(base, url):
base = base.replace('://', '\1')
url = url.replace('://', '\1')
while '//' in base:
base = base.replace('//', '/\0/')
while '//' in url:
url = url.replace('//', '/\0/')
return _urljoin(base.replace('\1', '://'), url.replace('\1', '://')).replace('\0', '')


def get_segment_custom_value(state, key, default=None):
"""
Helper function for getting custom values for Segment
Expand Down
2 changes: 1 addition & 1 deletion tests/playlists/relative-playlist.m3u8
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#EXTINF:5220,
entire4.ts
#EXTINF:5220,
//entire5.ts
./entire5.ts
#EXTINF:5220,
.//entire6.ts
#EXT-X-ENDLIST
10 changes: 5 additions & 5 deletions tests/test_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def test_load_should_create_object_from_file_with_relative_segments():
obj = m3u8.load(playlists.RELATIVE_PLAYLIST_FILENAME)
expected_key_abspath = '%s/key.bin' % os.path.dirname(base_uri)
expected_key_path = '../key.bin'
expected_ts1_abspath = '%s/entire1.ts' % base_uri
expected_ts1_abspath = '/entire1.ts'
expected_ts1_path = '/entire1.ts'
expected_ts2_abspath = '%s/entire2.ts' % os.path.dirname(base_uri)
expected_ts2_path = '../entire2.ts'
Expand All @@ -51,7 +51,7 @@ def test_load_should_create_object_from_file_with_relative_segments():
expected_ts4_abspath = '%s/entire4.ts' % base_uri
expected_ts4_path = 'entire4.ts'
expected_ts5_abspath = '%s/entire5.ts' % base_uri
expected_ts5_path = '//entire5.ts'
expected_ts5_path = './entire5.ts'
expected_ts6_abspath = '%s/entire6.ts' % base_uri
expected_ts6_path = './/entire6.ts'

Expand Down Expand Up @@ -87,9 +87,9 @@ def test_load_should_create_object_from_uri_with_relative_segments():
expected_ts3_path = '../../entire3.ts'
expected_ts4_abspath = '%s%sentire4.ts' % (prefix, base_uri + '/')
expected_ts4_path = 'entire4.ts'
expected_ts5_abspath = '%s%sentire5.ts' % (prefix, '//')
expected_ts5_path = '//entire5.ts'
expected_ts6_abspath = '%s%sentire6.ts' % (prefix, os.path.normpath(base_uri + '/.') + '//')
expected_ts5_abspath = '%s%sentire5.ts' % (prefix, base_uri + '/')
expected_ts5_path = './entire5.ts'
expected_ts6_abspath = '%s%sentire6.ts' % (prefix, base_uri + '/')
expected_ts6_path = './/entire6.ts'

assert isinstance(obj, m3u8.M3U8)
Expand Down
4 changes: 2 additions & 2 deletions tests/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -1002,12 +1002,12 @@ def test_m3u8_should_propagate_base_uri_to_segments():
content = f.read()
obj = m3u8.M3U8(content, base_uri='/any/path')
assert '/entire1.ts' == obj.segments[0].uri
assert '/any/path/entire1.ts' == obj.segments[0].absolute_uri
assert '/entire1.ts' == obj.segments[0].absolute_uri
assert 'entire4.ts' == obj.segments[3].uri
assert '/any/path/entire4.ts' == obj.segments[3].absolute_uri
obj.base_uri = '/any/where/'
assert '/entire1.ts' == obj.segments[0].uri
assert '/any/where/entire1.ts' == obj.segments[0].absolute_uri
assert '/entire1.ts' == obj.segments[0].absolute_uri
assert 'entire4.ts' == obj.segments[3].uri
assert '/any/where/entire4.ts' == obj.segments[3].absolute_uri

Expand Down

0 comments on commit 8c1a047

Please sign in to comment.