diff --git a/README.md b/README.md index 2ffd524..2e1576d 100644 --- a/README.md +++ b/README.md @@ -21,8 +21,6 @@ The provider comes in two parts: > This plugin is not ready for general use and is awaiting changes to be merged in yt-dlp for it to be functional. > Follow https://github.com/yt-dlp/yt-dlp/pull/10648 for updates. -Default port number is 4416. If you want to change this, be sure to change it in both the provider and plugin code. - ### Base Requirements If using Docker image for option (a) for the provider, the Docker runtime is required. @@ -96,8 +94,22 @@ This will automatically install [coletdjnz's POT plugin framework](https://githu If using option (a) HTTP Server for the provider, use yt-dlp like normal 🙂. -If using option (b) script for the provider, you need to pass extractor arguments including the path to the generation script for each yt-dlp call. Make sure to point to the transpiled version, `server/build/generate_once.js` +If you want to change the port number used by the provider server, use the `--port` option. + +```shell +node build/main.js --port 8080 +``` + +If changing the port or IP used for the provider server, pass it to yt-dlp via `getpot_bgutil_baseurl` + +```shell +--extractor-args "youtube:getpot_bgutil_baseurl=http://127.0.0.1:8080" +``` + +--- + +If using option (b) script for the provider, you need to pass the extractor argument `getpot_bgutil_script` to `youtube` for each yt-dlp call. The argument should include the path to the transpiled generation script (`server/build/generate_once.js`). ```shell -./yt-dlp --extractor-args "youtube:getpot_bgutil_script=/home/user/bgutil-test/bgutil-ytdlp-pot-provider/server/build/generate_once.js" +--extractor-args "youtube:getpot_bgutil_script=$WORKSPACE/bgutil-ytdlp-pot-provider/server/build/generate_once.js" ``` diff --git a/plugin/yt_dlp_plugins/extractor/getpot_bgutil.py b/plugin/yt_dlp_plugins/extractor/getpot_bgutil.py index 14c76af..2ede751 100644 --- a/plugin/yt_dlp_plugins/extractor/getpot_bgutil.py +++ b/plugin/yt_dlp_plugins/extractor/getpot_bgutil.py @@ -1,10 +1,13 @@ import json import subprocess +import os.path +import shutil from yt_dlp import YoutubeDL from yt_dlp.networking.common import Request from yt_dlp.networking.exceptions import RequestError, UnsupportedRequest -from yt_dlp_plugins.extractor.getpot import GetPOTProvider, register_provider, register_preference +from yt_dlp.utils import Popen +from yt_dlp_plugins.extractor.getpot import GetPOTProvider, register_provider @register_provider @@ -14,52 +17,89 @@ class BgUtilPotProviderRH(GetPOTProvider): def _validate_get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data_sync_id=None, player_url=None, **kwargs): if not data_sync_id and not visitor_data: - raise UnsupportedRequest('One of [data_sync_id, visitor_data] must be passed') + raise UnsupportedRequest( + 'One of [data_sync_id, visitor_data] must be passed') def _get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data_sync_id=None, player_url=None, **kwargs) -> str: - generate_pot_script_path = ydl.get_info_extractor('Youtube')._configuration_arg('getpot_bgutil_script', [None], casesense=True)[0] + generate_pot_script_path = ydl.get_info_extractor('Youtube')._configuration_arg( + 'getpot_bgutil_script', [None], casesense=True)[0] + http_base_url = ydl.get_info_extractor('Youtube')._configuration_arg( + 'getpot_bgutil_baseurl', [None], casesense=True)[0] if generate_pot_script_path: - self._logger.info(f"Generating POT via script: {generate_pot_script_path}") - po_token = self._get_pot_via_script(generate_pot_script_path, visitor_data, data_sync_id) - return po_token + self._logger.info( + f'Generating POT via script: {generate_pot_script_path}') + po_token = self._get_pot_via_script( + generate_pot_script_path, visitor_data, data_sync_id) else: - self._logger.info(f"Generating POT via HTTP server") - po_token = self._get_pot_via_http(ydl, client, visitor_data, data_sync_id) + self._logger.info('Generating POT via HTTP server') + po_token = self._get_pot_via_http( + ydl, client, visitor_data, data_sync_id, http_base_url) return po_token - - def _get_pot_via_http(self, ydl, client, visitor_data, data_sync_id): - response = ydl.urlopen(Request('http://127.0.0.1:4416/get_pot', data=json.dumps({ - 'client': client, - 'visitor_data': visitor_data, - 'data_sync_id': data_sync_id - }).encode(), headers = {'Content-Type': 'application/json'})) - response_json = json.loads(response.read().decode('utf-8')) + def _get_pot_via_http(self, ydl, client, visitor_data, data_sync_id, base_url): + if base_url is None: + base_url = 'http://127.0.0.1:4416' + try: + response = ydl.urlopen(Request(f'{base_url}/get_pot', data=json.dumps({ + 'client': client, + 'visitor_data': visitor_data, + 'data_sync_id': data_sync_id, + }).encode(), headers={'Content-Type': 'application/json'})) + except Exception as e: + raise RequestError(f'Error reaching POST /get_pot: {str(e)}') + try: + response_json = json.load(response) + except Exception as e: + raise RequestError( + f'Error parsing response JSON(caused by {str(e)}). response = {response.read().decode()}') + + if error_msg := response_json.get('error'): + raise RequestError(error_msg) if 'po_token' not in response_json: raise RequestError('Server did not respond with a po_token') - return response_json["po_token"] + return response_json['po_token'] def _get_pot_via_script(self, script_path, visitor_data, data_sync_id): - # possibly vulnerable to shell injection here? but risk is low + if not os.path.isfile(script_path): + raise RequestError(f"Script path doesn't exist: {script_path}") + if os.path.basename(script_path) != 'generate_once.js': + raise RequestError( + 'Incorrect script passed to extractor args. Path to generate_once.js required') + if shutil.which('node') is None: + raise RequestError('node is not in PATH') + command_args = ['node', script_path] if data_sync_id: - command_args.extend(["-d", data_sync_id]) + command_args.extend(['-d', data_sync_id]) elif visitor_data: - command_args.extend(["-v", visitor_data]) + command_args.extend(['-v', visitor_data]) else: - raise RequestError("Unexpected missing visitorData/dataSyncId in _get_pot_via_script") - self._logger.debug(f"Executing command to get POT via script: {' '.join(command_args)}") + raise RequestError( + 'Unexpected missing visitorData and dataSyncId in _get_pot_via_script') + self._logger.debug( + f'Executing command to get POT via script: {" ".join(command_args)}') + + try: + stdout, stderr, returncode = Popen.run( + command_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + except Exception as e: + raise RequestError( + f'_get_pot_via_script failed: Unable to run script(caused by {str(e)})') + + self._logger.debug(f'stdout = {stdout}') + if returncode: + raise RequestError( + f'_get_pot_via_script failed with returncode {returncode}:\n{stderr.strip()}') - result = subprocess.run(command_args,stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) - - self._logger.debug(f"stdout = {result.stdout}") - if result.stderr or result.returncode != 0: - raise RequestError(f"_get_pot_via_script failed with return code {result.returncode}. stderr = {result.stderr}") - - # the JSON response is always the last line - script_data_resp = result.stdout.splitlines()[-1] - self._logger.debug(f"_get_pot_via_script response = {script_data_resp}") - return json.loads(script_data_resp)['poToken'] + # The JSON response is always the last line + script_data_resp = stdout.splitlines()[-1] + self._logger.debug( + f'_get_pot_via_script response = {script_data_resp}') + try: + return json.loads(script_data_resp)['poToken'] + except (json.JSONDecodeError, TypeError, KeyError) as e: + raise RequestError( + f'Error parsing JSON response from _get_pot_via_script(caused by {str(e)})') diff --git a/server/src/generate_once.ts b/server/src/generate_once.ts index 96093b6..38735fa 100644 --- a/server/src/generate_once.ts +++ b/server/src/generate_once.ts @@ -39,9 +39,6 @@ const options = program.opts(); } if (verbose) { - console.log( - `Received request for visitor data, grabbing from Innertube`, - ); console.log(`Generated visitor data: ${generatedVisitorData}`); } diff --git a/server/src/main.ts b/server/src/main.ts index 336b539..ef05edd 100644 --- a/server/src/main.ts +++ b/server/src/main.ts @@ -1,8 +1,16 @@ import { SessionManager } from "./session_manager"; +import { Command } from "@commander-js/extra-typings"; import express from "express"; import bodyParser from "body-parser"; -const PORT_NUMBER = 4416; +const program = new Command() + .option("-p, --port ") + .option("--verbose"); + +program.parse(); +const options = program.opts(); + +const PORT_NUMBER = options.port || 4416; const httpServer = express(); httpServer.use(bodyParser.json()); @@ -14,7 +22,7 @@ httpServer.listen({ console.log(`Started POT server on port ${PORT_NUMBER}`); -const sessionManager = new SessionManager(); +const sessionManager = new SessionManager(options.verbose || false); httpServer.post("/get_pot", async (request, response) => { const visitorData = request.body.visitor_data as string; const dataSyncId = request.body.data_sync_id as string; @@ -36,7 +44,7 @@ httpServer.post("/get_pot", async (request, response) => { const generatedVisitorData = await sessionManager.generateVisitorData(); if (!generatedVisitorData) { response.status(500); - response.send("Error generating visitor data"); + response.send({error: "Error generating visitor data"}); return; }