From 9a11a8702bf691824f1b795878101a43864a141b Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Mon, 17 Jun 2024 06:57:44 +0000 Subject: [PATCH 1/3] add robots.txt --- cubedash/_pages.py | 7 ++++++- integration_tests/test_page_loads.py | 8 ++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/cubedash/_pages.py b/cubedash/_pages.py index 87ea3594e..99644f99c 100644 --- a/cubedash/_pages.py +++ b/cubedash/_pages.py @@ -8,7 +8,7 @@ from datacube.model import DatasetType, Range from datacube.scripts.dataset import build_dataset_info from dateutil import tz -from flask import abort, redirect, request, url_for +from flask import abort, redirect, request, send_from_directory, url_for from werkzeug.datastructures import MultiDict from werkzeug.exceptions import HTTPException @@ -674,6 +674,11 @@ def about_page(): ) +@app.route("/robots.txt") +def robots_txt(): + return send_from_directory("static", "robots.txt") + + @app.route("/") def default_redirect(): """Redirect to default starting page.""" diff --git a/integration_tests/test_page_loads.py b/integration_tests/test_page_loads.py index 0ddc95f9b..ff37e907d 100644 --- a/integration_tests/test_page_loads.py +++ b/integration_tests/test_page_loads.py @@ -961,6 +961,14 @@ def check_doc_start_has_hint(hint: str, url: str): ) +def test_get_robots(client: FlaskClient): + """ + Check that robots.txt is correctly served from root + """ + text, rv = get_text_response(client, "/robots.txt") + assert "User-Agent: *" in text + + def test_all_give_404s(client: FlaskClient): """ We should get 404 messages, not exceptions, for missing things. From 88515ddaf3261480caa8ace92206be9f070d3602 Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Mon, 17 Jun 2024 07:01:15 +0000 Subject: [PATCH 2/3] actually add the robots.txt file --- cubedash/static/robots.txt | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 cubedash/static/robots.txt diff --git a/cubedash/static/robots.txt b/cubedash/static/robots.txt new file mode 100644 index 000000000..7705abc24 --- /dev/null +++ b/cubedash/static/robots.txt @@ -0,0 +1,4 @@ +User-Agent: * +Allow: / +Disallow: /products/*/* +Disallow: /stac/** From d4dd5a979dd2b38ea44171a0162dedc054e44a91 Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Mon, 17 Jun 2024 07:28:06 +0000 Subject: [PATCH 3/3] provide robots.txt via config instead of static file --- cubedash/_pages.py | 10 ++++++++-- cubedash/static/robots.txt | 4 ---- 2 files changed, 8 insertions(+), 6 deletions(-) delete mode 100644 cubedash/static/robots.txt diff --git a/cubedash/_pages.py b/cubedash/_pages.py index 99644f99c..bb97760e3 100644 --- a/cubedash/_pages.py +++ b/cubedash/_pages.py @@ -8,7 +8,7 @@ from datacube.model import DatasetType, Range from datacube.scripts.dataset import build_dataset_info from dateutil import tz -from flask import abort, redirect, request, send_from_directory, url_for +from flask import abort, redirect, request, url_for from werkzeug.datastructures import MultiDict from werkzeug.exceptions import HTTPException @@ -51,6 +51,10 @@ _DEFAULT_ARRIVALS_DAYS: int = app.config.get("CUBEDASH_DEFAULT_ARRIVALS_DAY_COUNT", 14) +_ROBOTS_TXT_DEFAULT = ( + "User-Agent: *\nAllow: /\nDisallow: /products/*/*\nDisallow: /stac/**" +) + # Add server timings to http headers. if app.config.get("CUBEDASH_SHOW_PERF_TIMES", False): _monitoring.init_app_monitoring() @@ -676,7 +680,9 @@ def about_page(): @app.route("/robots.txt") def robots_txt(): - return send_from_directory("static", "robots.txt") + return utils.as_json( + flask.current_app.config.get("ROBOTS_TXT", _ROBOTS_TXT_DEFAULT) + ) @app.route("/") diff --git a/cubedash/static/robots.txt b/cubedash/static/robots.txt deleted file mode 100644 index 7705abc24..000000000 --- a/cubedash/static/robots.txt +++ /dev/null @@ -1,4 +0,0 @@ -User-Agent: * -Allow: / -Disallow: /products/*/* -Disallow: /stac/**