Skip to content

Commit

Permalink
[Doc] Autogen nav (#51073)
Browse files Browse the repository at this point in the history
Signed-off-by: DanRoscigno <[email protected]>
(cherry picked from commit 0be4658)

# Conflicts:
#	docs/docusaurus/sidebars.json
#	docs/en/_assets/commonMarkdown/loadMethodIntro.md
#	docs/en/administration/management/resource_management/be_label.md
#	docs/en/data_source/catalog/iceberg_catalog.md
#	docs/en/introduction/Architecture.md
#	docs/en/loading/BrokerLoad.md
#	docs/en/loading/Etl_in_loading.md
#	docs/en/loading/Flink_cdc_load.md
#	docs/en/loading/InsertInto.md
#	docs/en/loading/Load_to_Primary_Key_tables.md
#	docs/en/loading/RoutineLoad.md
#	docs/en/loading/StreamLoad.md
#	docs/en/sql-reference/information_schema/loads.md
#	docs/en/sql-reference/sql-functions/table-functions/files.md
#	docs/en/sql-reference/sql-statements/Resource/CREATE_RESOURCE.md
#	docs/en/sql-reference/sql-statements/loading_unloading/INSERT.md
#	docs/en/sql-reference/sql-statements/loading_unloading/SHOW_LOAD.md
#	docs/en/sql-reference/sql-statements/loading_unloading/STREAM_LOAD.md
#	docs/en/sql-reference/sql-statements/loading_unloading/routine_load/CREATE_ROUTINE_LOAD.md
#	docs/en/sql-reference/sql-statements/materialized_view/CREATE_MATERIALIZED_VIEW.md
#	docs/en/sql-reference/sql-statements/table_bucket_part_index/CREATE_TABLE.md
#	docs/en/sql-reference/sql-statements/table_bucket_part_index/SHOW_CREATE_TABLE.md
#	docs/en/table_design/StarRocks_table_design.md
#	docs/en/table_design/data_distribution/Data_distribution.md
#	docs/en/table_design/hybrid_table.md
#	docs/en/table_design/indexes/Ngram_Bloom_Filter_Index.md
#	docs/en/table_design/indexes/Prefix_index_sort_key.md
#	docs/en/table_design/indexes/inverted_index.md
#	docs/en/table_design/table_design.md
#	docs/en/table_design/table_types/aggregate_table.md
#	docs/en/table_design/table_types/duplicate_key_table.md
#	docs/en/table_design/table_types/primary_key_table.md
#	docs/en/table_design/table_types/unique_key_table.md
#	docs/en/using_starrocks/_category_.yml
#	docs/en/using_starrocks/indexes.md
#	docs/zh/loading/BrokerLoad.md
#	docs/zh/loading/Etl_in_loading.md
#	docs/zh/loading/Flink_cdc_load.md
#	docs/zh/loading/InsertInto.md
#	docs/zh/loading/Load_to_Primary_Key_tables.md
#	docs/zh/loading/RoutineLoad.md
#	docs/zh/loading/StreamLoad.md
#	docs/zh/sql-reference/sql-statements/loading_unloading/INSERT.md
#	docs/zh/sql-reference/sql-statements/materialized_view/CREATE_MATERIALIZED_VIEW.md
#	docs/zh/sql-reference/sql-statements/table_bucket_part_index/ALTER_TABLE.md
#	docs/zh/sql-reference/sql-statements/table_bucket_part_index/CREATE_TABLE.md
#	docs/zh/sql-reference/sql-statements/table_bucket_part_index/SHOW_CREATE_TABLE.md
#	docs/zh/table_design/StarRocks_table_design.md
#	docs/zh/table_design/Table_design.md
#	docs/zh/table_design/data_distribution/Data_distribution.md
#	docs/zh/table_design/data_distribution/Temporary_partition.md
#	docs/zh/table_design/data_distribution/expression_partitioning.md
#	docs/zh/table_design/data_distribution/list_partitioning.md
#	docs/zh/table_design/hybrid_table.md
#	docs/zh/table_design/indexes/Ngram_Bloom_Filter_Index.md
#	docs/zh/table_design/indexes/Prefix_index_sort_key.md
#	docs/zh/table_design/indexes/inverted_index.md
#	docs/zh/table_design/table_design.md
#	docs/zh/table_design/table_types/aggregate_table.md
#	docs/zh/table_design/table_types/duplicate_key_table.md
#	docs/zh/table_design/table_types/primary_key_table.md
#	docs/zh/table_design/table_types/unique_key_table.md
#	docs/zh/using_starrocks/_category_.yml
#	docs/zh/using_starrocks/indexes.md
  • Loading branch information
DanRoscigno authored and mergify[bot] committed Sep 18, 2024
1 parent 4453e15 commit 610aff6
Show file tree
Hide file tree
Showing 120 changed files with 4,220 additions and 2,497 deletions.
11 changes: 6 additions & 5 deletions docs/docusaurus/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@
"dependencies": {
"@algolia/client-search": "^4.20.0",
"@docsearch/react": "3",
"@docusaurus/core": "^3.5.2",
"@docusaurus/preset-classic": "^3.5.2",
"@docusaurus/theme-search-algolia": "^3.5.2",
"@docusaurus/core": "^3.1.1",
"@docusaurus/plugin-client-redirects": "^3.1.1",
"@docusaurus/preset-classic": "^3.1.1",
"@docusaurus/theme-search-algolia": "^3.1.1",
"@mdx-js/react": "^3.0.0",
"clsx": "^2.0.0",
"fs-extra": "^11.1.1",
Expand All @@ -29,8 +30,8 @@
"react-dom": "^18.2.0"
},
"devDependencies": {
"@docusaurus/module-type-aliases": "^3.5.2",
"@docusaurus/types": "^3.5.2"
"@docusaurus/module-type-aliases": "^3.1.1",
"@docusaurus/types": "^3.1.1"
},
"browserslist": {
"production": [
Expand Down
20 changes: 18 additions & 2 deletions docs/docusaurus/sidebars.json
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@
"type": "category",
"label": "Table Design",
"link": {
<<<<<<< HEAD
"type": "doc",
"id": "table_design/StarRocks_table_design"
},
Expand Down Expand Up @@ -138,6 +139,15 @@
},
"table_design/data_compression",
"table_design/Sort_key"
=======
"type": "generated-index"
},
"items": [
{
"type": "autogenerated",
"dirName": "table_design"
}
>>>>>>> 0be46582cf ([Doc] Autogen nav (#51073))
]
},
{
Expand Down Expand Up @@ -407,10 +417,10 @@
"type": "category",
"label": "Resource management",
"link": {
"type": "doc",
"id": "administration/management/resource_management/resource_management"
"type": "generated-index"
},
"items": [
<<<<<<< HEAD
"administration/management/resource_management/resource_group",
"administration/management/resource_management/query_queues",
"administration/management/resource_management/Query_management",
Expand All @@ -420,6 +430,12 @@
"administration/management/resource_management/Replica",
"administration/management/resource_management/Blacklist",
"administration/management/resource_management/filemanager"
=======
{
"type": "autogenerated",
"dirName": "administration/management/resource_management"
}
>>>>>>> 0be46582cf ([Doc] Autogen nav (#51073))
]
}
]
Expand Down
3,608 changes: 1,275 additions & 2,333 deletions docs/docusaurus/yarn.lock

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions docs/en/_assets/commonMarkdown/loadMethodIntro.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,10 @@ Each of these options has its own advantages, which are detailed in the followin

In most cases, we recommend that you use the INSERT+`FILES()` method, which is much easier to use.

<<<<<<< HEAD
However, the INSERT+`FILES()` method currently supports only the Parquet and ORC file formats. Therefore, if you need to load data of other file formats such as CSV, or [perform data changes such as DELETE during data loading](../../loading/Load_to_Primary_Key_tables.md), you can resort to Broker Load.
=======
However, the INSERT+`FILES()` method currently supports only the Parquet, ORC, and CSV file formats. Therefore, if you need to load data of other file formats such as JSON, or perform data changes such as DELETE during data loading, you can resort to Broker Load.

If you need to load a large number of data files with a significant data volume in total (for example, more than 100 GB or even 1 TB), we recommend that you use the Pipe method. Pipe can split the files based on their number or size, breaking down the load job into smaller, sequential tasks. This approach ensures that errors in one file do not impact the entire load job and minimizes the need for retries due to data errors.
>>>>>>> 0be46582cf ([Doc] Autogen nav (#51073))
1 change: 1 addition & 0 deletions docs/en/_assets/commonMarkdown/multi-service-access.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
For the best practices of multi-service access control, see [Multi-service access control](../../administration/user_privs/User_privilege.md#multi-service-access-control).
5 changes: 5 additions & 0 deletions docs/en/_assets/commonMarkdown/quickstart-iceberg-tip.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@

:::tip
This example uses the Local Climatological Data(LCD) dataset featured in the [StarRocks Basics](../../quick_start/shared-nothing.md) Quick Start. You can load the data and try the example yourself.
:::

3 changes: 3 additions & 0 deletions docs/en/_assets/commonMarkdown/quickstart-overview-tip.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
## Learn by doing

Try the [Quick Starts](../../quick_start/quick_start.mdx) to get an overview of using StarRocks with realistic scenarios.
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@

:::tip
Try Routine Load out in this [Quick Start](../../quick_start/routine-load.md)
:::

5 changes: 5 additions & 0 deletions docs/en/_assets/commonMarkdown/quickstart-shared-data.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@

:::tip
Give [shared-data](../../quick_start/shared-data.md) a try using MinIO for object storage.
:::

Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@

:::tip
This example uses the Local Climatological Data(LCD) dataset featured in the [StarRocks Basics](../../quick_start/shared-nothing.md) Quick Start. You can load the data and try the example yourself.
:::

2 changes: 1 addition & 1 deletion docs/en/administration/management/Backup_and_restore.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ StarRocks supports the following remote storage systems:

StarRocks supports FULL backup on the granularity level of database, table, or partition.

If you have stored a large amount of data in a table, we recommend that you back up and restore data by partition. This way, you can reduce the cost of retries in case of job failures. If you need to back up incremental data on a regular basis, you can strategize a [dynamic partitioning](../../table_design/dynamic_partitioning.md) plan (by a certain time interval, for example) for your table, and back up only new partitions each time.
If you have stored a large amount of data in a table, we recommend that you back up and restore data by partition. This way, you can reduce the cost of retries in case of job failures. If you need to back up incremental data on a regular basis, you can strategize a [dynamic partitioning](../../table_design/data_distribution/dynamic_partitioning.md) plan (by a certain time interval, for example) for your table, and back up only new partitions each time.

### Create a repository

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
displayed_sidebar: docs
sidebar_position: 80
---

# Blacklist Management
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
displayed_sidebar: docs
sidebar_position: 60
---

# Load Balancing
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
displayed_sidebar: docs
sidebar_position: 40
---

# Memory Management
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
displayed_sidebar: docs
sidebar_position: 30
---

# Query Management
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
displayed_sidebar: docs
sidebar_position: 70
---

# Replica management
Expand Down
128 changes: 128 additions & 0 deletions docs/en/administration/management/resource_management/be_label.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
---
displayed_sidebar: docs
sidebar_position: 80
---

# Add labels on BEs

Since v3.2.8, StarRocks supports adding labels on BEs. When creating a table or an asynchronous materialized view, you can specify the label of a certain group of BE nodes. This ensures that data replicas are distributed only on the BE nodes associated with that label. Data replicas will be evenly distributed among nodes with the same label, enhancing data high availability and resource isolation.

## Usage

### Add labels on BEs

Suppose that one StarRocks cluster includes six BEs which are distributed evenly across three racks. You can add labels on BEs based on the racks where the BEs are located.

```SQL
ALTER SYSTEM MODIFY BACKEND "172.xx.xx.46:9050" SET ("labels.location" = "rack:rack1");
ALTER SYSTEM MODIFY BACKEND "172.xx.xx.47:9050" SET ("labels.location" = "rack:rack1");
ALTER SYSTEM MODIFY BACKEND "172.xx.xx.48:9050" SET ("labels.location" = "rack:rack2");
ALTER SYSTEM MODIFY BACKEND "172.xx.xx.49:9050" SET ("labels.location" = "rack:rack2");
ALTER SYSTEM MODIFY BACKEND "172.xx.xx.50:9050" SET ("labels.location" = "rack:rack3");
ALTER SYSTEM MODIFY BACKEND "172.xx.xx.51:9050" SET ("labels.location" = "rack:rack3");
```

After adding labels, you can execute `SHOW BACKENDS;` and view the labels of BEs in the `Location` field of the returned result.

If you need to modify the labels of BEs, you can execute `ALTER SYSTEM MODIFY BACKEND "172.xx.xx.48:9050" SET ("labels.location" = "rack:xxx");`.

### Use labels to specify table data distribution on BE nodes

If you need to specify the locations to which a table's data is distributed, for example, distributing a table's data across two racks, rack1 and rack2, you can add labels to the table.

After labels are added, all the replicas of the same tablet in the table are distributed across labels in a Round-Robin approach. Moreover, if multiple replicas of the same tablet exist within the same label, these replicas will be distributed as evenly as possible across different BEs in that label.

:::note

- The total number of BE nodes associated with the labels must be greater than the number of replicas. Otherwise, an error `Table replication num should be less than or equal to the number of available BE nodes` will occur.
- The label to be associated with a table must already exist. Otherwise, an error `Getting analyzing error. Detail message: Cannot find any backend with location: rack:xxx` will occur.

:::

#### At table creation

You can use the property `"labels.location"` to distribute the table's data across rack 1 and rack 2 at table creation:

```SQL
CREATE TABLE example_table (
order_id bigint NOT NULL,
dt date NOT NULL,
user_id INT NOT NULL,
good_id INT NOT NULL,
cnt int NOT NULL,
revenue int NOT NULL
)
PROPERTIES
("labels.location" = "rack:rack1,rack:rack2");
```

For newly created tables, the default value of the table property `labels.location` is `*`, indicating that replicas are evenly distributed across all labels. If the data distribution of a newly created table does not need to be aware of the geographical locations of servers in the cluster, you can manually set the table property `"labels.location" = ""`.

#### After table creation

If you need to modify the data distribution location of the table after table creation, for example, modify the location to rack 1, rack 2, and rack 3, you can execute the following statement:

```SQL
ALTER TABLE example_table
SET ("labels.location" = "rack:rack1,rack:rack2,rack:rack3");
```

:::note

If you have upgraded StarRocks to version 3.2.8 or later, for historical tables created before the upgrade, data is not distributed based on labels by default. If you need to distribute data of a historical table based on labels, you can execute the following statement to add labels to the historical table:

```SQL
ALTER TABLE example_table1
SET ("labels.location" = "rack:rack1,rack:rack2");
```

:::

### Use labels to specify materialized view data distribution on BE nodes

If you need to specify the locations to which an asynchronous materialized view's data is distributed, for example, distributing data across two racks, rack1 and rack2, you can add labels to the materialized view.

After labels are added, all the replicas of the same tablet in the materialized view are distributed across labels in a Round-Robin approach. Moreover, if multiple replicas of the same tablet exist within the same label, these replicas will be distributed as evenly as possible across different BEs in that label.

:::note

- The total number of BE nodes associated with the labels must be greater than the number of replicas. Otherwise, an error `Table replication num should be less than or equal to the number of available BE nodes` will occur.
- The labels to be associated with the materialized view must already exist. Otherwise, an error `Getting analyzing error. Detail message: Cannot find any backend with location: rack:xxx` will occur.

:::

#### At materialized view creation

If you want to distribute the materialized view's data across rack 1 and rack 2 while creating it, you can execute the following statement:

```SQL
CREATE MATERIALIZED VIEW mv_example_mv
DISTRIBUTED BY RANDOM
PROPERTIES (
"labels.location" = "rack:rack1,rack:rack2")
as
select order_id, dt from example_table;
```

For newly created materialized view, the default value of the property `labels.location` is `*`, indicating that replicas are evenly distributed across all labels. If the data distribution of a newly created materialized view does not need to be aware of the geographical locations of servers in the cluster, you can manually set the property `"labels.location" = ""`.

#### After materialized view creation

If you need to modify the data distribution location of the materialized view after it is created, for example, modify the location to rack 1, rack 2, and rack 3, you can execute the following statement:

```SQL
ALTER MATERIALIZED VIEW mv_example_mv
SET ("labels.location" = "rack:rack1,rack:rack2,rack:rack3");
```

:::note

If you have upgraded StarRocks to version 3.2.8 or later, for existing materialized views created before the upgrade, data is not distributed based on labels by default. If you need to distribute data of an existing based on labels, you can execute the following statement to add labels to the materialized view:

```SQL
ALTER TABLE example_mv1
SET ("labels.location" = "rack:rack1,rack:rack2");
```

:::

Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
displayed_sidebar: docs
sidebar_position: 90
---

# File manager
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
displayed_sidebar: docs
sidebar_position: 20
---

# Query queues
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
displayed_sidebar: docs
sidebar_position: 10
---

# Resource group
Expand Down Expand Up @@ -60,15 +61,15 @@ You can specify CPU and memory resource quotas for a resource group on a BE by u

> **NOTE**
>
> The amount of memory that can be used for queries is indicated by the `query_pool` parameter. For more information about the parameter, see [Memory management](Memory_management.md).
> The amount of memory that can be used for queries is indicated by the `query_pool` parameter.
- `concurrency_limit`

This parameter specifies the upper limit of concurrent queries in a resource group. It is used to avoid system overload caused by too many concurrent queries. This parameter takes effect only when it is set greater than 0. Default: 0.

- `max_cpu_cores`

The CPU core threshold for triggering query queue in FE. For more details, refer to [Query queues - Specify resource thresholds for resource group-level query queues](./query_queues.md#specify-resource-thresholds-for-resource-group-level-query-queues). It takes effect only when it is set to greater than `0`. Range: [0, `avg_be_cpu_cores`], where `avg_be_cpu_cores` represents the average number of CPU cores across all BE nodes. Default: 0.
The CPU core threshold for triggering query queue in FE. This only takes effect when it is set to greater than `0`. Range: [0, `avg_be_cpu_cores`], where `avg_be_cpu_cores` represents the average number of CPU cores across all BE nodes. Default: 0.

- `spill_mem_limit_threshold`

Expand Down Expand Up @@ -360,9 +361,9 @@ The following FE metrics only provide statistics within the current FE node:
| starrocks_fe_query_resource_group | Count | Instantaneous | The number of queries historically run in this resource group (including those currently running). |
| starrocks_fe_query_resource_group_latency | ms | Instantaneous | The query latency percentile for this resource group. The label `type` indicates specific percentiles, including `mean`, `75_quantile`, `95_quantile`, `98_quantile`, `99_quantile`, `999_quantile`. |
| starrocks_fe_query_resource_group_err | Count | Instantaneous | The number of queries in this resource group that encountered an error. |
| starrocks_fe_resource_group_query_queue_total | Count | Instantaneous | The total number of queries historically queued in this resource group (including those currently running). This metric is supported from v3.1.4 onwards. It is valid only when query queues are enabled, see [Query Queues](query_queues.md) for details. |
| starrocks_fe_resource_group_query_queue_pending | Count | Instantaneous | The number of queries currently in the queue of this resource group. This metric is supported from v3.1.4 onwards. It is valid only when query queues are enabled, see [Query Queues](query_queues.md) for details. |
| starrocks_fe_resource_group_query_queue_timeout | Count | Instantaneous | The number of queries in this resource group that have timed out while in the queue. This metric is supported from v3.1.4 onwards. It is valid only when query queues are enabled, see [Query Queues](query_queues.md) for details. |
| starrocks_fe_resource_group_query_queue_total | Count | Instantaneous | The total number of queries historically queued in this resource group (including those currently running). This metric is supported from v3.1.4 onwards. It is valid only when query queues are enabled. |
| starrocks_fe_resource_group_query_queue_pending | Count | Instantaneous | The number of queries currently in the queue of this resource group. This metric is supported from v3.1.4 onwards. It is valid only when query queues are enabled. |
| starrocks_fe_resource_group_query_queue_timeout | Count | Instantaneous | The number of queries in this resource group that have timed out while in the queue. This metric is supported from v3.1.4 onwards. It is valid only when query queues are enabled. |

### BE metrics

Expand Down Expand Up @@ -412,11 +413,3 @@ MySQL [(none)]> SHOW USAGE RESOURCE GROUPS;
| wg2 | 0 | 127.0.0.1 | 0.400 | 4 | 8 |
+------------+----+-----------+-----------------+-----------------+------------------+
```

## What to do next

After you configure resource groups, you can manage memory resources and queries. For more information, see the following topics:

- [Memory management](./Memory_management.md)

- [Query management](./Query_management.md)

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
displayed_sidebar: docs
sidebar_position: 50
---

# Spill to disk
Expand Down
11 changes: 11 additions & 0 deletions docs/en/data_source/catalog/iceberg_catalog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,20 @@
displayed_sidebar: docs
toc_max_heading_level: 5
---
<<<<<<< HEAD

# Iceberg catalog

=======
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import QSTip from '../../_assets/commonMarkdown/quickstart-iceberg-tip.mdx'

# Iceberg catalog

<QSTip />

>>>>>>> 0be46582cf ([Doc] Autogen nav (#51073))
An Iceberg catalog is a type of external catalog that is supported by StarRocks from v2.4 onwards. With Iceberg catalogs, you can:

- Directly query data stored in Iceberg without the need to manually create tables.
Expand Down
2 changes: 1 addition & 1 deletion docs/en/deployment/post_deployment_setup.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,4 +90,4 @@ SET PROPERTY FOR '<username>' 'max_user_connections' = '1000';

## What to do next

After deploying and setting up your StarRocks cluster, you can then proceed to design tables that best work for your scenarios. See [Understand StarRocks table design](../table_design/Table_design.md) for detailed instructions on designing a table.
After deploying and setting up your StarRocks cluster, you can then proceed to design tables that best work for your scenarios. See [Understand StarRocks table design](../table_design/table_design.md) for detailed instructions on designing a table.
Loading

0 comments on commit 610aff6

Please sign in to comment.