Skip to content

Commit 6f9bf44

Browse files
authored
Merge pull request #48 from cloudsufi/patch-ftp-xls-ui
[Plugin-1730] Adding XLS UI elements for ftp source
2 parents 26472a3 + d79d330 commit 6f9bf44

File tree

3 files changed

+156
-6
lines changed

3 files changed

+156
-6
lines changed

docs/FTPSource-batchsource.md

+16-3
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,28 @@ Properties
2828
**Password:** Password to use for authentication.
2929

3030
**Format:** Format of the data to read.
31-
The format must be one of 'blob', 'csv', 'delimited', 'json', 'text', 'tsv', or the
31+
The format must be one of 'blob', 'csv', 'delimited', 'json', 'text', 'tsv', 'xls', or the
3232
name of any format plugin that you have deployed to your environment. Note that FTP does
3333
not support seeking in a file, so formats like avro and parquet cannot be used.
3434
If the format is a macro, only the formats listed above can be used.
3535
If the format is 'blob', every input file will be read into a separate record.
3636
The 'blob' format also requires a schema that contains a field named 'body' of type 'bytes'.
3737
If the format is 'text', the schema must contain a field named 'body' of type 'string'.
3838

39-
**Get Schema:** Auto-detects schema from file. Supported formats are: csv, delimited, tsv, blob and text.
39+
**Sample Size:** The maximum number of rows that will get investigated for automatic data type detection.
40+
The default value is 1000. This is only used when the format is 'xls'.
41+
42+
**Override:** A list of columns with the corresponding data types for whom the automatic data type detection gets
43+
skipped. This is only used when the format is 'xls'.
44+
45+
**Terminate Reading After Empty Row:** Specify whether to stop reading after encountering the first empty row. Defaults to false. When false the reader will read all rows in the sheet. This is only used when the format is 'xls'.
46+
47+
**Select Sheet Using:** Select the sheet by name or number. Default is 'Sheet Number'. This is only used when the format is 'xls'.
48+
49+
**Sheet Value:** The name/number of the sheet to read from. If not specified, the first sheet will be read.
50+
Sheet Numbers are 0 based, ie first sheet is 0. This is only used when the format is 'xls'.
51+
52+
**Get Schema:** Auto-detects schema from file. Supported formats are: csv, delimited, tsv, xls, blob and text.
4053

4154
Blob - is set by default as field named 'body' of type bytes.
4255

@@ -47,7 +60,7 @@ JSON - is not supported. You must manually provide the output schema.
4760
**Delimiter:** Delimiter to use when the format is 'delimited'. This will be ignored for other formats.
4861

4962
**Use First Row as Header:** Whether to use the first line of each file as the column headers. Supported formats are '
50-
text', 'csv', 'tsv', and 'delimited'.
63+
text', 'csv', 'tsv', 'xls', and 'delimited'.
5164

5265
**Enable Quoted Values** Whether to treat content between quotes as a value. This value will only be used if the format
5366
is 'csv', 'tsv' or 'delimited'. For example, if this is set to true, a line that looks like `1, "a, b, c"` will output

src/main/java/io/cdap/plugin/batch/source/ftp/FTPConfig.java

+36-2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import com.google.gson.Gson;
2323
import io.cdap.cdap.api.annotation.Description;
2424
import io.cdap.cdap.api.annotation.Macro;
25+
import io.cdap.cdap.api.annotation.Name;
2526
import io.cdap.cdap.api.data.schema.Schema;
2627
import io.cdap.cdap.api.plugin.PluginConfig;
2728
import io.cdap.cdap.etl.api.FailureCollector;
@@ -58,6 +59,9 @@ public class FTPConfig extends PluginConfig implements FileSourceProperties {
5859
private static final Type MAP_STRING_STRING_TYPE = new TypeToken<Map<String, String>>() {
5960
}.getType();
6061
private static final List<String> LOCATION_PROPERTIES = Arrays.asList("type", "host", "path", "user", "password");
62+
private static final String NAME_SHEET = "sheet";
63+
private static final String NAME_SHEET_VALUE = "sheetValue";
64+
private static final String NAME_TERMINATE_IF_EMPTY_ROW = "terminateIfEmptyRow";
6165

6266
@Description("Name be used to uniquely identify this source for lineage, annotating metadata, etc.")
6367
private final String referenceName;
@@ -110,13 +114,24 @@ public class FTPConfig extends PluginConfig implements FileSourceProperties {
110114

111115
@Macro
112116
@Nullable
113-
@Description("Whether to use first row as header. Supported formats are 'text', 'csv', 'tsv', " +
117+
@Description("The maximum number of rows that will get investigated for automatic data type detection.")
118+
private Long sampleSize;
119+
120+
@Macro
121+
@Nullable
122+
@Description("A list of columns with the corresponding data types for whom the automatic data type detection gets" +
123+
" skipped.")
124+
private String override;
125+
126+
@Macro
127+
@Nullable
128+
@Description("Whether to use first row as header. Supported formats are 'text', 'csv', 'tsv', 'xls', " +
114129
"'delimited'. Default value is false.")
115130
private final Boolean skipHeader;
116131

117132
@Macro
118133
@Description("Format of the data to read. Supported formats are 'avro', 'blob', 'csv', 'delimited', 'json', "
119-
+ "'parquet', 'text', or 'tsv'. If no format is given, it will default to 'text'.")
134+
+ "'parquet', 'text', or 'tsv', 'xls'. If no format is given, it will default to 'text'.")
120135
private final String format;
121136

122137
@Macro
@@ -148,6 +163,25 @@ public class FTPConfig extends PluginConfig implements FileSourceProperties {
148163
@Description("Maximum time in milliseconds to wait for connection initialization before time out.")
149164
private final Integer connectTimeout;
150165

166+
@Name(NAME_SHEET)
167+
@Macro
168+
@Nullable
169+
@Description("Select the sheet by name or number. Default is 'Sheet Number'.")
170+
private String sheet;
171+
172+
@Name(NAME_SHEET_VALUE)
173+
@Macro
174+
@Nullable
175+
@Description("The name/number of the sheet to read from. If not specified, the first sheet will be read." +
176+
"Sheet Numbers are 0 based, ie first sheet is 0.")
177+
private String sheetValue;
178+
179+
@Name(NAME_TERMINATE_IF_EMPTY_ROW)
180+
@Macro
181+
@Nullable
182+
@Description("Specify whether to stop reading after encountering the first empty row. Defaults to false.")
183+
private String terminateIfEmptyRow;
184+
151185
@VisibleForTesting
152186
private FTPConfig(@Nullable String referenceName, String type, String host, @Nullable Integer port, String path,
153187
String user, String password, @Nullable String fileSystemProperties,

widgets/FTPSource-batchsource.json

+104-1
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,10 @@
8888
{
8989
"label": "tsv",
9090
"value": "tsv"
91+
},
92+
{
93+
"label": "xls",
94+
"value": "xls"
9195
}
9296
]
9397
}
@@ -96,6 +100,36 @@
96100
"widget-type": "get-schema",
97101
"widget-category": "plugin"
98102
},
103+
{
104+
"widget-type": "number",
105+
"label": "Sample Size",
106+
"name": "sampleSize",
107+
"widget-attributes": {
108+
"default": "1000",
109+
"minimum": "1"
110+
}
111+
},
112+
{
113+
"widget-type": "keyvalue-dropdown",
114+
"label": "Override",
115+
"name": "override",
116+
"widget-attributes": {
117+
"key-placeholder": "Field Name",
118+
"value-placeholder": "Data Type",
119+
"dropdownOptions": [
120+
"boolean",
121+
"bytes",
122+
"double",
123+
"float",
124+
"int",
125+
"long",
126+
"string",
127+
"date",
128+
"time",
129+
"timestamp"
130+
]
131+
}
132+
},
99133
{
100134
"widget-type": "textbox",
101135
"label": "Delimiter",
@@ -151,6 +185,42 @@
151185
"label": "False"
152186
}
153187
}
188+
},
189+
{
190+
"widget-type": "toggle",
191+
"label": "Terminate Reading After Empty Row",
192+
"name": "terminateIfEmptyRow",
193+
"widget-attributes": {
194+
"default": "false",
195+
"on": {
196+
"value": "true",
197+
"label": "True"
198+
},
199+
"off": {
200+
"value": "false",
201+
"label": "False"
202+
}
203+
}
204+
},
205+
{
206+
"widget-type": "select",
207+
"label": "Select Sheet Using",
208+
"name": "sheet",
209+
"widget-attributes": {
210+
"values": [
211+
"Sheet Name",
212+
"Sheet Number"
213+
],
214+
"default": "Sheet Number"
215+
}
216+
},
217+
{
218+
"widget-type": "textbox",
219+
"label": "Sheet Value",
220+
"name": "sheetValue",
221+
"widget-attributes": {
222+
"default": "0"
223+
}
154224
}
155225
]
156226
},
@@ -257,13 +327,46 @@
257327
{
258328
"name": "skipHeader",
259329
"condition": {
260-
"expression": "format == 'delimited' || format == 'csv' || format == 'tsv'"
330+
"expression": "format == 'delimited' || format == 'csv' || format == 'tsv' || format == 'xls'"
261331
},
262332
"show": [
263333
{
264334
"name": "skipHeader"
265335
}
266336
]
337+
},
338+
{
339+
"name": "sheet",
340+
"condition": {
341+
"expression": "format == 'xls'"
342+
},
343+
"show": [
344+
{
345+
"name": "sheet"
346+
}
347+
]
348+
},
349+
{
350+
"name": "sheetValue",
351+
"condition": {
352+
"expression": "format == 'xls'"
353+
},
354+
"show": [
355+
{
356+
"name": "sheetValue"
357+
}
358+
]
359+
},
360+
{
361+
"name": "terminateIfEmptyRow",
362+
"condition": {
363+
"expression": "format == 'xls'"
364+
},
365+
"show": [
366+
{
367+
"name": "terminateIfEmptyRow"
368+
}
369+
]
267370
}
268371
],
269372
"jump-config": {

0 commit comments

Comments
 (0)