Skip to content

Commit

Permalink
Issue #2580: Skip file indexing of object storage with certain tags
Browse files Browse the repository at this point in the history
  • Loading branch information
tcibinan committed May 24, 2023
1 parent a7af587 commit f3775a3
Show file tree
Hide file tree
Showing 9 changed files with 65 additions and 8 deletions.
6 changes: 6 additions & 0 deletions deploy/docker/cp-search/config/application.properties
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ sync.s3-file.bulk.load.tags.size=100
sync.s3-file.tag.value.delimiter=${CP_SEARCH_S3_FILE_TAG_DELIMITER:;}
sync.s3-file.storage.ids=${CP_SEARCH_S3_INDEX_STORAGE_IDS:}
sync.s3-file.storage.skip.ids=${CP_SEARCH_S3_INDEX_SKIP_STORAGE_IDS:}
sync.s3-file.storage.exclude.metadata.key=${CP_SEARCH_S3_FILE_STORAGE_EXCLUDE_METADATA_KEY:Billing status}
sync.s3-file.storage.exclude.metadata.value=${CP_SEARCH_S3_FILE_STORAGE_EXCLUDE_METADATA_VALUE:Exclude}

#NFS Files Settings
sync.nfs-file.disable=${CP_SEARCH_DISABLE_NFS_FILE:false}
Expand All @@ -73,6 +75,8 @@ sync.gs-file.index.mapping=file://${CP_SEARCH_MAPPINGS_LOCATION}/storage_file.js
sync.gs-file.bulk.insert.size=1000
sync.gs-file.bulk.load.tags.size=100
sync.gs-file.tag.value.delimiter=${CP_SEARCH_GS_FILE_TAG_DELIMITER:;}
sync.gs-file.storage.exclude.metadata.key=${CP_SEARCH_GS_FILE_STORAGE_EXCLUDE_METADATA_KEY:Billing status}
sync.gs-file.storage.exclude.metadata.value=${CP_SEARCH_GS_FILE_STORAGE_EXCLUDE_METADATA_VALUE:Exclude}

#GS Storage Settings
sync.gs-storage.disable=${CP_SEARCH_DISABLE_GS_STORAGE:false}
Expand Down Expand Up @@ -105,6 +109,8 @@ sync.az-blob.index.name=az-blob
sync.az-blob.bulk.insert.size=1000
sync.az-blob.bulk.load.tags.size=100
sync.az-file.tag.value.delimiter=${CP_SEARCH_AZ_FILE_TAG_DELIMITER:;}
sync.az-file.storage.exclude.metadata.key=${CP_SEARCH_AZ_FILE_STORAGE_EXCLUDE_METADATA_KEY:Billing status}
sync.az-file.storage.exclude.metadata.value=${CP_SEARCH_AZ_FILE_STORAGE_EXCLUDE_METADATA_VALUE:Exclude}

#Tool Settings
sync.tool.disable=${CP_SEARCH_DISABLE_TOOL:false}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ public class AzureFileSyncConfiguration {
@Value("${sync.az-file.tag.value.delimiter:;}")
private String tagDelimiter;

@Value("${sync.az-file.storage.exclude.metadata.key:Billing status}")
private String storageExcludeKey;

@Value("${sync.az-file.storage.exclude.metadata.value:Exclude}")
private String storageExcludeValue;

@Bean
public ObjectStorageFileManager azFileManager() {
return new AzureBlobManager();
Expand All @@ -69,6 +75,7 @@ public ObjectStorageIndex azFileSynchronizer(
indexSettingsPath, bulkInsertSize, bulkLoadTagsSize,
DataStorageType.AZ,
SearchDocumentType.AZ_BLOB_FILE,
tagDelimiter, false);
tagDelimiter, false,
storageExcludeKey, storageExcludeValue);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ public class GSFileSyncConfiguration {
@Value("${sync.gs-file.tag.value.delimiter:;}")
private String tagDelimiter;

@Value("${sync.gs-file.storage.exclude.metadata.key:Billing status}")
private String storageExcludeKey;

@Value("${sync.gs-file.storage.exclude.metadata.value:Exclude}")
private String storageExcludeValue;

@Bean
public ObjectStorageFileManager gsFileManager() {
return new GsBucketFileManager();
Expand All @@ -65,7 +71,8 @@ public ObjectStorageIndex gsFileSynchronizer(
indexSettingsPath, bulkInsertSize, bulkLoadTagsSize,
DataStorageType.GS,
SearchDocumentType.GS_FILE,
tagDelimiter, false);
tagDelimiter, false,
storageExcludeKey, storageExcludeValue);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ public class S3FileSyncConfiguration {
private String storageIds;
@Value("${sync.s3-file.storage.skip.ids:}")
private String skipStorageIds;
@Value("${sync.s3-file.storage.exclude.metadata.key:Billing status}")
private String storageExcludeKey;
@Value("${sync.s3-file.storage.exclude.metadata.value:Exclude}")
private String storageExcludeValue;

@Bean
public ObjectStorageFileManager s3FileManager() {
Expand All @@ -76,7 +80,8 @@ public ObjectStorageIndex s3FileSynchronizer(
DataStorageType.S3,
SearchDocumentType.S3_FILE,
tagDelimiter,
includeVersions);
includeVersions,
storageExcludeKey, storageExcludeValue);
if (StringUtils.isNotBlank(storageIds)) {
service.setStorageIds(parseIds(storageIds));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -241,4 +241,8 @@ public List<StorageFileSearchMask> getStorageSearchMasks() {
public FileShareMount loadFileShareMount(final Long id) {
return executor.execute(cloudPipelineAPI.loadShareMount(id));
}

public List<EntityVO> searchEntriesByMetadata(final AclClass entityClass, final String key, final String value) {
return executor.execute(cloudPipelineAPI.searchMetadata(key, value, entityClass));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,12 @@
import com.epam.pipeline.entity.datastorage.lifecycle.restore.StorageRestoreAction;
import com.epam.pipeline.entity.datastorage.lifecycle.restore.StorageRestorePathType;
import com.epam.pipeline.entity.datastorage.lifecycle.restore.StorageRestoreStatus;
import com.epam.pipeline.entity.security.acl.AclClass;
import com.epam.pipeline.entity.utils.DateUtils;
import com.epam.pipeline.utils.StreamUtils;
import com.epam.pipeline.entity.search.SearchDocumentType;
import com.epam.pipeline.vo.EntityPermissionVO;
import com.epam.pipeline.vo.EntityVO;
import com.epam.pipeline.vo.data.storage.DataStorageTagLoadBatchRequest;
import com.epam.pipeline.vo.data.storage.DataStorageTagLoadRequest;
import lombok.Getter;
Expand Down Expand Up @@ -95,6 +97,8 @@ public class ObjectStorageIndexImpl implements ObjectStorageIndex {
private final SearchDocumentType documentType;
private final String tagDelimiter;
private final boolean includeVersions;
private final String storageExcludeKey;
private final String storageExcludeValue;

private Set<Long> storageIds;
private Set<Long> skipStorageIds;
Expand All @@ -105,17 +109,29 @@ public class ObjectStorageIndexImpl implements ObjectStorageIndex {
public void synchronize(final LocalDateTime lastSyncTime, final LocalDateTime syncStart) {
log.debug("Started {} files synchronization", getStorageType());
fileMapper.updateSearchMasks(cloudPipelineAPIClient, log);
final Set<Long> excludeStorageIds = loadExcludedStorageIds();
final List<AbstractDataStorage> allStorages = cloudPipelineAPIClient.loadAllDataStorages();
allStorages
.stream()
.filter(dataStorage -> CollectionUtils.isEmpty(excludeStorageIds)
|| !excludeStorageIds.contains(dataStorage.getId()))
.filter(dataStorage -> CollectionUtils.isEmpty(skipStorageIds)
|| !skipStorageIds.contains(dataStorage.getId()))
.filter(dataStorage -> CollectionUtils.isEmpty(storageIds) || storageIds.contains(dataStorage.getId()))
.filter(dataStorage -> CollectionUtils.isEmpty(storageIds)
|| storageIds.contains(dataStorage.getId()))
.filter(dataStorage -> dataStorage.getType() == getStorageType())
.filter(dataStorage -> isNotSharedOrChild(dataStorage, allStorages))
.forEach(this::indexStorage);
}

private Set<Long> loadExcludedStorageIds() {
return ListUtils.emptyIfNull(cloudPipelineAPIClient.searchEntriesByMetadata(AclClass.DATA_STORAGE,
storageExcludeKey, storageExcludeValue))
.stream()
.map(EntityVO::getEntityId)
.collect(Collectors.toSet());
}

@Override
@SuppressWarnings("PMD.AvoidCatchingGenericException")
public void indexStorage(final AbstractDataStorage dataStorage) {
Expand Down
6 changes: 6 additions & 0 deletions elasticsearch-agent/src/main/resources/application.properties
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ sync.az-blob.index.mapping=classpath:/templates/storage_file.json
sync.az-blob.index.name=az-blob
sync.az-blob.bulk.insert.size=1000
sync.az-blob.bulk.load.tags.size=100
sync.az-file.storage.exclude.metadata.key=${CP_SEARCH_AZ_FILE_STORAGE_EXCLUDE_METADATA_KEY:Billing status}
sync.az-file.storage.exclude.metadata.value=${CP_SEARCH_AZ_FILE_STORAGE_EXCLUDE_METADATA_VALUE:Exclude}

#S3 Files Settings
#sync.s3-file.disable=true
Expand All @@ -69,13 +71,17 @@ sync.s3-file.index.include.versions=false
sync.s3-file.enable.tags=false
sync.s3-file.bulk.insert.size=1000
sync.s3-file.bulk.load.tags.size=100
sync.s3-file.storage.exclude.metadata.key=${CP_SEARCH_S3_FILE_STORAGE_EXCLUDE_METADATA_KEY:Billing status}
sync.s3-file.storage.exclude.metadata.value=${CP_SEARCH_S3_FILE_STORAGE_EXCLUDE_METADATA_VALUE:Exclude}

#GS Files Settings
#sync.gs-file.disable=true
sync.gs-file.index.name=gs-file
sync.gs-file.index.mapping=classpath:/templates/storage_file.json
sync.gs-file.bulk.insert.size=1000
sync.gs-file.bulk.load.tags.size=100
sync.gs-file.storage.exclude.metadata.key=${CP_SEARCH_GS_FILE_STORAGE_EXCLUDE_METADATA_KEY:Billing status}
sync.gs-file.storage.exclude.metadata.value=${CP_SEARCH_GS_FILE_STORAGE_EXCLUDE_METADATA_VALUE:Exclude}

#GS Storage Settings
#sync.gs-storage.disable=true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@ public class ObjectStorageIndexTest {

private static final String TEST_BLOB_NAME_1 = "1";
private static final String TEST_BLOB_NAME_2 = "2";
public static final int BULK_SIZE = 1000;
private static final int BULK_SIZE = 1000;
private static final String EXCLUDE_KEY = "key";
private static final String EXCLUDE_VALUE = "value";

private final AbstractDataStorage dataStorage = new GSBucketStorage(
1L, "storage", "storage", new StoragePolicy(), null
Expand Down Expand Up @@ -79,7 +81,8 @@ public void init() {
BULK_SIZE,
DataStorageType.GS,
SearchDocumentType.GS_FILE,
";", false)
";", false,
EXCLUDE_KEY, EXCLUDE_VALUE)
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ public class ObjectStorageIndexVersionsTest {

private static final String TEST_BLOB_NAME_1 = "1";
private static final String TEST_BLOB_NAME_2 = "2";
public static final int BULK_SIZE = 1000;
private static final int BULK_SIZE = 1000;
private static final String EXCLUDE_KEY = "key";
private static final String EXCLUDE_VALUE = "value";

private final Supplier<TemporaryCredentials> temporaryCredentials = () ->
TemporaryCredentials.builder().region("").build();
Expand Down Expand Up @@ -77,7 +79,8 @@ public void init() {
BULK_SIZE,
DataStorageType.S3,
SearchDocumentType.S3_FILE,
";", true)
";", true,
EXCLUDE_KEY, EXCLUDE_VALUE)
);
}

Expand Down

0 comments on commit f3775a3

Please sign in to comment.