From d9f6545d63a53cd8e6f6c44713bff8bb2dbbaf49 Mon Sep 17 00:00:00 2001 From: Gilbert Chen Date: Sun, 24 Oct 2021 23:34:49 -0400 Subject: [PATCH] Rewrite the backup procedure to reduce memory usage Main changes: * Change the listing order of files/directories so that the local and remote snapshots can be compared on-the-fly. * Introduce a new struct called EntryList that maintains a list of files/directories, which are kept in memory when the number is lower, and serialized into a file when there are too many. * EntryList can also be turned into an on-disk incomplete snapshot quickly, to support fast-resume on next run. * ChunkOperator can now download and upload chunks, thus replacing original ChunkDownloader and ChunkUploader. The new ChunkDownloader is only used to prefetch chunks during the restore operation. --- duplicacy/duplicacy_main.go | 26 +- src/duplicacy_backupmanager.go | 1134 ++++++++--------- src/duplicacy_backupmanager_test.go | 12 +- src/duplicacy_benchmark.go | 36 +- src/duplicacy_chunk.go | 12 +- src/duplicacy_chunkdownloader.go | 316 +---- src/duplicacy_chunkmaker.go | 189 +-- src/duplicacy_chunkmaker_test.go | 74 +- src/duplicacy_chunkoperator.go | 408 +++++- ...est.go => duplicacy_chunkoperator_test.go} | 26 +- src/duplicacy_chunkuploader.go | 151 --- src/duplicacy_entry.go | 430 ++++++- src/duplicacy_entry_test.go | 72 +- src/duplicacy_entrylist.go | 574 +++++++++ src/duplicacy_entrylist_test.go | 179 +++ src/duplicacy_snapshot.go | 261 ++-- src/duplicacy_snapshotmanager.go | 456 ++++--- src/duplicacy_snapshotmanager_test.go | 19 +- src/duplicacy_utils.go | 14 + src/duplicacy_utils_others.go | 10 +- 20 files changed, 2706 insertions(+), 1693 deletions(-) rename src/{duplicacy_chunkuploader_test.go => duplicacy_chunkoperator_test.go} (77%) delete mode 100644 src/duplicacy_chunkuploader.go create mode 100644 src/duplicacy_entrylist.go create mode 100644 src/duplicacy_entrylist_test.go diff --git a/duplicacy/duplicacy_main.go b/duplicacy/duplicacy_main.go index 9fe4ce0d..5ac26356 100644 --- a/duplicacy/duplicacy_main.go +++ b/duplicacy/duplicacy_main.go @@ -147,6 +147,10 @@ func setGlobalOptions(context *cli.Context) { duplicacy.SetLoggingLevel(duplicacy.DEBUG) } + if context.GlobalBool("print-memory-usage") { + go duplicacy.PrintMemoryUsage() + } + ScriptEnabled = true if context.GlobalBool("no-script") { ScriptEnabled = false @@ -781,7 +785,10 @@ func backupRepository(context *cli.Context) { backupManager.SetupSnapshotCache(preference.Name) backupManager.SetDryRun(dryRun) - backupManager.Backup(repository, quickMode, threads, context.String("t"), showStatistics, enableVSS, vssTimeout, enumOnly) + + metadataChunkSize := context.Int("metadata-chunk-size") + maximumInMemoryEntries := context.Int("max-in-memory-entries") + backupManager.Backup(repository, quickMode, threads, context.String("t"), showStatistics, enableVSS, vssTimeout, enumOnly, metadataChunkSize, maximumInMemoryEntries) runScript(context, preference.Name, "post") } @@ -1506,6 +1513,19 @@ func main() { Name: "enum-only", Usage: "enumerate the repository recursively and then exit", }, + cli.IntFlag{ + Name: "metadata-chunk-size", + Value: 1024 * 1024, + Usage: "the average size of metadata chunks (defaults to 1M)", + Argument: "", + }, + cli.IntFlag{ + Name: "max-in-memory-entries", + Value: 1024 * 1024, + Usage: "the maximum number of entries kept in memory (defaults to 1M)", + Argument: "", + }, + }, Usage: "Save a snapshot of the repository to the storage", ArgsUsage: " ", @@ -2180,6 +2200,10 @@ func main() { Usage: "suppress logs with the specified id", Argument: "", }, + cli.BoolFlag{ + Name: "print-memory-usage", + Usage: "print memory usage every second", + }, } app.HideVersion = true diff --git a/src/duplicacy_backupmanager.go b/src/duplicacy_backupmanager.go index d4f4c4df..6ad94ca9 100644 --- a/src/duplicacy_backupmanager.go +++ b/src/duplicacy_backupmanager.go @@ -20,6 +20,8 @@ import ( "sync" "sync/atomic" "time" + + "github.com/vmihailenco/msgpack" ) // BackupManager performs the two major operations, backup and restore, and passes other operations, mostly related to @@ -35,11 +37,10 @@ type BackupManager struct { config *Config // contains a number of options nobackupFile string // don't backup directory when this file name is found + filtersFile string // the path to the filters file + excludeByAttribute bool // don't backup file based on file attribute - filtersFile string // the path to the filters file - - excludeByAttribute bool // don't backup file based on file attribute - + cachePath string } func (manager *BackupManager) SetDryRun(dryRun bool) { @@ -111,74 +112,20 @@ func (manager *BackupManager) SetupSnapshotCache(storageName string) bool { } } + manager.cachePath = path.Join(preferencePath, "cache", storageName) + storage.SetDefaultNestingLevels([]int{1}, 1) manager.snapshotCache = storage manager.SnapshotManager.snapshotCache = storage return true } - -// setEntryContent sets the 4 content pointers for each entry in 'entries'. 'offset' indicates the value -// to be added to the StartChunk and EndChunk points, used when intending to append 'entries' to the -// original unchanged entry list. -// -// This function assumes the Size field of each entry is equal to the length of the chunk content that belong -// to the file. -func setEntryContent(entries []*Entry, chunkLengths []int, offset int) { - if len(entries) == 0 { - return - } - - // The following code works by iterating over 'entries' and 'chunkLength' and keeping track of the - // accumulated total file size and the accumulated total chunk size. - i := 0 - totalChunkSize := int64(0) - totalFileSize := entries[i].Size - entries[i].StartChunk = 0 + offset - entries[i].StartOffset = 0 - for j, length := range chunkLengths { - - for totalChunkSize+int64(length) >= totalFileSize { - entries[i].EndChunk = j + offset - entries[i].EndOffset = int(totalFileSize - totalChunkSize) - - i++ - if i >= len(entries) { - break - } - - // If the current file ends at the end of the current chunk, the next file will - // start at the next chunk - if totalChunkSize+int64(length) == totalFileSize { - entries[i].StartChunk = j + 1 + offset - entries[i].StartOffset = 0 - } else { - entries[i].StartChunk = j + offset - entries[i].StartOffset = int(totalFileSize - totalChunkSize) - } - - totalFileSize += entries[i].Size - } - - if i >= len(entries) { - break - } - totalChunkSize += int64(length) - } - - // If there are some unvisited entries (which happens when saving an incomplete snapshot), - // set their sizes to -1 so they won't be saved to the incomplete snapshot - for j := i; j < len(entries); j++ { - entries[j].Size = -1 - } -} - // Backup creates a snapshot for the repository 'top'. If 'quickMode' is true, only files with different sizes // or timestamps since last backup will be uploaded (however the snapshot is still a full snapshot that shares // unmodified files with last backup). Otherwise (or if this is the first backup), the entire repository will // be scanned to create the snapshot. 'tag' is the tag assigned to the new snapshot. func (manager *BackupManager) Backup(top string, quickMode bool, threads int, tag string, - showStatistics bool, shadowCopy bool, shadowCopyTimeout int, enumOnly bool) bool { + showStatistics bool, shadowCopy bool, shadowCopyTimeout int, enumOnly bool, metadataChunkSize int, maximumInMemoryEntries int) bool { var err error top, err = filepath.Abs(top) @@ -206,222 +153,203 @@ func (manager *BackupManager) Backup(top string, quickMode bool, threads int, ta remoteSnapshot := manager.SnapshotManager.downloadLatestSnapshot(manager.snapshotID) if remoteSnapshot == nil { - remoteSnapshot = CreateEmptySnapshot(manager.snapshotID) LOG_INFO("BACKUP_START", "No previous backup found") + remoteSnapshot = CreateEmptySnapshot(manager.snapshotID) } else { LOG_INFO("BACKUP_START", "Last backup at revision %d found", remoteSnapshot.Revision) } - shadowTop := CreateShadowCopy(top, shadowCopy, shadowCopyTimeout) - defer DeleteShadowCopy() - - LOG_INFO("BACKUP_INDEXING", "Indexing %s", top) - localSnapshot, skippedDirectories, skippedFiles, err := CreateSnapshotFromDirectory(manager.snapshotID, shadowTop, - manager.nobackupFile, manager.filtersFile, manager.excludeByAttribute) - if err != nil { - LOG_ERROR("SNAPSHOT_LIST", "Failed to list the directory %s: %v", top, err) - return false - } - - if enumOnly { - return true - } - - if len(localSnapshot.Files) == 0 { - LOG_ERROR("SNAPSHOT_EMPTY", "No files under the repository to be backed up") - return false - } + hashMode := remoteSnapshot.Revision == 0 || !quickMode // This cache contains all chunks referenced by last snasphot. Any other chunks will lead to a call to // UploadChunk. chunkCache := make(map[string]bool) - var incompleteSnapshot *Snapshot - // A revision number of 0 means this is the initial backup if remoteSnapshot.Revision > 0 { + manager.SnapshotManager.DownloadSnapshotSequences(remoteSnapshot) // Add all chunks in the last snapshot to the cache for _, chunkID := range manager.SnapshotManager.GetSnapshotChunks(remoteSnapshot, true) { chunkCache[chunkID] = true } - } else { - - // In quick mode, attempt to load the incomplete snapshot from last incomplete backup if there is one. - if quickMode { - incompleteSnapshot = LoadIncompleteSnapshot() - } + } - // If the listing operation is fast or there is an incomplete snapshot, list all chunks and - // put them in the cache. - if manager.storage.IsFastListing() || incompleteSnapshot != nil { - LOG_INFO("BACKUP_LIST", "Listing all chunks") - allChunks, _ := manager.SnapshotManager.ListAllFiles(manager.storage, "chunks/") + var incompleteSnapshot *EntryList + if hashMode { + incompleteSnapshot = loadIncompleteSnapshot(manager.snapshotID, manager.cachePath) + } - for _, chunk := range allChunks { - if len(chunk) == 0 || chunk[len(chunk)-1] == '/' { - continue - } + // If the listing operation is fast and this is an initial backup, list all chunks and + // put them in the cache. + if (manager.storage.IsFastListing() && remoteSnapshot.Revision == 0) { + LOG_INFO("BACKUP_LIST", "Listing all chunks") + allChunks, _ := manager.SnapshotManager.ListAllFiles(manager.storage, "chunks/") - if strings.HasSuffix(chunk, ".fsl") { - continue - } - - chunk = strings.Replace(chunk, "/", "", -1) - chunkCache[chunk] = true + for _, chunk := range allChunks { + if len(chunk) == 0 || chunk[len(chunk)-1] == '/' { + continue } - } - - if incompleteSnapshot != nil { - // This is the last chunk from the incomplete snapshot that can be found in the cache - lastCompleteChunk := -1 - for i, chunkHash := range incompleteSnapshot.ChunkHashes { - chunkID := manager.config.GetChunkIDFromHash(chunkHash) - if _, ok := chunkCache[chunkID]; ok { - lastCompleteChunk = i - } else { - break - } + if strings.HasSuffix(chunk, ".fsl") { + continue } - LOG_DEBUG("CHUNK_INCOMPLETE", "The incomplete snapshot contains %d files and %d chunks", len(incompleteSnapshot.Files), len(incompleteSnapshot.ChunkHashes)) - LOG_DEBUG("CHUNK_INCOMPLETE", "Last chunk in the incomplete snapshot that exist in the storage: %d", lastCompleteChunk) - - // Only keep those files whose chunks exist in the cache - var files []*Entry - for _, file := range incompleteSnapshot.Files { - if file.StartChunk <= lastCompleteChunk && file.EndChunk <= lastCompleteChunk { - files = append(files, file) - } else { - break - } - } - incompleteSnapshot.Files = files + chunk = strings.Replace(chunk, "/", "", -1) + chunkCache[chunk] = true + } - // Remove incomplete chunks (they may not have been uploaded) - incompleteSnapshot.ChunkHashes = incompleteSnapshot.ChunkHashes[:lastCompleteChunk+1] - incompleteSnapshot.ChunkLengths = incompleteSnapshot.ChunkLengths[:lastCompleteChunk+1] - remoteSnapshot = incompleteSnapshot - LOG_INFO("FILE_SKIP", "Skipped %d files from previous incomplete backup", len(files)) + // Make sure that all chunks in the incomplete snapshot must exist in the storage + if incompleteSnapshot != nil && !incompleteSnapshot.CheckChunks(manager.config, chunkCache) { + LOG_WARN("INCOMPLETE_DISCARD", "The incomplete snapshot can't be used as it contains chunks not in the storage") + incompleteSnapshot = nil } + } + // Copy over chunks from the incomplete snapshot + if incompleteSnapshot != nil { + remoteSnapshot.ChunkHashes = append(incompleteSnapshot.PreservedChunkHashes, incompleteSnapshot.UploadedChunkHashes...) + remoteSnapshot.ChunkLengths = append(incompleteSnapshot.PreservedChunkLengths, incompleteSnapshot.UploadedChunkLengths...) } - var numberOfNewFileChunks int64 // number of new file chunks - var totalUploadedFileChunkLength int64 // total length of uploaded file chunks - var totalUploadedFileChunkBytes int64 // how many actual bytes have been uploaded + shadowTop := CreateShadowCopy(top, shadowCopy, shadowCopyTimeout) + defer DeleteShadowCopy() - var totalUploadedSnapshotChunkLength int64 // size of uploaded snapshot chunks - var totalUploadedSnapshotChunkBytes int64 // how many actual bytes have been uploaded + var totalModifiedFileSize int64 // total size of modified files + var uploadedModifiedFileSize int64 // portions that have been uploaded (including cache hits) + var preservedFileSize int64 // total size of unmodified files + localSnapshot := CreateEmptySnapshot(manager.snapshotID) localSnapshot.Revision = remoteSnapshot.Revision + 1 - var totalModifiedFileSize int64 // total size of modified files - var uploadedModifiedFileSize int64 // portions that have been uploaded (including cache hits) + localListingChannel := make(chan *Entry) + remoteListingChannel := make(chan *Entry) + chunkOperator := CreateChunkOperator(manager.config, manager.storage, manager.snapshotCache, showStatistics, threads, false) - var modifiedEntries []*Entry // Files that has been modified or newly created - var preservedEntries []*Entry // Files unchanges + var skippedDirectories []string + var skippedFiles []string + + LOG_INFO("BACKUP_INDEXING", "Indexing %s", top) + go func() { + // List local files + defer CatchLogException() + localSnapshot.ListLocalFiles(shadowTop, manager.nobackupFile, manager.filtersFile, manager.excludeByAttribute, localListingChannel, &skippedDirectories, &skippedFiles) + } () - // If the quick mode is disable and there isn't an incomplete snapshot from last (failed) backup, - // we simply treat all files as if they were new, and break them into chunks. - // Otherwise, we need to find those that are new or recently modified + go func() { + // List remote files + defer CatchLogException() - if (remoteSnapshot.Revision == 0 || !quickMode) && incompleteSnapshot == nil { - modifiedEntries = localSnapshot.Files - for _, entry := range modifiedEntries { - totalModifiedFileSize += entry.Size + if incompleteSnapshot != nil { + // If there is an incomplete snapshot, always use it + incompleteSnapshot.ReadEntries(func(entry *Entry) error { + remoteListingChannel <- entry + return nil + }) + } else if hashMode { + // No need to list remote files for a hash mode backup + } else { + // List remote files in the previous snapshot + remoteSnapshot.ListRemoteFiles(manager.config, chunkOperator, func(entry *Entry) bool { + remoteListingChannel <- entry + return true + }) } - } else { + close(remoteListingChannel) + } () - var i, j int - for i < len(localSnapshot.Files) { + // Create the local file list + localEntryList, err := CreateEntryList(manager.snapshotID, manager.cachePath, maximumInMemoryEntries) + if err != nil { + LOG_ERROR("BACKUP_CREATE", "Failed to create the entry list: %v", err) + return false + } + lastPreservedChunk := -1 - local := localSnapshot.Files[i] + // Now compare local files with remote files one by one + var remoteEntry *Entry + remoteListingOK := true + for { + localEntry := <- localListingChannel + if localEntry == nil { + break + } - if !local.IsFile() || local.Size == 0 { - i++ - continue + // compareResult < 0: local entry has no remote counterpart + // compareResult == 0: local entry may or may not be the same as the remote one + // compareResult > 0: remote entry is extra - skip it and get the next remote entry while keeping the same local entry + var compareResult int + for { + if remoteEntry != nil { + compareResult = localEntry.Compare(remoteEntry) + } else { + if remoteListingOK { + remoteEntry, remoteListingOK = <- remoteListingChannel + } + if !remoteListingOK { + compareResult = -1 + break + } + compareResult = localEntry.Compare(remoteEntry) } - var remote *Entry - if j >= len(remoteSnapshot.Files) { - totalModifiedFileSize += local.Size - modifiedEntries = append(modifiedEntries, local) - i++ - } else if remote = remoteSnapshot.Files[j]; !remote.IsFile() { - j++ - } else if local.Path == remote.Path { - if local.IsSameAs(remote) { - local.Hash = remote.Hash - local.StartChunk = remote.StartChunk - local.StartOffset = remote.StartOffset - local.EndChunk = remote.EndChunk - local.EndOffset = remote.EndOffset - preservedEntries = append(preservedEntries, local) - } else { - totalModifiedFileSize += local.Size - modifiedEntries = append(modifiedEntries, local) - } - i++ - j++ - } else if local.Compare(remote) < 0 { - totalModifiedFileSize += local.Size - modifiedEntries = append(modifiedEntries, local) - i++ - } else { - j++ + if compareResult <= 0 { + break } + remoteEntry = nil } - // Must sort files by their 'StartChunk', so the chunk indices form a monotonically increasing sequence - sort.Sort(ByChunk(preservedEntries)) - } - - var preservedChunkHashes []string - var preservedChunkLengths []int + if compareResult == 0 { + // No need to check if it is in hash mode -- in that case remote listing is nil + if localEntry.IsSameAs(remoteEntry) && localEntry.IsFile() { - // For each preserved file, adjust the StartChunk and EndChunk pointers. This is done by finding gaps - // between these indices and subtracting the number of deleted chunks. - last := -1 - deletedChunks := 0 - for _, entry := range preservedEntries { + localEntry.Hash = remoteEntry.Hash + localEntry.StartOffset = remoteEntry.StartOffset + localEntry.EndOffset = remoteEntry.EndOffset + delta := remoteEntry.StartChunk - len(localEntryList.PreservedChunkHashes) + if lastPreservedChunk != remoteEntry.StartChunk { + lastPreservedChunk = remoteEntry.StartChunk + localEntryList.AddPreservedChunk(remoteSnapshot.ChunkHashes[lastPreservedChunk], remoteSnapshot.ChunkLengths[lastPreservedChunk]) + } else { + delta++ + } - if entry.StartChunk > last { - deletedChunks += entry.StartChunk - last - 1 - } + for i := remoteEntry.StartChunk + 1; i <= remoteEntry.EndChunk; i++ { + localEntryList.AddPreservedChunk(remoteSnapshot.ChunkHashes[i], remoteSnapshot.ChunkLengths[i]) + lastPreservedChunk = i + } - for i := entry.StartChunk; i <= entry.EndChunk; i++ { - if i == last { - continue + localEntry.StartChunk = remoteEntry.StartChunk - delta + localEntry.EndChunk = remoteEntry.EndChunk - delta + preservedFileSize += localEntry.Size + } else { + totalModifiedFileSize += localEntry.Size + if localEntry.Size > 0 { + localEntry.Size = -1 + } + } + remoteEntry = nil + } else { + // compareResult must be < 0; the local file is new + totalModifiedFileSize += localEntry.Size + if localEntry.Size > 0 { + // A size of -1 indicates this is a modified file that will be uploaded + localEntry.Size = -1 } - preservedChunkHashes = append(preservedChunkHashes, remoteSnapshot.ChunkHashes[i]) - preservedChunkLengths = append(preservedChunkLengths, remoteSnapshot.ChunkLengths[i]) } - last = entry.EndChunk - - entry.StartChunk -= deletedChunks - entry.EndChunk -= deletedChunks + localEntryList.AddEntry(localEntry) } - var uploadedEntries []*Entry - var uploadedChunkHashes []string - var uploadedChunkLengths []int - var uploadedChunkLock = &sync.Mutex{} - - // Set all file sizes to -1 to indicate they haven't been processed. This must be done before creating the file - // reader because the file reader may skip inaccessible files on construction. - for _, entry := range modifiedEntries { - entry.Size = -1 + if enumOnly { + return true } - // the file reader implements the Reader interface. When an EOF is encounter, it opens the next file unless it - // is the last file. - fileReader := CreateFileReader(shadowTop, modifiedEntries) - - startUploadingTime := time.Now().Unix() + if localEntryList.NumberOfEntries == 0 { + LOG_ERROR("SNAPSHOT_EMPTY", "No files under the repository to be backed up") + return false + } - lastUploadingTime := time.Now().Unix() + fileChunkMaker := CreateFileChunkMaker(manager.config, false) keepUploadAlive := int64(1800) @@ -430,7 +358,7 @@ func (manager *BackupManager) Backup(top string, quickMode bool, threads int, ta if value < 10 { value = 10 } - LOG_INFO("UPLOAD_KEEPALIVE", "Setting KeepUploadAlive to %d", value) + LOG_INFO("UPLOAD_KEEPALIVE", "Setting KeepUploadAlive to %d second", value) keepUploadAlive = int64(value) } @@ -441,203 +369,129 @@ func (manager *BackupManager) Backup(top string, quickMode bool, threads int, ta LOG_INFO("SNAPSHOT_FAIL", "Will abort the backup on chunk %d", chunkToFail) } - chunkMaker := CreateChunkMaker(manager.config, false) - chunkUploader := CreateChunkUploader(manager.config, manager.storage, nil, threads, nil) + if incompleteSnapshot != nil { + incompleteSnapshot.CloseOnDiskFile() + } - localSnapshotReady := false var once sync.Once - - if remoteSnapshot.Revision == 0 { - // In case an error occurs during the initial backup, save the incomplete snapshot + if hashMode { + // In case an error occurs during a hash mode backup, save the incomplete snapshot RunAtError = func() { - once.Do( - func() { - if !localSnapshotReady { - // Lock it to gain exclusive access to uploadedChunkHashes and uploadedChunkLengths - uploadedChunkLock.Lock() - setEntryContent(uploadedEntries, uploadedChunkLengths, len(preservedChunkHashes)) - if len(preservedChunkHashes) > 0 { - //localSnapshot.Files = preservedEntries - //localSnapshot.Files = append(preservedEntries, uploadedEntries...) - localSnapshot.ChunkHashes = preservedChunkHashes - localSnapshot.ChunkHashes = append(localSnapshot.ChunkHashes, uploadedChunkHashes...) - localSnapshot.ChunkLengths = preservedChunkLengths - localSnapshot.ChunkLengths = append(localSnapshot.ChunkLengths, uploadedChunkLengths...) - } else { - //localSnapshot.Files = uploadedEntries - localSnapshot.ChunkHashes = uploadedChunkHashes - localSnapshot.ChunkLengths = uploadedChunkLengths - } - uploadedChunkLock.Unlock() - } - SaveIncompleteSnapshot(localSnapshot) - }) + once.Do(func() { + localEntryList.SaveIncompleteSnapshot() + }) } } - if fileReader.CurrentFile != nil { + startUploadingTime := time.Now().Unix() + lastUploadingTime := time.Now().Unix() - LOG_TRACE("PACK_START", "Packing %s", fileReader.CurrentEntry.Path) + var numberOfNewFileChunks int64 // number of new file chunks + var totalUploadedFileChunkLength int64 // total length of uploaded file chunks + var totalUploadedFileChunkBytes int64 // how many actual bytes have been uploaded - chunkIndex := 0 - if threads < 1 { - threads = 1 - } - if threads > 1 { - LOG_INFO("BACKUP_THREADS", "Use %d uploading threads", threads) - } + // This function is called when a chunk has been uploaded + uploadChunkCompletionFunc := func(chunk *Chunk, chunkIndex int, inCache bool, chunkSize int, uploadSize int) { - var numberOfCollectedChunks int64 + localEntryList.AddUploadedChunk(chunkIndex, chunk.GetHash(), chunkSize) - completionFunc := func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int) { - action := "Skipped" - if skipped { - LOG_DEBUG("CHUNK_CACHE", "Skipped chunk %s in cache", chunk.GetID()) + action := "Skipped" + if inCache { + LOG_DEBUG("CHUNK_CACHE", "Skipped chunk %s in cache", chunk.GetID()) + } else { + if uploadSize > 0 { + atomic.AddInt64(&numberOfNewFileChunks, 1) + atomic.AddInt64(&totalUploadedFileChunkLength, int64(chunkSize)) + atomic.AddInt64(&totalUploadedFileChunkBytes, int64(uploadSize)) + action = "Uploaded" } else { - if uploadSize > 0 { - atomic.AddInt64(&numberOfNewFileChunks, 1) - atomic.AddInt64(&totalUploadedFileChunkLength, int64(chunkSize)) - atomic.AddInt64(&totalUploadedFileChunkBytes, int64(uploadSize)) - action = "Uploaded" - } else { - LOG_DEBUG("CHUNK_EXIST", "Skipped chunk %s in the storage", chunk.GetID()) - } + LOG_DEBUG("CHUNK_EXIST", "Skipped chunk %s in the storage", chunk.GetID()) } + } - uploadedModifiedFileSize := atomic.AddInt64(&uploadedModifiedFileSize, int64(chunkSize)) + uploadedModifiedFileSize := atomic.AddInt64(&uploadedModifiedFileSize, int64(chunkSize)) - if (IsTracing() || showStatistics) && totalModifiedFileSize > 0 { - now := time.Now().Unix() - if now <= startUploadingTime { - now = startUploadingTime + 1 - } - speed := uploadedModifiedFileSize / (now - startUploadingTime) - remainingTime := int64(0) - if speed > 0 { - remainingTime = (totalModifiedFileSize-uploadedModifiedFileSize)/speed + 1 - } - percentage := float32(uploadedModifiedFileSize * 1000 / totalModifiedFileSize) - LOG_INFO("UPLOAD_PROGRESS", "%s chunk %d size %d, %sB/s %s %.1f%%", action, chunkIndex, - chunkSize, PrettySize(speed), PrettyTime(remainingTime), percentage/10) + if (IsTracing() || showStatistics) && totalModifiedFileSize > 0 { + now := time.Now().Unix() + if now <= startUploadingTime { + now = startUploadingTime + 1 } - - atomic.AddInt64(&numberOfCollectedChunks, 1) - manager.config.PutChunk(chunk) + speed := uploadedModifiedFileSize / (now - startUploadingTime) + remainingTime := int64(0) + if speed > 0 { + remainingTime = (totalModifiedFileSize-uploadedModifiedFileSize)/speed + 1 + } + percentage := float32(uploadedModifiedFileSize * 1000 / totalModifiedFileSize) + LOG_INFO("UPLOAD_PROGRESS", "%s chunk %d size %d, %sB/s %s %.1f%%", action, chunkIndex, + chunkSize, PrettySize(speed), PrettyTime(remainingTime), percentage/10) } - chunkUploader.completionFunc = completionFunc - chunkUploader.Start() - - // Break files into chunks - chunkMaker.ForEachChunk( - fileReader.CurrentFile, - func(chunk *Chunk, final bool) { - - hash := chunk.GetHash() - chunkID := chunk.GetID() - chunkSize := chunk.GetLength() - - if chunkSize == 0 { - LOG_DEBUG("CHUNK_EMPTY", "Ignored chunk %s of size 0", chunkID) - return - } - - chunkIndex++ - - _, found := chunkCache[chunkID] - if found { - if time.Now().Unix()-lastUploadingTime > keepUploadAlive { - LOG_INFO("UPLOAD_KEEPALIVE", "Skip chunk cache to keep connection alive") - found = false - } - } - - if found { - completionFunc(chunk, chunkIndex, true, chunkSize, 0) - } else { - lastUploadingTime = time.Now().Unix() - chunkCache[chunkID] = true - - chunkUploader.StartChunk(chunk, chunkIndex) - } - - // Must lock it because the RunAtError function called by other threads may access these two slices - uploadedChunkLock.Lock() - uploadedChunkHashes = append(uploadedChunkHashes, hash) - uploadedChunkLengths = append(uploadedChunkLengths, chunkSize) - uploadedChunkLock.Unlock() - if len(uploadedChunkHashes) == chunkToFail { - LOG_ERROR("SNAPSHOT_FAIL", "Artificially fail the chunk %d for testing purposes", chunkToFail) - } - - }, - func(fileSize int64, hash string) (io.Reader, bool) { + manager.config.PutChunk(chunk) + } - // Must lock here because the RunAtError function called by other threads may access uploadedEntries - uploadedChunkLock.Lock() - defer uploadedChunkLock.Unlock() + chunkOperator.UploadCompletionFunc = uploadChunkCompletionFunc - // This function is called when a new file is needed - entry := fileReader.CurrentEntry - entry.Hash = hash - entry.Size = fileSize - uploadedEntries = append(uploadedEntries, entry) + chunkIndex := -1 + // This function is called when the chunk maker generates a new chunk + uploadChunkFunc := func(chunk *Chunk) { + chunkID := chunk.GetID() + chunkSize := chunk.GetLength() - if !showStatistics || IsTracing() || RunInBackground { - LOG_INFO("PACK_END", "Packed %s (%d)", entry.Path, entry.Size) - } + chunkIndex++ - fileReader.NextFile() + _, found := chunkCache[chunkID] + if found { + if time.Now().Unix() - lastUploadingTime > keepUploadAlive { + LOG_INFO("UPLOAD_KEEPALIVE", "Skip chunk cache to keep connection alive") + found = false + } + } - if fileReader.CurrentFile != nil { - LOG_TRACE("PACK_START", "Packing %s", fileReader.CurrentEntry.Path) - return fileReader.CurrentFile, true - } - return nil, false - }) + if found { + uploadChunkCompletionFunc(chunk, chunkIndex, true, chunkSize, 0) + } else { + lastUploadingTime = time.Now().Unix() + chunkCache[chunkID] = true - chunkUploader.Stop() + chunkOperator.Upload(chunk, chunkIndex, false) + } - // We can't set the offsets in the ForEachChunk loop because in that loop, when switching to a new file, the - // data in the buffer may not have been pushed into chunks; it may happen that new chunks can be created - // aftwards, before reaching the end of the current file. - // - // Therefore, we saved uploaded entries and then do a loop here to set offsets for them. - setEntryContent(uploadedEntries, uploadedChunkLengths, len(preservedChunkHashes)) + if chunkIndex == chunkToFail { + LOG_ERROR("SNAPSHOT_FAIL", "Artificially fail the chunk %d for testing purposes", chunkToFail) + } } - if len(preservedChunkHashes) > 0 { - localSnapshot.ChunkHashes = preservedChunkHashes - localSnapshot.ChunkHashes = append(localSnapshot.ChunkHashes, uploadedChunkHashes...) - localSnapshot.ChunkLengths = preservedChunkLengths - localSnapshot.ChunkLengths = append(localSnapshot.ChunkLengths, uploadedChunkLengths...) - } else { - localSnapshot.ChunkHashes = uploadedChunkHashes - localSnapshot.ChunkLengths = uploadedChunkLengths + // These are files to be uploaded; directories and links are excluded + for i := range localEntryList.ModifiedEntries { + entry := &localEntryList.ModifiedEntries[i] + LOG_TRACE("PACK_START", "Packing %s", entry.Path) + fullPath := joinPath(shadowTop, entry.Path) + file, err := os.OpenFile(fullPath, os.O_RDONLY, 0) + if err != nil { + LOG_WARN("OPEN_FAILURE", "Failed to open file for reading: %v", err) + skippedFiles = append(skippedFiles, entry.Path) + continue + } + entry.Size, entry.Hash = fileChunkMaker.AddData(file, uploadChunkFunc) + if !showStatistics || IsTracing() || RunInBackground { + LOG_INFO("PACK_END", "Packed %s (%d)", entry.Path, entry.Size) + } + file.Close() + } - localSnapshotReady = true + // This flushes the chunk maker (forcing all remaining data to be sent in chunks) + fileChunkMaker.AddData(nil, uploadChunkFunc) + chunkOperator.WaitForCompletion() localSnapshot.EndTime = time.Now().Unix() - err = manager.SnapshotManager.CheckSnapshot(localSnapshot) - if err != nil { - RunAtError = func() {} // Don't save the incomplete snapshot - LOG_ERROR("SNAPSHOT_CHECK", "The snapshot contains an error: %v", err) - return false - } - localSnapshot.Tag = tag localSnapshot.Options = "" - if !quickMode || remoteSnapshot.Revision == 0 { + if hashMode { localSnapshot.Options = "-hash" } - if _, found := os.LookupEnv("DUPLICACY_FAIL_SNAPSHOT"); found { - LOG_ERROR("SNAPSHOT_FAIL", "Artificially fail the backup for testing purposes") - return false - } - if shadowCopy { if localSnapshot.Options == "" { localSnapshot.Options = "-vss" @@ -646,28 +500,32 @@ func (manager *BackupManager) Backup(top string, quickMode bool, threads int, ta } } - var preservedFileSize int64 var uploadedFileSize int64 var totalFileChunkLength int64 - for _, file := range preservedEntries { - preservedFileSize += file.Size + for _, entry := range localEntryList.ModifiedEntries { + uploadedFileSize += entry.Size } - for _, file := range uploadedEntries { - uploadedFileSize += file.Size + for _, length := range localEntryList.PreservedChunkLengths { + totalFileChunkLength += int64(length) } - for _, length := range localSnapshot.ChunkLengths { + for _, length := range localEntryList.UploadedChunkLengths { totalFileChunkLength += int64(length) } localSnapshot.FileSize = preservedFileSize + uploadedFileSize - localSnapshot.NumberOfFiles = int64(len(preservedEntries) + len(uploadedEntries)) + localSnapshot.NumberOfFiles = localEntryList.NumberOfEntries - int64(len(skippedFiles)) + localSnapshot.ChunkHashes = append(localEntryList.PreservedChunkHashes, localEntryList.UploadedChunkHashes...) + localSnapshot.ChunkLengths = append(localEntryList.PreservedChunkLengths, localEntryList.UploadedChunkLengths...) - totalSnapshotChunkLength, numberOfNewSnapshotChunks, - totalUploadedSnapshotChunkLength, totalUploadedSnapshotChunkBytes := - manager.UploadSnapshot(chunkMaker, chunkUploader, top, localSnapshot, chunkCache) + totalMetadataChunkLength, numberOfNewMetadataChunks, + totalUploadedMetadataChunkLength, totalUploadedMetadataChunkBytes := + manager.UploadSnapshot(chunkOperator, top, localSnapshot, localEntryList, chunkCache, metadataChunkSize) if showStatistics && !RunInBackground { - for _, entry := range uploadedEntries { + for _, entry := range localEntryList.ModifiedEntries { + if entry.Size < 0 { + continue + } LOG_INFO("UPLOAD_FILE", "Uploaded %s (%d)", entry.Path, entry.Size) } } @@ -676,10 +534,9 @@ func (manager *BackupManager) Backup(top string, quickMode bool, threads int, ta LOG_WARN("SKIP_DIRECTORY", "Subdirectory %s cannot be listed", dir) } - for _, file := range fileReader.SkippedFiles { + for _, file := range skippedFiles { LOG_WARN("SKIP_FILE", "File %s cannot be opened", file) } - skippedFiles = append(skippedFiles, fileReader.SkippedFiles...) if !manager.config.dryRun { manager.SnapshotManager.CleanSnapshotCache(localSnapshot, nil) @@ -687,16 +544,16 @@ func (manager *BackupManager) Backup(top string, quickMode bool, threads int, ta LOG_INFO("BACKUP_END", "Backup for %s at revision %d completed", top, localSnapshot.Revision) RunAtError = func() {} - RemoveIncompleteSnapshot() + deleteIncompleteSnapshot(manager.cachePath) - totalSnapshotChunks := len(localSnapshot.FileSequence) + len(localSnapshot.ChunkSequence) + + totalMetadataChunks := len(localSnapshot.FileSequence) + len(localSnapshot.ChunkSequence) + len(localSnapshot.LengthSequence) if showStatistics { LOG_INFO("BACKUP_STATS", "Files: %d total, %s bytes; %d new, %s bytes", - len(preservedEntries)+len(uploadedEntries), + localEntryList.NumberOfEntries - int64(len(skippedFiles)), PrettyNumber(preservedFileSize+uploadedFileSize), - len(uploadedEntries), PrettyNumber(uploadedFileSize)) + len(localEntryList.ModifiedEntries), PrettyNumber(uploadedFileSize)) LOG_INFO("BACKUP_STATS", "File chunks: %d total, %s bytes; %d new, %s bytes, %s bytes uploaded", len(localSnapshot.ChunkHashes), PrettyNumber(totalFileChunkLength), @@ -704,16 +561,16 @@ func (manager *BackupManager) Backup(top string, quickMode bool, threads int, ta PrettyNumber(totalUploadedFileChunkBytes)) LOG_INFO("BACKUP_STATS", "Metadata chunks: %d total, %s bytes; %d new, %s bytes, %s bytes uploaded", - totalSnapshotChunks, PrettyNumber(totalSnapshotChunkLength), - numberOfNewSnapshotChunks, PrettyNumber(totalUploadedSnapshotChunkLength), - PrettyNumber(totalUploadedSnapshotChunkBytes)) + totalMetadataChunks, PrettyNumber(totalMetadataChunkLength), + numberOfNewMetadataChunks, PrettyNumber(totalUploadedMetadataChunkLength), + PrettyNumber(totalUploadedMetadataChunkBytes)) LOG_INFO("BACKUP_STATS", "All chunks: %d total, %s bytes; %d new, %s bytes, %s bytes uploaded", - len(localSnapshot.ChunkHashes)+totalSnapshotChunks, - PrettyNumber(totalFileChunkLength+totalSnapshotChunkLength), - int(numberOfNewFileChunks)+numberOfNewSnapshotChunks, - PrettyNumber(totalUploadedFileChunkLength+totalUploadedSnapshotChunkLength), - PrettyNumber(totalUploadedFileChunkBytes+totalUploadedSnapshotChunkBytes)) + len(localSnapshot.ChunkHashes)+totalMetadataChunks, + PrettyNumber(totalFileChunkLength+totalMetadataChunkLength), + int(numberOfNewFileChunks)+numberOfNewMetadataChunks, + PrettyNumber(totalUploadedFileChunkLength+totalUploadedMetadataChunkLength), + PrettyNumber(totalUploadedFileChunkBytes+totalUploadedMetadataChunkBytes)) now := time.Now().Unix() if now == startTime { @@ -753,6 +610,8 @@ func (manager *BackupManager) Backup(top string, quickMode bool, threads int, ta LOG_WARN("BACKUP_SKIPPED", skipped) } + chunkOperator.Stop() + return true } @@ -789,91 +648,99 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu } } - // How will behave restore when repo created using -repo-dir ,?? err = os.Mkdir(path.Join(top, DUPLICACY_DIRECTORY), 0744) if err != nil && !os.IsExist(err) { LOG_ERROR("RESTORE_MKDIR", "Failed to create the preference directory: %v", err) return 0 } - remoteSnapshot := manager.SnapshotManager.DownloadSnapshot(manager.snapshotID, revision) - manager.SnapshotManager.DownloadSnapshotContents(remoteSnapshot, patterns, true) + // local files that don't exist in the remote snapshot + var extraFiles []string - localSnapshot, _, _, err := CreateSnapshotFromDirectory(manager.snapshotID, top, manager.nobackupFile, - manager.filtersFile, manager.excludeByAttribute) - if err != nil { - LOG_ERROR("SNAPSHOT_LIST", "Failed to list the repository: %v", err) - return 0 - } + // These will store files/directories to be downloaded. + fileEntries := make([]*Entry, 0) + directoryEntries := make([]*Entry, 0) - LOG_INFO("RESTORE_START", "Restoring %s to revision %d", top, revision) + var totalFileSize int64 + var downloadedFileSize int64 + var failedFileCount int + var skippedFileSize int64 + var skippedFileCount int64 + var downloadedFiles []*Entry - var includedFiles []*Entry + localSnapshot := CreateEmptySnapshot(manager.snapshotID) - // Include/exclude some files if needed - if len(patterns) > 0 { - for _, file := range remoteSnapshot.Files { + localListingChannel := make(chan *Entry) + remoteListingChannel := make(chan *Entry) + chunkOperator := CreateChunkOperator(manager.config, manager.storage, manager.snapshotCache, showStatistics, threads, false) - if MatchPath(file.Path, patterns) { - includedFiles = append(includedFiles, file) - } - } + LOG_INFO("RESTORE_INDEXING", "Indexing %s", top) + go func() { + // List local files + defer CatchLogException() + localSnapshot.ListLocalFiles(top, manager.nobackupFile, manager.filtersFile, manager.excludeByAttribute, localListingChannel, nil, nil) + } () - remoteSnapshot.Files = includedFiles - } + remoteSnapshot := manager.SnapshotManager.DownloadSnapshot(manager.snapshotID, revision) + manager.SnapshotManager.DownloadSnapshotSequences(remoteSnapshot) + go func() { + // List remote files + defer CatchLogException() + remoteSnapshot.ListRemoteFiles(manager.config, chunkOperator, func(entry *Entry) bool { + remoteListingChannel <- entry + return true + }) + close(remoteListingChannel) + } () - // local files that don't exist in the remote snapshot - var extraFiles []string + var localEntry *Entry + localListingOK := true - // These will store files to be downloaded. - fileEntries := make([]*Entry, 0, len(remoteSnapshot.Files)/2) + for remoteEntry := range remoteListingChannel { - var totalFileSize int64 - var downloadedFileSize int64 - var failedFiles int - var skippedFileSize int64 - var skippedFiles int64 - - var downloadedFiles []*Entry + if len(patterns) > 0 && !MatchPath(remoteEntry.Path, patterns) { + continue + } - i := 0 - for _, entry := range remoteSnapshot.Files { - - skipped := false - // Find local files that don't exist in the remote snapshot - for i < len(localSnapshot.Files) { - local := localSnapshot.Files[i] - compare := entry.Compare(local) - if compare > 0 { - extraFiles = append(extraFiles, local.Path) - i++ - continue + // remoteEntry is valid; now find the matching localEntry + var compareResult int + + for { + if localEntry == nil && localListingOK { + localEntry, localListingOK = <- localListingChannel + } + if localEntry == nil { + compareResult = 1 } else { - if compare == 0 { - i++ - if quickMode && local.IsSameAs(entry) { - LOG_TRACE("RESTORE_SKIP", "File %s unchanged (by size and timestamp)", local.Path) - skippedFileSize += entry.Size - skippedFiles++ - skipped = true - } + compareResult = localEntry.Compare(remoteEntry) + if compareResult < 0 { + extraFiles = append(extraFiles, localEntry.Path) + localEntry = nil + continue } - break } + break } - if skipped { - continue + if compareResult == 0 { + if quickMode && localEntry.IsFile() && localEntry.IsSameAs(remoteEntry) { + LOG_TRACE("RESTORE_SKIP", "File %s unchanged (by size and timestamp)", localEntry.Path) + skippedFileSize += localEntry.Size + skippedFileCount++ + localEntry = nil + continue + } + localEntry = nil } - fullPath := joinPath(top, entry.Path) - if entry.IsLink() { + fullPath := joinPath(top, remoteEntry.Path) + if remoteEntry.IsLink() { stat, err := os.Lstat(fullPath) if stat != nil { if stat.Mode()&os.ModeSymlink != 0 { isRegular, link, err := Readlink(fullPath) - if err == nil && link == entry.Link && !isRegular { - entry.RestoreMetadata(fullPath, nil, setOwner) + if err == nil && link == remoteEntry.Link && !isRegular { + remoteEntry.RestoreMetadata(fullPath, nil, setOwner) continue } } @@ -881,14 +748,15 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu os.Remove(fullPath) } - err = os.Symlink(entry.Link, fullPath) + err = os.Symlink(remoteEntry.Link, fullPath) if err != nil { - LOG_ERROR("RESTORE_SYMLINK", "Can't create symlink %s: %v", entry.Path, err) + LOG_ERROR("RESTORE_SYMLINK", "Can't create symlink %s: %v", remoteEntry.Path, err) return 0 } - entry.RestoreMetadata(fullPath, nil, setOwner) - LOG_TRACE("DOWNLOAD_DONE", "Symlink %s updated", entry.Path) - } else if entry.IsDir() { + remoteEntry.RestoreMetadata(fullPath, nil, setOwner) + LOG_TRACE("DOWNLOAD_DONE", "Symlink %s updated", remoteEntry.Path) + } else if remoteEntry.IsDir() { + stat, err := os.Stat(fullPath) if err == nil && !stat.IsDir() { @@ -905,25 +773,53 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu return 0 } } + directoryEntries = append(directoryEntries, remoteEntry) } else { // We can't download files here since fileEntries needs to be sorted - fileEntries = append(fileEntries, entry) - totalFileSize += entry.Size + fileEntries = append(fileEntries, remoteEntry) + totalFileSize += remoteEntry.Size + } + } + + if localEntry != nil { + extraFiles = append(extraFiles, localEntry.Path) + } + + for localListingOK { + localEntry, localListingOK = <- localListingChannel + if localEntry != nil { + extraFiles = append(extraFiles, localEntry.Path) } } - for i < len(localSnapshot.Files) { - extraFiles = append(extraFiles, localSnapshot.Files[i].Path) - i++ + LOG_INFO("RESTORE_START", "Restoring %s to revision %d", top, revision) + + // The same chunk may appear in the chunk list multiple times. This is to find the first + // occurrence for each chunk + chunkMap := make(map[string]int) + for i, chunk := range remoteSnapshot.ChunkHashes { + if _, found := chunkMap[chunk]; !found { + chunkMap[chunk] = i + } + } + + // For small files that span only one chunk, use the first chunk instead + for _, file := range fileEntries { + if file.StartChunk == file.EndChunk { + first := chunkMap[remoteSnapshot.ChunkHashes[file.StartChunk]] + file.StartChunk = first + file.EndChunk = first + } } // Sort entries by their starting chunks in order to linearize the access to the chunk chain. sort.Sort(ByChunk(fileEntries)) - chunkDownloader := CreateChunkDownloader(manager.config, manager.storage, nil, showStatistics, threads, allowFailures) + chunkDownloader := CreateChunkDownloader(chunkOperator) + chunkDownloader.AddFiles(remoteSnapshot, fileEntries) - chunkMaker := CreateChunkMaker(manager.config, true) + chunkMaker := CreateFileChunkMaker(manager.config, true) startDownloadingTime := time.Now().Unix() @@ -937,7 +833,7 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu if file.IsSameAsFileInfo(stat) { LOG_TRACE("RESTORE_SKIP", "File %s unchanged (by size and timestamp)", file.Path) skippedFileSize += file.Size - skippedFiles++ + skippedFileCount++ continue } } @@ -945,7 +841,7 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu if file.Size == 0 && file.IsSameAsFileInfo(stat) { LOG_TRACE("RESTORE_SKIP", "File %s unchanged (size 0)", file.Path) skippedFileSize += file.Size - skippedFiles++ + skippedFileCount++ continue } } else { @@ -980,7 +876,7 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu if err != nil { // RestoreFile returned an error; if allowFailures is false RestoerFile would error out and not return so here // we just need to show a warning - failedFiles++ + failedFileCount++ LOG_WARN("DOWNLOAD_FAIL", "Failed to restore %s: %v", file.Path, err) continue } @@ -993,7 +889,7 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu } else { // No error, file was skipped skippedFileSize += file.Size - skippedFiles++ + skippedFileCount++ } file.RestoreMetadata(fullPath, nil, setOwner) } @@ -1008,11 +904,9 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu } } - for _, entry := range remoteSnapshot.Files { - if entry.IsDir() && !entry.IsLink() { - dir := joinPath(top, entry.Path) - entry.RestoreMetadata(dir, nil, setOwner) - } + for _, entry := range directoryEntries { + dir := joinPath(top, entry.Path) + entry.RestoreMetadata(dir, nil, setOwner) } if showStatistics { @@ -1021,8 +915,8 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu } } - if failedFiles > 0 { - return failedFiles + if failedFileCount > 0 { + return failedFileCount } LOG_INFO("RESTORE_END", "Restored %s to revision %d", top, revision) @@ -1030,7 +924,7 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu LOG_INFO("RESTORE_STATS", "Files: %d total, %s bytes", len(fileEntries), PrettySize(totalFileSize)) LOG_INFO("RESTORE_STATS", "Downloaded %d file, %s bytes, %d chunks", len(downloadedFiles), PrettySize(downloadedFileSize), chunkDownloader.numberOfDownloadedChunks) - LOG_INFO("RESTORE_STATS", "Skipped %d file, %s bytes", skippedFiles, PrettySize(skippedFileSize)) + LOG_INFO("RESTORE_STATS", "Skipped %d file, %s bytes", skippedFileCount, PrettySize(skippedFileSize)) } runningTime := time.Now().Unix() - startTime @@ -1040,7 +934,7 @@ func (manager *BackupManager) Restore(top string, revision int, inPlace bool, qu LOG_INFO("RESTORE_STATS", "Total running time: %s", PrettyTime(runningTime)) - chunkDownloader.Stop() + chunkOperator.Stop() return 0 } @@ -1096,60 +990,113 @@ func (encoder *fileEncoder) NextFile() (io.Reader, bool) { // UploadSnapshot uploads the specified snapshot to the storage. It turns Files, ChunkHashes, and ChunkLengths into // sequences of chunks, and uploads these chunks, and finally the snapshot file. -func (manager *BackupManager) UploadSnapshot(chunkMaker *ChunkMaker, uploader *ChunkUploader, top string, snapshot *Snapshot, - chunkCache map[string]bool) (totalSnapshotChunkSize int64, - numberOfNewSnapshotChunks int, totalUploadedSnapshotChunkSize int64, - totalUploadedSnapshotChunkBytes int64) { - - uploader.snapshotCache = manager.snapshotCache - - completionFunc := func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int) { - if skipped { - LOG_DEBUG("CHUNK_CACHE", "Skipped snapshot chunk %s in cache", chunk.GetID()) +func (manager *BackupManager) UploadSnapshot(chunkOperator *ChunkOperator, top string, snapshot *Snapshot, + entryList *EntryList, chunkCache map[string]bool, metadataChunkSize int) (totalMetadataChunkSize int64, + numberOfNewMetadataChunks int, totalUploadedMetadataChunkSize int64, + totalUploadedMetadataChunkBytes int64) { + + uploadCompletionFunc := func(chunk *Chunk, chunkIndex int, inCache bool, chunkSize int, uploadSize int) { + if inCache { + LOG_DEBUG("CHUNK_CACHE", "Skipped metadata chunk %s in cache", chunk.GetID()) } else { if uploadSize > 0 { - numberOfNewSnapshotChunks++ - totalUploadedSnapshotChunkSize += int64(chunkSize) - totalUploadedSnapshotChunkBytes += int64(uploadSize) + numberOfNewMetadataChunks++ + totalUploadedMetadataChunkSize += int64(chunkSize) + totalUploadedMetadataChunkBytes += int64(uploadSize) } else { - LOG_DEBUG("CHUNK_EXIST", "Skipped snapshot chunk %s in the storage", chunk.GetID()) + LOG_DEBUG("CHUNK_EXIST", "Skipped metadata chunk %s in the storage", chunk.GetID()) } } manager.config.PutChunk(chunk) } - uploader.completionFunc = completionFunc - uploader.Start() + chunkOperator.UploadCompletionFunc = uploadCompletionFunc - // uploadSequenceFunc uploads chunks read from 'reader'. - uploadSequenceFunc := func(reader io.Reader, - nextReader func(size int64, hash string) (io.Reader, bool)) (sequence []string) { + chunkIndex := -1 + var chunkSequence []string - chunkMaker.ForEachChunk(reader, - func(chunk *Chunk, final bool) { - totalSnapshotChunkSize += int64(chunk.GetLength()) - chunkID := chunk.GetID() - if _, found := chunkCache[chunkID]; found { - completionFunc(chunk, 0, true, chunk.GetLength(), 0) - } else { - uploader.StartChunk(chunk, len(sequence)) - } - sequence = append(sequence, chunk.GetHash()) - }, - nextReader) + uploadChunkFunc := func(chunk *Chunk) { + hash := chunk.GetHash() + chunkID := chunk.GetID() + chunkSize := chunk.GetLength() + + chunkIndex++ + totalMetadataChunkSize += int64(chunkSize) + + _, found := chunkCache[chunkID] + if found { + uploadCompletionFunc(chunk, chunkIndex, true, chunkSize, 0) + } else { + chunkCache[chunkID] = true + chunkOperator.Upload(chunk, chunkIndex, true) + } + + chunkSequence = append(chunkSequence, hash) + } + + buffer := new(bytes.Buffer) + encoder := msgpack.NewEncoder(buffer) + metadataChunkMaker := CreateMetaDataChunkMaker(manager.config, metadataChunkSize) + + var chunkHashes []string + var chunkLengths []int + lastChunk := -1 + + lastEndChunk := 0 + + uploadEntryInfoFunc := func(entry *Entry) error { + + delta := entry.StartChunk - len(chunkHashes) + 1 + if entry.StartChunk != lastChunk { + chunkHashes = append(chunkHashes, snapshot.ChunkHashes[entry.StartChunk]) + chunkLengths = append(chunkLengths, snapshot.ChunkLengths[entry.StartChunk]) + delta-- + } + + for i := entry.StartChunk + 1; i <= entry.EndChunk; i++ { + chunkHashes = append(chunkHashes, snapshot.ChunkHashes[i]) + chunkLengths = append(chunkLengths, snapshot.ChunkLengths[i]) + } + + lastChunk = entry.EndChunk + entry.StartChunk -= delta + entry.EndChunk -= delta + + if entry.IsFile() { + delta := entry.EndChunk - entry.StartChunk + entry.StartChunk -= lastEndChunk + lastEndChunk = entry.EndChunk + entry.EndChunk = delta + } - return sequence + buffer.Reset() + err := encoder.Encode(entry) + if err != nil { + LOG_ERROR("SNAPSHOT_UPLOAD", "Metadata for %s can't be encoded: %v", entry.Path, err) + return err + } + + metadataChunkMaker.AddData(buffer, uploadChunkFunc) + return nil } - sequences := []string{"chunks", "lengths"} - // The file list is assumed not to be too large when fixed-size chunking is used - if chunkMaker.minimumChunkSize == chunkMaker.maximumChunkSize { - sequences = append(sequences, "files") + err := entryList.ReadEntries(uploadEntryInfoFunc) + if err != nil { + LOG_ERROR("SNAPSHOT_UPLOAD", "The file list contains an error: %v", err) + return 0, 0, 0, 0 } + snapshot.ChunkHashes = chunkHashes + snapshot.ChunkLengths = chunkLengths + + metadataChunkMaker.AddData(nil, uploadChunkFunc) + snapshot.SetSequence("files", chunkSequence) + // Chunk and length sequences can be encoded and loaded into memory directly - for _, sequenceType := range sequences { + for _, sequenceType := range []string{"chunks", "lengths"} { + + chunkSequence = nil contents, err := snapshot.MarshalSequence(sequenceType) if err != nil { @@ -1158,33 +1105,20 @@ func (manager *BackupManager) UploadSnapshot(chunkMaker *ChunkMaker, uploader *C return int64(0), 0, int64(0), int64(0) } - sequence := uploadSequenceFunc(bytes.NewReader(contents), - func(fileSize int64, hash string) (io.Reader, bool) { - return nil, false - }) - snapshot.SetSequence(sequenceType, sequence) - } + metadataChunkMaker = CreateMetaDataChunkMaker(manager.config, metadataChunkSize) + metadataChunkMaker.AddData(bytes.NewBuffer(contents), uploadChunkFunc) + metadataChunkMaker.AddData(nil, uploadChunkFunc) - // File sequence may be too big to fit into the memory. So we encode files one by one and take advantages of - // the multi-reader capability of the chunk maker. - if chunkMaker.minimumChunkSize != chunkMaker.maximumChunkSize { - encoder := fileEncoder{ - top: top, - readAttributes: snapshot.discardAttributes, - files: snapshot.Files, - currentIndex: -1, - buffer: new(bytes.Buffer), - } + snapshot.SetSequence(sequenceType, chunkSequence) - encoder.buffer.Write([]byte("[")) - sequence := uploadSequenceFunc(encoder, - func(fileSize int64, hash string) (io.Reader, bool) { - return encoder.NextFile() - }) - snapshot.SetSequence("files", sequence) } - uploader.Stop() + chunkOperator.WaitForCompletion() + + if _, found := os.LookupEnv("DUPLICACY_FAIL_SNAPSHOT"); found { + LOG_ERROR("SNAPSHOT_FAIL", "Artificially fail the backup for testing purposes") + return 0, 0, 0, 0 + } description, err := snapshot.MarshalJSON() if err != nil { @@ -1196,7 +1130,7 @@ func (manager *BackupManager) UploadSnapshot(chunkMaker *ChunkMaker, uploader *C if !manager.config.dryRun { manager.SnapshotManager.UploadFile(path, path, description) } - return totalSnapshotChunkSize, numberOfNewSnapshotChunks, totalUploadedSnapshotChunkSize, totalUploadedSnapshotChunkBytes + return totalMetadataChunkSize, numberOfNewMetadataChunks, totalUploadedMetadataChunkSize, totalUploadedMetadataChunkBytes } // Restore downloads a file from the storage. If 'inPlace' is false, the download file is saved first to a temporary @@ -1386,21 +1320,20 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun } else { // If it is not inplace, we want to reuse any chunks in the existing file regardless their offets, so // we run the chunk maker to split the original file. - chunkMaker.ForEachChunk( - existingFile, - func(chunk *Chunk, final bool) { - hash := chunk.GetHash() - chunkSize := chunk.GetLength() - existingChunks = append(existingChunks, hash) - existingLengths = append(existingLengths, chunkSize) - offsetMap[hash] = offset - lengthMap[hash] = chunkSize - offset += int64(chunkSize) - }, - func(fileSize int64, hash string) (io.Reader, bool) { - fileHash = hash - return nil, false - }) + + offset := int64(0) + chunkFunc := func(chunk *Chunk) { + hash := chunk.GetHash() + chunkSize := chunk.GetLength() + existingChunks = append(existingChunks, hash) + existingLengths = append(existingLengths, chunkSize) + offsetMap[hash] = offset + lengthMap[hash] = chunkSize + offset += int64(chunkSize) + } + + chunkMaker.AddData(existingFile, chunkFunc) + chunkMaker.AddData(nil, chunkFunc) } // This is an additional check comparing fileHash to entry.Hash above, so this should no longer occur @@ -1507,7 +1440,7 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun hash := hex.EncodeToString(hasher.Sum(nil)) if hash != entry.Hash && hash != "" && entry.Hash != "" && !strings.HasPrefix(entry.Hash, "#") { LOG_WERROR(allowFailures, "DOWNLOAD_HASH", "File %s has a mismatched hash: %s instead of %s (in-place)", - fullPath, "", entry.Hash) + fullPath, hash, entry.Hash) return false, fmt.Errorf("file corrupt (hash mismatch)") } @@ -1523,7 +1456,7 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun hasher := manager.config.NewFileHasher() var localChunk *Chunk - defer chunkDownloader.config.PutChunk(localChunk) + defer chunkDownloader.operator.config.PutChunk(localChunk) var offset int64 for i := entry.StartChunk; i <= entry.EndChunk; i++ { @@ -1538,7 +1471,7 @@ func (manager *BackupManager) RestoreFile(chunkDownloader *ChunkDownloader, chun length := lengthMap[hash] existingFile.Seek(offset, 0) if localChunk == nil { - localChunk = chunkDownloader.config.GetChunk() + localChunk = chunkDownloader.operator.config.GetChunk() } localChunk.Reset(true) _, err = io.CopyN(localChunk, existingFile, int64(length)) @@ -1712,7 +1645,7 @@ func (manager *BackupManager) CopySnapshots(otherManager *BackupManager, snapsho } // These two maps store hashes of chunks in the source and destination storages, respectively. Note that - // the value of 'chunks' is used to indicated if the chunk is a snapshot chunk, while the value of 'otherChunks' + // the value of 'chunks' is used to indicate if the chunk is a snapshot chunk, while the value of 'otherChunks' // is not used. chunks := make(map[string]bool) otherChunks := make(map[string]bool) @@ -1781,47 +1714,46 @@ func (manager *BackupManager) CopySnapshots(otherManager *BackupManager, snapsho LOG_INFO("SNAPSHOT_COPY", "Chunks to copy: %d, to skip: %d, total: %d", len(chunksToCopy), len(chunks) - len(chunksToCopy), len(chunks)) - chunkDownloader := CreateChunkDownloader(manager.config, manager.storage, nil, false, downloadingThreads, false) + chunkDownloader := CreateChunkOperator(manager.config, manager.storage, nil, false, downloadingThreads, false) var uploadedBytes int64 startTime := time.Now() copiedChunks := 0 - chunkUploader := CreateChunkUploader(otherManager.config, otherManager.storage, nil, uploadingThreads, - func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int) { - action := "Skipped" - if !skipped { - copiedChunks++ - action = "Copied" - } - - atomic.AddInt64(&uploadedBytes, int64(chunkSize)) - - elapsedTime := time.Now().Sub(startTime).Seconds() - speed := int64(float64(atomic.LoadInt64(&uploadedBytes)) / elapsedTime) - remainingTime := int64(float64(len(chunksToCopy) - chunkIndex - 1) / float64(chunkIndex + 1) * elapsedTime) - percentage := float64(chunkIndex + 1) / float64(len(chunksToCopy)) * 100.0 - LOG_INFO("COPY_PROGRESS", "%s chunk %s (%d/%d) %sB/s %s %.1f%%", - action, chunk.GetID(), chunkIndex + 1, len(chunksToCopy), - PrettySize(speed), PrettyTime(remainingTime), percentage) - otherManager.config.PutChunk(chunk) - }) + chunkUploader := CreateChunkOperator(otherManager.config, otherManager.storage, nil, false, uploadingThreads, false) + chunkUploader.UploadCompletionFunc = func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int) { + action := "Skipped" + if !skipped { + copiedChunks++ + action = "Copied" + } - chunkUploader.Start() + atomic.AddInt64(&uploadedBytes, int64(chunkSize)) - for _, chunkHash := range chunksToCopy { - chunkDownloader.AddChunk(chunkHash) + elapsedTime := time.Now().Sub(startTime).Seconds() + speed := int64(float64(atomic.LoadInt64(&uploadedBytes)) / elapsedTime) + remainingTime := int64(float64(len(chunksToCopy) - chunkIndex - 1) / float64(chunkIndex + 1) * elapsedTime) + percentage := float64(chunkIndex + 1) / float64(len(chunksToCopy)) * 100.0 + LOG_INFO("COPY_PROGRESS", "%s chunk %s (%d/%d) %sB/s %s %.1f%%", + action, chunk.GetID(), chunkIndex + 1, len(chunksToCopy), + PrettySize(speed), PrettyTime(remainingTime), percentage) + otherManager.config.PutChunk(chunk) } + for i, chunkHash := range chunksToCopy { chunkID := manager.config.GetChunkIDFromHash(chunkHash) newChunkID := otherManager.config.GetChunkIDFromHash(chunkHash) + + chunkDownloader.DownloadAsync(chunkHash, i, chunks[chunkHash], func(chunk *Chunk, chunkIndex int) { + newChunk := otherManager.config.GetChunk() + newChunk.Reset(true) + newChunk.Write(chunk.GetBytes()) + newChunk.isMetadata = chunks[chunk.GetHash()] + chunkUploader.Upload(newChunk, chunkIndex, newChunk.isMetadata) + manager.config.PutChunk(chunk) + }) + LOG_DEBUG("SNAPSHOT_COPY", "Copying chunk %s to %s", chunkID, newChunkID) - chunk := chunkDownloader.WaitForChunk(i) - newChunk := otherManager.config.GetChunk() - newChunk.Reset(true) - newChunk.Write(chunk.GetBytes()) - newChunk.isSnapshot = chunks[chunkHash] - chunkUploader.StartChunk(newChunk, i) } chunkDownloader.Stop() diff --git a/src/duplicacy_backupmanager_test.go b/src/duplicacy_backupmanager_test.go index 988bf43f..cd3798c5 100644 --- a/src/duplicacy_backupmanager_test.go +++ b/src/duplicacy_backupmanager_test.go @@ -257,7 +257,7 @@ func TestBackupManager(t *testing.T) { backupManager.SetupSnapshotCache("default") SetDuplicacyPreferencePath(testDir + "/repository1/.duplicacy") - backupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "first", false, false, 0, false) + backupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "first", false, false, 0, false, 1024, 1024) time.Sleep(time.Duration(delay) * time.Second) SetDuplicacyPreferencePath(testDir + "/repository2/.duplicacy") failedFiles := backupManager.Restore(testDir+"/repository2", threads /*inPlace=*/, false /*quickMode=*/, false, threads /*overwrite=*/, true, @@ -282,7 +282,7 @@ func TestBackupManager(t *testing.T) { modifyFile(testDir+"/repository1/dir1/file3", 0.3) SetDuplicacyPreferencePath(testDir + "/repository1/.duplicacy") - backupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "second", false, false, 0, false) + backupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "second", false, false, 0, false, 1024, 1024) time.Sleep(time.Duration(delay) * time.Second) SetDuplicacyPreferencePath(testDir + "/repository2/.duplicacy") failedFiles = backupManager.Restore(testDir+"/repository2", 2 /*inPlace=*/, true /*quickMode=*/, true, threads /*overwrite=*/, true, @@ -303,7 +303,7 @@ func TestBackupManager(t *testing.T) { os.Mkdir(testDir+"/repository1/dir2/dir3", 0700) os.Mkdir(testDir+"/repository1/dir4", 0700) SetDuplicacyPreferencePath(testDir + "/repository1/.duplicacy") - backupManager.Backup(testDir+"/repository1" /*quickMode=*/, false, threads, "third", false, false, 0, false) + backupManager.Backup(testDir+"/repository1" /*quickMode=*/, false, threads, "third", false, false, 0, false, 1024, 1024) time.Sleep(time.Duration(delay) * time.Second) // Create some directories and files under repository2 that will be deleted during restore @@ -368,7 +368,7 @@ func TestBackupManager(t *testing.T) { } backupManager.SnapshotManager.CheckSnapshots( /*snapshotID*/ "host1" /*revisions*/, []int{2, 3} /*tag*/, "", /*showStatistics*/ false /*showTabular*/, false /*checkFiles*/, false /*checkChunks*/, false /*searchFossils*/, false /*resurrect*/, false, 1 /*allowFailures*/, false) - backupManager.Backup(testDir+"/repository1" /*quickMode=*/, false, threads, "fourth", false, false, 0, false) + backupManager.Backup(testDir+"/repository1" /*quickMode=*/, false, threads, "fourth", false, false, 0, false, 1024, 1024) backupManager.SnapshotManager.PruneSnapshots("host1", "host1" /*revisions*/, nil /*tags*/, nil /*retentions*/, nil, /*exhaustive*/ false /*exclusive=*/, true /*ignoredIDs*/, nil /*dryRun*/, false /*deleteOnly*/, false /*collectOnly*/, false, 1) numberOfSnapshots = backupManager.SnapshotManager.ListSnapshots( /*snapshotID*/ "host1" /*revisionsToList*/, nil /*tag*/, "" /*showFiles*/, false /*showChunks*/, false) @@ -533,7 +533,7 @@ func TestPersistRestore(t *testing.T) { unencBackupManager.SetupSnapshotCache("default") SetDuplicacyPreferencePath(testDir + "/repository1/.duplicacy") - unencBackupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "first", false, false, 0, false) + unencBackupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "first", false, false, 0, false, 1024, 1024) time.Sleep(time.Duration(delay) * time.Second) @@ -543,7 +543,7 @@ func TestPersistRestore(t *testing.T) { encBackupManager.SetupSnapshotCache("default") SetDuplicacyPreferencePath(testDir + "/repository1/.duplicacy") - encBackupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "first", false, false, 0, false) + encBackupManager.Backup(testDir+"/repository1" /*quickMode=*/, true, threads, "first", false, false, 0, false, 1024, 1024) time.Sleep(time.Duration(delay) * time.Second) diff --git a/src/duplicacy_benchmark.go b/src/duplicacy_benchmark.go index 0851c825..ffe46f53 100644 --- a/src/duplicacy_benchmark.go +++ b/src/duplicacy_benchmark.go @@ -29,29 +29,29 @@ func benchmarkSplit(reader *bytes.Reader, fileSize int64, chunkSize int, compres config.HashKey = DEFAULT_KEY config.IDKey = DEFAULT_KEY - maker := CreateChunkMaker(config, false) + maker := CreateFileChunkMaker(config, false) startTime := float64(time.Now().UnixNano()) / 1e9 numberOfChunks := 0 reader.Seek(0, os.SEEK_SET) - maker.ForEachChunk(reader, - func(chunk *Chunk, final bool) { - if compression { - key := "" - if encryption { - key = "0123456789abcdef0123456789abcdef" - } - err := chunk.Encrypt([]byte(key), "", false) - if err != nil { - LOG_ERROR("BENCHMARK_ENCRYPT", "Failed to encrypt the chunk: %v", err) - } + + chunkFunc := func(chunk *Chunk) { + if compression { + key := "" + if encryption { + key = "0123456789abcdef0123456789abcdef" } - config.PutChunk(chunk) - numberOfChunks++ - }, - func(size int64, hash string) (io.Reader, bool) { - return nil, false - }) + err := chunk.Encrypt([]byte(key), "", false) + if err != nil { + LOG_ERROR("BENCHMARK_ENCRYPT", "Failed to encrypt the chunk: %v", err) + } + } + config.PutChunk(chunk) + numberOfChunks++ + } + + maker.AddData(reader, chunkFunc) + maker.AddData(nil, chunkFunc) runningTime := float64(time.Now().UnixNano())/1e9 - startTime speed := int64(float64(fileSize) / runningTime) diff --git a/src/duplicacy_chunk.go b/src/duplicacy_chunk.go index 860036f0..8ed3a740 100644 --- a/src/duplicacy_chunk.go +++ b/src/duplicacy_chunk.go @@ -65,8 +65,8 @@ type Chunk struct { config *Config // Every chunk is associated with a Config object. Which hashing algorithm to use is determined // by the config - isSnapshot bool // Indicates if the chunk is a snapshot chunk (instead of a file chunk). This is only used by RSA - // encryption, where a snapshot chunk is not encrypted by RSA + isMetadata bool // Indicates if the chunk is a metadata chunk (instead of a file chunk). This is primarily used by RSA + // encryption, where a metadata chunk is not encrypted by RSA isBroken bool // Indicates the chunk did not download correctly. This is only used for -persist (allowFailures) mode } @@ -127,7 +127,7 @@ func (chunk *Chunk) Reset(hashNeeded bool) { chunk.hash = nil chunk.id = "" chunk.size = 0 - chunk.isSnapshot = false + chunk.isMetadata = false chunk.isBroken = false } @@ -186,7 +186,7 @@ func (chunk *Chunk) VerifyID() { // Encrypt encrypts the plain data stored in the chunk buffer. If derivationKey is not nil, the actual // encryption key will be HMAC-SHA256(encryptionKey, derivationKey). -func (chunk *Chunk) Encrypt(encryptionKey []byte, derivationKey string, isSnapshot bool) (err error) { +func (chunk *Chunk) Encrypt(encryptionKey []byte, derivationKey string, isMetadata bool) (err error) { var aesBlock cipher.Block var gcm cipher.AEAD @@ -203,8 +203,8 @@ func (chunk *Chunk) Encrypt(encryptionKey []byte, derivationKey string, isSnapsh key := encryptionKey usingRSA := false - // Enable RSA encryption only when the chunk is not a snapshot chunk - if chunk.config.rsaPublicKey != nil && !isSnapshot && !chunk.isSnapshot { + // Enable RSA encryption only when the chunk is not a metadata chunk + if chunk.config.rsaPublicKey != nil && !isMetadata && !chunk.isMetadata { randomKey := make([]byte, 32) _, err := rand.Read(randomKey) if err != nil { diff --git a/src/duplicacy_chunkdownloader.go b/src/duplicacy_chunkdownloader.go index f48cd0f5..62865b5e 100644 --- a/src/duplicacy_chunkdownloader.go +++ b/src/duplicacy_chunkdownloader.go @@ -5,7 +5,6 @@ package duplicacy import ( - "io" "sync/atomic" "time" ) @@ -20,78 +19,47 @@ type ChunkDownloadTask struct { isDownloading bool // 'true' means the chunk has been downloaded or is being downloaded } -// ChunkDownloadCompletion represents the nofication when a chunk has been downloaded. type ChunkDownloadCompletion struct { - chunkIndex int // The index of this chunk in the chunk list - chunk *Chunk // The chunk that has been downloaded + chunk *Chunk + chunkIndex int } -// ChunkDownloader is capable of performing multi-threaded downloading. Chunks to be downloaded are first organized +// ChunkDownloader is a wrapper of ChunkOperator and is only used by the restore procedure.capable of performing multi-threaded downloading. Chunks to be downloaded are first organized // as a list of ChunkDownloadTasks, with only the chunkHash field initialized. When a chunk is needed, the // corresponding ChunkDownloadTask is sent to the dowloading goroutine. Once a chunk is downloaded, it will be // inserted in the completed task list. type ChunkDownloader struct { + + operator *ChunkOperator + totalChunkSize int64 // Total chunk size downloadedChunkSize int64 // Downloaded chunk size - config *Config // Associated config - storage Storage // Download from this storage - snapshotCache *FileStorage // Used as cache if not nil; usually for downloading snapshot chunks - showStatistics bool // Show a stats log for each chunk if true - threads int // Number of threads - allowFailures bool // Whether to failfast on download error, or continue - taskList []ChunkDownloadTask // The list of chunks to be downloaded completedTasks map[int]bool // Store downloaded chunks lastChunkIndex int // a monotonically increasing number indicating the last chunk to be downloaded - taskQueue chan ChunkDownloadTask // Downloading goroutines are waiting on this channel for input - stopChannel chan bool // Used to stop the dowloading goroutines completionChannel chan ChunkDownloadCompletion // A downloading goroutine sends back the chunk via this channel after downloading startTime int64 // The time it starts downloading numberOfDownloadedChunks int // The number of chunks that have been downloaded numberOfDownloadingChunks int // The number of chunks still being downloaded numberOfActiveChunks int // The number of chunks that is being downloaded or has been downloaded but not reclaimed - - NumberOfFailedChunks int // The number of chunks that can't be downloaded } -func CreateChunkDownloader(config *Config, storage Storage, snapshotCache *FileStorage, showStatistics bool, threads int, allowFailures bool) *ChunkDownloader { +func CreateChunkDownloader(operator *ChunkOperator) *ChunkDownloader { downloader := &ChunkDownloader{ - config: config, - storage: storage, - snapshotCache: snapshotCache, - showStatistics: showStatistics, - threads: threads, - allowFailures: allowFailures, + operator: operator, taskList: nil, completedTasks: make(map[int]bool), lastChunkIndex: 0, - taskQueue: make(chan ChunkDownloadTask, threads), - stopChannel: make(chan bool), completionChannel: make(chan ChunkDownloadCompletion), startTime: time.Now().Unix(), } - // Start the downloading goroutines - for i := 0; i < downloader.threads; i++ { - go func(threadIndex int) { - defer CatchLogException() - for { - select { - case task := <-downloader.taskQueue: - downloader.Download(threadIndex, task) - case <-downloader.stopChannel: - return - } - } - }(i) - } - return downloader } @@ -129,26 +97,6 @@ func (downloader *ChunkDownloader) AddFiles(snapshot *Snapshot, files []*Entry) } } -// AddChunk adds a single chunk the download list. -func (downloader *ChunkDownloader) AddChunk(chunkHash string) int { - - task := ChunkDownloadTask{ - chunkIndex: len(downloader.taskList), - chunkHash: chunkHash, - chunkLength: 0, - needed: true, - isDownloading: false, - } - downloader.taskList = append(downloader.taskList, task) - if downloader.numberOfActiveChunks < downloader.threads { - downloader.taskQueue <- task - downloader.numberOfDownloadingChunks++ - downloader.numberOfActiveChunks++ - downloader.taskList[len(downloader.taskList)-1].isDownloading = true - } - return len(downloader.taskList) - 1 -} - // Prefetch adds up to 'threads' chunks needed by a file to the download list func (downloader *ChunkDownloader) Prefetch(file *Entry) { @@ -159,20 +107,22 @@ func (downloader *ChunkDownloader) Prefetch(file *Entry) { task := &downloader.taskList[i] if task.needed { if !task.isDownloading { - if downloader.numberOfActiveChunks >= downloader.threads { + if downloader.numberOfActiveChunks >= downloader.operator.threads { return } LOG_DEBUG("DOWNLOAD_PREFETCH", "Prefetching %s chunk %s", file.Path, - downloader.config.GetChunkIDFromHash(task.chunkHash)) - downloader.taskQueue <- *task + downloader.operator.config.GetChunkIDFromHash(task.chunkHash)) + downloader.operator.DownloadAsync(task.chunkHash, i, false, func (chunk *Chunk, chunkIndex int) { + downloader.completionChannel <- ChunkDownloadCompletion { chunk: chunk, chunkIndex: chunkIndex } + }) task.isDownloading = true downloader.numberOfDownloadingChunks++ downloader.numberOfActiveChunks++ } } else { LOG_DEBUG("DOWNLOAD_PREFETCH", "%s chunk %s is not needed", file.Path, - downloader.config.GetChunkIDFromHash(task.chunkHash)) + downloader.operator.config.GetChunkIDFromHash(task.chunkHash)) } } } @@ -186,7 +136,7 @@ func (downloader *ChunkDownloader) Reclaim(chunkIndex int) { for i := range downloader.completedTasks { if i < chunkIndex && downloader.taskList[i].chunk != nil { - downloader.config.PutChunk(downloader.taskList[i].chunk) + downloader.operator.config.PutChunk(downloader.taskList[i].chunk) downloader.taskList[i].chunk = nil delete(downloader.completedTasks, i) downloader.numberOfActiveChunks-- @@ -222,8 +172,10 @@ func (downloader *ChunkDownloader) WaitForChunk(chunkIndex int) (chunk *Chunk) { // If we haven't started download the specified chunk, download it now if !downloader.taskList[chunkIndex].isDownloading { LOG_DEBUG("DOWNLOAD_FETCH", "Fetching chunk %s", - downloader.config.GetChunkIDFromHash(downloader.taskList[chunkIndex].chunkHash)) - downloader.taskQueue <- downloader.taskList[chunkIndex] + downloader.operator.config.GetChunkIDFromHash(downloader.taskList[chunkIndex].chunkHash)) + downloader.operator.DownloadAsync(downloader.taskList[chunkIndex].chunkHash, chunkIndex, false, func (chunk *Chunk, chunkIndex int) { + downloader.completionChannel <- ChunkDownloadCompletion { chunk: chunk, chunkIndex: chunkIndex } + }) downloader.taskList[chunkIndex].isDownloading = true downloader.numberOfDownloadingChunks++ downloader.numberOfActiveChunks++ @@ -231,7 +183,7 @@ func (downloader *ChunkDownloader) WaitForChunk(chunkIndex int) (chunk *Chunk) { // We also need to look ahead and prefetch other chunks as many as permitted by the number of threads for i := chunkIndex + 1; i < len(downloader.taskList); i++ { - if downloader.numberOfActiveChunks >= downloader.threads { + if downloader.numberOfActiveChunks >= downloader.operator.threads { break } task := &downloader.taskList[i] @@ -240,8 +192,10 @@ func (downloader *ChunkDownloader) WaitForChunk(chunkIndex int) (chunk *Chunk) { } if !task.isDownloading { - LOG_DEBUG("DOWNLOAD_PREFETCH", "Prefetching chunk %s", downloader.config.GetChunkIDFromHash(task.chunkHash)) - downloader.taskQueue <- *task + LOG_DEBUG("DOWNLOAD_PREFETCH", "Prefetching chunk %s", downloader.operator.config.GetChunkIDFromHash(task.chunkHash)) + downloader.operator.DownloadAsync(task.chunkHash, task.chunkIndex, false, func (chunk *Chunk, chunkIndex int) { + downloader.completionChannel <- ChunkDownloadCompletion { chunk: chunk, chunkIndex: chunkIndex } + }) task.isDownloading = true downloader.numberOfDownloadingChunks++ downloader.numberOfActiveChunks++ @@ -255,9 +209,6 @@ func (downloader *ChunkDownloader) WaitForChunk(chunkIndex int) (chunk *Chunk) { downloader.taskList[completion.chunkIndex].chunk = completion.chunk downloader.numberOfDownloadedChunks++ downloader.numberOfDownloadingChunks-- - if completion.chunk.isBroken { - downloader.NumberOfFailedChunks++ - } } return downloader.taskList[chunkIndex].chunk } @@ -281,13 +232,10 @@ func (downloader *ChunkDownloader) WaitForCompletion() { // Wait for a completion event first if downloader.numberOfActiveChunks > 0 { completion := <-downloader.completionChannel - downloader.config.PutChunk(completion.chunk) + downloader.operator.config.PutChunk(completion.chunk) downloader.numberOfActiveChunks-- downloader.numberOfDownloadedChunks++ downloader.numberOfDownloadingChunks-- - if completion.chunk.isBroken { - downloader.NumberOfFailedChunks++ - } } // Pass the tasks one by one to the download queue @@ -297,7 +245,9 @@ func (downloader *ChunkDownloader) WaitForCompletion() { downloader.lastChunkIndex++ continue } - downloader.taskQueue <- *task + downloader.operator.DownloadAsync(task.chunkHash, task.chunkIndex, false, func (chunk *Chunk, chunkIndex int) { + downloader.completionChannel <- ChunkDownloadCompletion { chunk: chunk, chunkIndex: chunkIndex } + }) task.isDownloading = true downloader.numberOfDownloadingChunks++ downloader.numberOfActiveChunks++ @@ -306,213 +256,3 @@ func (downloader *ChunkDownloader) WaitForCompletion() { } } -// Stop terminates all downloading goroutines -func (downloader *ChunkDownloader) Stop() { - for downloader.numberOfDownloadingChunks > 0 { - completion := <-downloader.completionChannel - downloader.completedTasks[completion.chunkIndex] = true - downloader.taskList[completion.chunkIndex].chunk = completion.chunk - downloader.numberOfDownloadedChunks++ - downloader.numberOfDownloadingChunks-- - if completion.chunk.isBroken { - downloader.NumberOfFailedChunks++ - } -} - - for i := range downloader.completedTasks { - downloader.config.PutChunk(downloader.taskList[i].chunk) - downloader.taskList[i].chunk = nil - downloader.numberOfActiveChunks-- - } - - for i := 0; i < downloader.threads; i++ { - downloader.stopChannel <- true - } -} - -// Download downloads a chunk from the storage. -func (downloader *ChunkDownloader) Download(threadIndex int, task ChunkDownloadTask) bool { - - cachedPath := "" - chunk := downloader.config.GetChunk() - chunkID := downloader.config.GetChunkIDFromHash(task.chunkHash) - - if downloader.snapshotCache != nil && downloader.storage.IsCacheNeeded() { - - var exist bool - var err error - - // Reset the chunk with a hasher -- we're reading from the cache where chunk are not encrypted or compressed - chunk.Reset(true) - - cachedPath, exist, _, err = downloader.snapshotCache.FindChunk(threadIndex, chunkID, false) - if err != nil { - LOG_WARN("DOWNLOAD_CACHE", "Failed to find the cache path for the chunk %s: %v", chunkID, err) - } else if exist { - err = downloader.snapshotCache.DownloadFile(0, cachedPath, chunk) - if err != nil { - LOG_WARN("DOWNLOAD_CACHE", "Failed to load the chunk %s from the snapshot cache: %v", chunkID, err) - } else { - actualChunkID := chunk.GetID() - if actualChunkID != chunkID { - LOG_WARN("DOWNLOAD_CACHE_CORRUPTED", - "The chunk %s load from the snapshot cache has a hash id of %s", chunkID, actualChunkID) - } else { - LOG_DEBUG("CHUNK_CACHE", "Chunk %s has been loaded from the snapshot cache", chunkID) - - downloader.completionChannel <- ChunkDownloadCompletion{chunk: chunk, chunkIndex: task.chunkIndex} - return false - } - } - } - } - - // Reset the chunk without a hasher -- the downloaded content will be encrypted and/or compressed and the hasher - // will be set up before the encryption - chunk.Reset(false) - - // If failures are allowed, complete the task properly - completeFailedChunk := func(chunk *Chunk) { - if downloader.allowFailures { - chunk.isBroken = true - downloader.completionChannel <- ChunkDownloadCompletion{chunk: chunk, chunkIndex: task.chunkIndex} - } - } - - const MaxDownloadAttempts = 3 - for downloadAttempt := 0; ; downloadAttempt++ { - - // Find the chunk by ID first. - chunkPath, exist, _, err := downloader.storage.FindChunk(threadIndex, chunkID, false) - if err != nil { - completeFailedChunk(chunk) - LOG_WERROR(downloader.allowFailures, "DOWNLOAD_CHUNK", "Failed to find the chunk %s: %v", chunkID, err) - return false - } - - if !exist { - // No chunk is found. Have to find it in the fossil pool again. - fossilPath, exist, _, err := downloader.storage.FindChunk(threadIndex, chunkID, true) - if err != nil { - completeFailedChunk(chunk) - LOG_WERROR(downloader.allowFailures, "DOWNLOAD_CHUNK", "Failed to find the chunk %s: %v", chunkID, err) - return false - } - - if !exist { - - retry := false - - // Retry for Hubic or WebDAV as it may return 404 even when the chunk exists - if _, ok := downloader.storage.(*HubicStorage); ok { - retry = true - } - - if _, ok := downloader.storage.(*WebDAVStorage); ok { - retry = true - } - - if retry && downloadAttempt < MaxDownloadAttempts { - LOG_WARN("DOWNLOAD_RETRY", "Failed to find the chunk %s; retrying", chunkID) - continue - } - - completeFailedChunk(chunk) - // A chunk is not found. This is a serious error and hopefully it will never happen. - if err != nil { - LOG_WERROR(downloader.allowFailures, "DOWNLOAD_CHUNK", "Chunk %s can't be found: %v", chunkID, err) - } else { - LOG_WERROR(downloader.allowFailures, "DOWNLOAD_CHUNK", "Chunk %s can't be found", chunkID) - } - return false - } - - // We can't download the fossil directly. We have to turn it back into a regular chunk and try - // downloading again. - err = downloader.storage.MoveFile(threadIndex, fossilPath, chunkPath) - if err != nil { - completeFailedChunk(chunk) - LOG_WERROR(downloader.allowFailures, "DOWNLOAD_CHUNK", "Failed to resurrect chunk %s: %v", chunkID, err) - return false - } - - LOG_WARN("DOWNLOAD_RESURRECT", "Fossil %s has been resurrected", chunkID) - continue - } - - err = downloader.storage.DownloadFile(threadIndex, chunkPath, chunk) - if err != nil { - _, isHubic := downloader.storage.(*HubicStorage) - // Retry on EOF or if it is a Hubic backend as it may return 404 even when the chunk exists - if (err == io.ErrUnexpectedEOF || isHubic) && downloadAttempt < MaxDownloadAttempts { - LOG_WARN("DOWNLOAD_RETRY", "Failed to download the chunk %s: %v; retrying", chunkID, err) - chunk.Reset(false) - continue - } else { - completeFailedChunk(chunk) - LOG_WERROR(downloader.allowFailures, "DOWNLOAD_CHUNK", "Failed to download the chunk %s: %v", chunkID, err) - return false - } - } - - err = chunk.Decrypt(downloader.config.ChunkKey, task.chunkHash) - if err != nil { - if downloadAttempt < MaxDownloadAttempts { - LOG_WARN("DOWNLOAD_RETRY", "Failed to decrypt the chunk %s: %v; retrying", chunkID, err) - chunk.Reset(false) - continue - } else { - completeFailedChunk(chunk) - LOG_WERROR(downloader.allowFailures, "DOWNLOAD_DECRYPT", "Failed to decrypt the chunk %s: %v", chunkID, err) - return false - } - } - - actualChunkID := chunk.GetID() - if actualChunkID != chunkID { - if downloadAttempt < MaxDownloadAttempts { - LOG_WARN("DOWNLOAD_RETRY", "The chunk %s has a hash id of %s; retrying", chunkID, actualChunkID) - chunk.Reset(false) - continue - } else { - completeFailedChunk(chunk) - LOG_WERROR(downloader.allowFailures, "DOWNLOAD_CORRUPTED", "The chunk %s has a hash id of %s", chunkID, actualChunkID) - return false - } - } - - break - } - - if len(cachedPath) > 0 { - // Save a copy to the local snapshot cache - err := downloader.snapshotCache.UploadFile(threadIndex, cachedPath, chunk.GetBytes()) - if err != nil { - LOG_WARN("DOWNLOAD_CACHE", "Failed to add the chunk %s to the snapshot cache: %v", chunkID, err) - } - } - - downloadedChunkSize := atomic.AddInt64(&downloader.downloadedChunkSize, int64(chunk.GetLength())) - - if (downloader.showStatistics || IsTracing()) && downloader.totalChunkSize > 0 { - - now := time.Now().Unix() - if now <= downloader.startTime { - now = downloader.startTime + 1 - } - speed := downloadedChunkSize / (now - downloader.startTime) - remainingTime := int64(0) - if speed > 0 { - remainingTime = (downloader.totalChunkSize-downloadedChunkSize)/speed + 1 - } - percentage := float32(downloadedChunkSize * 1000 / downloader.totalChunkSize) - LOG_INFO("DOWNLOAD_PROGRESS", "Downloaded chunk %d size %d, %sB/s %s %.1f%%", - task.chunkIndex+1, chunk.GetLength(), - PrettySize(speed), PrettyTime(remainingTime), percentage/10) - } else { - LOG_DEBUG("CHUNK_DOWNLOAD", "Chunk %s has been downloaded", chunkID) - } - - downloader.completionChannel <- ChunkDownloadCompletion{chunk: chunk, chunkIndex: task.chunkIndex} - return true -} diff --git a/src/duplicacy_chunkmaker.go b/src/duplicacy_chunkmaker.go index 7c3e32a4..5e6f93b8 100644 --- a/src/duplicacy_chunkmaker.go +++ b/src/duplicacy_chunkmaker.go @@ -25,15 +25,20 @@ type ChunkMaker struct { bufferSize int bufferStart int + minimumReached bool + hashSum uint64 + chunk *Chunk + config *Config hashOnly bool hashOnlyChunk *Chunk + } // CreateChunkMaker creates a chunk maker. 'randomSeed' is used to generate the character-to-integer table needed by // buzhash. -func CreateChunkMaker(config *Config, hashOnly bool) *ChunkMaker { +func CreateFileChunkMaker(config *Config, hashOnly bool) *ChunkMaker { size := 1 for size*2 <= config.AverageChunkSize { size *= 2 @@ -67,6 +72,33 @@ func CreateChunkMaker(config *Config, hashOnly bool) *ChunkMaker { } maker.buffer = make([]byte, 2*config.MinimumChunkSize) + maker.bufferStart = 0 + maker.bufferSize = 0 + + maker.startNewChunk() + + return maker +} + +// CreateMetaDataChunkMaker creates a chunk maker that always uses the variable-sized chunking algorithm +func CreateMetaDataChunkMaker(config *Config, chunkSize int) *ChunkMaker { + + size := 1 + for size*2 <= chunkSize { + size *= 2 + } + + if size != chunkSize { + LOG_FATAL("CHUNK_SIZE", "Invalid metadata chunk size: %d is not a power of 2", chunkSize) + return nil + } + + maker := CreateFileChunkMaker(config, false) + maker.hashMask = uint64(chunkSize - 1) + maker.maximumChunkSize = chunkSize * 4 + maker.minimumChunkSize = chunkSize / 4 + maker.bufferCapacity = 2 * maker.minimumChunkSize + maker.buffer = make([]byte, maker.bufferCapacity) return maker } @@ -90,62 +122,50 @@ func (maker *ChunkMaker) buzhashUpdate(sum uint64, out byte, in byte, length int return rotateLeftByOne(sum) ^ rotateLeft(maker.randomTable[out], uint(length)) ^ maker.randomTable[in] } -// ForEachChunk reads data from 'reader'. If EOF is encountered, it will call 'nextReader' to ask for next file. If -// 'nextReader' returns false, it will process remaining data in the buffer and then quit. When a chunk is identified, -// it will call 'endOfChunk' to return the chunk size and a boolean flag indicating if it is the last chunk. -func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *Chunk, final bool), - nextReader func(size int64, hash string) (io.Reader, bool)) { - - maker.bufferStart = 0 - maker.bufferSize = 0 +func (maker *ChunkMaker) startNewChunk() (chunk *Chunk) { + maker.hashSum = 0 + maker.minimumReached = false + if maker.hashOnly { + maker.chunk = maker.hashOnlyChunk + maker.chunk.Reset(true) + } else { + maker.chunk = maker.config.GetChunk() + maker.chunk.Reset(true) + } + return +} - var minimumReached bool - var hashSum uint64 - var chunk *Chunk +func (maker *ChunkMaker) AddData(reader io.Reader, sendChunk func(*Chunk)) (int64, string) { + isEOF := false fileSize := int64(0) fileHasher := maker.config.NewFileHasher() - // Start a new chunk. - startNewChunk := func() { - hashSum = 0 - minimumReached = false - if maker.hashOnly { - chunk = maker.hashOnlyChunk - chunk.Reset(true) - } else { - chunk = maker.config.GetChunk() - chunk.Reset(true) - } - } - // Move data from the buffer to the chunk. fill := func(count int) { + if maker.bufferStart+count < maker.bufferCapacity { - chunk.Write(maker.buffer[maker.bufferStart : maker.bufferStart+count]) + maker.chunk.Write(maker.buffer[maker.bufferStart : maker.bufferStart+count]) maker.bufferStart += count maker.bufferSize -= count } else { - chunk.Write(maker.buffer[maker.bufferStart:]) - chunk.Write(maker.buffer[:count-(maker.bufferCapacity-maker.bufferStart)]) + maker.chunk.Write(maker.buffer[maker.bufferStart:]) + maker.chunk.Write(maker.buffer[:count-(maker.bufferCapacity-maker.bufferStart)]) maker.bufferStart = count - (maker.bufferCapacity - maker.bufferStart) maker.bufferSize -= count } } - startNewChunk() - var err error - isEOF := false - if maker.minimumChunkSize == maker.maximumChunkSize { - if maker.bufferCapacity < maker.minimumChunkSize { - maker.buffer = make([]byte, maker.minimumChunkSize) + if reader == nil { + return 0, "" } for { + maker.startNewChunk() maker.bufferStart = 0 for maker.bufferStart < maker.minimumChunkSize && !isEOF { count, err := reader.Read(maker.buffer[maker.bufferStart:maker.minimumChunkSize]) @@ -153,7 +173,7 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C if err != nil { if err != io.EOF { LOG_ERROR("CHUNK_MAKER", "Failed to read %d bytes: %s", count, err.Error()) - return + return 0, "" } else { isEOF = true } @@ -161,26 +181,15 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C maker.bufferStart += count } - fileHasher.Write(maker.buffer[:maker.bufferStart]) - fileSize += int64(maker.bufferStart) - chunk.Write(maker.buffer[:maker.bufferStart]) + if maker.bufferStart > 0 { + fileHasher.Write(maker.buffer[:maker.bufferStart]) + fileSize += int64(maker.bufferStart) + maker.chunk.Write(maker.buffer[:maker.bufferStart]) + sendChunk(maker.chunk) + } if isEOF { - var ok bool - reader, ok = nextReader(fileSize, hex.EncodeToString(fileHasher.Sum(nil))) - if !ok { - endOfChunk(chunk, true) - return - } else { - endOfChunk(chunk, false) - startNewChunk() - fileSize = 0 - fileHasher = maker.config.NewFileHasher() - isEOF = false - } - } else { - endOfChunk(chunk, false) - startNewChunk() + return fileSize, hex.EncodeToString(fileHasher.Sum(nil)) } } @@ -189,7 +198,7 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C for { // If the buffer still has some space left and EOF is not seen, read more data. - for maker.bufferSize < maker.bufferCapacity && !isEOF { + for maker.bufferSize < maker.bufferCapacity && !isEOF && reader != nil { start := maker.bufferStart + maker.bufferSize count := maker.bufferCapacity - start if start >= maker.bufferCapacity { @@ -201,7 +210,7 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C if err != nil && err != io.EOF { LOG_ERROR("CHUNK_MAKER", "Failed to read %d bytes: %s", count, err.Error()) - return + return 0, "" } maker.bufferSize += count @@ -210,54 +219,55 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C // if EOF is seen, try to switch to next file and continue if err == io.EOF { - var ok bool - reader, ok = nextReader(fileSize, hex.EncodeToString(fileHasher.Sum(nil))) - if !ok { - isEOF = true - } else { - fileSize = 0 - fileHasher = maker.config.NewFileHasher() - isEOF = false - } + isEOF = true + break } } // No eough data to meet the minimum chunk size requirement, so just return as a chunk. if maker.bufferSize < maker.minimumChunkSize { - fill(maker.bufferSize) - endOfChunk(chunk, true) - return + if reader == nil { + fill(maker.bufferSize) + if maker.chunk.GetLength() > 0 { + sendChunk(maker.chunk) + } + return 0, "" + } else if isEOF { + return fileSize, hex.EncodeToString(fileHasher.Sum(nil)) + } else { + continue + } } // Minimum chunk size has been reached. Calculate the buzhash for the minimum size chunk. - if !minimumReached { + if !maker.minimumReached { bytes := maker.minimumChunkSize if maker.bufferStart+bytes < maker.bufferCapacity { - hashSum = maker.buzhashSum(0, maker.buffer[maker.bufferStart:maker.bufferStart+bytes]) + maker.hashSum = maker.buzhashSum(0, maker.buffer[maker.bufferStart:maker.bufferStart+bytes]) } else { - hashSum = maker.buzhashSum(0, maker.buffer[maker.bufferStart:]) - hashSum = maker.buzhashSum(hashSum, + maker.hashSum = maker.buzhashSum(0, maker.buffer[maker.bufferStart:]) + maker.hashSum = maker.buzhashSum(maker.hashSum, maker.buffer[:bytes-(maker.bufferCapacity-maker.bufferStart)]) } - if (hashSum & maker.hashMask) == 0 { + if (maker.hashSum & maker.hashMask) == 0 { // This is a minimum size chunk fill(bytes) - endOfChunk(chunk, false) - startNewChunk() + sendChunk(maker.chunk) + maker.startNewChunk() continue } - minimumReached = true + maker.minimumReached = true } // Now check the buzhash of the data in the buffer, shifting one byte at a time. bytes := maker.bufferSize - maker.minimumChunkSize - isEOC := false - maxSize := maker.maximumChunkSize - chunk.GetLength() - for i := 0; i < maker.bufferSize-maker.minimumChunkSize; i++ { + isEOC := false // chunk boundary found + maxSize := maker.maximumChunkSize - maker.chunk.GetLength() + for i := 0; i < bytes; i++ { out := maker.bufferStart + i if out >= maker.bufferCapacity { out -= maker.bufferCapacity @@ -267,8 +277,8 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C in -= maker.bufferCapacity } - hashSum = maker.buzhashUpdate(hashSum, maker.buffer[out], maker.buffer[in], maker.minimumChunkSize) - if (hashSum&maker.hashMask) == 0 || i == maxSize-maker.minimumChunkSize-1 { + maker.hashSum = maker.buzhashUpdate(maker.hashSum, maker.buffer[out], maker.buffer[in], maker.minimumChunkSize) + if (maker.hashSum&maker.hashMask) == 0 || i == maxSize-maker.minimumChunkSize-1 { // A chunk is completed. bytes = i + 1 + maker.minimumChunkSize isEOC = true @@ -277,21 +287,20 @@ func (maker *ChunkMaker) ForEachChunk(reader io.Reader, endOfChunk func(chunk *C } fill(bytes) - if isEOC { - if isEOF && maker.bufferSize == 0 { - endOfChunk(chunk, true) - return + sendChunk(maker.chunk) + maker.startNewChunk() + } else { + if reader == nil { + fill(maker.minimumChunkSize) + sendChunk(maker.chunk) + maker.startNewChunk() + return 0, "" } - endOfChunk(chunk, false) - startNewChunk() - continue } if isEOF { - fill(maker.bufferSize) - endOfChunk(chunk, true) - return + return fileSize, hex.EncodeToString(fileHasher.Sum(nil)) } } } diff --git a/src/duplicacy_chunkmaker_test.go b/src/duplicacy_chunkmaker_test.go index 0ff5f525..5e35c834 100644 --- a/src/duplicacy_chunkmaker_test.go +++ b/src/duplicacy_chunkmaker_test.go @@ -7,14 +7,12 @@ package duplicacy import ( "bytes" crypto_rand "crypto/rand" - "io" "math/rand" "sort" "testing" ) -func splitIntoChunks(content []byte, n, averageChunkSize, maxChunkSize, minChunkSize, - bufferCapacity int) ([]string, int) { +func splitIntoChunks(content []byte, n, averageChunkSize, maxChunkSize, minChunkSize int) ([]string, int) { config := CreateConfig() @@ -27,14 +25,12 @@ func splitIntoChunks(content []byte, n, averageChunkSize, maxChunkSize, minChunk config.HashKey = DEFAULT_KEY config.IDKey = DEFAULT_KEY - maker := CreateChunkMaker(config, false) + maker := CreateFileChunkMaker(config, false) var chunks []string totalChunkSize := 0 totalFileSize := int64(0) - //LOG_INFO("CHUNK_SPLIT", "bufferCapacity: %d", bufferCapacity) - buffers := make([]*bytes.Buffer, n) sizes := make([]int, n) sizes[0] = 0 @@ -42,7 +38,7 @@ func splitIntoChunks(content []byte, n, averageChunkSize, maxChunkSize, minChunk same := true for same { same = false - sizes[i] = rand.Int() % n + sizes[i] = rand.Int() % len(content) for j := 0; j < i; j++ { if sizes[i] == sizes[j] { same = true @@ -59,22 +55,17 @@ func splitIntoChunks(content []byte, n, averageChunkSize, maxChunkSize, minChunk } buffers[n-1] = bytes.NewBuffer(content[sizes[n-1]:]) - i := 0 - - maker.ForEachChunk(buffers[0], - func(chunk *Chunk, final bool) { - //LOG_INFO("CHUNK_SPLIT", "i: %d, chunk: %s, size: %d", i, chunk.GetHash(), size) - chunks = append(chunks, chunk.GetHash()) - totalChunkSize += chunk.GetLength() - }, - func(size int64, hash string) (io.Reader, bool) { - totalFileSize += size - i++ - if i >= len(buffers) { - return nil, false - } - return buffers[i], true - }) + chunkFunc := func(chunk *Chunk) { + chunks = append(chunks, chunk.GetHash()) + totalChunkSize += chunk.GetLength() + config.PutChunk(chunk) + } + + for _, buffer := range buffers { + fileSize, _ := maker.AddData(buffer, chunkFunc) + totalFileSize += fileSize + } + maker.AddData(nil, chunkFunc) if totalFileSize != int64(totalChunkSize) { LOG_ERROR("CHUNK_SPLIT", "total chunk size: %d, total file size: %d", totalChunkSize, totalFileSize) @@ -96,35 +87,28 @@ func TestChunkMaker(t *testing.T) { continue } - chunkArray1, totalSize1 := splitIntoChunks(content, 10, 32, 64, 16, 32) + chunkArray1, totalSize1 := splitIntoChunks(content, 10, 32, 64, 16) - capacities := [...]int{32, 33, 34, 61, 62, 63, 64, 65, 66, 126, 127, 128, 129, 130, - 255, 256, 257, 511, 512, 513, 1023, 1024, 1025, - 32, 48, 64, 128, 256, 512, 1024, 2048} - //capacities := [...]int { 32 } + for _, n := range [...]int{6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} { + chunkArray2, totalSize2 := splitIntoChunks(content, n, 32, 64, 16) - for _, capacity := range capacities { - - for _, n := range [...]int{6, 7, 8, 9, 10} { - chunkArray2, totalSize2 := splitIntoChunks(content, n, 32, 64, 16, capacity) - - if totalSize1 != totalSize2 { - t.Errorf("[size %d, capacity %d] total size is %d instead of %d", - size, capacity, totalSize2, totalSize1) - } + if totalSize1 != totalSize2 { + t.Errorf("[size %d] total size is %d instead of %d", + size, totalSize2, totalSize1) + } - if len(chunkArray1) != len(chunkArray2) { - t.Errorf("[size %d, capacity %d] number of chunks is %d instead of %d", - size, capacity, len(chunkArray2), len(chunkArray1)) - } else { - for i := 0; i < len(chunkArray1); i++ { - if chunkArray1[i] != chunkArray2[i] { - t.Errorf("[size %d, capacity %d, chunk %d] chunk is different", size, capacity, i) - } + if len(chunkArray1) != len(chunkArray2) { + t.Errorf("[size %d] number of chunks is %d instead of %d", + size, len(chunkArray2), len(chunkArray1)) + } else { + for i := 0; i < len(chunkArray1); i++ { + if chunkArray1[i] != chunkArray2[i] { + t.Errorf("[size %d, chunk %d] chunk is different", size, i) } } } + } } diff --git a/src/duplicacy_chunkoperator.go b/src/duplicacy_chunkoperator.go index f42742ab..09b7919b 100644 --- a/src/duplicacy_chunkoperator.go +++ b/src/duplicacy_chunkoperator.go @@ -5,6 +5,7 @@ package duplicacy import ( + "io" "sync" "sync/atomic" "time" @@ -12,42 +13,69 @@ import ( // These are operations that ChunkOperator will perform. const ( - ChunkOperationFind = 0 - ChunkOperationDelete = 1 - ChunkOperationFossilize = 2 - ChunkOperationResurrect = 3 + ChunkOperationDownload = 0 + ChunkOperationUpload = 1 + ChunkOperationDelete = 2 + ChunkOperationFossilize = 3 + ChunkOperationResurrect = 4 + ChunkOperationFind = 5 ) -// ChunkOperatorTask is used to pass parameters for different kinds of chunk operations. -type ChunkOperatorTask struct { - operation int // The type of operation - chunkID string // The chunk id - filePath string // The path of the chunk file; it may be empty +// ChunkTask is used to pass parameters for different kinds of chunk operations. +type ChunkTask struct { + operation int // The type of operation + chunkID string // The chunk id + chunkHash string // The chunk hash + chunkIndex int // The chunk index + filePath string // The path of the chunk file; it may be empty + + isMetadata bool + chunk *Chunk + + completionFunc func(chunk *Chunk, chunkIndex int) } // ChunkOperator is capable of performing multi-threaded operations on chunks. type ChunkOperator struct { - numberOfActiveTasks int64 // The number of chunks that are being operated on - storage Storage // This storage - threads int // Number of threads - taskQueue chan ChunkOperatorTask // Operating goroutines are waiting on this channel for input - stopChannel chan bool // Used to stop all the goroutines - - fossils []string // For fossilize operation, the paths of the fossils are stored in this slice - fossilsLock *sync.Mutex // The lock for 'fossils' + config *Config // Associated config + storage Storage // This storage + snapshotCache *FileStorage + showStatistics bool + threads int // Number of threads + taskQueue chan ChunkTask // Operating goroutines are waiting on this channel for input + stopChannel chan bool // Used to stop all the goroutines + + numberOfActiveTasks int64 // The number of chunks that are being operated on + + fossils []string // For fossilize operation, the paths of the fossils are stored in this slice + collectionLock *sync.Mutex // The lock for accessing 'fossils' + + startTime int64 // The time it starts downloading + totalChunkSize int64 // Total chunk size + downloadedChunkSize int64 // Downloaded chunk size + + allowFailures bool // Whether to fail on download error, or continue + NumberOfFailedChunks int64 // The number of chunks that can't be downloaded + + UploadCompletionFunc func(chunk *Chunk, chunkIndex int, inCache bool, chunkSize int, uploadSize int) } // CreateChunkOperator creates a new ChunkOperator. -func CreateChunkOperator(storage Storage, threads int) *ChunkOperator { +func CreateChunkOperator(config *Config, storage Storage, snapshotCache *FileStorage, showStatistics bool, threads int, allowFailures bool) *ChunkOperator { + operator := &ChunkOperator{ + config: config, storage: storage, + snapshotCache: snapshotCache, + showStatistics: showStatistics, threads: threads, - taskQueue: make(chan ChunkOperatorTask, threads*4), + taskQueue: make(chan ChunkTask, threads), stopChannel: make(chan bool), - fossils: make([]string, 0), - fossilsLock: &sync.Mutex{}, + collectionLock: &sync.Mutex{}, + + allowFailures: allowFailures, } // Start the operator goroutines @@ -84,38 +112,78 @@ func (operator *ChunkOperator) Stop() { atomic.AddInt64(&operator.numberOfActiveTasks, int64(-1)) } -func (operator *ChunkOperator) AddTask(operation int, chunkID string, filePath string) { +func (operator *ChunkOperator) WaitForCompletion() { - task := ChunkOperatorTask{ - operation: operation, - chunkID: chunkID, - filePath: filePath, + for atomic.LoadInt64(&operator.numberOfActiveTasks) > 0 { + time.Sleep(100 * time.Millisecond) + } +} + +func (operator *ChunkOperator) AddTask(operation int, chunkID string, chunkHash string, filePath string, chunkIndex int, chunk *Chunk, isMetadata bool, completionFunc func(*Chunk, int)) { + + task := ChunkTask { + operation: operation, + chunkID: chunkID, + chunkHash: chunkHash, + chunkIndex: chunkIndex, + filePath: filePath, + chunk: chunk, + isMetadata: isMetadata, + completionFunc: completionFunc, } + operator.taskQueue <- task atomic.AddInt64(&operator.numberOfActiveTasks, int64(1)) + + return } -func (operator *ChunkOperator) Find(chunkID string) { - operator.AddTask(ChunkOperationFind, chunkID, "") +func (operator *ChunkOperator) Download(chunkHash string, chunkIndex int, isMetadata bool) *Chunk { + chunkID := operator.config.GetChunkIDFromHash(chunkHash) + completionChannel := make(chan *Chunk) + completionFunc := func(chunk *Chunk, chunkIndex int) { + completionChannel <- chunk + } + operator.AddTask(ChunkOperationDownload, chunkID, chunkHash, "", chunkIndex, nil, isMetadata, completionFunc) + return <- completionChannel +} + +func (operator *ChunkOperator) DownloadAsync(chunkHash string, chunkIndex int, isMetadata bool, completionFunc func(*Chunk, int)) { + chunkID := operator.config.GetChunkIDFromHash(chunkHash) + operator.AddTask(ChunkOperationDownload, chunkID, chunkHash, "", chunkIndex, nil, isMetadata, completionFunc) +} + +func (operator *ChunkOperator) Upload(chunk *Chunk, chunkIndex int, isMetadata bool) { + chunkHash := chunk.GetHash() + chunkID := operator.config.GetChunkIDFromHash(chunkHash) + operator.AddTask(ChunkOperationUpload, chunkID, chunkHash, "", chunkIndex, chunk, isMetadata, nil) } func (operator *ChunkOperator) Delete(chunkID string, filePath string) { - operator.AddTask(ChunkOperationDelete, chunkID, filePath) + operator.AddTask(ChunkOperationDelete, chunkID, "", filePath, 0, nil, false, nil) } func (operator *ChunkOperator) Fossilize(chunkID string, filePath string) { - operator.AddTask(ChunkOperationFossilize, chunkID, filePath) + operator.AddTask(ChunkOperationFossilize, chunkID, "", filePath, 0, nil, false, nil) } func (operator *ChunkOperator) Resurrect(chunkID string, filePath string) { - operator.AddTask(ChunkOperationResurrect, chunkID, filePath) + operator.AddTask(ChunkOperationResurrect, chunkID, "", filePath, 0, nil, false, nil) } -func (operator *ChunkOperator) Run(threadIndex int, task ChunkOperatorTask) { +func (operator *ChunkOperator) Run(threadIndex int, task ChunkTask) { defer func() { atomic.AddInt64(&operator.numberOfActiveTasks, int64(-1)) }() + if task.operation == ChunkOperationDownload { + operator.DownloadChunk(threadIndex, task) + return + } else if task.operation == ChunkOperationUpload { + operator.UploadChunk(threadIndex, task) + return + } + // task.filePath may be empty. If so, find the chunk first. if task.operation == ChunkOperationDelete || task.operation == ChunkOperationFossilize { if task.filePath == "" { @@ -132,9 +200,9 @@ func (operator *ChunkOperator) Run(threadIndex int, task ChunkOperatorTask) { fossilPath, exist, _, _ := operator.storage.FindChunk(threadIndex, task.chunkID, true) if exist { LOG_WARN("CHUNK_FOSSILIZE", "Chunk %s is already a fossil", task.chunkID) - operator.fossilsLock.Lock() + operator.collectionLock.Lock() operator.fossils = append(operator.fossils, fossilPath) - operator.fossilsLock.Unlock() + operator.collectionLock.Unlock() } else { LOG_ERROR("CHUNK_FIND", "Chunk %s does not exist in the storage", task.chunkID) } @@ -175,17 +243,17 @@ func (operator *ChunkOperator) Run(threadIndex int, task ChunkOperatorTask) { if err == nil { LOG_TRACE("CHUNK_DELETE", "Deleted chunk file %s as the fossil already exists", task.chunkID) } - operator.fossilsLock.Lock() + operator.collectionLock.Lock() operator.fossils = append(operator.fossils, fossilPath) - operator.fossilsLock.Unlock() + operator.collectionLock.Unlock() } else { LOG_ERROR("CHUNK_DELETE", "Failed to fossilize the chunk %s: %v", task.chunkID, err) } } else { LOG_TRACE("CHUNK_FOSSILIZE", "The chunk %s has been marked as a fossil", task.chunkID) - operator.fossilsLock.Lock() + operator.collectionLock.Lock() operator.fossils = append(operator.fossils, fossilPath) - operator.fossilsLock.Unlock() + operator.collectionLock.Unlock() } } else if task.operation == ChunkOperationResurrect { chunkPath, exist, _, err := operator.storage.FindChunk(threadIndex, task.chunkID, false) @@ -207,3 +275,267 @@ func (operator *ChunkOperator) Run(threadIndex int, task ChunkOperatorTask) { } } } + +// Download downloads a chunk from the storage. +func (operator *ChunkOperator) DownloadChunk(threadIndex int, task ChunkTask) { + + cachedPath := "" + chunk := operator.config.GetChunk() + chunk.isMetadata = task.isMetadata + chunkID := task.chunkID + + defer func() { + if chunk != nil { + operator.config.PutChunk(chunk) + } + } () + + if task.isMetadata && operator.snapshotCache != nil { + + var exist bool + var err error + + // Reset the chunk with a hasher -- we're reading from the cache where chunk are not encrypted or compressed + chunk.Reset(true) + + cachedPath, exist, _, err = operator.snapshotCache.FindChunk(threadIndex, chunkID, false) + if err != nil { + LOG_WARN("DOWNLOAD_CACHE", "Failed to find the cache path for the chunk %s: %v", chunkID, err) + } else if exist { + err = operator.snapshotCache.DownloadFile(0, cachedPath, chunk) + if err != nil { + LOG_WARN("DOWNLOAD_CACHE", "Failed to load the chunk %s from the snapshot cache: %v", chunkID, err) + } else { + actualChunkID := chunk.GetID() + if actualChunkID != chunkID { + LOG_WARN("DOWNLOAD_CACHE_CORRUPTED", + "The chunk %s load from the snapshot cache has a hash id of %s", chunkID, actualChunkID) + } else { + LOG_DEBUG("CHUNK_CACHE", "Chunk %s has been loaded from the snapshot cache", chunkID) + + task.completionFunc(chunk, task.chunkIndex) + chunk = nil + return + } + } + } + } + + // Reset the chunk without a hasher -- the downloaded content will be encrypted and/or compressed and the hasher + // will be set up before the encryption + chunk.Reset(false) + chunk.isMetadata = task.isMetadata + + // If failures are allowed, complete the task properly + completeFailedChunk := func() { + + atomic.AddInt64(&operator.NumberOfFailedChunks, 1) + if operator.allowFailures { + task.completionFunc(chunk, task.chunkIndex) + } + } + + const MaxDownloadAttempts = 3 + for downloadAttempt := 0; ; downloadAttempt++ { + + // Find the chunk by ID first. + chunkPath, exist, _, err := operator.storage.FindChunk(threadIndex, chunkID, false) + if err != nil { + completeFailedChunk() + LOG_WERROR(operator.allowFailures, "DOWNLOAD_CHUNK", "Failed to find the chunk %s: %v", chunkID, err) + return + } + + if !exist { + // No chunk is found. Have to find it in the fossil pool again. + fossilPath, exist, _, err := operator.storage.FindChunk(threadIndex, chunkID, true) + if err != nil { + completeFailedChunk() + LOG_WERROR(operator.allowFailures, "DOWNLOAD_CHUNK", "Failed to find the chunk %s: %v", chunkID, err) + return + } + + if !exist { + + retry := false + + // Retry for Hubic or WebDAV as it may return 404 even when the chunk exists + if _, ok := operator.storage.(*HubicStorage); ok { + retry = true + } + + if _, ok := operator.storage.(*WebDAVStorage); ok { + retry = true + } + + if retry && downloadAttempt < MaxDownloadAttempts { + LOG_WARN("DOWNLOAD_RETRY", "Failed to find the chunk %s; retrying", chunkID) + continue + } + + // A chunk is not found. This is a serious error and hopefully it will never happen. + completeFailedChunk() + if err != nil { + LOG_WERROR(operator.allowFailures, "DOWNLOAD_CHUNK", "Chunk %s can't be found: %v", chunkID, err) + } else { + LOG_WERROR(operator.allowFailures, "DOWNLOAD_CHUNK", "Chunk %s can't be found", chunkID) + } + return + } + + // We can't download the fossil directly. We have to turn it back into a regular chunk and try + // downloading again. + err = operator.storage.MoveFile(threadIndex, fossilPath, chunkPath) + if err != nil { + completeFailedChunk() + LOG_WERROR(operator.allowFailures, "DOWNLOAD_CHUNK", "Failed to resurrect chunk %s: %v", chunkID, err) + return + } + + LOG_WARN("DOWNLOAD_RESURRECT", "Fossil %s has been resurrected", chunkID) + continue + } + + err = operator.storage.DownloadFile(threadIndex, chunkPath, chunk) + if err != nil { + _, isHubic := operator.storage.(*HubicStorage) + // Retry on EOF or if it is a Hubic backend as it may return 404 even when the chunk exists + if (err == io.ErrUnexpectedEOF || isHubic) && downloadAttempt < MaxDownloadAttempts { + LOG_WARN("DOWNLOAD_RETRY", "Failed to download the chunk %s: %v; retrying", chunkID, err) + chunk.Reset(false) + chunk.isMetadata = task.isMetadata + continue + } else { + completeFailedChunk() + LOG_WERROR(operator.allowFailures, "DOWNLOAD_CHUNK", "Failed to download the chunk %s: %v", chunkID, err) + return + } + } + + err = chunk.Decrypt(operator.config.ChunkKey, task.chunkHash) + if err != nil { + if downloadAttempt < MaxDownloadAttempts { + LOG_WARN("DOWNLOAD_RETRY", "Failed to decrypt the chunk %s: %v; retrying", chunkID, err) + chunk.Reset(false) + chunk.isMetadata = task.isMetadata + continue + } else { + completeFailedChunk() + LOG_WERROR(operator.allowFailures, "DOWNLOAD_DECRYPT", "Failed to decrypt the chunk %s: %v", chunkID, err) + return + } + } + + actualChunkID := chunk.GetID() + if actualChunkID != chunkID { + if downloadAttempt < MaxDownloadAttempts { + LOG_WARN("DOWNLOAD_RETRY", "The chunk %s has a hash id of %s; retrying", chunkID, actualChunkID) + chunk.Reset(false) + chunk.isMetadata = task.isMetadata + continue + } else { + completeFailedChunk() + LOG_WERROR(operator.allowFailures, "DOWNLOAD_CORRUPTED", "The chunk %s has a hash id of %s", chunkID, actualChunkID) + return + } + } + + break + } + + if chunk.isMetadata && len(cachedPath) > 0 { + // Save a copy to the local snapshot cache + err := operator.snapshotCache.UploadFile(threadIndex, cachedPath, chunk.GetBytes()) + if err != nil { + LOG_WARN("DOWNLOAD_CACHE", "Failed to add the chunk %s to the snapshot cache: %v", chunkID, err) + } + } + + downloadedChunkSize := atomic.AddInt64(&operator.downloadedChunkSize, int64(chunk.GetLength())) + + if (operator.showStatistics || IsTracing()) && operator.totalChunkSize > 0 { + + now := time.Now().Unix() + if now <= operator.startTime { + now = operator.startTime + 1 + } + speed := downloadedChunkSize / (now - operator.startTime) + remainingTime := int64(0) + if speed > 0 { + remainingTime = (operator.totalChunkSize-downloadedChunkSize)/speed + 1 + } + percentage := float32(downloadedChunkSize * 1000 / operator.totalChunkSize) + LOG_INFO("DOWNLOAD_PROGRESS", "Downloaded chunk %d size %d, %sB/s %s %.1f%%", + task.chunkIndex+1, chunk.GetLength(), + PrettySize(speed), PrettyTime(remainingTime), percentage/10) + } else { + LOG_DEBUG("CHUNK_DOWNLOAD", "Chunk %s has been downloaded", chunkID) + } + + task.completionFunc(chunk, task.chunkIndex) + chunk = nil + return +} + +// UploadChunk is called by the task goroutines to perform the actual uploading +func (operator *ChunkOperator) UploadChunk(threadIndex int, task ChunkTask) bool { + + chunk := task.chunk + chunkID := task.chunkID + chunkSize := chunk.GetLength() + + // For a snapshot chunk, verify that its chunk id is correct + if task.isMetadata { + chunk.VerifyID() + } + + if task.isMetadata && operator.storage.IsCacheNeeded() { + // Save a copy to the local snapshot. + chunkPath, exist, _, err := operator.snapshotCache.FindChunk(threadIndex, chunkID, false) + if err != nil { + LOG_WARN("UPLOAD_CACHE", "Failed to find the cache path for the chunk %s: %v", chunkID, err) + } else if exist { + LOG_DEBUG("CHUNK_CACHE", "Chunk %s already exists in the snapshot cache", chunkID) + } else if err = operator.snapshotCache.UploadFile(threadIndex, chunkPath, chunk.GetBytes()); err != nil { + LOG_WARN("UPLOAD_CACHE", "Failed to save the chunk %s to the snapshot cache: %v", chunkID, err) + } else { + LOG_DEBUG("CHUNK_CACHE", "Chunk %s has been saved to the snapshot cache", chunkID) + } + } + + // This returns the path the chunk file should be at. + chunkPath, exist, _, err := operator.storage.FindChunk(threadIndex, chunkID, false) + if err != nil { + LOG_ERROR("UPLOAD_CHUNK", "Failed to find the path for the chunk %s: %v", chunkID, err) + return false + } + + if exist { + // Chunk deduplication by name in effect here. + LOG_DEBUG("CHUNK_DUPLICATE", "Chunk %s already exists", chunkID) + + operator.UploadCompletionFunc(chunk, task.chunkIndex, false, chunkSize, 0) + return false + } + + // Encrypt the chunk only after we know that it must be uploaded. + err = chunk.Encrypt(operator.config.ChunkKey, chunk.GetHash(), task.isMetadata) + if err != nil { + LOG_ERROR("UPLOAD_CHUNK", "Failed to encrypt the chunk %s: %v", chunkID, err) + return false + } + + if !operator.config.dryRun { + err = operator.storage.UploadFile(threadIndex, chunkPath, chunk.GetBytes()) + if err != nil { + LOG_ERROR("UPLOAD_CHUNK", "Failed to upload the chunk %s: %v", chunkID, err) + return false + } + LOG_DEBUG("CHUNK_UPLOAD", "Chunk %s has been uploaded", chunkID) + } else { + LOG_DEBUG("CHUNK_UPLOAD", "Uploading was skipped for chunk %s", chunkID) + } + + operator.UploadCompletionFunc(chunk, task.chunkIndex, false, chunkSize, chunk.GetLength()) + return true +} \ No newline at end of file diff --git a/src/duplicacy_chunkuploader_test.go b/src/duplicacy_chunkoperator_test.go similarity index 77% rename from src/duplicacy_chunkuploader_test.go rename to src/duplicacy_chunkoperator_test.go index c31c1c18..e636defb 100644 --- a/src/duplicacy_chunkuploader_test.go +++ b/src/duplicacy_chunkoperator_test.go @@ -15,11 +15,11 @@ import ( "math/rand" ) -func TestUploaderAndDownloader(t *testing.T) { +func TestChunkOperator(t *testing.T) { rand.Seed(time.Now().UnixNano()) setTestingT(t) - SetLoggingLevel(INFO) + SetLoggingLevel(DEBUG) defer func() { if r := recover(); r != nil { @@ -87,35 +87,25 @@ func TestUploaderAndDownloader(t *testing.T) { totalFileSize += chunk.GetLength() } - completionFunc := func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int) { + chunkOperator := CreateChunkOperator(config, storage, nil, false, testThreads, false) + chunkOperator.UploadCompletionFunc = func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int) { t.Logf("Chunk %s size %d (%d/%d) uploaded", chunk.GetID(), chunkSize, chunkIndex, len(chunks)) } - chunkUploader := CreateChunkUploader(config, storage, nil, testThreads, nil) - chunkUploader.completionFunc = completionFunc - chunkUploader.Start() - for i, chunk := range chunks { - chunkUploader.StartChunk(chunk, i) + chunkOperator.Upload(chunk, i, false) } - chunkUploader.Stop() - - chunkDownloader := CreateChunkDownloader(config, storage, nil, true, testThreads, false) - chunkDownloader.totalChunkSize = int64(totalFileSize) - - for _, chunk := range chunks { - chunkDownloader.AddChunk(chunk.GetHash()) - } + chunkOperator.WaitForCompletion() for i, chunk := range chunks { - downloaded := chunkDownloader.WaitForChunk(i) + downloaded := chunkOperator.Download(chunk.GetHash(), i, false) if downloaded.GetID() != chunk.GetID() { t.Errorf("Uploaded: %s, downloaded: %s", chunk.GetID(), downloaded.GetID()) } } - chunkDownloader.Stop() + chunkOperator.Stop() for _, file := range listChunks(storage) { err = storage.DeleteFile(0, "chunks/"+file) diff --git a/src/duplicacy_chunkuploader.go b/src/duplicacy_chunkuploader.go deleted file mode 100644 index b983fe0d..00000000 --- a/src/duplicacy_chunkuploader.go +++ /dev/null @@ -1,151 +0,0 @@ -// Copyright (c) Acrosync LLC. All rights reserved. -// Free for personal use and commercial trial -// Commercial use requires per-user licenses available from https://duplicacy.com - -package duplicacy - -import ( - "sync/atomic" - "time" -) - -// ChunkUploadTask represents a chunk to be uploaded. -type ChunkUploadTask struct { - chunk *Chunk - chunkIndex int -} - -// ChunkUploader uploads chunks to the storage using one or more uploading goroutines. Chunks are added -// by the call to StartChunk(), and then passed to the uploading goroutines. The completion function is -// called when the downloading is completed. Note that ChunkUploader does not release chunks to the -// chunk pool; instead -type ChunkUploader struct { - config *Config // Associated config - storage Storage // Download from this storage - snapshotCache *FileStorage // Used as cache if not nil; usually for uploading snapshot chunks - threads int // Number of uploading goroutines - taskQueue chan ChunkUploadTask // Uploading goroutines are listening on this channel for upload jobs - stopChannel chan bool // Used to terminate uploading goroutines - - numberOfUploadingTasks int32 // The number of uploading tasks - - // Uploading goroutines call this function after having downloaded chunks - completionFunc func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int) -} - -// CreateChunkUploader creates a chunk uploader. -func CreateChunkUploader(config *Config, storage Storage, snapshotCache *FileStorage, threads int, - completionFunc func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int)) *ChunkUploader { - uploader := &ChunkUploader{ - config: config, - storage: storage, - snapshotCache: snapshotCache, - threads: threads, - taskQueue: make(chan ChunkUploadTask, 1), - stopChannel: make(chan bool), - completionFunc: completionFunc, - } - - return uploader -} - -// Starts starts uploading goroutines. -func (uploader *ChunkUploader) Start() { - for i := 0; i < uploader.threads; i++ { - go func(threadIndex int) { - defer CatchLogException() - for { - select { - case task := <-uploader.taskQueue: - uploader.Upload(threadIndex, task) - case <-uploader.stopChannel: - return - } - } - }(i) - } -} - -// StartChunk sends a chunk to be uploaded to a waiting uploading goroutine. It may block if all uploading goroutines are busy. -func (uploader *ChunkUploader) StartChunk(chunk *Chunk, chunkIndex int) { - atomic.AddInt32(&uploader.numberOfUploadingTasks, 1) - uploader.taskQueue <- ChunkUploadTask{ - chunk: chunk, - chunkIndex: chunkIndex, - } -} - -// Stop stops all uploading goroutines. -func (uploader *ChunkUploader) Stop() { - for atomic.LoadInt32(&uploader.numberOfUploadingTasks) > 0 { - time.Sleep(100 * time.Millisecond) - } - for i := 0; i < uploader.threads; i++ { - uploader.stopChannel <- false - } -} - -// Upload is called by the uploading goroutines to perform the actual uploading -func (uploader *ChunkUploader) Upload(threadIndex int, task ChunkUploadTask) bool { - - chunk := task.chunk - chunkSize := chunk.GetLength() - chunkID := chunk.GetID() - - // For a snapshot chunk, verify that its chunk id is correct - if uploader.snapshotCache != nil { - chunk.VerifyID() - } - - if uploader.snapshotCache != nil && uploader.storage.IsCacheNeeded() { - // Save a copy to the local snapshot. - chunkPath, exist, _, err := uploader.snapshotCache.FindChunk(threadIndex, chunkID, false) - if err != nil { - LOG_WARN("UPLOAD_CACHE", "Failed to find the cache path for the chunk %s: %v", chunkID, err) - } else if exist { - LOG_DEBUG("CHUNK_CACHE", "Chunk %s already exists in the snapshot cache", chunkID) - } else if err = uploader.snapshotCache.UploadFile(threadIndex, chunkPath, chunk.GetBytes()); err != nil { - LOG_WARN("UPLOAD_CACHE", "Failed to save the chunk %s to the snapshot cache: %v", chunkID, err) - } else { - LOG_DEBUG("CHUNK_CACHE", "Chunk %s has been saved to the snapshot cache", chunkID) - } - } - - // This returns the path the chunk file should be at. - chunkPath, exist, _, err := uploader.storage.FindChunk(threadIndex, chunkID, false) - if err != nil { - LOG_ERROR("UPLOAD_CHUNK", "Failed to find the path for the chunk %s: %v", chunkID, err) - return false - } - - if exist { - // Chunk deduplication by name in effect here. - LOG_DEBUG("CHUNK_DUPLICATE", "Chunk %s already exists", chunkID) - - uploader.completionFunc(chunk, task.chunkIndex, true, chunkSize, 0) - atomic.AddInt32(&uploader.numberOfUploadingTasks, -1) - return false - } - - // Encrypt the chunk only after we know that it must be uploaded. - err = chunk.Encrypt(uploader.config.ChunkKey, chunk.GetHash(), uploader.snapshotCache != nil) - if err != nil { - LOG_ERROR("UPLOAD_CHUNK", "Failed to encrypt the chunk %s: %v", chunkID, err) - return false - } - - if !uploader.config.dryRun { - err = uploader.storage.UploadFile(threadIndex, chunkPath, chunk.GetBytes()) - if err != nil { - LOG_ERROR("UPLOAD_CHUNK", "Failed to upload the chunk %s: %v", chunkID, err) - return false - } - LOG_DEBUG("CHUNK_UPLOAD", "Chunk %s has been uploaded", chunkID) - } else { - LOG_DEBUG("CHUNK_UPLOAD", "Uploading was skipped for chunk %s", chunkID) - } - - uploader.completionFunc(chunk, task.chunkIndex, false, chunkSize, chunk.GetLength()) - atomic.AddInt32(&uploader.numberOfUploadingTasks, -1) - return true -} diff --git a/src/duplicacy_entry.go b/src/duplicacy_entry.go index 7d2849f7..fa56b865 100644 --- a/src/duplicacy_entry.go +++ b/src/duplicacy_entry.go @@ -16,6 +16,11 @@ import ( "strconv" "strings" "time" + "bytes" + "crypto/sha256" + + "github.com/vmihailenco/msgpack" + ) // This is the hidden directory in the repository for storing various files. @@ -45,7 +50,7 @@ type Entry struct { EndChunk int EndOffset int - Attributes map[string][]byte + Attributes *map[string][]byte } // CreateEntry creates an entry from file properties. @@ -93,6 +98,27 @@ func CreateEntryFromFileInfo(fileInfo os.FileInfo, directory string) *Entry { return entry } +func (entry *Entry) Copy() *Entry { + return &Entry{ + Path: entry.Path, + Size: entry.Size, + Time: entry.Time, + Mode: entry.Mode, + Link: entry.Link, + Hash: entry.Hash, + + UID: entry.UID, + GID: entry.GID, + + StartChunk: entry.StartChunk, + StartOffset: entry.StartOffset, + EndChunk: entry.EndChunk, + EndOffset: entry.EndOffset, + + Attributes: entry.Attributes, + } +} + // CreateEntryFromJSON creates an entry from a json description. func (entry *Entry) UnmarshalJSON(description []byte) (err error) { @@ -175,17 +201,17 @@ func (entry *Entry) UnmarshalJSON(description []byte) (err error) { if attributes, ok := value.(map[string]interface{}); !ok { return fmt.Errorf("Attributes are invalid for file '%s' in the snapshot", entry.Path) } else { - entry.Attributes = make(map[string][]byte) + entry.Attributes = &map[string][]byte{} for name, object := range attributes { if object == nil { - entry.Attributes[name] = []byte("") + (*entry.Attributes)[name] = []byte("") } else if attributeInBase64, ok := object.(string); !ok { return fmt.Errorf("Attribute '%s' is invalid for file '%s' in the snapshot", name, entry.Path) } else if attribute, err := base64.StdEncoding.DecodeString(attributeInBase64); err != nil { return fmt.Errorf("Failed to decode attribute '%s' for file '%s' in the snapshot: %v", name, entry.Path, err) } else { - entry.Attributes[name] = attribute + (*entry.Attributes)[name] = attribute } } } @@ -244,7 +270,7 @@ func (entry *Entry) convertToObject(encodeName bool) map[string]interface{} { object["gid"] = entry.GID } - if len(entry.Attributes) > 0 { + if entry.Attributes != nil && len(*entry.Attributes) > 0 { object["attributes"] = entry.Attributes } @@ -259,6 +285,197 @@ func (entry *Entry) MarshalJSON() ([]byte, error) { return description, err } +var _ msgpack.CustomEncoder = (*Entry)(nil) +var _ msgpack.CustomDecoder = (*Entry)(nil) + +func (entry *Entry) EncodeMsgpack(encoder *msgpack.Encoder) error { + + err := encoder.EncodeString(entry.Path) + if err != nil { + return err + } + + err = encoder.EncodeInt(entry.Size) + if err != nil { + return err + } + + err = encoder.EncodeInt(entry.Time) + if err != nil { + return err + } + + err = encoder.EncodeInt(int64(entry.Mode)) + if err != nil { + return err + } + + err = encoder.EncodeString(entry.Link) + if err != nil { + return err + } + + err = encoder.EncodeString(entry.Hash) + if err != nil { + return err + } + + err = encoder.EncodeInt(int64(entry.StartChunk)) + if err != nil { + return err + } + + err = encoder.EncodeInt(int64(entry.StartOffset)) + if err != nil { + return err + } + + err = encoder.EncodeInt(int64(entry.EndChunk)) + if err != nil { + return err + } + + err = encoder.EncodeInt(int64(entry.EndOffset)) + if err != nil { + return err + } + + err = encoder.EncodeInt(int64(entry.UID)) + if err != nil { + return err + } + + err = encoder.EncodeInt(int64(entry.GID)) + if err != nil { + return err + } + + var numberOfAttributes int64 + if entry.Attributes != nil { + numberOfAttributes = int64(len(*entry.Attributes)) + } + + err = encoder.EncodeInt(numberOfAttributes) + if err != nil { + return err + } + + if entry.Attributes != nil { + attributes := make([]string, numberOfAttributes) + i := 0 + for attribute := range *entry.Attributes { + attributes[i] = attribute + i++ + } + sort.Strings(attributes) + for _, attribute := range attributes { + err = encoder.EncodeString(attribute) + if err != nil { + return err + } + err = encoder.EncodeString(string((*entry.Attributes)[attribute])) + if err != nil { + return err + } + } + } + + return nil +} + +func (entry *Entry) DecodeMsgpack(decoder *msgpack.Decoder) error { + + var err error + + entry.Path, err = decoder.DecodeString() + if err != nil { + return err + } + + entry.Size, err = decoder.DecodeInt64() + if err != nil { + return err + } + + entry.Time, err = decoder.DecodeInt64() + if err != nil { + return err + } + + mode, err := decoder.DecodeInt64() + if err != nil { + return err + } + entry.Mode = uint32(mode) + + entry.Link, err = decoder.DecodeString() + if err != nil { + return err + } + + entry.Hash, err = decoder.DecodeString() + if err != nil { + return err + } + + startChunk, err := decoder.DecodeInt() + if err != nil { + return err + } + entry.StartChunk = int(startChunk) + + startOffset, err := decoder.DecodeInt() + if err != nil { + return err + } + entry.StartOffset = int(startOffset) + + endChunk, err := decoder.DecodeInt() + if err != nil { + return err + } + entry.EndChunk = int(endChunk) + + endOffset, err := decoder.DecodeInt() + if err != nil { + return err + } + entry.EndOffset = int(endOffset) + + uid, err := decoder.DecodeInt() + if err != nil { + return err + } + entry.UID = int(uid) + + gid, err := decoder.DecodeInt() + if err != nil { + return err + } + entry.GID = int(gid) + + numberOfAttributes, err := decoder.DecodeInt() + if err != nil { + return err + } + + if numberOfAttributes > 0 { + entry.Attributes = &map[string][]byte{} + for i := 0; i < numberOfAttributes; i++ { + attribute, err := decoder.DecodeString() + if err != nil { + return err + } + value, err := decoder.DecodeString() + if err != nil { + return err + } + (*entry.Attributes)[attribute] = []byte(value) + } + } + return nil +} + func (entry *Entry) IsFile() bool { return entry.Mode&uint32(os.ModeType) == 0 } @@ -271,10 +488,27 @@ func (entry *Entry) IsLink() bool { return entry.Mode&uint32(os.ModeSymlink) != 0 } +func (entry *Entry) IsComplete() bool { + return entry.Size >= 0 +} + func (entry *Entry) GetPermissions() os.FileMode { return os.FileMode(entry.Mode) & fileModeMask } +func (entry *Entry) GetParent() string { + path := entry.Path + if path != "" && path[len(path) - 1] == '/' { + path = path[:len(path) - 1] + } + i := strings.LastIndex(path, "/") + if i == -1 { + return "" + } else { + return path[:i] + } +} + func (entry *Entry) IsSameAs(other *Entry) bool { return entry.Size == other.Size && entry.Time <= other.Time+1 && entry.Time >= other.Time-1 } @@ -326,7 +560,7 @@ func (entry *Entry) RestoreMetadata(fullPath string, fileInfo *os.FileInfo, setO } } - if len(entry.Attributes) > 0 { + if entry.Attributes != nil && len(*entry.Attributes) > 0 { entry.SetAttributesToFile(fullPath) } @@ -335,47 +569,62 @@ func (entry *Entry) RestoreMetadata(fullPath string, fileInfo *os.FileInfo, setO // Return -1 if 'left' should appear before 'right', 1 if opposite, and 0 if they are the same. // Files are always arranged before subdirectories under the same parent directory. -func (left *Entry) Compare(right *Entry) int { - - path1 := left.Path - path2 := right.Path +func ComparePaths(left string, right string) int { p := 0 - for ; p < len(path1) && p < len(path2); p++ { - if path1[p] != path2[p] { + for ; p < len(left) && p < len(right); p++ { + if left[p] != right[p] { break } } - // c1, c2 is the first byte that differs + // c1, c2 are the first bytes that differ var c1, c2 byte - if p < len(path1) { - c1 = path1[p] + if p < len(left) { + c1 = left[p] } - if p < len(path2) { - c2 = path2[p] + if p < len(right) { + c2 = right[p] } - // c3, c4 indicates how the current component ends - // c3 == '/': the current component is a directory - // c3 != '/': the current component is the last one + // c3, c4 indicate how the current component ends + // c3 == '/': the current component is a directory; c3 != '/': the current component is the last one c3 := c1 - for i := p; c3 != '/' && i < len(path1); i++ { - c3 = path1[i] + + // last1, last2 means if the current compoent is the last component + last1 := true + for i := p; i < len(left); i++ { + c3 = left[i] + if c3 == '/' { + last1 = i == len(left) - 1 + break + } } c4 := c2 - for i := p; c4 != '/' && i < len(path2); i++ { - c4 = path2[i] + last2 := true + for i := p; i < len(right); i++ { + c4 = right[i] + if c4 == '/' { + last2 = i == len(right) - 1 + break + } + } + + if last1 != last2 { + if last1 { + return -1 + } else { + return 1 + } } if c3 == '/' { if c4 == '/' { // We are comparing two directory components if c1 == '/' { - // left is shorter - // Note that c2 maybe smaller than c1 but c1 is '/' which is counted - // as 0 + // left is shorter; note that c2 maybe smaller than c1 but c1 should be treated as 0 therefore + // this is a special case that must be handled separately return -1 } else if c2 == '/' { // right is shorter @@ -397,6 +646,10 @@ func (left *Entry) Compare(right *Entry) int { } } +func (left *Entry) Compare(right *Entry) int { + return ComparePaths(left.Path, right.Path) +} + // This is used to sort entries by their names. type ByName []*Entry @@ -443,7 +696,7 @@ func (files FileInfoCompare) Less(i, j int) bool { // ListEntries returns a list of entries representing file and subdirectories under the directory 'path'. Entry paths // are normalized as relative to 'top'. 'patterns' are used to exclude or include certain files. -func ListEntries(top string, path string, fileList *[]*Entry, patterns []string, nobackupFile string, discardAttributes bool, excludeByAttribute bool) (directoryList []*Entry, +func ListEntries(top string, path string, patterns []string, nobackupFile string, excludeByAttribute bool, listingChannel chan *Entry) (directoryList []*Entry, skippedFiles []string, err error) { LOG_DEBUG("LIST_ENTRIES", "Listing %s", path) @@ -478,8 +731,6 @@ func ListEntries(top string, path string, fileList *[]*Entry, patterns []string, sort.Sort(FileInfoCompare(files)) - entries := make([]*Entry, 0, 4) - for _, f := range files { if f.Name() == DUPLICACY_DIRECTORY { continue @@ -520,11 +771,9 @@ func ListEntries(top string, path string, fileList *[]*Entry, patterns []string, } } - if !discardAttributes { - entry.ReadAttributes(top) - } + entry.ReadAttributes(top) - if excludeByAttribute && excludedByAttribute(entry.Attributes) { + if excludeByAttribute && entry.Attributes != nil && excludedByAttribute(*entry.Attributes) { LOG_DEBUG("LIST_EXCLUDE", "%s is excluded by attribute", entry.Path) continue } @@ -535,20 +784,20 @@ func ListEntries(top string, path string, fileList *[]*Entry, patterns []string, continue } - entries = append(entries, entry) + if entry.IsDir() { + directoryList = append(directoryList, entry) + } else { + listingChannel <- entry + } } // For top level directory we need to sort again because symlinks may have been changed if path == "" { - sort.Sort(ByName(entries)) + sort.Sort(ByName(directoryList)) } - for _, entry := range entries { - if entry.IsDir() { - directoryList = append(directoryList, entry) - } else { - *fileList = append(*fileList, entry) - } + for _, entry := range directoryList { + listingChannel <- entry } for i, j := 0, len(directoryList)-1; i < j; i, j = i+1, j-1 { @@ -597,3 +846,100 @@ func (entry *Entry) Diff(chunkHashes []string, chunkLengths []int, return modifiedLength } + +func (entry *Entry) EncodeWithHash(encoder *msgpack.Encoder) error { + entryBytes, err := msgpack.Marshal(entry) + if err != nil { + return err + } + hash := sha256.Sum256(entryBytes) + err = encoder.EncodeBytes(entryBytes) + if err != nil { + return err + } + err = encoder.EncodeBytes(hash[:]) + if err != nil { + return err + } + return nil +} + +func DecodeEntryWithHash(decoder *msgpack.Decoder) (*Entry, error) { + entryBytes, err := decoder.DecodeBytes() + if err != nil { + return nil, err + } + hashBytes, err := decoder.DecodeBytes() + if err != nil { + return nil, err + } + expectedHash := sha256.Sum256(entryBytes) + if bytes.Compare(expectedHash[:], hashBytes) != 0 { + return nil, fmt.Errorf("corrupted file metadata") + } + + var entry Entry + err = msgpack.Unmarshal(entryBytes, &entry) + if err != nil { + return nil, err + } + return &entry, nil +} + +func (entry *Entry) check(chunkLengths []int) error { + + if entry.Size < 0 { + return fmt.Errorf("The file %s hash an invalid size (%d)", entry.Path, entry.Size) + } + + if !entry.IsFile() || entry.Size == 0 { + return nil + } + + if entry.StartChunk < 0 { + return fmt.Errorf("The file %s starts at chunk %d", entry.Path, entry.StartChunk) + } + + if entry.EndChunk >= len(chunkLengths) { + return fmt.Errorf("The file %s ends at chunk %d while the number of chunks is %d", + entry.Path, entry.EndChunk, len(chunkLengths)) + } + + if entry.EndChunk < entry.StartChunk { + return fmt.Errorf("The file %s starts at chunk %d and ends at chunk %d", + entry.Path, entry.StartChunk, entry.EndChunk) + } + + if entry.StartOffset >= chunkLengths[entry.StartChunk] { + return fmt.Errorf("The file %s starts at offset %d of chunk %d of length %d", + entry.Path, entry.StartOffset, entry.StartChunk, chunkLengths[entry.StartChunk]) + } + + if entry.EndOffset > chunkLengths[entry.EndChunk] { + return fmt.Errorf("The file %s ends at offset %d of chunk %d of length %d", + entry.Path, entry.EndOffset, entry.EndChunk, chunkLengths[entry.EndChunk]) + } + + fileSize := int64(0) + + for i := entry.StartChunk; i <= entry.EndChunk; i++ { + + start := 0 + if i == entry.StartChunk { + start = entry.StartOffset + } + end := chunkLengths[i] + if i == entry.EndChunk { + end = entry.EndOffset + } + + fileSize += int64(end - start) + } + + if entry.Size != fileSize { + return fmt.Errorf("The file %s has a size of %d but the total size of chunks is %d", + entry.Path, entry.Size, fileSize) + } + + return nil +} diff --git a/src/duplicacy_entry_test.go b/src/duplicacy_entry_test.go index 57d76346..670a16b3 100644 --- a/src/duplicacy_entry_test.go +++ b/src/duplicacy_entry_test.go @@ -13,8 +13,11 @@ import ( "sort" "strings" "testing" + "bytes" + "encoding/json" "github.com/gilbertchen/xattr" + "github.com/vmihailenco/msgpack" ) func TestEntrySort(t *testing.T) { @@ -27,19 +30,19 @@ func TestEntrySort(t *testing.T) { "\xBB\xDDfile", "\xFF\xDDfile", "ab/", + "ab-/", + "ab0/", + "ab1/", "ab/c", "ab+/c-", "ab+/c0", "ab+/c/", - "ab+/c/d", "ab+/c+/", - "ab+/c+/d", "ab+/c0/", + "ab+/c/d", + "ab+/c+/d", "ab+/c0/d", - "ab-/", "ab-/c", - "ab0/", - "ab1/", "ab1/c", "ab1/\xBB\xDDfile", "ab1/\xFF\xDDfile", @@ -86,7 +89,7 @@ func TestEntrySort(t *testing.T) { } } -func TestEntryList(t *testing.T) { +func TestEntryOrder(t *testing.T) { testDir := filepath.Join(os.TempDir(), "duplicacy_test") os.RemoveAll(testDir) @@ -98,16 +101,16 @@ func TestEntryList(t *testing.T) { "ab0", "ab1", "ab+/", + "ab2/", + "ab3/", "ab+/c", "ab+/c+", "ab+/c1", "ab+/c-/", - "ab+/c-/d", "ab+/c0/", + "ab+/c-/d", "ab+/c0/d", - "ab2/", "ab2/c", - "ab3/", "ab3/c", } @@ -172,18 +175,24 @@ func TestEntryList(t *testing.T) { directories = append(directories, CreateEntry("", 0, 0, 0)) entries := make([]*Entry, 0, 4) + entryChannel := make(chan *Entry, 1024) + entries = append(entries, CreateEntry("", 0, 0, 0)) for len(directories) > 0 { directory := directories[len(directories)-1] directories = directories[:len(directories)-1] - entries = append(entries, directory) - subdirectories, _, err := ListEntries(testDir, directory.Path, &entries, nil, "", false, false) + subdirectories, _, err := ListEntries(testDir, directory.Path, nil, "", false, entryChannel) if err != nil { t.Errorf("ListEntries(%s, %s) returned an error: %s", testDir, directory.Path, err) } directories = append(directories, subdirectories...) } + close(entryChannel) + for entry := range entryChannel { + entries = append(entries, entry) + } + entries = entries[1:] for _, entry := range entries { @@ -274,18 +283,25 @@ func TestEntryExcludeByAttribute(t *testing.T) { directories = append(directories, CreateEntry("", 0, 0, 0)) entries := make([]*Entry, 0, 4) + entryChannel := make(chan *Entry, 1024) + entries = append(entries, CreateEntry("", 0, 0, 0)) for len(directories) > 0 { directory := directories[len(directories)-1] directories = directories[:len(directories)-1] - entries = append(entries, directory) - subdirectories, _, err := ListEntries(testDir, directory.Path, &entries, nil, "", false, excludeByAttribute) + subdirectories, _, err := ListEntries(testDir, directory.Path, nil, "", excludeByAttribute, entryChannel) if err != nil { t.Errorf("ListEntries(%s, %s) returned an error: %s", testDir, directory.Path, err) } directories = append(directories, subdirectories...) } + close(entryChannel) + + for entry := range entryChannel { + entries = append(entries, entry) + } + entries = entries[1:] for _, entry := range entries { @@ -327,3 +343,33 @@ func TestEntryExcludeByAttribute(t *testing.T) { } } + +func TestEntryEncoding(t *testing.T) { + buffer := new(bytes.Buffer) + encoder := msgpack.NewEncoder(buffer) + + entry1 := CreateEntry("abcd", 1, 2, 0700) + err := encoder.Encode(entry1) + if err != nil { + t.Errorf("Failed to encode the entry: %v", err) + return + } + + t.Logf("msgpack size: %d\n", len(buffer.Bytes())) + decoder := msgpack.NewDecoder(buffer) + + description, _ := json.Marshal(entry1) + t.Logf("json size: %d\n", len(description)) + + var entry2 Entry + err = decoder.Decode(&entry2) + if err != nil { + t.Errorf("Failed to decode the entry: %v", err) + return + } + + if entry1.Path != entry2.Path || entry1.Size != entry2.Size || entry1.Time != entry2.Time { + t.Error("Decoded entry is different than the original one") + } + +} \ No newline at end of file diff --git a/src/duplicacy_entrylist.go b/src/duplicacy_entrylist.go new file mode 100644 index 00000000..bc0890d8 --- /dev/null +++ b/src/duplicacy_entrylist.go @@ -0,0 +1,574 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Free for personal use and commercial trial +// Commercial use requires per-user licenses available from https://duplicacy.com + +package duplicacy + +import ( + "encoding/hex" + "encoding/binary" + "fmt" + "os" + "io" + "path" + "crypto/sha256" + "crypto/rand" + "sync" + + "github.com/vmihailenco/msgpack" +) + +// This struct stores information about a file entry that has been modified +type ModifiedEntry struct { + Path string + Size int64 + Hash string +} + +// EntryList is basically a list of entries, which can be kept in the memory, or serialized to a disk file, +// depending on if maximumInMemoryEntries is reached. +// +// The idea behind the on-disk entry list is that entries are written to a disk file as they are coming in. +// Entries that have been modified and thus need to be uploaded will have their Incomplete bit set (i.e., +// with a size of -1). When the limit is reached, entries are moved to a disk file but ModifiedEntries and +// UploadedChunks are still kept in memory. When later entries are read from the entry list, incomplete +// entries are back-annotated with info from ModifiedEntries and UploadedChunk* before sending them out. + +type EntryList struct { + onDiskFile *os.File // the file to store entries + encoder *msgpack.Encoder // msgpack encoder for entry serialization + entries []*Entry // in-memory entry list + + SnapshotID string // the snapshot id + Token string // this unique random token makes sure we read/write + // the same entry list + ModifiedEntries []ModifiedEntry // entries that will be uploaded + + UploadedChunkHashes []string // chunks from entries that have been uploaded + UploadedChunkLengths []int // chunk lengths from entries that have been uploaded + uploadedChunkLock sync.Mutex // lock for UploadedChunkHashes and UploadedChunkLengths + + PreservedChunkHashes []string // chunks from entries not changed + PreservedChunkLengths []int // chunk lengths from entries not changed + + Checksum string // checksum of all entries to detect disk corruption + + maximumInMemoryEntries int // max in-memory entries + NumberOfEntries int64 // number of entries (not including directories and links) + cachePath string // the directory for the on-disk file + + // These 3 variables are used in entry infomation back-annotation + modifiedEntryIndex int // points to the current modified entry + uploadedChunkIndex int // counter for upload chunks + uploadedChunkOffset int // the start offset for the current modified entry + +} + +// Create a new entry list +func CreateEntryList(snapshotID string, cachePath string, maximumInMemoryEntries int) (*EntryList, error) { + + token := make([]byte, 16) + _, err := rand.Read(token) + if err != nil { + return nil, fmt.Errorf("Failed to create a random token: %v", err) + } + + entryList := &EntryList { + SnapshotID: snapshotID, + maximumInMemoryEntries: maximumInMemoryEntries, + cachePath: cachePath, + Token: string(token), + } + + return entryList, nil + +} + +// Create the on-disk entry list file +func (entryList *EntryList)createOnDiskFile() error { + file, err := os.OpenFile(path.Join(entryList.cachePath, "incomplete_files"), os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0600) + if err != nil { + return fmt.Errorf("Failed to create on disk entry list: %v", err) + } + + entryList.onDiskFile = file + entryList.encoder = msgpack.NewEncoder(file) + + err = entryList.encoder.EncodeString(entryList.Token) + if err != nil { + return fmt.Errorf("Failed to create on disk entry list: %v", err) + } + + for _, entry := range entryList.entries { + err = entry.EncodeWithHash(entryList.encoder) + if err != nil { + return err + } + } + return nil +} + +// Add an entry to the entry list +func (entryList *EntryList)AddEntry(entry *Entry) error { + + if !entry.IsDir() && !entry.IsLink() { + entryList.NumberOfEntries++ + } + + if !entry.IsComplete() { + if entry.IsDir() || entry.IsLink() { + entry.Size = 0 + } else { + modifiedEntry := ModifiedEntry { + Path: entry.Path, + Size: -1, + } + + entryList.ModifiedEntries = append(entryList.ModifiedEntries, modifiedEntry) + } + } + + if entryList.onDiskFile != nil { + return entry.EncodeWithHash(entryList.encoder) + } else { + entryList.entries = append(entryList.entries, entry) + if entryList.maximumInMemoryEntries >= 0 && len(entryList.entries) > entryList.maximumInMemoryEntries { + err := entryList.createOnDiskFile() + if err != nil { + return err + } + } + } + + return nil +} + +// Add a preserved chunk that belongs to files that have not been modified +func (entryList *EntryList)AddPreservedChunk(chunkHash string, chunkSize int) { + entryList.PreservedChunkHashes = append(entryList.PreservedChunkHashes, chunkHash) + entryList.PreservedChunkLengths = append(entryList.PreservedChunkLengths, chunkSize) +} + +// Add a chunk just uploaded (that belongs to files that have been modified) +func (entryList *EntryList)AddUploadedChunk(chunkIndex int, chunkHash string, chunkSize int) { + entryList.uploadedChunkLock.Lock() + + for len(entryList.UploadedChunkHashes) <= chunkIndex { + entryList.UploadedChunkHashes = append(entryList.UploadedChunkHashes, "") + } + + for len(entryList.UploadedChunkLengths) <= chunkIndex { + entryList.UploadedChunkLengths = append(entryList.UploadedChunkLengths, 0) + } + + entryList.UploadedChunkHashes[chunkIndex] = chunkHash + entryList.UploadedChunkLengths[chunkIndex] = chunkSize + entryList.uploadedChunkLock.Unlock() +} + +// Close the on-disk file +func (entryList *EntryList) CloseOnDiskFile() error { + + if entryList.onDiskFile == nil { + return nil + } + + err := entryList.onDiskFile.Sync() + if err != nil { + return err + } + + err = entryList.onDiskFile.Close() + if err != nil { + return err + } + + entryList.onDiskFile = nil + return nil +} + +// Return the length of the `index`th chunk +func (entryList *EntryList) getChunkLength(index int) int { + if index < len(entryList.PreservedChunkLengths) { + return entryList.PreservedChunkLengths[index] + } else { + return entryList.UploadedChunkLengths[index - len(entryList.PreservedChunkLengths)] + } +} + +// Sanity check for each entry +func (entryList *EntryList) checkEntry(entry *Entry) error { + + if entry.Size < 0 { + return fmt.Errorf("the file %s hash an invalid size (%d)", entry.Path, entry.Size) + } + + if !entry.IsFile() || entry.Size == 0 { + return nil + } + + numberOfChunks := len(entryList.PreservedChunkLengths) + len(entryList.UploadedChunkLengths) + + if entry.StartChunk < 0 { + return fmt.Errorf("the file %s starts at chunk %d", entry.Path, entry.StartChunk) + } + + if entry.EndChunk >= numberOfChunks { + return fmt.Errorf("the file %s ends at chunk %d while the number of chunks is %d", + entry.Path, entry.EndChunk, numberOfChunks) + } + + if entry.EndChunk < entry.StartChunk { + return fmt.Errorf("the file %s starts at chunk %d and ends at chunk %d", + entry.Path, entry.StartChunk, entry.EndChunk) + } + + if entry.StartOffset >= entryList.getChunkLength(entry.StartChunk) { + return fmt.Errorf("the file %s starts at offset %d of chunk %d with a length of %d", + entry.Path, entry.StartOffset, entry.StartChunk, entryList.getChunkLength(entry.StartChunk)) + } + + if entry.EndOffset > entryList.getChunkLength(entry.EndChunk) { + return fmt.Errorf("the file %s ends at offset %d of chunk %d with a length of %d", + entry.Path, entry.EndOffset, entry.EndChunk, entryList.getChunkLength(entry.EndChunk)) + } + + fileSize := int64(0) + + for i := entry.StartChunk; i <= entry.EndChunk; i++ { + + start := 0 + if i == entry.StartChunk { + start = entry.StartOffset + } + end := entryList.getChunkLength(i) + if i == entry.EndChunk { + end = entry.EndOffset + } + + fileSize += int64(end - start) + } + + if entry.Size != fileSize { + return fmt.Errorf("the file %s has a size of %d but the total size of chunks is %d", + entry.Path, entry.Size, fileSize) + } + + return nil +} + +// An incomplete entry (with a size of -1) does not have 'startChunk', 'startOffset', 'endChunk', and 'endOffset'. This function +// is to fill in these information before sending the entry out. +func (entryList *EntryList) fillAndSendEntry(entry *Entry, entryOut func(*Entry)error) (skipped bool, err error) { + + if entry.IsComplete() { + err := entryList.checkEntry(entry) + if err != nil { + return false, err + } + return false, entryOut(entry) + } + + if entryList.modifiedEntryIndex >= len(entryList.ModifiedEntries) { + return false, fmt.Errorf("Unexpected file index %d (%d modified files)", entryList.modifiedEntryIndex, len(entryList.ModifiedEntries)) + } + + modifiedEntry := &entryList.ModifiedEntries[entryList.modifiedEntryIndex] + entryList.modifiedEntryIndex++ + + if modifiedEntry.Path != entry.Path { + return false, fmt.Errorf("Unexpected file path %s when expecting %s", modifiedEntry.Path, entry.Path) + } + + if modifiedEntry.Size <= 0 { + return true, nil + } + + entry.Size = modifiedEntry.Size + entry.Hash = modifiedEntry.Hash + + entry.StartChunk = entryList.uploadedChunkIndex + len(entryList.PreservedChunkHashes) + entry.StartOffset = entryList.uploadedChunkOffset + entry.EndChunk = entry.StartChunk + endOffset := int64(entry.StartOffset) + entry.Size + + for entryList.uploadedChunkIndex < len(entryList.UploadedChunkLengths) && endOffset > int64(entryList.UploadedChunkLengths[entryList.uploadedChunkIndex]) { + endOffset -= int64(entryList.UploadedChunkLengths[entryList.uploadedChunkIndex]) + entry.EndChunk++ + entryList.uploadedChunkIndex++ + } + + if entryList.uploadedChunkIndex >= len(entryList.UploadedChunkLengths) { + return false, fmt.Errorf("File %s has not been completely uploaded", entry.Path) + } + + entry.EndOffset = int(endOffset) + entryList.uploadedChunkOffset = entry.EndOffset + if entry.EndOffset == entryList.UploadedChunkLengths[entryList.uploadedChunkIndex] { + entryList.uploadedChunkIndex++ + entryList.uploadedChunkOffset = 0 + } + + err = entryList.checkEntry(entry) + if err != nil { + return false, err + } + + return false, entryOut(entry) +} + +// Iterate through the entries in this entry list +func (entryList *EntryList) ReadEntries(entryOut func(*Entry)error) (error) { + + entryList.modifiedEntryIndex = 0 + entryList.uploadedChunkIndex = 0 + entryList.uploadedChunkOffset = 0 + + if entryList.onDiskFile == nil { + for _, entry := range entryList.entries { + skipped, err := entryList.fillAndSendEntry(entry.Copy(), entryOut) + if err != nil { + return err + } + if skipped { + continue + } + } + } else { + _, err := entryList.onDiskFile.Seek(0, os.SEEK_SET) + if err != nil { + return err + } + decoder := msgpack.NewDecoder(entryList.onDiskFile) + + _, err = decoder.DecodeString() + if err != nil { + return err + } + + for _, err = decoder.PeekCode(); err == nil; _, err = decoder.PeekCode() { + entry, err := DecodeEntryWithHash(decoder) + if err != nil { + return err + } + skipped, err := entryList.fillAndSendEntry(entry, entryOut) + if err != nil { + return err + } + if skipped { + continue + } + } + + if err != io.EOF { + return err + } + } + + return nil +} + +// When saving an incomplete snapshot, the on-disk entry list ('incomplete_files') is renamed to +// 'incomplete_snapshot', and this EntryList struct is saved as 'incomplete_chunks'. +func (entryList *EntryList) SaveIncompleteSnapshot() { + entryList.uploadedChunkLock.Lock() + defer entryList.uploadedChunkLock.Unlock() + + if entryList.onDiskFile == nil { + err := entryList.createOnDiskFile() + if err != nil { + LOG_WARN("INCOMPLETE_SAVE", "Failed to create the incomplete snapshot file: %v", err) + return + } + + for _, entry := range entryList.entries { + + err = entry.EncodeWithHash(entryList.encoder) + if err != nil { + LOG_WARN("INCOMPLETE_SAVE", "Failed to save the entry %s: %v", entry.Path, err) + return + } + } + } + + err := entryList.onDiskFile.Close() + if err != nil { + LOG_WARN("INCOMPLETE_SAVE", "Failed to close the on-disk file: %v", err) + return + } + + filePath := path.Join(entryList.cachePath, "incomplete_snapshot") + if _, err := os.Stat(filePath); err == nil { + err = os.Remove(filePath) + if err != nil { + LOG_WARN("INCOMPLETE_REMOVE", "Failed to remove previous incomplete snapshot: %v", err) + } + } + + err = os.Rename(path.Join(entryList.cachePath, "incomplete_files"), filePath) + if err != nil { + LOG_WARN("INCOMPLETE_SAVE", "Failed to rename the incomplete snapshot file: %v", err) + return + } + + chunkFile := path.Join(entryList.cachePath, "incomplete_chunks") + file, err := os.OpenFile(chunkFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0600) + if err != nil { + LOG_WARN("INCOMPLETE_SAVE", "Failed to create the incomplete chunk file: %v", err) + return + } + + defer file.Close() + encoder := msgpack.NewEncoder(file) + + entryList.Checksum = entryList.CalculateChecksum() + + err = encoder.Encode(entryList) + if err != nil { + LOG_WARN("INCOMPLETE_SAVE", "Failed to save the incomplete snapshot: %v", err) + return + } + + LOG_INFO("INCOMPLETE_SAVE", "Incomplete snapshot saved to %s", filePath) +} + +// Calculate a checksum for this entry list +func (entryList *EntryList) CalculateChecksum() string{ + + hasher := sha256.New() + for _, s := range entryList.UploadedChunkHashes { + hasher.Write([]byte(s)) + } + + buffer := make([]byte, 8) + for _, i := range entryList.UploadedChunkLengths { + binary.LittleEndian.PutUint64(buffer, uint64(i)) + hasher.Write(buffer) + } + + for _, s := range entryList.PreservedChunkHashes { + hasher.Write([]byte(s)) + } + + for _, i := range entryList.PreservedChunkLengths { + binary.LittleEndian.PutUint64(buffer, uint64(i)) + hasher.Write(buffer) + } + + for _, entry := range entryList.ModifiedEntries { + binary.LittleEndian.PutUint64(buffer, uint64(entry.Size)) + hasher.Write(buffer) + hasher.Write([]byte(entry.Hash)) + } + + return hex.EncodeToString(hasher.Sum(nil)) +} + +// Check if all chunks exist in 'chunkCache' +func (entryList *EntryList) CheckChunks(config *Config, chunkCache map[string]bool) bool { + for _, chunkHash := range entryList.UploadedChunkHashes { + chunkID := config.GetChunkIDFromHash(chunkHash) + if _, ok := chunkCache[chunkID]; !ok { + return false + } + } + for _, chunkHash := range entryList.PreservedChunkHashes { + chunkID := config.GetChunkIDFromHash(chunkHash) + if _, ok := chunkCache[chunkID]; !ok { + return false + } + } + + return true + +} + +// Recover the on disk file from 'incomplete_snapshot', and restore the EntryList struct +// from 'incomplete_chunks' +func loadIncompleteSnapshot(snapshotID string, cachePath string) *EntryList { + + onDiskFilePath := path.Join(cachePath, "incomplete_snapshot") + entryListFilePath := path.Join(cachePath, "incomplete_chunks") + + if _, err := os.Stat(onDiskFilePath); os.IsNotExist(err) { + return nil + } + + if _, err := os.Stat(entryListFilePath); os.IsNotExist(err) { + return nil + } + + entryList := &EntryList {} + entryListFile, err := os.OpenFile(entryListFilePath, os.O_RDONLY, 0600) + if err != nil { + LOG_WARN("INCOMPLETE_LOAD", "Failed to open the incomplete snapshot: %v", err) + return nil + } + + defer entryListFile.Close() + decoder := msgpack.NewDecoder(entryListFile) + err = decoder.Decode(&entryList) + if err != nil { + LOG_WARN("INCOMPLETE_LOAD", "Failed to load the incomplete snapshot: %v", err) + return nil + } + + checksum := entryList.CalculateChecksum() + if checksum != entryList.Checksum { + LOG_WARN("INCOMPLETE_LOAD", "Failed to load the incomplete snapshot: checksum mismatched") + return nil + } + + onDiskFile, err := os.OpenFile(onDiskFilePath, os.O_RDONLY, 0600) + if err != nil { + LOG_WARN("INCOMPLETE_LOAD", "Failed to open the on disk file for the incomplete snapshot: %v", err) + return nil + } + + decoder = msgpack.NewDecoder(onDiskFile) + token, err := decoder.DecodeString() + if err != nil { + LOG_WARN("INCOMPLETE_LOAD", "Failed to read the token for the incomplete snapshot: %v", err) + onDiskFile.Close() + return nil + } + + if token != entryList.Token { + LOG_WARN("INCOMPLETE_LOAD", "Mismatched tokens in the incomplete snapshot") + onDiskFile.Close() + return nil + } + + entryList.onDiskFile = onDiskFile + + for i, hash := range entryList.UploadedChunkHashes { + if len(hash) == 0 { + // An empty hash means the chunk has not been uploaded in previous run + entryList.UploadedChunkHashes = entryList.UploadedChunkHashes[0:i] + entryList.UploadedChunkLengths = entryList.UploadedChunkLengths[0:i] + break + } + } + + LOG_INFO("INCOMPLETE_LOAD", "Previous incomlete backup contains %d files and %d chunks", + entryList.NumberOfEntries, len(entryList.PreservedChunkLengths) + len(entryList.UploadedChunkHashes)) + + return entryList +} + +// Delete the two incomplete files. +func deleteIncompleteSnapshot(cachePath string) { + + for _, file := range []string{"incomplete_snapshot", "incomplete_chunks"} { + filePath := path.Join(cachePath, file) + if _, err := os.Stat(filePath); err == nil { + err = os.Remove(filePath) + if err != nil { + LOG_WARN("INCOMPLETE_REMOVE", "Failed to remove the incomplete snapshot: %v", err) + return + } + } + } + + +} \ No newline at end of file diff --git a/src/duplicacy_entrylist_test.go b/src/duplicacy_entrylist_test.go new file mode 100644 index 00000000..1695f4a3 --- /dev/null +++ b/src/duplicacy_entrylist_test.go @@ -0,0 +1,179 @@ +// Copyright (c) Acrosync LLC. All rights reserved. +// Free for personal use and commercial trial +// Commercial use requires per-user licenses available from https://duplicacy.com + +package duplicacy + +import ( + "os" + "path" + "time" + "testing" + "math/rand" +) + + +func generateRandomString(length int) string { + var letters = []rune("abcdefghijklmnopqrstuvwxyz") + b := make([]rune, length) + for i := range b { + b[i] = letters[rand.Intn(len(letters))] + } + return string(b) +} + +var fileSizeGenerator = rand.NewZipf(rand.New(rand.NewSource(time.Now().UnixNano())), 1.2, 1.0, 1024) + +func generateRandomFileSize() int64 { + return int64(fileSizeGenerator.Uint64() + 1) +} + +func generateRandomChunks(totalFileSize int64) (chunks []string, lengths []int) { + + totalChunkSize := int64(0) + + for totalChunkSize < totalFileSize { + chunks = append(chunks, generateRandomString(64)) + chunkSize := int64(1 + (rand.Int() % 64)) + if chunkSize + totalChunkSize > totalFileSize { + chunkSize = totalFileSize - totalChunkSize + } + lengths = append(lengths, int(chunkSize)) + totalChunkSize += chunkSize + } + return chunks, lengths + +} + +func getPreservedChunks(entries []*Entry, chunks []string, lengths []int) (preservedChunks []string, preservedChunkLengths []int) { + lastPreservedChunk := -1 + for i := range entries { + if entries[i].Size < 0 { + continue + } + delta := entries[i].StartChunk - len(chunks) + if lastPreservedChunk != entries[i].StartChunk { + lastPreservedChunk = entries[i].StartChunk + preservedChunks = append(preservedChunks, chunks[entries[i].StartChunk]) + preservedChunkLengths = append(preservedChunkLengths, lengths[entries[i].StartChunk]) + delta++ + } + for j := entries[i].StartChunk + 1; i <= entries[i].EndChunk; i++ { + preservedChunks = append(preservedChunks, chunks[j]) + preservedChunkLengths = append(preservedChunkLengths, lengths[j]) + lastPreservedChunk = j + } + } + + return +} + +func testEntryList(t *testing.T, numberOfEntries int, maximumInMemoryEntries int) { + + entries := make([]*Entry, 0, numberOfEntries) + entrySizes := make([]int64, 0) + + for i := 0; i < numberOfEntries; i++ { + entry:= CreateEntry(generateRandomString(16), -1, 0, 0700) + entries = append(entries, entry) + entrySizes = append(entrySizes, generateRandomFileSize()) + } + + totalFileSize := int64(0) + for _, size := range entrySizes { + totalFileSize += size + } + + testDir := path.Join(os.TempDir(), "duplicacy_test") + os.RemoveAll(testDir) + os.MkdirAll(testDir, 0700) + + os.MkdirAll(testDir + "/list1", 0700) + os.MkdirAll(testDir + "/list2", 0700) + os.MkdirAll(testDir + "/list3", 0700) + os.MkdirAll(testDir + "/list1", 0700) + + // For the first entry list, all entries are new + entryList, _ := CreateEntryList("test", testDir + "/list1", maximumInMemoryEntries) + for _, entry := range entries { + entryList.AddEntry(entry) + } + uploadedChunks, uploadedChunksLengths := generateRandomChunks(totalFileSize) + for i, chunk := range uploadedChunks { + entryList.AddUploadedChunk(i, chunk, uploadedChunksLengths[i]) + } + + for i := range entryList.ModifiedEntries { + entryList.ModifiedEntries[i].Size = entrySizes[i] + } + + totalEntries := 0 + err := entryList.ReadEntries(func(entry *Entry) error { + totalEntries++ + return nil + }) + + if err != nil { + t.Errorf("ReadEntries returned an error: %s", err) + return + } + + if totalEntries != numberOfEntries { + t.Errorf("EntryList contains %d entries instead of %d", totalEntries, numberOfEntries) + return + } + + // For the second entry list, half of the entries are new + for i := range entries { + if rand.Int() % 1 == 0 { + entries[i].Size = -1 + } else { + entries[i].Size = entrySizes[i] + } + } + + preservedChunks, preservedChunkLengths := getPreservedChunks(entries, uploadedChunks, uploadedChunksLengths) + entryList, _ = CreateEntryList("test", testDir + "/list2", maximumInMemoryEntries) + for _, entry := range entries { + entryList.AddEntry(entry) + } + for i, chunk := range preservedChunks { + entryList.AddPreservedChunk(chunk, preservedChunkLengths[i]) + } + + totalFileSize = 0 + for i := range entryList.ModifiedEntries { + fileSize := generateRandomFileSize() + entryList.ModifiedEntries[i].Size = fileSize + totalFileSize += fileSize + } + + uploadedChunks, uploadedChunksLengths = generateRandomChunks(totalFileSize) + for i, chunk := range uploadedChunks { + entryList.AddUploadedChunk(i, chunk, uploadedChunksLengths[i]) + } + + totalEntries = 0 + err = entryList.ReadEntries(func(entry *Entry) error { + totalEntries++ + return nil + }) + + if err != nil { + t.Errorf("ReadEntries returned an error: %s", err) + return + } + + if totalEntries != numberOfEntries { + t.Errorf("EntryList contains %d entries instead of %d", totalEntries, numberOfEntries) + return + } + +} + + +func TestEntryList(t *testing.T) { + testEntryList(t, 1024, 1024) + testEntryList(t, 1024, 512) + testEntryList(t, 1024, 0) +} diff --git a/src/duplicacy_snapshot.go b/src/duplicacy_snapshot.go index 07b46915..4fe8df79 100644 --- a/src/duplicacy_snapshot.go +++ b/src/duplicacy_snapshot.go @@ -8,17 +8,22 @@ import ( "encoding/hex" "encoding/json" "fmt" + "io" "io/ioutil" "os" - "path" "path/filepath" - "strconv" "strings" "time" + "sort" + "bytes" + + "github.com/vmihailenco/msgpack" + ) // Snapshot represents a backup of the repository. type Snapshot struct { + Version int ID string // the snapshot id; must be different for different repositories Revision int // the revision number Options string // options used to create this snapshot (some not included) @@ -37,14 +42,11 @@ type Snapshot struct { // A sequence of chunks whose aggregated content is the json representation of 'ChunkLengths'. LengthSequence []string - Files []*Entry // list of files and subdirectories - ChunkHashes []string // a sequence of chunks representing the file content ChunkLengths []int // the length of each chunk Flag bool // used to mark certain snapshots for deletion or copy - discardAttributes bool } // CreateEmptySnapshot creates an empty snapshot. @@ -56,16 +58,14 @@ func CreateEmptySnapshot(id string) (snapshto *Snapshot) { } } -// CreateSnapshotFromDirectory creates a snapshot from the local directory 'top'. Only 'Files' -// will be constructed, while 'ChunkHashes' and 'ChunkLengths' can only be populated after uploading. -func CreateSnapshotFromDirectory(id string, top string, nobackupFile string, filtersFile string, excludeByAttribute bool) (snapshot *Snapshot, skippedDirectories []string, - skippedFiles []string, err error) { +type DirectoryListing struct { + directory string + files *[]Entry +} - snapshot = &Snapshot{ - ID: id, - Revision: 0, - StartTime: time.Now().Unix(), - } +func (snapshot *Snapshot) ListLocalFiles(top string, nobackupFile string, + filtersFile string, excludeByAttribute bool, listingChannel chan *Entry, + skippedDirectories *[]string, skippedFiles *[]string) { var patterns []string @@ -77,45 +77,128 @@ func CreateSnapshotFromDirectory(id string, top string, nobackupFile string, fil directories := make([]*Entry, 0, 256) directories = append(directories, CreateEntry("", 0, 0, 0)) - snapshot.Files = make([]*Entry, 0, 256) - - attributeThreshold := 1024 * 1024 - if attributeThresholdValue, found := os.LookupEnv("DUPLICACY_ATTRIBUTE_THRESHOLD"); found && attributeThresholdValue != "" { - attributeThreshold, _ = strconv.Atoi(attributeThresholdValue) - } - for len(directories) > 0 { directory := directories[len(directories)-1] directories = directories[:len(directories)-1] - snapshot.Files = append(snapshot.Files, directory) - subdirectories, skipped, err := ListEntries(top, directory.Path, &snapshot.Files, patterns, nobackupFile, snapshot.discardAttributes, excludeByAttribute) + subdirectories, skipped, err := ListEntries(top, directory.Path, patterns, nobackupFile, excludeByAttribute, listingChannel) if err != nil { if directory.Path == "" { LOG_ERROR("LIST_FAILURE", "Failed to list the repository root: %v", err) - return nil, nil, nil, err + return } LOG_WARN("LIST_FAILURE", "Failed to list subdirectory %s: %v", directory.Path, err) - skippedDirectories = append(skippedDirectories, directory.Path) + if skippedDirectories != nil { + *skippedDirectories = append(*skippedDirectories, directory.Path) + } continue } directories = append(directories, subdirectories...) - skippedFiles = append(skippedFiles, skipped...) - if !snapshot.discardAttributes && len(snapshot.Files) > attributeThreshold { - LOG_INFO("LIST_ATTRIBUTES", "Discarding file attributes") - snapshot.discardAttributes = true - for _, file := range snapshot.Files { - file.Attributes = nil - } + if skippedFiles != nil { + *skippedFiles = append(*skippedFiles, skipped...) } + + } + close(listingChannel) +} + +func (snapshot *Snapshot)ListRemoteFiles(config *Config, chunkOperator *ChunkOperator, entryOut func(*Entry) bool) { + + var chunks []string + for _, chunkHash := range snapshot.FileSequence { + chunks = append(chunks, chunkOperator.config.GetChunkIDFromHash(chunkHash)) } - // Remove the root entry - snapshot.Files = snapshot.Files[1:] + var chunk *Chunk + reader := sequenceReader{ + sequence: snapshot.FileSequence, + buffer: new(bytes.Buffer), + refillFunc: func(chunkHash string) []byte { + if chunk != nil { + config.PutChunk(chunk) + } + chunk = chunkOperator.Download(chunkHash, 0, true) + return chunk.GetBytes() + }, + } + + if snapshot.Version == 0 { + LOG_INFO("SNAPSHOT_VERSION", "snapshot %s at revision %d is encoded in an old version format", snapshot.ID, snapshot.Revision) + files := make([]*Entry, 0) + decoder := json.NewDecoder(&reader) + + // read open bracket + _, err := decoder.Token() + if err != nil { + LOG_ERROR("SNAPSHOT_PARSE", "Failed to open the snapshot %s at revision %d: not a list of entries", + snapshot.ID, snapshot.Revision) + return + } + + for decoder.More() { + var entry Entry + err = decoder.Decode(&entry) + if err != nil { + LOG_ERROR("SNAPSHOT_PARSE", "Failed to load files specified in the snapshot %s at revision %d: %v", + snapshot.ID, snapshot.Revision, err) + return + } + files = append(files, &entry) + } + + sort.Sort(ByName(files)) + + for _, file := range files { + if !entryOut(file) { + return + } + } + } else if snapshot.Version == 1 { + decoder := msgpack.NewDecoder(&reader) + + lastEndChunk := 0 + + // while the array contains values + for _, err := decoder.PeekCode(); err != io.EOF; _, err = decoder.PeekCode() { + if err != nil { + LOG_ERROR("SNAPSHOT_PARSE", "Failed to parse the snapshot %s at revision %d: %v", + snapshot.ID, snapshot.Revision, err) + return + } + var entry Entry + err = decoder.Decode(&entry) + if err != nil { + LOG_ERROR("SNAPSHOT_PARSE", "Failed to load the snapshot %s at revision %d: %v", + snapshot.ID, snapshot.Revision, err) + return + } + + if entry.IsFile() { + entry.StartChunk += lastEndChunk + entry.EndChunk += entry.StartChunk + lastEndChunk = entry.EndChunk + } + + err = entry.check(snapshot.ChunkLengths) + if err != nil { + LOG_ERROR("SNAPSHOT_ENTRY", "Failed to load the snapshot %s at revision %d: %v", + snapshot.ID, snapshot.Revision, err) + return + } + + if !entryOut(&entry) { + return + } + } + + } else { + LOG_ERROR("SNAPSHOT_VERSION", "snapshot %s at revision %d is encoded in unsupported version %d format", + snapshot.ID, snapshot.Revision, snapshot.Version) + return + } - return snapshot, skippedDirectories, skippedFiles, nil } func AppendPattern(patterns []string, new_pattern string) (new_patterns []string) { @@ -215,100 +298,6 @@ func ProcessFilterLines(patternFileLines []string, includedFiles []string) (patt return patterns } -// This is the struct used to save/load incomplete snapshots -type IncompleteSnapshot struct { - Files []*Entry - ChunkHashes []string - ChunkLengths []int -} - -// LoadIncompleteSnapshot loads the incomplete snapshot if it exists -func LoadIncompleteSnapshot() (snapshot *Snapshot) { - snapshotFile := path.Join(GetDuplicacyPreferencePath(), "incomplete") - description, err := ioutil.ReadFile(snapshotFile) - if err != nil { - LOG_DEBUG("INCOMPLETE_LOCATE", "Failed to locate incomplete snapshot: %v", err) - return nil - } - - var incompleteSnapshot IncompleteSnapshot - - err = json.Unmarshal(description, &incompleteSnapshot) - if err != nil { - LOG_DEBUG("INCOMPLETE_PARSE", "Failed to parse incomplete snapshot: %v", err) - return nil - } - - var chunkHashes []string - for _, chunkHash := range incompleteSnapshot.ChunkHashes { - hash, err := hex.DecodeString(chunkHash) - if err != nil { - LOG_DEBUG("INCOMPLETE_DECODE", "Failed to decode incomplete snapshot: %v", err) - return nil - } - chunkHashes = append(chunkHashes, string(hash)) - } - - snapshot = &Snapshot{ - Files: incompleteSnapshot.Files, - ChunkHashes: chunkHashes, - ChunkLengths: incompleteSnapshot.ChunkLengths, - } - LOG_INFO("INCOMPLETE_LOAD", "Incomplete snapshot loaded from %s", snapshotFile) - return snapshot -} - -// SaveIncompleteSnapshot saves the incomplete snapshot under the preference directory -func SaveIncompleteSnapshot(snapshot *Snapshot) { - var files []*Entry - for _, file := range snapshot.Files { - // All unprocessed files will have a size of -1 - if file.Size >= 0 { - file.Attributes = nil - files = append(files, file) - } else { - break - } - } - var chunkHashes []string - for _, chunkHash := range snapshot.ChunkHashes { - chunkHashes = append(chunkHashes, hex.EncodeToString([]byte(chunkHash))) - } - - incompleteSnapshot := IncompleteSnapshot{ - Files: files, - ChunkHashes: chunkHashes, - ChunkLengths: snapshot.ChunkLengths, - } - - description, err := json.MarshalIndent(incompleteSnapshot, "", " ") - if err != nil { - LOG_WARN("INCOMPLETE_ENCODE", "Failed to encode the incomplete snapshot: %v", err) - return - } - - snapshotFile := path.Join(GetDuplicacyPreferencePath(), "incomplete") - err = ioutil.WriteFile(snapshotFile, description, 0644) - if err != nil { - LOG_WARN("INCOMPLETE_WRITE", "Failed to save the incomplete snapshot: %v", err) - return - } - - LOG_INFO("INCOMPLETE_SAVE", "Incomplete snapshot saved to %s", snapshotFile) -} - -func RemoveIncompleteSnapshot() { - snapshotFile := path.Join(GetDuplicacyPreferencePath(), "incomplete") - if stat, err := os.Stat(snapshotFile); err == nil && !stat.IsDir() { - err = os.Remove(snapshotFile) - if err != nil { - LOG_INFO("INCOMPLETE_SAVE", "Failed to remove ncomplete snapshot: %v", err) - } else { - LOG_INFO("INCOMPLETE_SAVE", "Removed incomplete snapshot %s", snapshotFile) - } - } -} - // CreateSnapshotFromDescription creates a snapshot from json decription. func CreateSnapshotFromDescription(description []byte) (snapshot *Snapshot, err error) { @@ -321,6 +310,14 @@ func CreateSnapshotFromDescription(description []byte) (snapshot *Snapshot, err snapshot = &Snapshot{} + if value, ok := root["version"]; !ok { + snapshot.Version = 0 + } else if version, ok := value.(float64); !ok { + return nil, fmt.Errorf("Invalid version is specified in the snapshot") + } else { + snapshot.Version = int(version) + } + if value, ok := root["id"]; !ok { return nil, fmt.Errorf("No id is specified in the snapshot") } else if snapshot.ID, ok = value.(string); !ok { @@ -437,6 +434,7 @@ func (snapshot *Snapshot) MarshalJSON() ([]byte, error) { object := make(map[string]interface{}) + object["version"] = 1 object["id"] = snapshot.ID object["revision"] = snapshot.Revision object["options"] = snapshot.Options @@ -458,9 +456,7 @@ func (snapshot *Snapshot) MarshalJSON() ([]byte, error) { // MarshalSequence creates a json represetion for the specified chunk sequence. func (snapshot *Snapshot) MarshalSequence(sequenceType string) ([]byte, error) { - if sequenceType == "files" { - return json.Marshal(snapshot.Files) - } else if sequenceType == "chunks" { + if sequenceType == "chunks" { return json.Marshal(encodeSequence(snapshot.ChunkHashes)) } else { return json.Marshal(snapshot.ChunkLengths) @@ -489,3 +485,4 @@ func encodeSequence(sequence []string) []string { return sequenceInHex } + diff --git a/src/duplicacy_snapshotmanager.go b/src/duplicacy_snapshotmanager.go index d5a47be5..7d3d4c3e 100644 --- a/src/duplicacy_snapshotmanager.go +++ b/src/duplicacy_snapshotmanager.go @@ -20,6 +20,8 @@ import ( "strings" "text/tabwriter" "time" + "sync" + "sync/atomic" "github.com/aryann/difflib" ) @@ -189,7 +191,6 @@ type SnapshotManager struct { fileChunk *Chunk snapshotCache *FileStorage - chunkDownloader *ChunkDownloader chunkOperator *ChunkOperator } @@ -268,72 +269,26 @@ func (reader *sequenceReader) Read(data []byte) (n int, err error) { return reader.buffer.Read(data) } -func (manager *SnapshotManager) CreateChunkDownloader() { - if manager.chunkDownloader == nil { - manager.chunkDownloader = CreateChunkDownloader(manager.config, manager.storage, manager.snapshotCache, false, 1, false) +func (manager *SnapshotManager) CreateChunkOperator(resurrect bool, threads int, allowFailures bool) { + if manager.chunkOperator == nil { + manager.chunkOperator = CreateChunkOperator(manager.config, manager.storage, manager.snapshotCache, resurrect, threads, allowFailures) } } // DownloadSequence returns the content represented by a sequence of chunks. func (manager *SnapshotManager) DownloadSequence(sequence []string) (content []byte) { - manager.CreateChunkDownloader() + manager.CreateChunkOperator(false, 1, false) for _, chunkHash := range sequence { - i := manager.chunkDownloader.AddChunk(chunkHash) - chunk := manager.chunkDownloader.WaitForChunk(i) + chunk := manager.chunkOperator.Download(chunkHash, 0, true) content = append(content, chunk.GetBytes()...) + manager.config.PutChunk(chunk) } return content } -func (manager *SnapshotManager) DownloadSnapshotFileSequence(snapshot *Snapshot, patterns []string, attributesNeeded bool) bool { - - manager.CreateChunkDownloader() - - reader := sequenceReader{ - sequence: snapshot.FileSequence, - buffer: new(bytes.Buffer), - refillFunc: func(chunkHash string) []byte { - i := manager.chunkDownloader.AddChunk(chunkHash) - chunk := manager.chunkDownloader.WaitForChunk(i) - return chunk.GetBytes() - }, - } - - files := make([]*Entry, 0) - decoder := json.NewDecoder(&reader) - - // read open bracket - _, err := decoder.Token() - if err != nil { - LOG_ERROR("SNAPSHOT_PARSE", "Failed to load files specified in the snapshot %s at revision %d: not a list of entries", - snapshot.ID, snapshot.Revision) - return false - } - - // while the array contains values - for decoder.More() { - var entry Entry - err = decoder.Decode(&entry) - if err != nil { - LOG_ERROR("SNAPSHOT_PARSE", "Failed to load files specified in the snapshot %s at revision %d: %v", - snapshot.ID, snapshot.Revision, err) - return false - } - - // If we don't need the attributes or the file isn't included we clear the attributes to save memory - if !attributesNeeded || (len(patterns) != 0 && !MatchPath(entry.Path, patterns)) { - entry.Attributes = nil - } - - files = append(files, &entry) - } - snapshot.Files = files - return true -} - // DownloadSnapshotSequence downloads the content represented by a sequence of chunks, and then unmarshal the content -// using the specified 'loadFunction'. It purpose is to decode the chunk sequences representing chunk hashes or chunk lengths +// using the specified 'loadFunction'. Its purpose is to decode the chunk sequences representing chunk hashes or chunk lengths // in a snapshot. func (manager *SnapshotManager) DownloadSnapshotSequence(snapshot *Snapshot, sequenceType string) bool { @@ -362,30 +317,21 @@ func (manager *SnapshotManager) DownloadSnapshotSequence(snapshot *Snapshot, seq return true } -// DownloadSnapshotContents loads all chunk sequences in a snapshot. A snapshot, when just created, only contains -// some metadata and theree sequence representing files, chunk hashes, and chunk lengths. This function must be called -// for the actual content of the snapshot to be usable. -func (manager *SnapshotManager) DownloadSnapshotContents(snapshot *Snapshot, patterns []string, attributesNeeded bool) bool { +// DownloadSnapshotSequences loads all chunk sequences in a snapshot. A snapshot, when just created, only contains +// some metadata and three sequence representing files, chunk hashes, and chunk lengths. This function must be called +// for the chunk hash sequence and chunk length sequence to be usable. +func (manager *SnapshotManager) DownloadSnapshotSequences(snapshot *Snapshot) bool { - manager.DownloadSnapshotFileSequence(snapshot, patterns, attributesNeeded) manager.DownloadSnapshotSequence(snapshot, "chunks") manager.DownloadSnapshotSequence(snapshot, "lengths") - err := manager.CheckSnapshot(snapshot) - if err != nil { - LOG_ERROR("SNAPSHOT_CHECK", "The snapshot %s at revision %d contains an error: %v", - snapshot.ID, snapshot.Revision, err) - return false - } - return true } -// ClearSnapshotContents removes contents loaded by DownloadSnapshotContents -func (manager *SnapshotManager) ClearSnapshotContents(snapshot *Snapshot) { +// ClearSnapshotContents removes sequences loaded by DownloadSnapshotSequences +func (manager *SnapshotManager) ClearSnapshotSequences(snapshot *Snapshot) { snapshot.ChunkHashes = nil snapshot.ChunkLengths = nil - snapshot.Files = nil } // CleanSnapshotCache removes all files not referenced by the specified 'snapshot' in the snapshot cache. @@ -577,10 +523,6 @@ func (manager *SnapshotManager) downloadLatestSnapshot(snapshotID string) (remot remote = manager.DownloadSnapshot(snapshotID, latest) } - if remote != nil { - manager.DownloadSnapshotContents(remote, nil, false) - } - return remote } @@ -712,6 +654,12 @@ func (manager *SnapshotManager) ListSnapshots(snapshotID string, revisionsToList LOG_DEBUG("LIST_PARAMETERS", "id: %s, revisions: %v, tag: %s, showFiles: %t, showChunks: %t", snapshotID, revisionsToList, tag, showFiles, showChunks) + manager.CreateChunkOperator(false, 1, false) + defer func() { + manager.chunkOperator.Stop() + manager.chunkOperator = nil + }() + var snapshotIDs []string var err error @@ -749,14 +697,16 @@ func (manager *SnapshotManager) ListSnapshots(snapshotID string, revisionsToList if len(snapshot.Tag) > 0 { tagWithSpace = snapshot.Tag + " " } - LOG_INFO("SNAPSHOT_INFO", "Snapshot %s revision %d created at %s %s%s", - snapshotID, revision, creationTime, tagWithSpace, snapshot.Options) - - if showFiles { - manager.DownloadSnapshotFileSequence(snapshot, nil, false) + options := snapshot.Options + if snapshot.Version == 0 { + options += " (0)" } + LOG_INFO("SNAPSHOT_INFO", "Snapshot %s revision %d created at %s %s%s", + snapshotID, revision, creationTime, tagWithSpace, options) if showFiles { + // We need to fill in ChunkHashes and ChunkLengths to verify that each entry is valid + manager.DownloadSnapshotSequences(snapshot) if snapshot.NumberOfFiles > 0 { LOG_INFO("SNAPSHOT_STATS", "Files: %d", snapshot.NumberOfFiles) @@ -768,7 +718,7 @@ func (manager *SnapshotManager) ListSnapshots(snapshotID string, revisionsToList totalFileSize := int64(0) lastChunk := 0 - for _, file := range snapshot.Files { + snapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func(file *Entry)bool { if file.IsFile() { totalFiles++ totalFileSize += file.Size @@ -780,17 +730,18 @@ func (manager *SnapshotManager) ListSnapshots(snapshotID string, revisionsToList lastChunk = file.EndChunk } } - } + return true + }) - for _, file := range snapshot.Files { + snapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func(file *Entry)bool { if file.IsFile() { LOG_INFO("SNAPSHOT_FILE", "%s", file.String(maxSizeDigits)) } - } + return true + }) metaChunks := len(snapshot.FileSequence) + len(snapshot.ChunkSequence) + len(snapshot.LengthSequence) - LOG_INFO("SNAPSHOT_STATS", "Files: %d, total size: %d, file chunks: %d, metadata chunks: %d", - totalFiles, totalFileSize, lastChunk+1, metaChunks) + LOG_INFO("SNAPSHOT_STATS", "Total size: %d, file chunks: %d, metadata chunks: %d", totalFileSize, lastChunk+1, metaChunks) } if showChunks { @@ -807,11 +758,15 @@ func (manager *SnapshotManager) ListSnapshots(snapshotID string, revisionsToList } -// ListSnapshots shows the information about a snapshot. +// CheckSnapshots checks if there is any problem with a snapshot. func (manager *SnapshotManager) CheckSnapshots(snapshotID string, revisionsToCheck []int, tag string, showStatistics bool, showTabular bool, checkFiles bool, checkChunks, searchFossils bool, resurrect bool, threads int, allowFailures bool) bool { - manager.chunkDownloader = CreateChunkDownloader(manager.config, manager.storage, manager.snapshotCache, false, threads, allowFailures) + manager.CreateChunkOperator(resurrect, threads, allowFailures) + defer func() { + manager.chunkOperator.Stop() + manager.chunkOperator = nil + }() LOG_DEBUG("LIST_PARAMETERS", "id: %s, revisions: %v, tag: %s, showStatistics: %t, showTabular: %t, checkFiles: %t, searchFossils: %t, resurrect: %t", snapshotID, revisionsToCheck, tag, showStatistics, showTabular, checkFiles, searchFossils, resurrect) @@ -911,9 +866,9 @@ func (manager *SnapshotManager) CheckSnapshots(snapshotID string, revisionsToChe for _, snapshot := range snapshotMap[snapshotID] { if checkFiles { - manager.DownloadSnapshotContents(snapshot, nil, false) + manager.DownloadSnapshotSequences(snapshot) manager.VerifySnapshot(snapshot) - manager.ClearSnapshotContents(snapshot) + manager.ClearSnapshotSequences(snapshot) continue } @@ -1026,6 +981,7 @@ func (manager *SnapshotManager) CheckSnapshots(snapshotID string, revisionsToChe // .duplicacy/cache/storage/verified_chunks. Note that it contains the chunk ids not chunk // hashes. verifiedChunks := make(map[string]int64) + var verifiedChunksLock sync.Mutex verifiedChunksFile := "verified_chunks" manager.fileChunk.Reset(false) @@ -1061,16 +1017,11 @@ func (manager *SnapshotManager) CheckSnapshots(snapshotID string, revisionsToChe defer saveVerifiedChunks() RunAtError = saveVerifiedChunks - manager.chunkDownloader.snapshotCache = nil LOG_INFO("SNAPSHOT_VERIFY", "Verifying %d chunks", len(*allChunkHashes)) startTime := time.Now() var chunkHashes []string - // The index of the first chunk to add to the downloader, which may have already downloaded - // some metadata chunks so the index doesn't start with 0. - chunkIndex := -1 - skippedChunks := 0 for chunkHash := range *allChunkHashes { if len(verifiedChunks) > 0 { @@ -1081,38 +1032,65 @@ func (manager *SnapshotManager) CheckSnapshots(snapshotID string, revisionsToChe } } chunkHashes = append(chunkHashes, chunkHash) - if chunkIndex == -1 { - chunkIndex = manager.chunkDownloader.AddChunk(chunkHash) - } else { - manager.chunkDownloader.AddChunk(chunkHash) - } } if skippedChunks > 0 { LOG_INFO("SNAPSHOT_VERIFY", "Skipped %d chunks that have already been verified before", skippedChunks) } - var downloadedChunkSize int64 - totalChunks := len(chunkHashes) - for i := 0; i < totalChunks; i++ { - chunk := manager.chunkDownloader.WaitForChunk(i + chunkIndex) - chunkID := manager.config.GetChunkIDFromHash(chunkHashes[i]) - if chunk.isBroken { - continue - } - verifiedChunks[chunkID] = startTime.Unix() - downloadedChunkSize += int64(chunk.GetLength()) + var totalDownloadedChunkSize int64 + var totalDownloadedChunks int64 + totalChunks := int64(len(chunkHashes)) + + chunkChannel := make(chan int, threads) + var wg sync.WaitGroup + + wg.Add(threads) + for i := 0; i < threads; i++ { + go func() { + defer CatchLogException() - elapsedTime := time.Now().Sub(startTime).Seconds() - speed := int64(float64(downloadedChunkSize) / elapsedTime) - remainingTime := int64(float64(totalChunks - i - 1) / float64(i + 1) * elapsedTime) - percentage := float64(i + 1) / float64(totalChunks) * 100.0 - LOG_INFO("VERIFY_PROGRESS", "Verified chunk %s (%d/%d), %sB/s %s %.1f%%", - chunkID, i + 1, totalChunks, PrettySize(speed), PrettyTime(remainingTime), percentage) + for { + chunkIndex, ok := <- chunkChannel + if !ok { + wg.Done() + return + } + + chunk := manager.chunkOperator.Download(chunkHashes[chunkIndex], chunkIndex, false) + if chunk == nil { + continue + } + chunkID := manager.config.GetChunkIDFromHash(chunkHashes[chunkIndex]) + verifiedChunksLock.Lock() + verifiedChunks[chunkID] = startTime.Unix() + verifiedChunksLock.Unlock() + + downloadedChunkSize := atomic.AddInt64(&totalDownloadedChunkSize, int64(chunk.GetLength())) + downloadedChunks := atomic.AddInt64(&totalDownloadedChunks, 1) + + elapsedTime := time.Now().Sub(startTime).Seconds() + speed := int64(float64(downloadedChunkSize) / elapsedTime) + remainingTime := int64(float64(totalChunks - downloadedChunks) / float64(downloadedChunks) * elapsedTime) + percentage := float64(downloadedChunks) / float64(totalChunks) * 100.0 + LOG_INFO("VERIFY_PROGRESS", "Verified chunk %s (%d/%d), %sB/s %s %.1f%%", + chunkID, downloadedChunks, totalChunks, PrettySize(speed), PrettyTime(remainingTime), percentage) + + manager.config.PutChunk(chunk) + } + } () } - if manager.chunkDownloader.NumberOfFailedChunks > 0 { - LOG_ERROR("SNAPSHOT_VERIFY", "%d out of %d chunks are corrupted", manager.chunkDownloader.NumberOfFailedChunks, len(*allChunkHashes)) + for chunkIndex := range chunkHashes { + chunkChannel <- chunkIndex + } + + close(chunkChannel) + wg.Wait() + manager.chunkOperator.WaitForCompletion() + + if manager.chunkOperator.NumberOfFailedChunks > 0 { + LOG_ERROR("SNAPSHOT_VERIFY", "%d out of %d chunks are corrupted", manager.chunkOperator.NumberOfFailedChunks, len(*allChunkHashes)) } else { LOG_INFO("SNAPSHOT_VERIFY", "All %d chunks have been successfully verified", len(*allChunkHashes)) } @@ -1280,14 +1258,6 @@ func (manager *SnapshotManager) PrintSnapshot(snapshot *Snapshot) bool { object["chunks"] = manager.ConvertSequence(snapshot.ChunkHashes) object["lengths"] = snapshot.ChunkLengths - // By default the json serialization of a file entry contains the path in base64 format. This is - // to convert every file entry into an object which include the path in a more readable format. - var files []map[string]interface{} - for _, file := range snapshot.Files { - files = append(files, file.convertToObject(false)) - } - object["files"] = files - description, err := json.MarshalIndent(object, "", " ") if err != nil { @@ -1296,8 +1266,24 @@ func (manager *SnapshotManager) PrintSnapshot(snapshot *Snapshot) bool { return false } - fmt.Printf("%s\n", string(description)) + // Don't print the ending bracket + fmt.Printf("%s", string(description[:len(description) - 2])) + fmt.Printf(",\n \"files\": [\n") + isFirstFile := true + snapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func (file *Entry) bool { + + fileDescription, _ := json.MarshalIndent(file.convertToObject(false), "", " ") + + if isFirstFile { + fmt.Printf("%s", fileDescription) + isFirstFile = false + } else { + fmt.Printf(",\n%s", fileDescription) + } + return true + }) + fmt.Printf(" ]\n}\n") return true } @@ -1313,17 +1299,20 @@ func (manager *SnapshotManager) VerifySnapshot(snapshot *Snapshot) bool { return false } - files := make([]*Entry, 0, len(snapshot.Files)/2) - for _, file := range snapshot.Files { + files := make([]*Entry, 0) + snapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func (file *Entry) bool { if file.IsFile() && file.Size != 0 { + file.Attributes = nil files = append(files, file) } - } + return true + }) sort.Sort(ByChunk(files)) corruptedFiles := 0 + var lastChunk *Chunk for _, file := range files { - if !manager.RetrieveFile(snapshot, file, func([]byte) {}) { + if !manager.RetrieveFile(snapshot, file, &lastChunk, func([]byte) {}) { corruptedFiles++ } LOG_TRACE("SNAPSHOT_VERIFY", "%s", file.Path) @@ -1341,21 +1330,13 @@ func (manager *SnapshotManager) VerifySnapshot(snapshot *Snapshot) bool { } // RetrieveFile retrieves the file in the specified snapshot. -func (manager *SnapshotManager) RetrieveFile(snapshot *Snapshot, file *Entry, output func([]byte)) bool { +func (manager *SnapshotManager) RetrieveFile(snapshot *Snapshot, file *Entry, lastChunk **Chunk, output func([]byte)) bool { if file.Size == 0 { return true } - manager.CreateChunkDownloader() - - // Temporarily disable the snapshot cache of the download so that downloaded file chunks won't be saved - // to the cache. - snapshotCache := manager.chunkDownloader.snapshotCache - manager.chunkDownloader.snapshotCache = nil - defer func() { - manager.chunkDownloader.snapshotCache = snapshotCache - }() + manager.CreateChunkOperator(false, 1, false) fileHasher := manager.config.NewFileHasher() alternateHash := false @@ -1376,12 +1357,19 @@ func (manager *SnapshotManager) RetrieveFile(snapshot *Snapshot, file *Entry, ou } hash := snapshot.ChunkHashes[i] - lastChunk, lastChunkHash := manager.chunkDownloader.GetLastDownloadedChunk() - if lastChunkHash != hash { - i := manager.chunkDownloader.AddChunk(hash) - chunk = manager.chunkDownloader.WaitForChunk(i) + if lastChunk == nil { + chunk = manager.chunkOperator.Download(hash, 0, false) + } else if *lastChunk == nil { + chunk = manager.chunkOperator.Download(hash, 0, false) + *lastChunk = chunk } else { - chunk = lastChunk + if (*lastChunk).GetHash() == hash { + chunk = *lastChunk + } else { + manager.config.PutChunk(*lastChunk) + chunk = manager.chunkOperator.Download(hash, 0, false) + *lastChunk = chunk + } } output(chunk.GetBytes()[start:end]) @@ -1405,10 +1393,18 @@ func (manager *SnapshotManager) RetrieveFile(snapshot *Snapshot, file *Entry, ou // FindFile returns the file entry that has the given file name. func (manager *SnapshotManager) FindFile(snapshot *Snapshot, filePath string, suppressError bool) *Entry { - for _, entry := range snapshot.Files { + + var found *Entry + snapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func (entry *Entry) bool { if entry.Path == filePath { - return entry + found = entry + return false } + return true + }) + + if found != nil { + return found } if !suppressError { @@ -1440,13 +1436,8 @@ func (manager *SnapshotManager) PrintFile(snapshotID string, revision int, path return false } - patterns := []string{} - if path != "" { - patterns = []string{path} - } - - // If no path is specified, we're printing the snapshot so we need all attributes - if !manager.DownloadSnapshotContents(snapshot, patterns, path == "") { + // If no path is specified, we're printing the snapshot + if !manager.DownloadSnapshotSequences(snapshot) { return false } @@ -1456,7 +1447,7 @@ func (manager *SnapshotManager) PrintFile(snapshotID string, revision int, path } file := manager.FindFile(snapshot, path, false) - if !manager.RetrieveFile(snapshot, file, func(chunk []byte) { + if !manager.RetrieveFile(snapshot, file, nil, func(chunk []byte) { fmt.Printf("%s", chunk) }) { LOG_ERROR("SNAPSHOT_RETRIEVE", "File %s is corrupted in snapshot %s at revision %d", @@ -1474,22 +1465,38 @@ func (manager *SnapshotManager) Diff(top string, snapshotID string, revisions [] LOG_DEBUG("DIFF_PARAMETERS", "top: %s, id: %s, revision: %v, path: %s, compareByHash: %t", top, snapshotID, revisions, filePath, compareByHash) + manager.CreateChunkOperator(false, 1, false) + defer func() { + manager.chunkOperator.Stop() + manager.chunkOperator = nil + } () + var leftSnapshot *Snapshot var rightSnapshot *Snapshot - var err error + + leftSnapshotFiles := make([]*Entry, 0, 1024) + rightSnapshotFiles := make([]*Entry, 0, 1024) // If no or only one revision is specified, use the on-disk version for the right-hand side. if len(revisions) <= 1 { // Only scan the repository if filePath is not provided if len(filePath) == 0 { - rightSnapshot, _, _, err = CreateSnapshotFromDirectory(snapshotID, top, nobackupFile, filtersFile, excludeByAttribute) - if err != nil { - LOG_ERROR("SNAPSHOT_LIST", "Failed to list the directory %s: %v", top, err) - return false + rightSnapshot = CreateEmptySnapshot(snapshotID) + localListingChannel := make(chan *Entry) + go func() { + defer CatchLogException() + rightSnapshot.ListLocalFiles(top, nobackupFile, filtersFile, excludeByAttribute, localListingChannel, nil, nil) + } () + + for entry := range localListingChannel { + entry.Attributes = nil // attributes are not compared + rightSnapshotFiles = append(rightSnapshotFiles, entry) } + } } else { rightSnapshot = manager.DownloadSnapshot(snapshotID, revisions[1]) + manager.DownloadSnapshotSequences(rightSnapshot) } // If no revision is specified, use the latest revision as the left-hand side. @@ -1503,15 +1510,11 @@ func (manager *SnapshotManager) Diff(top string, snapshotID string, revisions [] leftSnapshot = manager.DownloadSnapshot(snapshotID, revisions[0]) } + manager.DownloadSnapshotSequences(leftSnapshot) if len(filePath) > 0 { - manager.DownloadSnapshotContents(leftSnapshot, nil, false) - if rightSnapshot != nil && rightSnapshot.Revision != 0 { - manager.DownloadSnapshotContents(rightSnapshot, nil, false) - } - var leftFile []byte - if !manager.RetrieveFile(leftSnapshot, manager.FindFile(leftSnapshot, filePath, false), func(content []byte) { + if !manager.RetrieveFile(leftSnapshot, manager.FindFile(leftSnapshot, filePath, false), nil, func(content []byte) { leftFile = append(leftFile, content...) }) { LOG_ERROR("SNAPSHOT_DIFF", "File %s is corrupted in snapshot %s at revision %d", @@ -1521,7 +1524,7 @@ func (manager *SnapshotManager) Diff(top string, snapshotID string, revisions [] var rightFile []byte if rightSnapshot != nil { - if !manager.RetrieveFile(rightSnapshot, manager.FindFile(rightSnapshot, filePath, false), func(content []byte) { + if !manager.RetrieveFile(rightSnapshot, manager.FindFile(rightSnapshot, filePath, false), nil, func(content []byte) { rightFile = append(rightFile, content...) }) { LOG_ERROR("SNAPSHOT_DIFF", "File %s is corrupted in snapshot %s at revision %d", @@ -1582,24 +1585,32 @@ func (manager *SnapshotManager) Diff(top string, snapshotID string, revisions [] return true } - // We only need to decode the 'files' sequence, not 'chunkhashes' or 'chunklengthes' - manager.DownloadSnapshotFileSequence(leftSnapshot, nil, false) - if rightSnapshot != nil && rightSnapshot.Revision != 0 { - manager.DownloadSnapshotFileSequence(rightSnapshot, nil, false) + leftSnapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func(entry *Entry) bool { + entry.Attributes = nil + leftSnapshotFiles = append(leftSnapshotFiles, entry) + return true + }) + + if rightSnapshot.Revision != 0 { + rightSnapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func(entry *Entry) bool { + entry.Attributes = nil + rightSnapshotFiles = append(rightSnapshotFiles, entry) + return true + }) } maxSize := int64(9) maxSizeDigits := 1 // Find the max Size value in order for pretty alignment. - for _, file := range leftSnapshot.Files { + for _, file := range leftSnapshotFiles { for !file.IsDir() && file.Size > maxSize { maxSize = maxSize*10 + 9 maxSizeDigits += 1 } } - for _, file := range rightSnapshot.Files { + for _, file := range rightSnapshotFiles { for !file.IsDir() && file.Size > maxSize { maxSize = maxSize*10 + 9 maxSizeDigits += 1 @@ -1609,22 +1620,22 @@ func (manager *SnapshotManager) Diff(top string, snapshotID string, revisions [] buffer := make([]byte, 32*1024) var i, j int - for i < len(leftSnapshot.Files) || j < len(rightSnapshot.Files) { + for i < len(leftSnapshotFiles) || j < len(rightSnapshotFiles) { - if i >= len(leftSnapshot.Files) { - if rightSnapshot.Files[j].IsFile() { - LOG_INFO("SNAPSHOT_DIFF", "+ %s", rightSnapshot.Files[j].String(maxSizeDigits)) + if i >= len(leftSnapshotFiles) { + if rightSnapshotFiles[j].IsFile() { + LOG_INFO("SNAPSHOT_DIFF", "+ %s", rightSnapshotFiles[j].String(maxSizeDigits)) } j++ - } else if j >= len(rightSnapshot.Files) { - if leftSnapshot.Files[i].IsFile() { - LOG_INFO("SNAPSHOT_DIFF", "- %s", leftSnapshot.Files[i].String(maxSizeDigits)) + } else if j >= len(rightSnapshotFiles) { + if leftSnapshotFiles[i].IsFile() { + LOG_INFO("SNAPSHOT_DIFF", "- %s", leftSnapshotFiles[i].String(maxSizeDigits)) } i++ } else { - left := leftSnapshot.Files[i] - right := rightSnapshot.Files[j] + left := leftSnapshotFiles[i] + right := rightSnapshotFiles[j] if !left.IsFile() { i++ @@ -1679,6 +1690,12 @@ func (manager *SnapshotManager) ShowHistory(top string, snapshotID string, revis LOG_DEBUG("HISTORY_PARAMETERS", "top: %s, id: %s, revisions: %v, path: %s, showLocalHash: %t", top, snapshotID, revisions, filePath, showLocalHash) + manager.CreateChunkOperator(false, 1, false) + defer func() { + manager.chunkOperator.Stop() + manager.chunkOperator = nil + } () + var err error if len(revisions) == 0 { @@ -1693,7 +1710,7 @@ func (manager *SnapshotManager) ShowHistory(top string, snapshotID string, revis sort.Ints(revisions) for _, revision := range revisions { snapshot := manager.DownloadSnapshot(snapshotID, revision) - manager.DownloadSnapshotFileSequence(snapshot, nil, false) + manager.DownloadSnapshotSequences(snapshot) file := manager.FindFile(snapshot, filePath, true) if file != nil { @@ -1801,8 +1818,11 @@ func (manager *SnapshotManager) PruneSnapshots(selfID string, snapshotID string, LOG_WARN("DELETE_OPTIONS", "Tags or retention policy will be ignored if at least one revision is specified") } - manager.chunkOperator = CreateChunkOperator(manager.storage, threads) - defer manager.chunkOperator.Stop() + manager.CreateChunkOperator(false, threads, false) + defer func() { + manager.chunkOperator.Stop() + manager.chunkOperator = nil + } () prefPath := GetDuplicacyPreferencePath() logDir := path.Join(prefPath, "logs") @@ -2184,7 +2204,7 @@ func (manager *SnapshotManager) PruneSnapshots(selfID string, snapshotID string, return false } - manager.chunkOperator.Stop() + manager.chunkOperator.WaitForCompletion() for _, fossil := range manager.chunkOperator.fossils { collection.AddFossil(fossil) } @@ -2265,6 +2285,7 @@ func (manager *SnapshotManager) PruneSnapshots(selfID string, snapshotID string, } else { manager.CleanSnapshotCache(nil, allSnapshots) } + manager.chunkOperator.WaitForCompletion() return true } @@ -2477,8 +2498,6 @@ func (manager *SnapshotManager) pruneSnapshotsExhaustive(referencedFossils map[s // CheckSnapshot performs sanity checks on the given snapshot. func (manager *SnapshotManager) CheckSnapshot(snapshot *Snapshot) (err error) { - lastChunk := 0 - lastOffset := 0 var lastEntry *Entry numberOfChunks := len(snapshot.ChunkHashes) @@ -2488,57 +2507,39 @@ func (manager *SnapshotManager) CheckSnapshot(snapshot *Snapshot) (err error) { numberOfChunks, len(snapshot.ChunkLengths)) } - entries := make([]*Entry, len(snapshot.Files)) - copy(entries, snapshot.Files) - sort.Sort(ByChunk(entries)) + snapshot.ListRemoteFiles(manager.config, manager.chunkOperator, func (entry *Entry) bool { - for _, entry := range snapshot.Files { if lastEntry != nil && lastEntry.Compare(entry) >= 0 && !strings.Contains(lastEntry.Path, "\ufffd") { - return fmt.Errorf("The entry %s appears before the entry %s", lastEntry.Path, entry.Path) + err = fmt.Errorf("The entry %s appears before the entry %s", lastEntry.Path, entry.Path) + return false } lastEntry = entry - } - - for _, entry := range entries { if !entry.IsFile() || entry.Size == 0 { - continue + return true } if entry.StartChunk < 0 { - return fmt.Errorf("The file %s starts at chunk %d", entry.Path, entry.StartChunk) + err = fmt.Errorf("The file %s starts at chunk %d", entry.Path, entry.StartChunk) + return false } if entry.EndChunk >= numberOfChunks { - return fmt.Errorf("The file %s ends at chunk %d while the number of chunks is %d", + err = fmt.Errorf("The file %s ends at chunk %d while the number of chunks is %d", entry.Path, entry.EndChunk, numberOfChunks) + return false } if entry.EndChunk < entry.StartChunk { - return fmt.Errorf("The file %s starts at chunk %d and ends at chunk %d", + fmt.Errorf("The file %s starts at chunk %d and ends at chunk %d", entry.Path, entry.StartChunk, entry.EndChunk) + return false } - if entry.StartOffset > 0 { - if entry.StartChunk < lastChunk { - return fmt.Errorf("The file %s starts at chunk %d while the last chunk is %d", - entry.Path, entry.StartChunk, lastChunk) - } - - if entry.StartChunk > lastChunk+1 { - return fmt.Errorf("The file %s starts at chunk %d while the last chunk is %d", - entry.Path, entry.StartChunk, lastChunk) - } - - if entry.StartChunk == lastChunk && entry.StartOffset < lastOffset { - return fmt.Errorf("The file %s starts at offset %d of chunk %d while the last file ends at offset %d", - entry.Path, entry.StartOffset, entry.StartChunk, lastOffset) - } - - if entry.StartChunk == entry.EndChunk && entry.StartOffset > entry.EndOffset { - return fmt.Errorf("The file %s starts at offset %d and ends at offset %d of the same chunk %d", - entry.Path, entry.StartOffset, entry.EndOffset, entry.StartChunk) - } + if entry.StartChunk == entry.EndChunk && entry.StartOffset > entry.EndOffset { + err = fmt.Errorf("The file %s starts at offset %d and ends at offset %d of the same chunk %d", + entry.Path, entry.StartOffset, entry.EndOffset, entry.StartChunk) + return false } fileSize := int64(0) @@ -2558,22 +2559,13 @@ func (manager *SnapshotManager) CheckSnapshot(snapshot *Snapshot) (err error) { } if entry.Size != fileSize { - return fmt.Errorf("The file %s has a size of %d but the total size of chunks is %d", + err = fmt.Errorf("The file %s has a size of %d but the total size of chunks is %d", entry.Path, entry.Size, fileSize) + return false } - lastChunk = entry.EndChunk - lastOffset = entry.EndOffset - } - - if len(entries) > 0 && entries[0].StartChunk != 0 { - return fmt.Errorf("The first file starts at chunk %d", entries[0].StartChunk) - } - - // There may be a last chunk whose size is 0 so we allow this to happen - if lastChunk < numberOfChunks-2 { - return fmt.Errorf("The last file ends at chunk %d but the number of chunks is %d", lastChunk, numberOfChunks) - } + return true + }) return nil } diff --git a/src/duplicacy_snapshotmanager_test.go b/src/duplicacy_snapshotmanager_test.go index 0bb9c2d5..6e84c067 100644 --- a/src/duplicacy_snapshotmanager_test.go +++ b/src/duplicacy_snapshotmanager_test.go @@ -116,19 +116,18 @@ func createTestSnapshotManager(testDir string) *SnapshotManager { func uploadTestChunk(manager *SnapshotManager, content []byte) string { - completionFunc := func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int) { + chunkOperator := CreateChunkOperator(manager.config, manager.storage, nil, false, testThreads, false) + chunkOperator.UploadCompletionFunc = func(chunk *Chunk, chunkIndex int, skipped bool, chunkSize int, uploadSize int) { LOG_INFO("UPLOAD_CHUNK", "Chunk %s size %d uploaded", chunk.GetID(), chunkSize) } - chunkUploader := CreateChunkUploader(manager.config, manager.storage, nil, testThreads, nil) - chunkUploader.completionFunc = completionFunc - chunkUploader.Start() - chunk := CreateChunk(manager.config, true) chunk.Reset(true) chunk.Write(content) - chunkUploader.StartChunk(chunk, 0) - chunkUploader.Stop() + + chunkOperator.Upload(chunk, 0, false) + chunkOperator.WaitForCompletion() + chunkOperator.Stop() return chunk.GetHash() } @@ -180,6 +179,12 @@ func createTestSnapshot(manager *SnapshotManager, snapshotID string, revision in func checkTestSnapshots(manager *SnapshotManager, expectedSnapshots int, expectedFossils int) { + manager.CreateChunkOperator(false, 1, false) + defer func() { + manager.chunkOperator.Stop() + manager.chunkOperator = nil + }() + var snapshotIDs []string var err error diff --git a/src/duplicacy_utils.go b/src/duplicacy_utils.go index 3b3cc5af..e1f2b88f 100644 --- a/src/duplicacy_utils.go +++ b/src/duplicacy_utils.go @@ -14,6 +14,7 @@ import ( "strconv" "strings" "time" + "runtime" "github.com/gilbertchen/gopass" "golang.org/x/crypto/pbkdf2" @@ -460,3 +461,16 @@ func AtoSize(sizeString string) int { return size } + +func PrintMemoryUsage() { + + for { + var m runtime.MemStats + runtime.ReadMemStats(&m) + + LOG_INFO("MEMORY_STATS", "Currently allocated: %s, total allocated: %s, system memory: %s, number of GCs: %d", + PrettySize(int64(m.Alloc)), PrettySize(int64(m.TotalAlloc)), PrettySize(int64(m.Sys)), m.NumGC) + + time.Sleep(time.Second) + } +} \ No newline at end of file diff --git a/src/duplicacy_utils_others.go b/src/duplicacy_utils_others.go index 1ed1462a..71f7a1e8 100644 --- a/src/duplicacy_utils_others.go +++ b/src/duplicacy_utils_others.go @@ -52,11 +52,11 @@ func (entry *Entry) ReadAttributes(top string) { fullPath := filepath.Join(top, entry.Path) attributes, _ := xattr.List(fullPath) if len(attributes) > 0 { - entry.Attributes = make(map[string][]byte) + entry.Attributes = &map[string][]byte{} for _, name := range attributes { attribute, err := xattr.Get(fullPath, name) if err == nil { - entry.Attributes[name] = attribute + (*entry.Attributes)[name] = attribute } } } @@ -68,19 +68,19 @@ func (entry *Entry) SetAttributesToFile(fullPath string) { for _, name := range names { - newAttribute, found := entry.Attributes[name] + newAttribute, found := (*entry.Attributes)[name] if found { oldAttribute, _ := xattr.Get(fullPath, name) if !bytes.Equal(oldAttribute, newAttribute) { xattr.Set(fullPath, name, newAttribute) } - delete(entry.Attributes, name) + delete(*entry.Attributes, name) } else { xattr.Remove(fullPath, name) } } - for name, attribute := range entry.Attributes { + for name, attribute := range *entry.Attributes { xattr.Set(fullPath, name, attribute) }