Package CedarBackup3 :: Package actions :: Module collect
[hide private]
[frames] | no frames]

Source Code for Module CedarBackup3.actions.collect

  1  # -*- coding: iso-8859-1 -*- 
  2  # vim: set ft=python ts=3 sw=3 expandtab: 
  3  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  4  # 
  5  #              C E D A R 
  6  #          S O L U T I O N S       "Software done right." 
  7  #           S O F T W A R E 
  8  # 
  9  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 10  # 
 11  # Copyright (c) 2004-2008,2011,2015 Kenneth J. Pronovici. 
 12  # All rights reserved. 
 13  # 
 14  # This program is free software; you can redistribute it and/or 
 15  # modify it under the terms of the GNU General Public License, 
 16  # Version 2, as published by the Free Software Foundation. 
 17  # 
 18  # This program is distributed in the hope that it will be useful, 
 19  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 20  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
 21  # 
 22  # Copies of the GNU General Public License are available from 
 23  # the Free Software Foundation website, http://www.gnu.org/. 
 24  # 
 25  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 26  # 
 27  # Author   : Kenneth J. Pronovici <pronovic@ieee.org> 
 28  # Language : Python 3 (>= 3.4) 
 29  # Project  : Cedar Backup, release 3 
 30  # Purpose  : Implements the standard 'collect' action. 
 31  # 
 32  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 33   
 34  ######################################################################## 
 35  # Module documentation 
 36  ######################################################################## 
 37   
 38  """ 
 39  Implements the standard 'collect' action. 
 40  @sort: executeCollect 
 41  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
 42  """ 
 43   
 44   
 45  ######################################################################## 
 46  # Imported modules 
 47  ######################################################################## 
 48   
 49  # System modules 
 50  import os 
 51  import logging 
 52  import pickle 
 53   
 54  # Cedar Backup modules 
 55  from CedarBackup3.filesystem import BackupFileList, FilesystemList 
 56  from CedarBackup3.util import isStartOfWeek, changeOwnership, displayBytes, buildNormalizedPath 
 57  from CedarBackup3.actions.constants import DIGEST_EXTENSION, COLLECT_INDICATOR 
 58  from CedarBackup3.actions.util import writeIndicatorFile 
 59   
 60   
 61  ######################################################################## 
 62  # Module-wide constants and variables 
 63  ######################################################################## 
 64   
 65  logger = logging.getLogger("CedarBackup3.log.actions.collect") 
 66   
 67   
 68  ######################################################################## 
 69  # Public functions 
 70  ######################################################################## 
 71   
 72  ############################ 
 73  # executeCollect() function 
 74  ############################ 
 75   
76 -def executeCollect(configPath, options, config):
77 """ 78 Executes the collect backup action. 79 80 @note: When the collect action is complete, we will write a collect 81 indicator to the collect directory, so it's obvious that the collect action 82 has completed. The stage process uses this indicator to decide whether a 83 peer is ready to be staged. 84 85 @param configPath: Path to configuration file on disk. 86 @type configPath: String representing a path on disk. 87 88 @param options: Program command-line options. 89 @type options: Options object. 90 91 @param config: Program configuration. 92 @type config: Config object. 93 94 @raise ValueError: Under many generic error conditions 95 @raise TarError: If there is a problem creating a tar file 96 """ 97 logger.debug("Executing the 'collect' action.") 98 if config.options is None or config.collect is None: 99 raise ValueError("Collect configuration is not properly filled in.") 100 if ((config.collect.collectFiles is None or len(config.collect.collectFiles) < 1) and 101 (config.collect.collectDirs is None or len(config.collect.collectDirs) < 1)): 102 raise ValueError("There must be at least one collect file or collect directory.") 103 fullBackup = options.full 104 logger.debug("Full backup flag is [%s]", fullBackup) 105 todayIsStart = isStartOfWeek(config.options.startingDay) 106 resetDigest = fullBackup or todayIsStart 107 logger.debug("Reset digest flag is [%s]", resetDigest) 108 if config.collect.collectFiles is not None: 109 for collectFile in config.collect.collectFiles: 110 logger.debug("Working with collect file [%s]", collectFile.absolutePath) 111 collectMode = _getCollectMode(config, collectFile) 112 archiveMode = _getArchiveMode(config, collectFile) 113 digestPath = _getDigestPath(config, collectFile.absolutePath) 114 tarfilePath = _getTarfilePath(config, collectFile.absolutePath, archiveMode) 115 if fullBackup or (collectMode in ['daily', 'incr', ]) or (collectMode == 'weekly' and todayIsStart): 116 logger.debug("File meets criteria to be backed up today.") 117 _collectFile(config, collectFile.absolutePath, tarfilePath, 118 collectMode, archiveMode, resetDigest, digestPath) 119 else: 120 logger.debug("File will not be backed up, per collect mode.") 121 logger.info("Completed collecting file [%s]", collectFile.absolutePath) 122 if config.collect.collectDirs is not None: 123 for collectDir in config.collect.collectDirs: 124 logger.debug("Working with collect directory [%s]", collectDir.absolutePath) 125 collectMode = _getCollectMode(config, collectDir) 126 archiveMode = _getArchiveMode(config, collectDir) 127 ignoreFile = _getIgnoreFile(config, collectDir) 128 linkDepth = _getLinkDepth(collectDir) 129 dereference = _getDereference(collectDir) 130 recursionLevel = _getRecursionLevel(collectDir) 131 (excludePaths, excludePatterns) = _getExclusions(config, collectDir) 132 if fullBackup or (collectMode in ['daily', 'incr', ]) or (collectMode == 'weekly' and todayIsStart): 133 logger.debug("Directory meets criteria to be backed up today.") 134 _collectDirectory(config, collectDir.absolutePath, 135 collectMode, archiveMode, ignoreFile, linkDepth, dereference, 136 resetDigest, excludePaths, excludePatterns, recursionLevel) 137 else: 138 logger.debug("Directory will not be backed up, per collect mode.") 139 logger.info("Completed collecting directory [%s]", collectDir.absolutePath) 140 writeIndicatorFile(config.collect.targetDir, COLLECT_INDICATOR, 141 config.options.backupUser, config.options.backupGroup) 142 logger.info("Executed the 'collect' action successfully.")
143 144 145 ######################################################################## 146 # Private utility functions 147 ######################################################################## 148 149 ########################## 150 # _collectFile() function 151 ########################## 152
153 -def _collectFile(config, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath):
154 """ 155 Collects a configured collect file. 156 157 The indicated collect file is collected into the indicated tarfile. 158 For files that are collected incrementally, we'll use the indicated 159 digest path and pay attention to the reset digest flag (basically, the reset 160 digest flag ignores any existing digest, but a new digest is always 161 rewritten). 162 163 The caller must decide what the collect and archive modes are, since they 164 can be on both the collect configuration and the collect file itself. 165 166 @param config: Config object. 167 @param absolutePath: Absolute path of file to collect. 168 @param tarfilePath: Path to tarfile that should be created. 169 @param collectMode: Collect mode to use. 170 @param archiveMode: Archive mode to use. 171 @param resetDigest: Reset digest flag. 172 @param digestPath: Path to digest file on disk, if needed. 173 """ 174 backupList = BackupFileList() 175 backupList.addFile(absolutePath) 176 _executeBackup(config, backupList, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath)
177 178 179 ############################### 180 # _collectDirectory() function 181 ############################### 182
183 -def _collectDirectory(config, absolutePath, collectMode, archiveMode, 184 ignoreFile, linkDepth, dereference, resetDigest, 185 excludePaths, excludePatterns, recursionLevel):
186 """ 187 Collects a configured collect directory. 188 189 The indicated collect directory is collected into the indicated tarfile. 190 For directories that are collected incrementally, we'll use the indicated 191 digest path and pay attention to the reset digest flag (basically, the reset 192 digest flag ignores any existing digest, but a new digest is always 193 rewritten). 194 195 The caller must decide what the collect and archive modes are, since they 196 can be on both the collect configuration and the collect directory itself. 197 198 @param config: Config object. 199 @param absolutePath: Absolute path of directory to collect. 200 @param collectMode: Collect mode to use. 201 @param archiveMode: Archive mode to use. 202 @param ignoreFile: Ignore file to use. 203 @param linkDepth: Link depth value to use. 204 @param dereference: Dereference flag to use. 205 @param resetDigest: Reset digest flag. 206 @param excludePaths: List of absolute paths to exclude. 207 @param excludePatterns: List of patterns to exclude. 208 @param recursionLevel: Recursion level (zero for no recursion) 209 """ 210 if recursionLevel == 0: 211 # Collect the actual directory because we're at recursion level 0 212 logger.info("Collecting directory [%s]", absolutePath) 213 tarfilePath = _getTarfilePath(config, absolutePath, archiveMode) 214 digestPath = _getDigestPath(config, absolutePath) 215 216 backupList = BackupFileList() 217 backupList.ignoreFile = ignoreFile 218 backupList.excludePaths = excludePaths 219 backupList.excludePatterns = excludePatterns 220 backupList.addDirContents(absolutePath, linkDepth=linkDepth, dereference=dereference) 221 222 _executeBackup(config, backupList, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath) 223 else: 224 # Find all of the immediate subdirectories 225 subdirs = FilesystemList() 226 subdirs.excludeFiles = True 227 subdirs.excludeLinks = True 228 subdirs.excludePaths = excludePaths 229 subdirs.excludePatterns = excludePatterns 230 subdirs.addDirContents(path=absolutePath, recursive=False, addSelf=False) 231 232 # Back up the subdirectories separately 233 for subdir in subdirs: 234 _collectDirectory(config, subdir, collectMode, archiveMode, 235 ignoreFile, linkDepth, dereference, resetDigest, 236 excludePaths, excludePatterns, recursionLevel-1) 237 excludePaths.append(subdir) # this directory is already backed up, so exclude it 238 239 # Back up everything that hasn't previously been backed up 240 _collectDirectory(config, absolutePath, collectMode, archiveMode, 241 ignoreFile, linkDepth, dereference, resetDigest, 242 excludePaths, excludePatterns, 0)
243 244 245 ############################ 246 # _executeBackup() function 247 ############################ 248
249 -def _executeBackup(config, backupList, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath):
250 """ 251 Execute the backup process for the indicated backup list. 252 253 This function exists mainly to consolidate functionality between the 254 L{_collectFile} and L{_collectDirectory} functions. Those functions build 255 the backup list; this function causes the backup to execute properly and 256 also manages usage of the digest file on disk as explained in their 257 comments. 258 259 For collect files, the digest file will always just contain the single file 260 that is being backed up. This might little wasteful in terms of the number 261 of files that we keep around, but it's consistent and easy to understand. 262 263 @param config: Config object. 264 @param backupList: List to execute backup for 265 @param absolutePath: Absolute path of directory or file to collect. 266 @param tarfilePath: Path to tarfile that should be created. 267 @param collectMode: Collect mode to use. 268 @param archiveMode: Archive mode to use. 269 @param resetDigest: Reset digest flag. 270 @param digestPath: Path to digest file on disk, if needed. 271 """ 272 if collectMode != 'incr': 273 logger.debug("Collect mode is [%s]; no digest will be used.", collectMode) 274 if len(backupList) == 1 and backupList[0] == absolutePath: # special case for individual file 275 logger.info("Backing up file [%s] (%s).", absolutePath, displayBytes(backupList.totalSize())) 276 else: 277 logger.info("Backing up %d files in [%s] (%s).", len(backupList), absolutePath, displayBytes(backupList.totalSize())) 278 if len(backupList) > 0: 279 backupList.generateTarfile(tarfilePath, archiveMode, True) 280 changeOwnership(tarfilePath, config.options.backupUser, config.options.backupGroup) 281 else: 282 if resetDigest: 283 logger.debug("Based on resetDigest flag, digest will be cleared.") 284 oldDigest = {} 285 else: 286 logger.debug("Based on resetDigest flag, digest will loaded from disk.") 287 oldDigest = _loadDigest(digestPath) 288 (removed, newDigest) = backupList.removeUnchanged(oldDigest, captureDigest=True) 289 logger.debug("Removed %d unchanged files based on digest values.", removed) 290 if len(backupList) == 1 and backupList[0] == absolutePath: # special case for individual file 291 logger.info("Backing up file [%s] (%s).", absolutePath, displayBytes(backupList.totalSize())) 292 else: 293 logger.info("Backing up %d files in [%s] (%s).", len(backupList), absolutePath, displayBytes(backupList.totalSize())) 294 if len(backupList) > 0: 295 backupList.generateTarfile(tarfilePath, archiveMode, True) 296 changeOwnership(tarfilePath, config.options.backupUser, config.options.backupGroup) 297 _writeDigest(config, newDigest, digestPath)
298 299 300 ######################### 301 # _loadDigest() function 302 ######################### 303
304 -def _loadDigest(digestPath):
305 """ 306 Loads the indicated digest path from disk into a dictionary. 307 308 If we can't load the digest successfully (either because it doesn't exist or 309 for some other reason), then an empty dictionary will be returned - but the 310 condition will be logged. 311 312 @param digestPath: Path to the digest file on disk. 313 314 @return: Dictionary representing contents of digest path. 315 """ 316 if not os.path.isfile(digestPath): 317 digest = {} 318 logger.debug("Digest [%s] does not exist on disk.", digestPath) 319 else: 320 try: 321 with open(digestPath, "rb") as f: 322 digest = pickle.load(f, fix_imports=True) # be compatible with Python 2 323 logger.debug("Loaded digest [%s] from disk: %d entries.", digestPath, len(digest)) 324 except Exception as e: 325 digest = {} 326 logger.error("Failed loading digest [%s] from disk: %s", digestPath, e) 327 return digest
328 329 330 ########################## 331 # _writeDigest() function 332 ########################## 333
334 -def _writeDigest(config, digest, digestPath):
335 """ 336 Writes the digest dictionary to the indicated digest path on disk. 337 338 If we can't write the digest successfully for any reason, we'll log the 339 condition but won't throw an exception. 340 341 @param config: Config object. 342 @param digest: Digest dictionary to write to disk. 343 @param digestPath: Path to the digest file on disk. 344 """ 345 try: 346 with open(digestPath, "wb") as f: 347 pickle.dump(digest, f, 0, fix_imports=True) # be compatible with Python 2 348 changeOwnership(digestPath, config.options.backupUser, config.options.backupGroup) 349 logger.debug("Wrote new digest [%s] to disk: %d entries.", digestPath, len(digest)) 350 except Exception as e: 351 logger.error("Failed to write digest [%s] to disk: %s", digestPath, e)
352 353 354 ######################################################################## 355 # Private attribute "getter" functions 356 ######################################################################## 357 358 ############################ 359 # getCollectMode() function 360 ############################ 361
362 -def _getCollectMode(config, item):
363 """ 364 Gets the collect mode that should be used for a collect directory or file. 365 If possible, use the one on the file or directory, otherwise take from collect section. 366 @param config: Config object. 367 @param item: C{CollectFile} or C{CollectDir} object 368 @return: Collect mode to use. 369 """ 370 if item.collectMode is None: 371 collectMode = config.collect.collectMode 372 else: 373 collectMode = item.collectMode 374 logger.debug("Collect mode is [%s]", collectMode) 375 return collectMode
376 377 378 ############################# 379 # _getArchiveMode() function 380 ############################# 381
382 -def _getArchiveMode(config, item):
383 """ 384 Gets the archive mode that should be used for a collect directory or file. 385 If possible, use the one on the file or directory, otherwise take from collect section. 386 @param config: Config object. 387 @param item: C{CollectFile} or C{CollectDir} object 388 @return: Archive mode to use. 389 """ 390 if item.archiveMode is None: 391 archiveMode = config.collect.archiveMode 392 else: 393 archiveMode = item.archiveMode 394 logger.debug("Archive mode is [%s]", archiveMode) 395 return archiveMode
396 397 398 ############################ 399 # _getIgnoreFile() function 400 ############################ 401
402 -def _getIgnoreFile(config, item):
403 """ 404 Gets the ignore file that should be used for a collect directory or file. 405 If possible, use the one on the file or directory, otherwise take from collect section. 406 @param config: Config object. 407 @param item: C{CollectFile} or C{CollectDir} object 408 @return: Ignore file to use. 409 """ 410 if item.ignoreFile is None: 411 ignoreFile = config.collect.ignoreFile 412 else: 413 ignoreFile = item.ignoreFile 414 logger.debug("Ignore file is [%s]", ignoreFile) 415 return ignoreFile
416 417 418 ############################ 419 # _getLinkDepth() function 420 ############################ 421
422 -def _getLinkDepth(item):
423 """ 424 Gets the link depth that should be used for a collect directory. 425 If possible, use the one on the directory, otherwise set a value of 0 (zero). 426 @param item: C{CollectDir} object 427 @return: Link depth to use. 428 """ 429 if item.linkDepth is None: 430 linkDepth = 0 431 else: 432 linkDepth = item.linkDepth 433 logger.debug("Link depth is [%d]", linkDepth) 434 return linkDepth
435 436 437 ############################ 438 # _getDereference() function 439 ############################ 440
441 -def _getDereference(item):
442 """ 443 Gets the dereference flag that should be used for a collect directory. 444 If possible, use the one on the directory, otherwise set a value of False. 445 @param item: C{CollectDir} object 446 @return: Dereference flag to use. 447 """ 448 if item.dereference is None: 449 dereference = False 450 else: 451 dereference = item.dereference 452 logger.debug("Dereference flag is [%s]", dereference) 453 return dereference
454 455 456 ################################ 457 # _getRecursionLevel() function 458 ################################ 459
460 -def _getRecursionLevel(item):
461 """ 462 Gets the recursion level that should be used for a collect directory. 463 If possible, use the one on the directory, otherwise set a value of 0 (zero). 464 @param item: C{CollectDir} object 465 @return: Recursion level to use. 466 """ 467 if item.recursionLevel is None: 468 recursionLevel = 0 469 else: 470 recursionLevel = item.recursionLevel 471 logger.debug("Recursion level is [%d]", recursionLevel) 472 return recursionLevel
473 474 475 ############################ 476 # _getDigestPath() function 477 ############################ 478
479 -def _getDigestPath(config, absolutePath):
480 """ 481 Gets the digest path associated with a collect directory or file. 482 @param config: Config object. 483 @param absolutePath: Absolute path to generate digest for 484 @return: Absolute path to the digest associated with the collect directory or file. 485 """ 486 normalized = buildNormalizedPath(absolutePath) 487 filename = "%s.%s" % (normalized, DIGEST_EXTENSION) 488 digestPath = os.path.join(config.options.workingDir, filename) 489 logger.debug("Digest path is [%s]", digestPath) 490 return digestPath
491 492 493 ############################# 494 # _getTarfilePath() function 495 ############################# 496
497 -def _getTarfilePath(config, absolutePath, archiveMode):
498 """ 499 Gets the tarfile path (including correct extension) associated with a collect directory. 500 @param config: Config object. 501 @param absolutePath: Absolute path to generate tarfile for 502 @param archiveMode: Archive mode to use for this tarfile. 503 @return: Absolute path to the tarfile associated with the collect directory. 504 """ 505 if archiveMode == 'tar': 506 extension = "tar" 507 elif archiveMode == 'targz': 508 extension = "tar.gz" 509 elif archiveMode == 'tarbz2': 510 extension = "tar.bz2" 511 normalized = buildNormalizedPath(absolutePath) 512 filename = "%s.%s" % (normalized, extension) 513 tarfilePath = os.path.join(config.collect.targetDir, filename) 514 logger.debug("Tarfile path is [%s]", tarfilePath) 515 return tarfilePath
516 517 518 ############################ 519 # _getExclusions() function 520 ############################ 521
522 -def _getExclusions(config, collectDir):
523 """ 524 Gets exclusions (file and patterns) associated with a collect directory. 525 526 The returned files value is a list of absolute paths to be excluded from the 527 backup for a given directory. It is derived from the collect configuration 528 absolute exclude paths and the collect directory's absolute and relative 529 exclude paths. 530 531 The returned patterns value is a list of patterns to be excluded from the 532 backup for a given directory. It is derived from the list of patterns from 533 the collect configuration and from the collect directory itself. 534 535 @param config: Config object. 536 @param collectDir: Collect directory object. 537 538 @return: Tuple (files, patterns) indicating what to exclude. 539 """ 540 paths = [] 541 if config.collect.absoluteExcludePaths is not None: 542 paths.extend(config.collect.absoluteExcludePaths) 543 if collectDir.absoluteExcludePaths is not None: 544 paths.extend(collectDir.absoluteExcludePaths) 545 if collectDir.relativeExcludePaths is not None: 546 for relativePath in collectDir.relativeExcludePaths: 547 paths.append(os.path.join(collectDir.absolutePath, relativePath)) 548 patterns = [] 549 if config.collect.excludePatterns is not None: 550 patterns.extend(config.collect.excludePatterns) 551 if collectDir.excludePatterns is not None: 552 patterns.extend(collectDir.excludePatterns) 553 logger.debug("Exclude paths: %s", paths) 554 logger.debug("Exclude patterns: %s", patterns) 555 return(paths, patterns)
556