Jay McCanta
2013-05-12 23:36:54 UTC
I'm unsure of the proper protocol for proposing patches for the group. My apologies if this isn't the correct way.
We keep the md5 checksum of files in an extended attribute. When running s3cmd with --check-md5, this patch allow one to use the --xattr option and specify the name of the extended attribute to use for local md5sums. If the attribute is not there, a regular check is done, so no changes to existing procedures would be necessary. For us, the drastically improves check time since we are not re-reading every local file recalculating every md5sum.
Also there was a typo in s3cmd for the 'help' field of --acl-revoke option definition. It's corrected in this patch.
diff -ru s3cmd-1.5.0-alpha3/s3cmd s3cmd-1.5.0-alpha3.jtm/s3cmd
--- s3cmd-1.5.0-alpha3/s3cmd 2013-03-10 17:06:33.000000000 -0700
+++ s3cmd-1.5.0-alpha3.jtm/s3cmd 2013-05-12 15:20:44.013584123 -0700
@@ -32,6 +32,13 @@
from logging import debug, info, warning, error
from distutils.spawn import find_executable
+try:
+ hasXattr = False
+ import xattr
+ hasXattr = True
+except:
+ pass
+
def output(message):
sys.stdout.write(message + "\n")
sys.stdout.flush()
@@ -1141,7 +1148,7 @@
def _invalidate_on_cf(destination_base_uri):
cf = CloudFront(cfg)
- default_index_file = None
+ efault_index_file = None
if cfg.invalidate_default_index_on_cf or cfg.invalidate_default_index_root_on_cf:
info_response = s3.website_info(destination_base_uri, cfg.bucket_location)
if info_response:
@@ -1507,9 +1514,9 @@
ret_enc = gpg_encrypt(filename)
ret_dec = gpg_decrypt(ret_enc[1], ret_enc[2], False)
hash = [
- Utils.hash_file_md5(filename),
- Utils.hash_file_md5(ret_enc[1]),
- Utils.hash_file_md5(ret_dec[1]),
+ Utils.hash_file_md5(filename, options.xattr),
+ Utils.hash_file_md5(ret_enc[1], options.xattr),
+ Utils.hash_file_md5(ret_dec[1], options.xattr),
]
os.unlink(filename)
os.unlink(ret_enc[1])
@@ -1762,11 +1769,13 @@
optparser.add_option( "--skip-existing", dest="skip_existing", action="store_true", help="Skip over files that exist at the destination (only for [get] and [sync] commands).")
optparser.add_option("-r", "--recursive", dest="recursive", action="store_true", help="Recursive upload, download or removal.")
optparser.add_option( "--check-md5", dest="check_md5", action="store_true", help="Check MD5 sums when comparing files for [sync]. (default)")
+ if hasXattr:
+ optparser.add_option ("--xattr", dest="xattr", action="store", help="If possible, use extended file attribute named (default:%default) instead of calculating it [sync].")
optparser.add_option( "--no-check-md5", dest="check_md5", action="store_false", help="Do not check MD5 sums when comparing files for [sync]. Only size will be compared. May significantly speed up transfer but may also miss some changed files.")
optparser.add_option("-P", "--acl-public", dest="acl_public", action="store_true", help="Store objects with ACL allowing read for anyone.")
optparser.add_option( "--acl-private", dest="acl_public", action="store_false", help="Store objects with default ACL allowing access for you only.")
optparser.add_option( "--acl-grant", dest="acl_grants", type="s3acl", action="append", metavar="PERMISSION:EMAIL or USER_CANONICAL_ID", help="Grant stated permission to a given amazon user. Permission is one of: read, write, read_acp, write_acp, full_control, all")
- optparser.add_option( "--acl-revoke", dest="acl_revokes", type="s3acl", action="append", metavar="PERMISSION:USER_CANONICAL_ID", help="Revoke stated permission for a given amazon user. Permission is one of: read, write, read_acp, wr ite_acp, full_control, all")
+ optparser.add_option( "--acl-revoke", dest="acl_revokes", type="s3acl", action="append", metavar="PERMISSION:USER_CANONICAL_ID", help="Revoke stated permission for a given amazon user. Permission is one of: read, write, read_acp, write_acp, full_control, all")
optparser.add_option( "--delete-removed", dest="delete_removed", action="store_true", help="Delete remote objects with no corresponding local file [sync]")
optparser.add_option( "--no-delete-removed", dest="delete_removed", action="store_false", help="Don't delete remote objects.")
Only in s3cmd-1.5.0-alpha3.jtm: s3cmd.orig
Jay McCanta | Senior Unix Administrator | F5 Networks, Inc.
We keep the md5 checksum of files in an extended attribute. When running s3cmd with --check-md5, this patch allow one to use the --xattr option and specify the name of the extended attribute to use for local md5sums. If the attribute is not there, a regular check is done, so no changes to existing procedures would be necessary. For us, the drastically improves check time since we are not re-reading every local file recalculating every md5sum.
Also there was a typo in s3cmd for the 'help' field of --acl-revoke option definition. It's corrected in this patch.
diff -ru s3cmd-1.5.0-alpha3/s3cmd s3cmd-1.5.0-alpha3.jtm/s3cmd
--- s3cmd-1.5.0-alpha3/s3cmd 2013-03-10 17:06:33.000000000 -0700
+++ s3cmd-1.5.0-alpha3.jtm/s3cmd 2013-05-12 15:20:44.013584123 -0700
@@ -32,6 +32,13 @@
from logging import debug, info, warning, error
from distutils.spawn import find_executable
+try:
+ hasXattr = False
+ import xattr
+ hasXattr = True
+except:
+ pass
+
def output(message):
sys.stdout.write(message + "\n")
sys.stdout.flush()
@@ -1141,7 +1148,7 @@
def _invalidate_on_cf(destination_base_uri):
cf = CloudFront(cfg)
- default_index_file = None
+ efault_index_file = None
if cfg.invalidate_default_index_on_cf or cfg.invalidate_default_index_root_on_cf:
info_response = s3.website_info(destination_base_uri, cfg.bucket_location)
if info_response:
@@ -1507,9 +1514,9 @@
ret_enc = gpg_encrypt(filename)
ret_dec = gpg_decrypt(ret_enc[1], ret_enc[2], False)
hash = [
- Utils.hash_file_md5(filename),
- Utils.hash_file_md5(ret_enc[1]),
- Utils.hash_file_md5(ret_dec[1]),
+ Utils.hash_file_md5(filename, options.xattr),
+ Utils.hash_file_md5(ret_enc[1], options.xattr),
+ Utils.hash_file_md5(ret_dec[1], options.xattr),
]
os.unlink(filename)
os.unlink(ret_enc[1])
@@ -1762,11 +1769,13 @@
optparser.add_option( "--skip-existing", dest="skip_existing", action="store_true", help="Skip over files that exist at the destination (only for [get] and [sync] commands).")
optparser.add_option("-r", "--recursive", dest="recursive", action="store_true", help="Recursive upload, download or removal.")
optparser.add_option( "--check-md5", dest="check_md5", action="store_true", help="Check MD5 sums when comparing files for [sync]. (default)")
+ if hasXattr:
+ optparser.add_option ("--xattr", dest="xattr", action="store", help="If possible, use extended file attribute named (default:%default) instead of calculating it [sync].")
optparser.add_option( "--no-check-md5", dest="check_md5", action="store_false", help="Do not check MD5 sums when comparing files for [sync]. Only size will be compared. May significantly speed up transfer but may also miss some changed files.")
optparser.add_option("-P", "--acl-public", dest="acl_public", action="store_true", help="Store objects with ACL allowing read for anyone.")
optparser.add_option( "--acl-private", dest="acl_public", action="store_false", help="Store objects with default ACL allowing access for you only.")
optparser.add_option( "--acl-grant", dest="acl_grants", type="s3acl", action="append", metavar="PERMISSION:EMAIL or USER_CANONICAL_ID", help="Grant stated permission to a given amazon user. Permission is one of: read, write, read_acp, write_acp, full_control, all")
- optparser.add_option( "--acl-revoke", dest="acl_revokes", type="s3acl", action="append", metavar="PERMISSION:USER_CANONICAL_ID", help="Revoke stated permission for a given amazon user. Permission is one of: read, write, read_acp, wr ite_acp, full_control, all")
+ optparser.add_option( "--acl-revoke", dest="acl_revokes", type="s3acl", action="append", metavar="PERMISSION:USER_CANONICAL_ID", help="Revoke stated permission for a given amazon user. Permission is one of: read, write, read_acp, write_acp, full_control, all")
optparser.add_option( "--delete-removed", dest="delete_removed", action="store_true", help="Delete remote objects with no corresponding local file [sync]")
optparser.add_option( "--no-delete-removed", dest="delete_removed", action="store_false", help="Don't delete remote objects.")
Only in s3cmd-1.5.0-alpha3.jtm: s3cmd.orig
Jay McCanta | Senior Unix Administrator | F5 Networks, Inc.