it-swarm.com.de

Analysieren der Manifest.mbdb-Datei in einem iOS 4.0-iTunes-Backup

In iOS 4.0 hat Apple den Sicherungsprozess neu gestaltet.

iTunes hat früher eine Liste mit Dateinamen gespeichert, die mit Sicherungsdateien in der Datei "Manifest.plist" verknüpft sind. In iOS 4.0 wurden diese Informationen jedoch in eine Datei "Manifest.mbdb" verschoben

Sie können ein Beispiel für diese Datei anzeigen, indem Sie mit Ihren iOS 4.0-Geräten eine Sicherungskopie erstellen und in Ihrem Ordner ~/Library/Application Support/MobileSync/Backup nachsehen.

Hier ist ein Screenshot davon, wie die Datei in einem Texteditor aussieht:

alt text
(Quelle: supercrazyawesome.com )

Wie analysiere ich dies in eine Cocoa-Anwendung, damit ich meine (kostenlose) iPhone Backup Extractor-App ( http://supercrazyawesome.com ) für iOS 4.0 aktualisieren kann?

82
Padraig

Vielen Dank, user374559 und reneD - dieser Code und diese Beschreibung sind sehr hilfreich.

Mein Versuch, Python zu analysieren und die Informationen in einem Unix-ls-l-ähnlichen Format auszudrucken:

#!/usr/bin/env python
import sys

def getint(data, offset, intsize):
    """Retrieve an integer (big-endian) and new offset from the current offset"""
    value = 0
    while intsize > 0:
        value = (value<<8) + ord(data[offset])
        offset = offset + 1
        intsize = intsize - 1
    return value, offset

def getstring(data, offset):
    """Retrieve a string and new offset from the current offset into the data"""
    if data[offset] == chr(0xFF) and data[offset+1] == chr(0xFF):
        return '', offset+2 # Blank string
    length, offset = getint(data, offset, 2) # 2-byte length
    value = data[offset:offset+length]
    return value, (offset + length)

def process_mbdb_file(filename):
    mbdb = {} # Map offset of info in this file => file info
    data = open(filename).read()
    if data[0:4] != "mbdb": raise Exception("This does not look like an MBDB file")
    offset = 4
    offset = offset + 2 # value x05 x00, not sure what this is
    while offset < len(data):
        fileinfo = {}
        fileinfo['start_offset'] = offset
        fileinfo['domain'], offset = getstring(data, offset)
        fileinfo['filename'], offset = getstring(data, offset)
        fileinfo['linktarget'], offset = getstring(data, offset)
        fileinfo['datahash'], offset = getstring(data, offset)
        fileinfo['unknown1'], offset = getstring(data, offset)
        fileinfo['mode'], offset = getint(data, offset, 2)
        fileinfo['unknown2'], offset = getint(data, offset, 4)
        fileinfo['unknown3'], offset = getint(data, offset, 4)
        fileinfo['userid'], offset = getint(data, offset, 4)
        fileinfo['groupid'], offset = getint(data, offset, 4)
        fileinfo['mtime'], offset = getint(data, offset, 4)
        fileinfo['atime'], offset = getint(data, offset, 4)
        fileinfo['ctime'], offset = getint(data, offset, 4)
        fileinfo['filelen'], offset = getint(data, offset, 8)
        fileinfo['flag'], offset = getint(data, offset, 1)
        fileinfo['numprops'], offset = getint(data, offset, 1)
        fileinfo['properties'] = {}
        for ii in range(fileinfo['numprops']):
            propname, offset = getstring(data, offset)
            propval, offset = getstring(data, offset)
            fileinfo['properties'][propname] = propval
        mbdb[fileinfo['start_offset']] = fileinfo
    return mbdb

def process_mbdx_file(filename):
    mbdx = {} # Map offset of info in the MBDB file => fileID string
    data = open(filename).read()
    if data[0:4] != "mbdx": raise Exception("This does not look like an MBDX file")
    offset = 4
    offset = offset + 2 # value 0x02 0x00, not sure what this is
    filecount, offset = getint(data, offset, 4) # 4-byte count of records 
    while offset < len(data):
        # 26 byte record, made up of ...
        fileID = data[offset:offset+20] # 20 bytes of fileID
        fileID_string = ''.join(['%02x' % ord(b) for b in fileID])
        offset = offset + 20
        mbdb_offset, offset = getint(data, offset, 4) # 4-byte offset field
        mbdb_offset = mbdb_offset + 6 # Add 6 to get past prolog
        mode, offset = getint(data, offset, 2) # 2-byte mode field
        mbdx[mbdb_offset] = fileID_string
    return mbdx

def modestr(val):
    def mode(val):
        if (val & 0x4): r = 'r'
        else: r = '-'
        if (val & 0x2): w = 'w'
        else: w = '-'
        if (val & 0x1): x = 'x'
        else: x = '-'
        return r+w+x
    return mode(val>>6) + mode((val>>3)) + mode(val)

def fileinfo_str(f, verbose=False):
    if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename'])
    if (f['mode'] & 0xE000) == 0xA000: type = 'l' # symlink
    Elif (f['mode'] & 0xE000) == 0x8000: type = '-' # file
    Elif (f['mode'] & 0xE000) == 0x4000: type = 'd' # dir
    else: 
        print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False))
        type = '?' # unknown
    info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % 
            (type, modestr(f['mode']&0x0FFF) , f['userid'], f['groupid'], f['filelen'], 
             f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename']))
    if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination
    for name, value in f['properties'].items(): # extra properties
        info = info + ' ' + name + '=' + repr(value)
    return info

verbose = True
if __== '__main__':
    mbdb = process_mbdb_file("Manifest.mbdb")
    mbdx = process_mbdx_file("Manifest.mbdx")
    for offset, fileinfo in mbdb.items():
        if offset in mbdx:
            fileinfo['fileID'] = mbdx[offset]
        else:
            fileinfo['fileID'] = "<nofileID>"
            print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo)
        print fileinfo_str(fileinfo, verbose)
83
galloglass

In iOS 5 wurde die Manifest.mbdx-Datei entfernt. Für den Zweck dieses Artikels war er sowieso redundant, da sich die Domäne und der Pfad in Manifest.mbdb befinden und der ID-Hash mit SHA1 generiert werden kann.

Hier ist mein Update des Galloglass-Codes, damit er mit Sicherungen von iOS 5-Geräten funktioniert. Die einzigen Änderungen sind das Entfernen von process_mbdx_file () und das Hinzufügen einiger Zeilen in process_mbdb_file ().

Getestet mit Sicherungen eines iPhone 4S und eines iPad 1, beide mit zahlreichen Apps und Dateien.

#!/usr/bin/env python
import sys
import hashlib

mbdx = {}

def getint(data, offset, intsize):
    """Retrieve an integer (big-endian) and new offset from the current offset"""
    value = 0
    while intsize > 0:
        value = (value<<8) + ord(data[offset])
        offset = offset + 1
        intsize = intsize - 1
    return value, offset

def getstring(data, offset):
    """Retrieve a string and new offset from the current offset into the data"""
    if data[offset] == chr(0xFF) and data[offset+1] == chr(0xFF):
        return '', offset+2 # Blank string
    length, offset = getint(data, offset, 2) # 2-byte length
    value = data[offset:offset+length]
    return value, (offset + length)

def process_mbdb_file(filename):
    mbdb = {} # Map offset of info in this file => file info
    data = open(filename).read()
    if data[0:4] != "mbdb": raise Exception("This does not look like an MBDB file")
    offset = 4
    offset = offset + 2 # value x05 x00, not sure what this is
    while offset < len(data):
        fileinfo = {}
        fileinfo['start_offset'] = offset
        fileinfo['domain'], offset = getstring(data, offset)
        fileinfo['filename'], offset = getstring(data, offset)
        fileinfo['linktarget'], offset = getstring(data, offset)
        fileinfo['datahash'], offset = getstring(data, offset)
        fileinfo['unknown1'], offset = getstring(data, offset)
        fileinfo['mode'], offset = getint(data, offset, 2)
        fileinfo['unknown2'], offset = getint(data, offset, 4)
        fileinfo['unknown3'], offset = getint(data, offset, 4)
        fileinfo['userid'], offset = getint(data, offset, 4)
        fileinfo['groupid'], offset = getint(data, offset, 4)
        fileinfo['mtime'], offset = getint(data, offset, 4)
        fileinfo['atime'], offset = getint(data, offset, 4)
        fileinfo['ctime'], offset = getint(data, offset, 4)
        fileinfo['filelen'], offset = getint(data, offset, 8)
        fileinfo['flag'], offset = getint(data, offset, 1)
        fileinfo['numprops'], offset = getint(data, offset, 1)
        fileinfo['properties'] = {}
        for ii in range(fileinfo['numprops']):
            propname, offset = getstring(data, offset)
            propval, offset = getstring(data, offset)
            fileinfo['properties'][propname] = propval
        mbdb[fileinfo['start_offset']] = fileinfo
        fullpath = fileinfo['domain'] + '-' + fileinfo['filename']
        id = hashlib.sha1(fullpath)
        mbdx[fileinfo['start_offset']] = id.hexdigest()
    return mbdb

def modestr(val):
    def mode(val):
        if (val & 0x4): r = 'r'
        else: r = '-'
        if (val & 0x2): w = 'w'
        else: w = '-'
        if (val & 0x1): x = 'x'
        else: x = '-'
        return r+w+x
    return mode(val>>6) + mode((val>>3)) + mode(val)

def fileinfo_str(f, verbose=False):
    if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename'])
    if (f['mode'] & 0xE000) == 0xA000: type = 'l' # symlink
    Elif (f['mode'] & 0xE000) == 0x8000: type = '-' # file
    Elif (f['mode'] & 0xE000) == 0x4000: type = 'd' # dir
    else: 
        print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False))
        type = '?' # unknown
    info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % 
            (type, modestr(f['mode']&0x0FFF) , f['userid'], f['groupid'], f['filelen'], 
             f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename']))
    if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination
    for name, value in f['properties'].items(): # extra properties
        info = info + ' ' + name + '=' + repr(value)
    return info

verbose = True
if __== '__main__':
    mbdb = process_mbdb_file("Manifest.mbdb")
    for offset, fileinfo in mbdb.items():
        if offset in mbdx:
            fileinfo['fileID'] = mbdx[offset]
        else:
            fileinfo['fileID'] = "<nofileID>"
            print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo)
        print fileinfo_str(fileinfo, verbose)
29
Robert Munafo

Ich habe meine Arbeit an diesem Zeug beendet - das heißt, das iOS 4 + iTunes 9.2-Update meiner Backup-Decoder-Bibliothek für Python - http://www.iki.fi/fingon/ iphonebackupdb.py

Es macht, was ich brauche, wenig Dokumentation, kann aber gerne Ideen von dort kopieren ;-)

(Scheint zumindest mit meinen Backups gut zu funktionieren.)

18
user374559

Informationen und eine kleine Beschreibung des MBDB/MBDX-Formats finden Sie hier:

http://code.google.com/p/iphonebackupbrowser/

Dies ist meine Anwendung zum Durchsuchen der Sicherungsdateien. Ich habe versucht, das Format der neuen Dateien zu dokumentieren, die mit iTunes 9.2 geliefert werden.

10
reneD

Dieses python Skript ist großartig.

Hier ist meine Ruby Version davon (mit geringfügigen Verbesserungen) und Suchfunktionen. (Für iOS 5)

# encoding: utf-8
require 'fileutils'
require 'digest/sha1'

class ManifestParser
  def initialize(mbdb_filename, verbose = false)
    @verbose = verbose
    process_mbdb_file(mbdb_filename)
  end

  # Returns the numbers of records in the Manifest files.
  def record_number
    @mbdb.size
  end

  # Returns a huge string containing the parsing of the Manifest files.
  def to_s
    s = ''
    @mbdb.each do |v|
      s += "#{fileinfo_str(v)}\n"
    end
    s
  end

  def to_file(filename)
    File.open(filename, 'w') do |f|
      @mbdb.each do |v|
        f.puts fileinfo_str(v)
      end
    end
  end

  # Copy the backup files to their real path/name.
  # * domain_match Can be a regexp to restrict the files to copy.
  # * filename_match Can be a regexp to restrict the files to copy.
  def rename_files(domain_match = nil, filename_match = nil)
    @mbdb.each do |v|
      if v[:type] == '-' # Only rename files.
        if (domain_match.nil? or v[:domain] =~ domain_match) and (filename_match.nil? or v[:filename] =~ filename_match)
          dst = "#{v[:domain]}/#{v[:filename]}"
          puts "Creating: #{dst}"
          FileUtils.mkdir_p(File.dirname(dst))
          FileUtils.cp(v[:fileID], dst)
        end
      end
    end
  end

  # Return the filename that math the given regexp.
  def search(regexp)
    result = Array.new
    @mbdb.each do |v|
      if "#{v[:domain]}::#{v[:filename]}" =~ regexp
        result << v
      end
    end
    result
  end

  private
  # Retrieve an integer (big-endian) and new offset from the current offset
  def getint(data, offset, intsize)
    value = 0
    while intsize > 0
      value = (value<<8) + data[offset].ord
      offset += 1
      intsize -= 1
    end
    return value, offset
  end

  # Retrieve a string and new offset from the current offset into the data
  def getstring(data, offset)
    return '', offset + 2 if data[offset] == 0xFF.chr and data[offset + 1] == 0xFF.chr # Blank string
    length, offset = getint(data, offset, 2) # 2-byte length
    value = data[offset...(offset + length)]
    return value, (offset + length)
  end

  def process_mbdb_file(filename)
    @mbdb = Array.new
    data = File.open(filename, 'rb') { |f| f.read }
    puts "MBDB file read. Size: #{data.size}"
    raise 'This does not look like an MBDB file' if data[0...4] != 'mbdb'
    offset = 4
    offset += 2 # value x05 x00, not sure what this is
    while offset < data.size
      fileinfo = Hash.new
      fileinfo[:start_offset] = offset
      fileinfo[:domain], offset = getstring(data, offset)
      fileinfo[:filename], offset = getstring(data, offset)
      fileinfo[:linktarget], offset = getstring(data, offset)
      fileinfo[:datahash], offset = getstring(data, offset)
      fileinfo[:unknown1], offset = getstring(data, offset)
      fileinfo[:mode], offset = getint(data, offset, 2)
      if (fileinfo[:mode] & 0xE000) == 0xA000 # Symlink
        fileinfo[:type] = 'l'
      elsif (fileinfo[:mode] & 0xE000) == 0x8000 # File
        fileinfo[:type] = '-'
      elsif (fileinfo[:mode] & 0xE000) == 0x4000 # Dir
        fileinfo[:type] = 'd'
      else
        # $stderr.puts "Unknown file type %04x for #{fileinfo_str(f, false)}" % f['mode']
        fileinfo[:type] = '?'
      end
      fileinfo[:unknown2], offset = getint(data, offset, 4)
      fileinfo[:unknown3], offset = getint(data, offset, 4)
      fileinfo[:userid], offset = getint(data, offset, 4)
      fileinfo[:groupid], offset = getint(data, offset, 4)
      fileinfo[:mtime], offset = getint(data, offset, 4)
      fileinfo[:atime], offset = getint(data, offset, 4)
      fileinfo[:ctime], offset = getint(data, offset, 4)
      fileinfo[:filelen], offset = getint(data, offset, 8)
      fileinfo[:flag], offset = getint(data, offset, 1)
      fileinfo[:numprops], offset = getint(data, offset, 1)
      fileinfo[:properties] = Hash.new
      (0...(fileinfo[:numprops])).each do |ii|
        propname, offset = getstring(data, offset)
        propval, offset = getstring(data, offset)
        fileinfo[:properties][propname] = propval
      end
      # Compute the ID of the file.
      fullpath = fileinfo[:domain] + '-' + fileinfo[:filename]
      fileinfo[:fileID] = Digest::SHA1.hexdigest(fullpath)
      # We add the file to the list of files.
      @mbdb << fileinfo
    end
    @mbdb
  end

  def modestr(val)
    def mode(val)
      r = (val & 0x4) ? 'r' : '-'
      w = (val & 0x2) ? 'w' : '-'
      x = (val & 0x1) ? 'x' : '-'
      r + w + x
    end
    mode(val >> 6) + mode(val >> 3) + mode(val)
  end

  def fileinfo_str(f)
    return "(#{f[:fileID]})#{f[:domain]}::#{f[:filename]}" unless @verbose
    data = [f[:type], modestr(f[:mode]), f[:userid], f[:groupid], f[:filelen], f[:mtime], f[:atime], f[:ctime], f[:fileID], f[:domain], f[:filename]]
    info = "%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % data
    info += ' -> ' + f[:linktarget] if f[:type] == 'l' # Symlink destination
    f[:properties].each do |k, v|
      info += " #{k}=#{v.inspect}"
    end
    info
  end
end

if __FILE__ == $0
  mp = ManifestParser.new 'Manifest.mbdb', true
  mp.to_file 'filenames.txt'
end
7
Balzard

Ich mochte den Galloglas-Code und habe die Hauptfunktion so geändert, dass eine sortierte Liste der Gesamtgröße nach Anwendung angezeigt wird:

verbose = True
if __== '__main__':
    mbdb = process_mbdb_file("Manifest.mbdb")
    mbdx = process_mbdx_file("Manifest.mbdx")
    sizes = {}
    for offset, fileinfo in mbdb.items():
        if offset in mbdx:
            fileinfo['fileID'] = mbdx[offset]
        else:
            fileinfo['fileID'] = "<nofileID>"
            print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo)
        print fileinfo_str(fileinfo, verbose)
        if (fileinfo['mode'] & 0xE000) == 0x8000:
        sizes[fileinfo['domain']]= sizes.get(fileinfo['domain'],0) + fileinfo['filelen']
    for domain in sorted(sizes, key=sizes.get):
        print "%-60s %11d (%dMB)" % (domain, sizes[domain], int(sizes[domain]/1024/1024))

Auf diese Weise können Sie herausfinden, welche Anwendung den gesamten Speicherplatz belegt.

4
w00t

Für diejenigen, die eine Java Implementierung eines MBDB-Dateireader suchen, gibt es mehrere:

2
david_p

Dank der Antwort von Galloglass. Der Code funktioniert hervorragend mit Python 2.7. Ich möchte nur eines zusammenfassen. Wenn Sie die Datei manifest.mbdb lesen, sollten Sie den Binärmodus verwenden. Andernfalls werden nicht alle Inhalte gelesen.

Ich habe auch ein paar kleinere Änderungen vorgenommen, damit der Code mit Python 3.4 funktioniert. Hier ist der Code.

#!/usr/bin/env python
import sys
import hashlib

mbdx = {}

def getint(data, offset, intsize):
    """Retrieve an integer (big-endian) and new offset from the current offset"""
    value = 0
    while intsize > 0:
        value = (value << 8) + data[offset]
        offset = offset + 1
        intsize = intsize - 1
    return value, offset

def getstring(data, offset):
    """Retrieve a string and new offset from the current offset into the data"""
    if chr(data[offset]) == chr(0xFF) and chr(data[offset + 1]) == chr(0xFF):
        return '', offset + 2  # Blank string
    length, offset = getint(data, offset, 2)  # 2-byte length
    value = data[offset:offset + length]
    return value.decode(encoding='latin-1'), (offset + length)

def process_mbdb_file(filename):
    mbdb = {}  # Map offset of info in this file => file info
    data = open(filename, 'rb').read()  # 'b' is needed to read all content at once
    if data[0:4].decode() != "mbdb": raise Exception("This does not look like an MBDB file")
    offset = 4
    offset = offset + 2  # value x05 x00, not sure what this is
    while offset < len(data):
        fileinfo = {}
        fileinfo['start_offset'] = offset
        fileinfo['domain'], offset = getstring(data, offset)
        fileinfo['filename'], offset = getstring(data, offset)
        fileinfo['linktarget'], offset = getstring(data, offset)
        fileinfo['datahash'], offset = getstring(data, offset)
        fileinfo['unknown1'], offset = getstring(data, offset)
        fileinfo['mode'], offset = getint(data, offset, 2)
        fileinfo['unknown2'], offset = getint(data, offset, 4)
        fileinfo['unknown3'], offset = getint(data, offset, 4)
        fileinfo['userid'], offset = getint(data, offset, 4)
        fileinfo['groupid'], offset = getint(data, offset, 4)
        fileinfo['mtime'], offset = getint(data, offset, 4)
        fileinfo['atime'], offset = getint(data, offset, 4)
        fileinfo['ctime'], offset = getint(data, offset, 4)
        fileinfo['filelen'], offset = getint(data, offset, 8)
        fileinfo['flag'], offset = getint(data, offset, 1)
        fileinfo['numprops'], offset = getint(data, offset, 1)
        fileinfo['properties'] = {}
        for ii in range(fileinfo['numprops']):
            propname, offset = getstring(data, offset)
            propval, offset = getstring(data, offset)
            fileinfo['properties'][propname] = propval
        mbdb[fileinfo['start_offset']] = fileinfo
        fullpath = fileinfo['domain'] + '-' + fileinfo['filename']
        id = hashlib.sha1(fullpath.encode())
        mbdx[fileinfo['start_offset']] = id.hexdigest()
    return mbdb

def modestr(val):
    def mode(val):
        if (val & 0x4):
            r = 'r'
        else:
            r = '-'
        if (val & 0x2):
            w = 'w'
        else:
            w = '-'
        if (val & 0x1):
            x = 'x'
        else:
            x = '-'
        return r + w + x
    return mode(val >> 6) + mode((val >> 3)) + mode(val)

def fileinfo_str(f, verbose=False):
    if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename'])
    if (f['mode'] & 0xE000) == 0xA000:
        type = 'l'  # symlink
    Elif (f['mode'] & 0xE000) == 0x8000:
        type = '-'  # file
    Elif (f['mode'] & 0xE000) == 0x4000:
        type = 'd'  # dir
    else:
        print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False))
        type = '?'  # unknown
    info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" %
            (type, modestr(f['mode'] & 0x0FFF), f['userid'], f['groupid'], f['filelen'],
             f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename']))
    if type == 'l': info = info + ' -> ' + f['linktarget']  # symlink destination
    for name, value in f['properties'].items():  # extra properties
        info = info + ' ' + name + '=' + repr(value)
    return info

verbose = True
if __== '__main__':
    mbdb = process_mbdb_file(
        r"Manifest.mbdb")
    for offset, fileinfo in mbdb.items():
        if offset in mbdx:
            fileinfo['fileID'] = mbdx[offset]
        else:
            fileinfo['fileID'] = "<nofileID>"
            print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo)
        print(fileinfo_str(fileinfo, verbose))
0
Oseack