1#!/usr/bin/env python 2# 3# This parser parses the output from Phil Harvey's exiftool (version 9.02) 4# and convert it to xml format. It reads exiftool's output from stdin and 5# write the xml format to stdout. 6# 7# In order to get the raw infomation from exiftool, we need to enable the verbose 8# flag (-v2) of exiftool. 9# 10# Usage: 11# exiftool -v2 img.jpg | ./parser.py >> output.xml 12# 13# 14 15import os 16import sys 17import re 18 19text = sys.stdin.read() 20 21print """<?xml version="1.0" encoding="utf-8"?>""" 22print "<exif>" 23 24# find the following two groups of string: 25# 26# 1. tag: 27# 28# | | | x) name = value 29# | | | - Tag 0x1234 30# 31# 2. IFD indicator: 32# 33# | | | + [xxx directory with xx entries] 34# 35p = re.compile( 36 "(((?:\| )+)[0-9]*\)(?:(?:.*? = .*?)|(?:.*? \(SubDirectory\) -->))\n.*?- Tag 0x[0-9a-f]{4})" + "|" 37 + "(((?:\| )*)\+ \[.*? directory with [0-9]+ entries]$)" 38 , re.M) 39tags = p.findall(text) 40 41layer = 0 42ifds = [] 43 44for s in tags: 45 # IFD indicator 46 if s[2]: 47 l = len(s[3]) 48 ifd = s[2][l + 3:].split()[0] 49 new_layer = l / 2 + 1 50 if new_layer > layer: 51 ifds.append(ifd) 52 else: 53 for i in range(layer - new_layer): 54 ifds.pop() 55 ifds[-1] = ifd 56 layer = new_layer 57 else: 58 l = len(s[1]) 59 s = s[0] 60 new_layer = l / 2 61 if new_layer < layer: 62 for i in range(layer - new_layer): 63 ifds.pop() 64 layer = new_layer 65 66 # find the ID 67 _id = re.search("0x[0-9a-f]{4}", s) 68 _id = _id.group(0) 69 70 # find the name 71 name = re.search("[0-9]*?\).*?(?:(?: = )|(?: \(SubDirectory\) -->))", s) 72 name = name.group(0).split()[1] 73 74 # find the raw value in the parenthesis 75 value = re.search("\(SubDirectory\) -->", s) 76 if value: 77 value = "NO_VALUE" 78 else: 79 value = re.search("\(.*\)\n", s) 80 if (name != 'Model' and value): 81 value = value.group(0)[1:-2] 82 else: 83 value = re.search("=.*\n", s) 84 value = value.group(0)[2:-1] 85 if "[snip]" in value: 86 value = "NO_VALUE" 87 88 print (' <tag ifd="' + ifds[-1] + '" id="' 89 + _id + '" name="' + name +'">' + value + "</tag>") 90print "</exif>" 91