mercredi 22 juin 2016

Obtain multiple slices efficiently from Objectified XML


If I have an XML which I have used lxml to Objectify, how do I get slices of the list efficiently?

My script.

# from lxml import etree
from lxml import objectify
import argparse
import os

parser = argparse.ArgumentParser()
parser.add_argument("path", type=str, nargs="+")
parser.add_argument('-e',
                    '--extension',
                    default='',
                    help='File extension to filter by.')

args = parser.parse_args()
name_pattern = "*" + args.extension
my_dir = args.path[0]

for dir_path, subdir_list, file_list in os.walk(my_dir):
    for name_pattern in file_list:
        full_path = os.path.join(dir_path, name_pattern)


def getsMeet(file_list):
    for filename in sorted(file_list):
        filename=my_dir + filename
        yield filename

def parseXML():
    """
    """
    for file in getsMeet(file_list):
        with open(file) as f:
            xml = f.read()

            root = objectify.fromstring(xml)
            print(objectify.dump(root.race.nomination[0]))
            find = objectify.ObjectPath(".race.nomination")
            print(find.hasattr(root))


parseXML()

The XML flows from root = meeting > Club > Race > Condition | Nomination

So this print show the structure of nomination

print(objectify.dump(root.race.nomination[0]))

(pyxml) [sayth@localhost pyxml]$ python xrace.py data/ -e .xml
nomination = '' [StringElement]
  * number = '8'
  * saddlecloth = '8'
  * horse = 'Chipanda'
  * id = '198926'
  * idnumber = ''
  * regnumber = ''
  * blinkers = '0'
  * trainernumber = '235'
  * trainersurname = "O'Shea"
  * trainerfirstname = 'John'
  * trainertrack = 'Agnes Banks/Hawkesbury'
  * rsbtrainername = "John O'Shea"
  * jockeynumber = '84015'
  * jockeysurname = 'Avdulla'
  * jockeyfirstname = 'Brenton'
  * barrier = '5'
  * weight = '54'
  * rating = '0'
  * description = 'B F 2 Sepoy x Lobola (Anabaa(USA))'
  * colours = 'Royal Blue'
  * owners = 'Godolphin '
  * dob = '2013-10-08T00:00:00'
  * age = '3'
  * sex = 'F'
  * career = '2-0-0-2 $30225.00'
  * thistrack = '1-0-0-1 $15000.00'
  * thisdistance = '0-0-0-0'
  * goodtrack = '0-0-0-0'
  * heavytrack = '0-0-0-0'
  * slowtrack = ''
  * deadtrack = ''
  * fasttrack = '0-0-0-0'
  * firstup = '2-0-0-2 $30225.00'
  * secondup = '0-0-0-0'
  * mindistancewin = '0'
  * maxdistancewin = '0'
  * finished = '1'
  * weightvariation = '0'
  * variedweight = '54'
  * decimalmargin = '0.00'
  * penalty = '0'
  * pricestarting = '$3.50'
  * sectional200 = '0'
  * sectional400 = '0'
  * sectional600 = '0'
  * sectional800 = '0'
  * sectional1200 = '0'
  * bonusindicator = 'E'
True

If I want to return these elements how should i do it?

 * number = '8'
  * saddlecloth = '8'
  * horse = 'Chipanda'
  * id = '198926'
  * barrier = '5'
  * weight = '54'
  * rating = '0'
  * description = 'B F 2 Sepoy x Lobola (Anabaa(USA))'
  * colours = 'Royal Blue'
  * owners = 'Godolphin '
  * dob = '2013-10-08T00:00:00'
  * age = '3'
  * sex = 'F'
  * career = '2-0-0-2 $30225.00'
  * thistrack = '1-0-0-1 $15000.00'
  * thisdistance = '0-0-0-0'
  * goodtrack = '0-0-0-0'
  * heavytrack = '0-0-0-0'
  * finished = '1'
  * weightvariation = '0'
  * variedweight = '54'
  * decimalmargin = '0.00'
  * penalty = '0'
  * pricestarting = '$3.50'

Sample XML

<meeting id="42977" barriertrial="0" venue="Rosehill Gardens" date="2016-05-21T00:00:00" gearchanges="-1" stewardsreport="-1" gearlist="-1" racebook="0" postracestewards="0" meetingtype="TAB" rail="Timing - Electronic : Rail - +6m" weather="Fine      " trackcondition="Good 3    " nomsdeadline="2016-05-16T11:00:00" weightsdeadline="2016-05-17T16:00:00" acceptdeadline="2016-05-18T09:00:00" jockeydeadline="2016-05-18T12:00:00">
  <club abbrevname="Australian Turf Club" code="56398" associationclass="1" website="http://" />
  <race id="215411" number="1" nomnumber="9" division="0" name="LES CARLYON AC PLATE" mediumname="2Y-SWP" shortname="2Y-SWP" stage="Results" distance="1200" minweight="0" raisedweight="0" class="~         " age="2         " grade="0" weightcondition="SWP       " trophy="0" owner="0" trainer="0" jockey="0" strapper="0" totalprize="85000" first="48750" second="16750" third="8350" fourth="4150" fifth="2000" time="2016-05-21T11:25:00" bonustype="BOB7      " nomsfee="0" acceptfee="0" trackcondition="Good 3    " timingmethod="Electronic" fastesttime="1-10.22   " sectionaltime="600/34.78 " formavailable="0" racebookprize="Of $85000. First $48750, second $16750, third $8350, fourth $4150, fifth $2000, sixth $1000, seventh $1000, eighth $1000, ninth $1000, tenth $1000">
    <condition line="1">Of $85000. First $48750, second $16750, third $8350, fourth $4150, fifth $2000, sixth $1000, seventh $1000, eighth $1000, ninth $1000, tenth $1000</condition>
    <condition line="2">Starter Subsidy: $200 for non-prize earning runners.</condition>
    <condition line="3">No class restriction, Set Weights plus Penalties, For Two-Years-Old, No sex restriction</condition>
    <condition line="4">BOBS Bonus available: $20,000</condition>
    <condition line="5">Apprentices can claim. Field Limit: 16 + 4 EM</condition>
    <nomination number="8" saddlecloth="8" horse="Chipanda" id="198926" idnumber="" regnumber="" blinkers="0" trainernumber="235" trainersurname="O'Shea" trainerfirstname="John" trainertrack="Agnes Banks/Hawkesbury" rsbtrainername="John O'Shea" jockeynumber="84015" jockeysurname="Avdulla" jockeyfirstname="Brenton" barrier="5" weight="54" rating="0" description="B F 2 Sepoy x Lobola (Anabaa(USA))" colours="Royal Blue" owners="Godolphin " dob="2013-10-08T00:00:00" age="3" sex="F" career="2-0-0-2 $30225.00" thistrack="1-0-0-1 $15000.00" thisdistance="0-0-0-0" goodtrack="0-0-0-0" heavytrack="0-0-0-0" slowtrack="" deadtrack="" fasttrack="0-0-0-0" firstup="2-0-0-2 $30225.00" secondup="0-0-0-0" mindistancewin="0" maxdistancewin="0" finished="1" weightvariation="0" variedweight="54" decimalmargin="0.00" penalty="0" pricestarting="$3.50" sectional200="0" sectional400="0" sectional600="0" sectional800="0" sectional1200="0" bonusindicator="E" />
    <nomination number="1" saddlecloth="1" horse="Legerity" id="200769" idnumber="" regnumber="" blinkers="0" trainernumber="77974" trainersurname="Hawkes" trainerfirstname="Michael" trainertrack="Rosehill" rsbtrainername="Michael, Wayne &amp; John Hawkes" jockeynumber="2687" jockeysurname="Reith" jockeyfirstname="Christian" barrier="1" weight="57.5" rating="0" description="B C 2 Snitzel x Simply Spiteful(USA) (Speightstown(USA))" colours="Purple, Gold Checks, Quartered Cap" owners="Highgrove Stud Syndicate (Mgr: R T Gilbert)" dob="2013-08-30T00:00:00" age="3" sex="C" career="4-1-1-1 $85075.00" thistrack="1-1-0-0 $68750.00" thisdistance="0-0-0-0" goodtrack="3-1-0-1 $77150.00" heavytrack="0-0-0-0" slowtrack="" deadtrack="" fasttrack="0-0-0-0" firstup="2-0-1-1 $15125.00" secondup="2-1-0-0 $69950.00" mindistancewin="0" maxdistancewin="0" finished="2" weightvariation="0" variedweight="57.5" decimalmargin="0.50" penalty="0" pricestarting="$2.50F" sectional200="0" sectional400="0" sectional600="0" sectional800="0" sectional1200="0" bonusindicator="E" />
</race>
</meeting>

I can get all values out with this defaultdict however it seems I am not using objectify properly.

d = defaultdict(list)
    # nomItems = ['id', 'horse']
    for sample in root.xpath('//race/nomination'):
        for attr_name, attr_value in sample.items():
            d[attr_name].append(attr_value)

 pprint(dict(d))

Aucun commentaire:

Enregistrer un commentaire