Getting A Memory Error When Parsing A Large Xml File In Python
My XML file looks like this: ... ... I h
Solution 1:
Try following code:
lxml.etree
import lxml.etree
from gzip importopenas gopen
classGroupDictTarget(object):
def__init__(self, d):
self.d = d
defstart(self, tag, attrib):
if tag == 'group':
self.group = self.d[attrib['from'], attrib['to']] = []
elif tag == 'link':
self.group.append(attrib['target'])
defclose(self):
passdefextractTargets(fin):
with gopen(fin) as xml:
targets = {}
parser = lxml.etree.XMLParser(target=GroupDictTarget(targets))
lxml.etree.parse(xml, parser)
return targets
xml.parsers.expat
import xml.parsers.expat
from gzip importopenas gopen
classGroupDictTarget(object):
# Same as abovedefextractTargets(fin):
targets = {}
p = xml.parsers.expat.ParserCreate()
p.StartElementHandler = GroupDictTarget(targets).start
with gopen(fin) as f:
p.ParseFile(f)
return targets
xml.sax
import xml.sax
from gzip importopenas gopen
classGroupDictTarget(object):
# Same as abovedefextractTargets(fin):
targets = {}
handler = xml.sax.handler.ContentHandler()
handler.startElement = GroupDictTarget(targets).start
with gopen(fin) as f:
xml.sax.parse(f, handler)
return targets
Solution 2:
I had the same problem today, and for me it worked after i deleted the "tag" parameter:
context = etree.iterparse(xml)
forevent, elem in context:
if elem.tag = "group":
targets[(elem.get("from"), elem.get("to"))] = elem.xpath("link/@target")
elem.clear()
while elem.getprevious() isnot None:
del elem.getparent()[0]
del context
Post a Comment for "Getting A Memory Error When Parsing A Large Xml File In Python"