Openpyxl Corrupts Xlsx On Save. Even When No Changes Were Made
TL;DR; Using Openpyxl to save changes to a large excel file results in a corrupted xlsx file The Excel file is made of several tabs with graphs, formulae, and images, and tables.
Solution 1:
As discussed in comments on original post: some graphics and other items are not supported by openpyxl, even if they are in worksheets not modified by your code. This is not a full workaround, but works when the unsupported objects are in other worksheets only.
I made an example .xlsx workbook with two worksheets, 'TWC' and 'UV240 Results'. This code assumes that any worksheet whose title ends in 'Results' contains the unsupported images, and creates two temporary files - imageoutput contains the unsupported images, and outputtemp contains the worksheets that may be modified without corruption by openpyxl. Then they're stitched together at the end.
It may be a inefficient in parts; please edit or comment with improvements!
import os
import shutil
import win32com.client
from openpyxl import load_workbook
name = 'spreadsheet.xlsx'
outputfile = 'output.xlsx'
outputtemp = 'outputtemp.xlsx'
shutil.copyfile(name, 'output.xlsx')
wb = load_workbook('output.xlsx')
ws = wb['TWC']
# TWC doesn't have images. Anything ending with 'Results' has unsupported images etc# Create new file with only openpyxl-unsupported worksheets
imageworksheets = [ws if ws.title.endswith('Results') else''for ws in wb.worksheets]
if [ws for ws in wb if ws.title != 'TWC']:
imageoutput = 'output2.xlsx'
imagefilewritten = Falsewhilenot imagefilewritten:
try:
shutil.copy(name, imageoutput)
except PermissionError as error:
# Catch an exception here - I usually have a GUI functionpasselse:
imagefilewritten = True
excel = win32com.client.Dispatch('Excel.Application')
excel.Visible = False
imagewb = excel.Workbooks.Open(os.path.join(os.getcwd(), imageoutput))
excel.DisplayAlerts = Falsefor i, ws inenumerate(imageworksheets[::-1]): # Go backwards to avoid reindexingifnot ws:
wsindex = len(imageworksheets) - i
imagewb.Worksheets(wsindex).Delete()
imagefileupdated = Falsewhilenot imagefileupdated:
try:
imagewb.Save()
imagewb.Close(SaveChanges = True)
print('Temp image workbook saved.')
except PermissionError as error:
# Catch exceptionpasselse:
imagefileupdated = True# Remove the unsupported worksheets in openpyxlfor ws in wb.worksheets:
if ws in imageworksheets:
wb.remove(ws)
wb.save(outputtemp)
print('Temp output workbook saved.')
''' Do your desired openpyxl manipulations on the remaining supported worksheet '''# Merge the outputtemp and imageoutput into outputfile
wb1 = excel.Workbooks.Open(os.path.join(os.getcwd(), outputtemp))
wb2 = excel.Workbooks.Open(os.path.join(os.getcwd(), imageoutput))
for ws in wb1.Sheets:
ws.Copy(wb2.Sheets(1))
wb2.SaveAs(os.path.join(os.getcwd(), outputfile))
wb1.Close(SaveChanges = True)
wb2.Close(SaveChanges = True)
print(f'Output workbook saved as {outputfile}.')
excel.Visible = True
excel.DisplayAlerts = True
Post a Comment for "Openpyxl Corrupts Xlsx On Save. Even When No Changes Were Made"