長期以來都想用python對Excel進行一些列的操作,但由於某種神秘的力量控制著我,一直未果,今天有幸用requests模塊和BeautifulSoup模塊進行爬蟲練習,拿到了一大批數據,照我以前,都只是用字典啊、列表啊,或者文本文件存放,之前沒覺得哪裡不好,但今天的我很奇怪,怎麼看怎麼不爽,而且 ...
長期以來都想用python對Excel進行一些列的操作,但由於某種神秘的力量控制著我,一直未果,今天有幸用requests模塊和BeautifulSoup模塊進行爬蟲練習,拿到了一大批數據,照我以前,都只是用字典啊、列表啊,或者文本文件存放,之前沒覺得哪裡不好,但今天的我很奇怪,怎麼看怎麼不爽,而且,如果我把我爬取的數據給別人看的話,確實有點難堪,沒有臉給別人看。於是,我拼盡平生所有能力掙脫某種神秘力量的束縛,粗略的學習了下用python如何操作Excel,在此跟大家分享下,鄙人水平有限,不夠深入或有問題的地方還請大佬指教。
一、用xlrd模塊對Excel進行讀取
import xlrd
#打開Excel文件讀取數據 data=xlrd.open_workbook('myexcel.xls')
#獲取一個工作表 table = data.sheets()[0] #通過索引順序獲取 table = data.sheet_by_index(0) #通過索引順序獲取 table = data.sheet_by_name(u'Sheet1')#通過名稱獲取
獲取整行或整列的值(數組)
table.row_values(num) table.col_values(num)
獲取行數和列數 table.nrows table.ncols
獲取單元格 table.cell(num1,num2).value
二、用xlwt模塊對Excel進行寫入
1,基本寫入操作
import xlwt
#創建workbook,也就是新建一個Excel文件 workbook=xlwt.Workbook(encoding='ascii')
#在剛纔創建的Excel文件下創建一個sheet表 worksheet=workbook.add_sheet('mysheet')
#往單元格裡寫入數據 worksheet.write(num,num,label='xxxxxx')
#保存Excel文件 workbook.save('myworkbook.xls')
2,font設置
font = xlwt.Font() # Create the Font 大概就是創建一個font對象,然後對其進行設置 font.name = 'Times New Roman' font.bold = True font.underline = True font.italic = True style = xlwt.XFStyle() # Create the Style #創建一個style對象,然後把font對象賦給它 style.font = font # Apply the Font to the Style worksheet.write(0, 0, label = 'Unformatted value') worksheet.write(1, 0, label = 'Formatted value', style) # Apply the Style to the Cell
下麵的就不是很懂,大概也是font對象的一些設置 font.struck_out = True # May be: True, False font.underline = xlwt.Font.UNDERLINE_SINGLE # May be: UNDERLINE_NONE, UNDERLINE_SINGLE, UNDERLINE_SINGLE_ACC, UNDERLINE_DOUBLE, UNDERLINE_DOUBLE_ACC font.escapement = xlwt.Font.ESCAPEMENT_SUPERSCRIPT # May be: ESCAPEMENT_NONE, ESCAPEMENT_SUPERSCRIPT, ESCAPEMENT_SUBSCRIPT font.family = xlwt.Font.FAMILY_ROMAN # May be: FAMILY_NONE, FAMILY_ROMAN, FAMILY_SWISS, FAMILY_MODERN, FAMILY_SCRIPT, FAMILY_DECORATIVE font.charset = xlwt.Font.CHARSET_ANSI_LATIN # May be: CHARSET_ANSI_LATIN, CHARSET_SYS_DEFAULT, CHARSET_SYMBOL, CHARSET_APPLE_ROMAN, CHARSET_ANSI_JAP_SHIFT_JIS, CHARSET_ANSI_KOR_HANGUL,
CHARSET_ANSI_KOR_JOHAB, CHARSET_ANSI_CHINESE_GBK, CHARSET_ANSI_CHINESE_BIG5, CHARSET_ANSI_GREEK, CHARSET_ANSI_TURKISH, CHARSET_ANSI_VIETNAMESE, CHARSET_ANSI_HEBREW, CHARSET_ANSI_ARABIC,
CHARSET_ANSI_BALTIC, CHARSET_ANSI_CYRILLIC, CHARSET_ANSI_THAI, CHARSET_ANSI_LATIN_II, CHARSET_OEM_LATIN_I font.colour_index = ? font.get_biff_record = ? font.height = 0x00C8 # C8 in Hex (in decimal) = 10 points in height. font.name = ? font.outline = ? font.shadow = ?
3,每列的寬度設置
worksheet.col(0).width = 3333 #這是第0列
4,往單元格裡插入時間
style = xlwt.XFStyle() style.num_format_str = 'M/D/YY' # Other options: D-MMM-YY, D-MMM, MMM-YY, h:mm, h:mm:ss, h:mm, h:mm:ss, M/D/YY h:mm, mm:ss, [h]:mm:ss, mm:ss.0 worksheet.write(0, 0, datetime.datetime.now(), style)
5,在單元格插入公式
worksheet.write(0, 0, 5) # Outputs 5 worksheet.write(0, 1, 2) # Outputs 2 worksheet.write(1, 0, xlwt.Formula('A1*B1')) # Should output "10" (A1[5] * A2[2]) worksheet.write(1, 1, xlwt.Formula('SUM(A1,B1)')) # Should output "7" (A1[5] + A2[2])
6,單元格插連接
worksheet.write(0, 0, xlwt.Formula('HYPERLINK("http://www.google.com";"Google")')) # Outputs the text "Google" linking to http://www.google.com
7,合併行和列
import xlwt workbook = xlwt.Workbook() worksheet = workbook.add_sheet('My Sheet') worksheet.write_merge(0, 0, 0, 3, 'First Merge') # Merges row 0's columns 0 through 3.合併第0列到底3列 font = xlwt.Font() # Create Font font.bold = True # Set font to Bold style = xlwt.XFStyle() # Create Style style.font = font # Add Bold Font to Style worksheet.write_merge(1, 2, 0, 3, 'Second Merge', style) # Merges row 1 through 2's columns 0 through 3.合併第1行到第2列和第0列到第3列 workbook.save('Excel_Workbook.xls')
8,Alignment
Setting the Alignment for the Contents of a Cell import xlwt workbook = xlwt.Workbook() worksheet = workbook.add_sheet('My Sheet') alignment = xlwt.Alignment() # Create Alignment alignment.horz = xlwt.Alignment.HORZ_CENTER # May be: HORZ_GENERAL, HORZ_LEFT, HORZ_CENTER, HORZ_RIGHT, HORZ_FILLED, HORZ_JUSTIFIED, HORZ_CENTER_ACROSS_SEL, HORZ_DISTRIBUTED alignment.vert = xlwt.Alignment.VERT_CENTER # May be: VERT_TOP, VERT_CENTER, VERT_BOTTOM, VERT_JUSTIFIED, VERT_DISTRIBUTED style = xlwt.XFStyle() # Create Style style.alignment = alignment # Add Alignment to Style worksheet.write(0, 0, 'Cell Contents', style) workbook.save('Excel_Workbook.xls')
9,Borders
Adding Borders to a Cell # Please note: While I was able to find these constants within the source code, on my system (using LibreOffice,)
I was only presented with a solid line, varying from thin to thick; no dotted or dashed lines. import xlwt workbook = xlwt.Workbook() worksheet = workbook.add_sheet('My Sheet') borders = xlwt.Borders() # Create Borders borders.left = xlwt.Borders.DASHED # May be: NO_LINE, THIN, MEDIUM, DASHED, DOTTED, THICK, DOUBLE, HAIR, MEDIUM_DASHED,
THIN_DASH_DOTTED, MEDIUM_DASH_DOTTED, THIN_DASH_DOT_DOTTED, MEDIUM_DASH_DOT_DOTTED, SLANTED_MEDIUM_DASH_DOTTED,
or 0x00 through 0x0D. borders.right = xlwt.Borders.DASHED borders.top = xlwt.Borders.DASHED borders.bottom = xlwt.Borders.DASHED borders.left_colour = 0x40 borders.right_colour = 0x40 borders.top_colour = 0x40 borders.bottom_colour = 0x40 style = xlwt.XFStyle() # Create Style style.borders = borders # Add Borders to Style worksheet.write(0, 0, 'Cell Contents', style) workbook.save('Excel_Workbook.xls')
10,Background Color
Setting the Background Color of a Cell import xlwt workbook = xlwt.Workbook() worksheet = workbook.add_sheet('My Sheet') pattern = xlwt.Pattern() # Create the Pattern pattern.pattern = xlwt.Pattern.SOLID_PATTERN # May be: NO_PATTERN, SOLID_PATTERN, or 0x00 through 0x12 pattern.pattern_fore_colour = 5 # May be: 8 through 63. 0 = Black, 1 = White, 2 = Red, 3 = Green, 4 = Blue, 5 = Yellow, 6 = Magenta,
7 = Cyan, 16 = Maroon, 17 = Dark Green, 18 = Dark Blue, 19 = Dark Yellow , almost brown), 20 = Dark Magenta, 21 = Teal, 22 = Light Gray,
23 = Dark Gray, the list goes on... style = xlwt.XFStyle() # Create the Pattern style.pattern = pattern # Add Pattern to Style worksheet.write(0, 0, 'Cell Contents', style) workbook.save('Excel_Workbook.xls')
純屬借鑒大佬的筆記,在此感謝大佬
三、小應用
from bs4 import BeautifulSoup import requests import xlwt import re headers={ 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36' } workbook = xlwt.Workbook(encoding='ascii') worksheet = workbook.add_sheet('My Worksheet') worksheet.col(0).width = 14000 worksheet.col(5).width = 24000 url='https://sz.58.com/ershoufang/?utm_source=market&spm=u-2d2yxv86y3v43nkddh1.BDPCPZ_BT&PGTID=0d30000c-0000-4591-0324-370565eccba8&ClickID=1' res=requests.get(url=url,headers=headers) con=res.text soup=BeautifulSoup(con,'lxml') ss=soup.find('ul',class_='house-list-wrap') li_list=ss.find_all('li') patter=re.compile(r'\s',re.S) for num in range(len(li_list)): worksheet.write(num, 0, label=li_list[num].find('a',tongji_label="listclick").text.strip()) p1=li_list[num].find_all('p')[0] span_list=p1.find_all('span') worksheet.write(num, 1, label=patter.sub('',span_list[0].text)) worksheet.write(num, 2, label=patter.sub('',span_list[1].text)) worksheet.write(num, 3, label=patter.sub('',span_list[2].text)) worksheet.write(num, 4, label=patter.sub('',span_list[3].text)) worksheet.write(num,5, label=patter.sub('',li_list[num].find('div',class_='jjrinfo').text)) worksheet.write(num, 6, label=li_list[num].find('p',class_='sum').text) worksheet.write(num, 7, label=li_list[num].find('p',class_='unit').text) workbook.save('myWorkbook.xls')