python文件排序的方法总结

在本篇内容里小编给各位整理的是一篇关于python文件排序的方法总结，有需要的朋友们可以参考下。

在python环境中提供两种排序方案：用库函数sorted()对字符串排序，它的对象是字符；用函数sort()对数字排序，它的对象是数字，如果读取文件的话，需要进行处理（把文件后缀名‘屏蔽’）。

（1）首先：我测试的文件夹是/img/，里面的文件都是图片，如下图所示：

（2）测试库函数sorted()，直接贴出代码：

 import numpy as np import os   img_path='./img/'   img_list=sorted(os.listdir(img_path))#文<a style="color:transparent">来源gao($daima.com搞@代@#码网</a>件名按字母排序 img_nums=len(img_list) for i in range(img_nums):     img_name=img_path+img_list[i]     print(img_name)

运行效果如下：

从图片可以清晰的看出，文件名是按字符排序的。

（3）测试函数sort()，代码：

 import numpy as np import os img_path='./img/'   img_list=os.listdir(img_path) img_list.sort() img_list.sort(key = lambda x: int(x[:-4])) ##文件名按数字排序 img_nums=len(img_list) for i in range(img_nums):     img_name=img_path+img_list[i]     print(img_name)

运行效果如下：

可以看出，文件名是按数字排序的；顺便提下，sort函数中用到了匿名函数(key = lambda x:int(x[:-4]))，其作用是将后缀名’.jpg-600’“屏蔽”（因为‘.jpg-600’是4个字符，所以[:-4]的含义是从文件名开始到倒数第四个字符为止），具体看python的匿名函数和数组取值方式。

实例扩展：

 import gzip import os from multiprocessing import Process, Queue, Pipe, current_process, freeze_support from datetime import datetime def sort_worker(input,output): while True: lines = input.get().splitlines() element_set = {} for line in lines: if line.strip() == 'STOP': return try: element = line.split(' ')[0] if not element_set.get(element): element_set[element] = '' except: pass sorted_element = sorted(element_set) #print sorted_element output.put('\n'.join(sorted_element)) def write_worker(input, pre): os.system('mkdir %s'%pre) i = 0 while True: content = input.get() if content.strip() == 'STOP': return write_sorted_bulk(content, '%s/%s'%(pre, i)) i += 1 def write_sorted_bulk(content, filename): f = file(filename, 'w') f.write(content) f.close() def split_sort_file(filename, num_sort = 3, buf_size = 65536*64*4): t = datetime.now() pre, ext = os.path.splitext(filename) if ext == '.gz': file_file = gzip.open(filename, 'rb') else: file_file = open(filename) bulk_queue = Queue(10) sorted_queue = Queue(10) NUM_SORT = num_sort sort_worker_pool = [] for i in range(NUM_SORT): sort_worker_pool.append( Process(target=sort_worker, args=(bulk_queue, sorted_queue)) ) sort_worker_pool[i].start() NUM_WRITE = 1 write_worker_pool = [] for i in range(NUM_WRITE): write_worker_pool.append( Process(target=write_worker, args=(sorted_queue, pre)) ) write_worker_pool[i].start() buf = file_file.read(buf_size) sorted_count = 0 while len(buf): end_line = buf.rfind('\n') #print buf[:end_line+1] bulk_queue.put(buf[:end_line+1]) sorted_count += 1 if end_line != -1: buf = buf[end_line+1:] + file_file.read(buf_size) else: buf = file_file.read(buf_size) for i in range(NUM_SORT): bulk_queue.put('STOP') for i in range(NUM_SORT): sort_worker_pool[i].join() for i in range(NUM_WRITE): sorted_queue.put('STOP') for i in range(NUM_WRITE): write_worker_pool[i].join() print 'elasped ', datetime.now() - t return sorted_count from heapq import heappush, heappop from datetime import datetime from multiprocessing import Process, Queue, Pipe, current_process, freeze_support import os class file_heap: def __init__(self, dir, idx = 0, count = 1): files = os.listdir(dir) self.heap = [] self.files = {} self.bulks = {} self.pre_element = None for i in range(len(files)): file = files[i] if hash(file) % count != idx: continue input = open(os.path.join(dir, file)) self.files[i] = input self.bulks[i] = '' heappush(self.heap, (self.get_next_element_buffered(i), i)) def get_next_element_buffered(self, i): if len(self.bulks[i])  1024: self.q.put(self.wbuf) self.wbuf = [] def diff_file(file_old, file_new, file_diff, buf = 268435456): print 'buffer size', buf from file_split import split_sort_file os.system('rm -rf '+ os.path.splitext(file_old)[0] ) os.system('rm -rf '+ os.path.splitext(file_new)[0] ) t = datetime.now() split_sort_file(file_old,5,buf) split_sort_file(file_new,5,buf) print 'split elasped ', datetime.now() - t os.system('cat %s/* | wc -l'%os.path.splitext(file_old)[0]) os.system('cat %s/* | wc -l'%os.path.splitext(file_new)[0]) os.system('rm -f '+file_diff) t = datetime.now() zdiff = open(file_diff, 'a') old_q = Queue(1024) new_q = Queue(1024) old_queue = queue_buffer(old_q) new_queue = queue_buffer(new_q) h1 = Process(target=heappoppush2, args=(os.path.splitext(file_old)[0], old_queue, 3)) h2 = Process(target=heappoppush2, args=(os.path.splitext(file_new)[0], new_queue, 3)) h1.start(), h2.start() old = old_queue.get() new = new_queue.get() old_count, new_count = 0, 0 while old is not None or new is not None: if old > new or old is None: zdiff.write('<'+new+'\n') new = new_queue.get() new_count +=1 elif old  '+old+'\n') old = old_queue.get() old_count +=1 else: old = old_queue.get() new = new_queue.get() print 'new_count:', new_count print 'old_count:', old_count print 'diff elasped ', datetime.now() - t h1.join(), h2.join()

到此这篇关于python文件排序的方法总结的文章就介绍到这了,更多相关python文件排序都有哪些方法内容请搜索gaodaima搞代码网以前的文章或继续浏览下面的相关文章希望大家以后多多支持gaodaima搞代码网！

以上就是python文件排序的方法总结的详细内容，更多请关注gaodaima搞代码网其它相关文章！

搞代码网（gaodaima.com）提供的所有资源部分来自互联网，如果有侵犯您的版权或其他权益，请说明详细缘由并提供版权或权益证明然后发送到邮箱[email protected]‍，我们会在看到邮件的第一时间内为您处理，或直接联系QQ：872152909。本网站采用BY-NC-SA协议进行授权
转载请注明原文链接：python文件排序的方法总结

Hi，您需要填写昵称和邮箱！