python批量修改文件编码

在网上下载了一份代码,然后发现中文全部乱码,应为编码格式是gb2312,所以写了个脚本批量修改编码格式

import os
import sys
import chardet
import codecs

def get_encoding_type(fileName):
    with open(fileName, 'rb') as f:
        data = f.read()
        encoding_type = chardet.detect(data)
        return encoding_type

def findAllFile(base):
    files = []
    for root, ds, fs in os.walk(base):
        for f in fs:
            fullname = os.path.join(root, f)
            files.append(fullname) 
    return files


def convert_encoding_type(filename_in, filename_out, encode_in="gbk", encode_out="utf-8"):
    with codecs.open(filename=filename_in, mode='r', encoding=encode_in) as fi:
        data = fi.read()
        outdir = os.path.dirname(filename_out)
        with open(filename_out, mode='w', encoding=encode_out) as fo:
            fo.write(data)


def main():
    base = './123/'
    files = findAllFile(base)
    for fileName in files:
        encoding_type = get_encoding_type(fileName)
        if encoding_type['encoding']=='GB2312':
            print(fileName + "  " + str(encoding_type))
            convert_encoding_type(fileName,fileName)

if __name__ == '__main__':
    main()
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容