第5章：Linux系统管理

1.文件读写

1).Python内置的open函数

f = open('data.txt', 'w')

f.write('hello, world')

f.close()

2).避免文件句柄泄露

try:

      f = open('data.txt')

      print(f.read())

finally:

      f.close()

with open('data.txt') as f:

       print(f.read())

3).常见的文件操作函数

Python提供了三个读相关的函数，分别是read,realine和readlines

read：读取文件中的所有内容

readline：一次读取一行

readlines：将文件内容存到一个列表中，列表中的每一行对应于文件中的一行

f = open('data.txt')

f.read()

f.seek(0)

f.readline()

f.seek(0)

f.readlines()

Python提供了两个写函数，分别是write和writelines

write：写字符串到文件中，并返回写入的字符数

writelines：写一个字符串列表到文件中

f = open('data.txt', 'w')

f.write('Beautiful is better than ugly')

f.writelines(['Explicit is better than implicit'],['Simple is better than complex'])

4).Python的文件是一个可迭代对象

使用for循环遍历文件内容的代码如下：

with open('data.txt') as inf:

       for line in inf:

             print(line.upper())

5).案例：将文件中所有单词的首字母变成大写

with open('data.txt') as inf, open('out.txt', 'w') as outf:

       for line in inf:

             outf.write(" ".join([word.capitalize() for word in line.split()]))

             outf.write("\n")

with open('data.txt') as inf, open('out.txt', 'w') as outf:

      for line in inf:

            print( * [word.capitalize() for word in line.split()], file=outf)

2.文件与文件路径管理

1).使用os.path进行路径和文件管理

拆分路径

import os

path = "/var/log/yum.log"

os.path.split(path)

os.path.dirname(path)

os.path.basename(path)

os.path.splitext(path)

构建路径

import os

os.getcwd()

os.path.expanduser('~')

os.path.expanduser('~mysql')

os.path.expanduser('~allen/data.txt')

os.path.abspath('.')

os.path.join('~', 't', 'a.py')

os.path.join(os.path.expanduser('~mysql'), 't', 'a.py')

os.path.isabs('/home/mysql/t/a.py')

os.path.isabs('.')

在Python代码中，可以使用__file__这个特殊的变量表示当前代码所在的源文件

import os

print("current directory : ", os.getcwd())

path = os.path.abspath(__file__)

print("full path of current file : ", path)

获取文件属性

os.path模块包含了若干函数用来获取文件的属性

getatime

getmtime

getctime

getsize

判断文件类型

os.path模块包含了若干判断类函数

exists

isfile

isdir

islink

ismount

2).使用os模块管理文件和目录

import os

os.getcwd()

os.chdir(os.path.expanduser('~mysql'))

os.getcwd()

下面的程序演示了chmod和access函数的用法。首先通过命令行读取文件的名称，先判断文件是否存在，如果不存在，直接退出

然后判断文件是否具有读权限，如果没有，则将文件赋予所有用户都有读、写、执行权限，如果文件有读权限，读取文件内容

import os

import sys

def main():

    sys.argv.append(" ")

    filename = sys.argv[1]

    if not os.path.isfile(filename):

        raise SystemExit(filename + ' does not exitsts')

    elif not os.access(filename, os.R_OK):

        os.chmod(filename, 0777)

    else:

        with open(filename) as f:

            print(f.read())

if __name__ == '__main__':

    main()

3).案例：打印最常用的10条Linux命令

import os

from collections import Counter

c = Counter()

with open(os.path.expanduser('~/.bash_history')) as f:

    for line in f:

        cmd = line.strip().split()

        if cmd:

            c[cmd[0]]+=1

print(c.most_common(10))

3.查找文件

1).使用fnmatch找到特定的文件

有fnmatch、fnmatchcase、filter和translate函数

import os

import fnmatch

for name in os.listdir('.'):

    if fnmatch.fnmatch(name, '*.py'):

        print(name)

    if fnmatch.filter(name, 'a*'):

        print(name)

2).使用glob找到特定的文件

glob的作用相当于os.listdir加上fnmatch，使用glob以后，不需要调用os.listdir获取文件列表，直接通过模式匹配即可

import os

import glob

name = glob.glob('*.py')

print(name)

name1 = glob.glob('o*')

print(name1)

3).使用os.walk遍历目录树

walk函数遍历某个目录及其子目录，对于每一个目录，walk返回一个三元组(dirpath,dirnames,filenames)

dirpath保存的是当前目录，dirnames是当前目录下的子目录列表，filenames是当前目录下的文件列表

import os

import fnmatch

images = ['*.jpg', '*.jpeg', '*.png']

matches = []

for root, dirnames, filenames in os.walk(os.path.expanduser("~")):

    for extensions in images:

        for filename in fnmatch.filter(filenames, extensions):

            matches.append(os.path.join(root, filename))

print(matches)

4).案例：找到目录下最大(或最老)的十个文件

import os

import fnmatch

def is_file_match(filename, patterns):

    for pattern in patterns:

        if fnmatch.fnmatch(filename, pattern):

            return True

    return False

def find_specific_files(root, patterns=['*'], exclude_dirs=[]):

    for root, dirnames, filenames in os.walk(root):

        for filename in filenames:

            if is_file_match(filename, patterns):

                yield os.path.join(root, filename)

        for d in exclude_dirs:

            if d in dirnames:

                dirnames.remove(d)

for item in find_specific_files("/home/mysql"):

    print(item)

patterns = ['*.jpg', '*.jpeg', '*.png', '*.tif', '*.tiff']

for item in find_specific_files("/root", patterns):

    print(item)

files = {name : os.path.getsize(name) for name in find_specific_files("/root")}

result = sorted(files.items(), key=lambda d:d[1], reverse=True)[:10]

for i, t in enumerate(result, 1):

    print(i, t[0], t[1])

首先通过字典推导创建一个字典，字典的key是找到的文件，字典的value是文件的大小

构建出字典以后，使用Python内置的sorted函数对字典进行逆序排序

排序完成以后即可获取最大的十个文件

4.高级文件处理接口shutil

os模块是对操作系统的接口进行封装，主要作用是跨平台

shutil模块包含复制、移动、重命名和删除文件及目录的函数，主要作用是管理文件和目录

1).复制文件和文件夹

copy和copytree

import shutil

shutil.copy('data.txt','mydata.txt')

shutil.copytree('/py','/mypy')

2).文件和文件夹的移动与改名

import shutil

shutil.move('a.py', 'b.py')

shutil.move('b.py', 'dir1')

3).删除目录

shutil.rmtree不管目录是否非空，都直接删除整个目录

import shutil

shutil.rmtree('dir1')

5.文件内容管理

1).文件比较

filecmp模块包含了比较目录和文件的操作

filecmp模块最简单的函数是cmp函数，该函数用来比较两个文件是否相同

import filecmp

filecmp.cmp('a.txt', 'b.txt')

2).MD5校验

import hashlib

d = hashlib.md5()

with open('/etc/passwd') as f:

    for line in f:

        d.update(line)

print(d.hexdigest())

3).案例：找到目录下的重复文件

import hashlib

import sys

import os

import fnmatch

CHUNK_SIZE = 8192

def is_file_match(filename, patterns):

    for pattern in patterns:

        if fnmatch.fnmatch(filename, pattern):

            return True

    return False

def find_specific_files(root, patterns=['*'], exclude_dirs=[]):

    for root, dirnames, filenames in os.walk(root):

        for filename in filenames:

            if is_file_match(filename, patterns):

                yield os.path.join(root, filename)

        for d in exclude_dirs:

            if d in dirnames:

                dirnames.remove(d)

def get_chunk(filename):

    with open(filename) as f:

        while True:

            chunk = f.read(CHUNK_SIZE)

            if not chunk:

                break

            else:

                yield chunk

def get_file_checksum(filename):

    h = hashlib.md5()

    for chunk in get_chunk(filename):

        h.update(chunk)

    return h.hexdigest()

def main():

    sys.argv.append("")

    directory = sys.argv[1]

    if not os.path.isdir(directory):

        raise SystemExit("{0} is not a directory".format(directory))

    record = {}

    for item in find_specific_files(directory):

        checksum = get_file_checksum(item)

        if checksum in record:

            print('find duplicate file : {0} vs {1}'.format(record[checksum], item))

        else:

            record[checksum] = item

if __name__ == '__main__':

    main()

6.使用Python管理压缩包

1).使用tarfile库读取与创建tar包

读取tar包

import tarfile

with tarfile.open('tarfile_add.tar') as t:

    for member_info in t.getnames():

        print(member_info)

    for member_info in t.getmembers():

        print(member_info.name)

使用tarfile库读取与创建tar包

创建tar包

import tarfile

with tarfile.open('tarfile_add.tar', mode='w') as out:

    out.add('data.txt')

getmembers：获取tar包中的文件列表

getnames：获取tar包中的文件名列表

extract：提取单个文件

extractall：提取所有文件

2).使用tarfile库读取与创建压缩包

读取一个用gzip算法压缩的tar包：

with tarfile.open('tarfile_add.tar', mode='r:gz') as out:

创建一个用bzip2算法压缩的tar包：

with tarfile.open('tarfile_add.tar', mode='w:bz2') as out:

3).案例：备份指定文件到压缩包

import os

import fnmatch

import tarfile

import datetime

def is_file_match(filename, patterns):

    for pattern in patterns:

        if fnmatch.fnmatch(filename, pattern):

            return True

    return False

def find_specific_files(root, patterns=['*'], exclude_dirs=[]):

    for root, dirnames, filenames in os.walk(root):

        for filename in filenames:

            if is_file_match(filename, patterns):

                yield os.path.join(root, filename)

    for d in exclude_dirs:

        if d in dirnames:

            dirnames.remove(d)

def main():

    patterns = ['*.jpg', '*.jpeg', '*.png', '*.tif', '*.tiff']

    now = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")

    filename = "all_images_{0}.tar.gz".format(now)

    with tarfile.open(filename, 'w:gz') as f:

        for item in find_specific_files("/root", patterns):

            f.add(item)

if __name__ == '__main__':

    main()

4).使用zipfile库创建和读取zip压缩包

读取zip文件

import zipfile

example_zip = zipfile.ZipFile('example.zip')

example_zip.namelist()

创建zip文件

import zipfile

newZip = zipfile.ZipFile('new.zip', 'w')

newZip.write('spam.txt')

newZip.close()

使用Python的zipfile模块提供的命令行接口，创建、查看和提取zip格式压缩包

python -m zipfile -c monty.zip spam.txt eggs.txt

python -m zipfile -e monty.zip /

python -m zipfile -l monty.zip

5).案例：暴力破解zip压缩包的密码

with open('passwords.txt') as pf:

    for line in pf:

        try:

            f.extractall(pwd=line.strip())

            print("password is {0}".format(line.strip()))

        except:

            pass

6).使用shutil创建和读取压缩包

import shutil

import tarfile

shutil.make_archive('backup','gztar',root_dir='/py')

f = tarfile.open('backup.tar.gz', 'r:gz')

print(f.getnames())

在Python3中使用shutil读取压缩包

import shutil

shutil.unpack_archive('backup.tar.gz')

7.Python中执行外部命令

1).subprocess模块简介

subprocess模块用来创建和管理子进程，它提供了一个名为Popen的类来启动和设置子进程的参数

2).subprocess模块的便利函数

call函数

import subprocess

subprocess.call(['ls', '-l'])

subprocess.call('ls -l', shell=True)

check_call函数

与call函数类似，区别在于异常情况下返回的形式不同

check_output函数

对获取的结果进行进一步的处理，或者将命令的输出打印到日志文件中

import subprocess

output = subprocess.check_output('ls -l', shell=True)

lines = output.split('\n')

print(lines)

3).subprocess模块的Popen类

Popen对象创建后，子进程便会运行

Popen类提供了若干方法来控制子进程的运行：

wait：等待子进程结束

poll：检查子进程状态

kill：终止子进程

send_signal：向子进程发送信号

communicate：与子进程交互

def execute_cmd(cmd):

    p = subprocess.Popen(cmd,shell=True,stdin=subprocess.PIPE,stdout=subprocess.PIPE,stderr=subprocess.PIPE)

    stdout, stderr = p.communicate()

    if p.returncode != 0:

        return p.returncode, stderr

    return p.returncode, stdout

8.综合案例：使用Python部署MongoDB

import os

import shutil

import tarfile

import subprocess

def unpackage_mongo(package, home_dir):

    unpackage_dir = os.path.splitext(package)[0]

    if os.path.exists(unpackage_dir):

        shutil.rmtree(unpackage_dir)

    if os.path.exists(home_dir):

        shutil.rmtree(home_dir)

    t = tarfile.open(package, 'r:gz')

    t.extractall('.')

    shutil.move(unpackage_dir, home_dir)

def create_datadir(data_dir):

    if os.path.exists(data_dir):

        shutil.rmtree(data_dir)

    os.mkdir(data_dir)

def format_mongod_command(package_dir, data_dir, logfile):

    mongod = os.path.join(package_dir, 'bin', 'mongod')

    mongod_format = """{0} --fork --dbpath {1} --logpath {2}"""

    return mongod_format.format(mongod, data_dir, logfile)

def start_mongod(cmd):

    returncode, out = execute_cmd(cmd)

    if returncode != 0:

        raise SystemExit('execute {0} error : {1}'.format(cmd, out))

    else:

        print("execute command ({0}) successful".format(cmd))

def execute_cmd(cmd):

    p = subprocess.Popen(cmd,shell = True,stdin = subprocess.PIPE,stdout = subprocess.PIPE,stderr = subprocess.PIPE)

    stdout, stderr = p.communicate()

    if p.returncode != 0:

        return p.returncode, stderr

    return p.returncode, stdout

def main():

    package = 'mongodb-linux-x86_64-rhel62-3.4.2.tgz'

    base_dir = os.path.abspath('/app')

    home_dir = os.path.join(base_dir, 'mongo')

    data_dir = os.path.join(base_dir, 'mongodata')

    logfile = os.path.join(data_dir, 'mongod.log')

    print("base_dir = ",base_dir)

    print("home_dir = ",home_dir)

    print("data_dir = ",data_dir)   

    if not os.path.exists(package):

        raise SystemExit("{0} not found".format(package))

    unpackage_mongo(package, home_dir)

    create_datadir(data_dir)

    start_mongod(format_mongod_command(home_dir, data_dir, logfile))

if __name__ == '__main__':

    main()

巴特西

第5章：Linux系统管理

最新文章

热门文章