#!/usr/bin/env python3

 # -*- coding: UTF-8 -*-

 from bs4 import BeautifulSoup
import operator
import os,shutil
import re def processhtml(item):
  html_path = item
  with open(html_path) as fp:
    soup = BeautifulSoup(fp, "html.parser")
  return soup def IsComputer(soup_arg):
  soup = soup_arg
  result = False
  try:
    value = soup.find('input', {'name':'资源类型'}).get('value')
    if value == '主机':
      print('资源类型:主机')
      result = True
    elif value == '数据库':
      print('资源类型:数据库')
    else:
      print('资源类型:其他')
  except:
    pass
  return result def IsAgree(soup_arg):
  soup = soup_arg
  result = False
  try:
    for row in soup.findAll('tr'):
      cells = row.findAll('td')
      if len(cells) == 4:
        if cells[1].findChild("font") != None:
          nStr = ""
          nStr = nStr.join(cells[0].string)
          target = ['帐号管理人员处理']
          if (operator.eq(nStr.split(), target)):
            print(cells[1].font.string)
          if (operator.eq(nStr.split(), target) and cells[1].font.string == '同意'):
            print("满足条件为:%s && 审批意见(同意)" % nStr.split()[0])
            result = True
  except IndexError as e:
    pass
  return result def IsIntersect(soup_arg):
  soup = soup_arg
  result = False
  try:
    value = soup.find('input', {'name':'239385_资源名称'}).get('value')
    temp_list = re.split('[、:\n]', value)
    hosts_list = []
    hosts_list.clear()
    for hostlist in temp_list:
      if re.search('[a-z]', hostlist):
        print(hostlist)
        hosts_list.append(hostlist)
    hosts_set = set(hosts_list)
    if target_hosts.intersection(hosts_set):
      print('非空,有交集')
      result = True
    else:
      print("空,无交集")
  except:
    pass
  return result def IsIntersect2(soup_arg):
  soup = soup_arg
  result = False
  try:
    value = soup.find('input', {'name':'所在的硬件设备/软件平台'}).get('value')
    temp_list = re.split('[、:\n]', value)
    hosts_list = []
    hosts_list.clear()
    for hostlist in temp_list:
      if re.search('[a-z]', hostlist):
        hosts_list.append(hostlist)
    hosts_set = set(hosts_list)
    if target_hosts.intersection(hosts_set):
      print('非空,有交集')
      result = True
    else:
      print("空,无交集")
  except:
    pass
  return result if __name__ == '__main__':
  target_hosts = {'cmszsoaa', 'cmszsoab', 'cmszdcss', 'cmszicss', 'cmsznpsa', 'cmsznpsb', 'cmszinta', 'cmszintb',
          'cmszdpsa', 'cmszdpsb', 'mcbsoaa', 'mcbsoab', 'mcbinta', 'mcbintb', 'mcbdpsa', 'mcbdpsb',
          'mcbnpsa', 'mcbnpsb', 'mcbdcss', 'mcbicss', 'newdcss', 'newicss'}   work_dir = '/root/XmlOut/'
  target_dir = '/root/AccountOut/'   for parent, dirnames, filenames in os.walk(work_dir, followlinks=True):
    for filename in filenames:
      file_path = os.path.join(parent, filename)
      print("filename with full path: %s" % file_path)
      soup = processhtml(file_path)
      flag1 = IsComputer(soup)
      flag2 = IsAgree(soup)
      flag3 = IsIntersect(soup)
      flag4 = IsIntersect2(soup)
      if (flag1 and flag2 and (flag3 or flag4)):
        print('%s, ok----' % (file_path))
        shutil.copy(file_path, target_dir)

最新文章

  1. Linux查看物理CPU个数、核数、逻辑CPU个数
  2. Extjs TabPanel 选项卡延迟加载
  3. [原创]Matlab获取当前时间信息
  4. 菜鸟学JS(五)——window.onload与$(document).ready()
  5. Java并发之:生产者消费者问题
  6. ECMAScript 5.1中对属性的操作
  7. Oracle 10g 数据文件的第一个数据块结构
  8. POJ 3667 Hotel (线段树区间合并)
  9. xampp
  10. python连接zookeeper的日志问题
  11. Win32 GDI 非矩形区域剪裁,双缓冲技术
  12. spring3.0事务的配置
  13. android SDK和ADT的更新
  14. MongoDB学习笔记-命令
  15. 第三次冲刺spring会议(第五次会议)
  16. 上传图文{"errcode":40007,"errmsg":"invalid media_id"}解决方案
  17. 数据定义语言(DDL Data Definition Language)基础学习笔记
  18. PythonStudy——三种字符串 Three strings
  19. 用excel批量生成insert语句
  20. 【16】命令模式(Command Pattern)

热门文章

  1. STL_string用法总结
  2. js中关于new Object时传参的一些细节分析
  3. CWnd* pParent
  4. 类 Fabric 主机管理程序开发
  5. c++ map: 使用struct或者数组做value
  6. Bequeath Connection and SYS Logon
  7. case....when ...多重判断
  8. Linxu基础入门
  9. BZOJ 1617 Usaco 2008 Mar. River Crossing渡河问题
  10. TypeError: CleanWebpackPlugin is not a constructor