master

分支 (1)

管理

管理

master

regex
/
01.py

# coding:utf8
"""
参考文档
http://www.cnblogs.com/chuxiuhong/p/5885073.html
http://www.cnblogs.com/chuxiuhong/p/5907484.html
"""

import re

key = r"javapythonhtmlvhdl"  # 这是源文本
p1 = r"python"
pattern1 = re.compile(p1)
matcher1 = re.search(pattern1, key)
print matcher1.group(0)

key = r"<h1>hello world<h1>"  # 源文本
p1 = r"<h1>.+<h1>"  # r"<h1>.*<h1>"
pattern1 = re.compile(p1)
print re.findall(pattern1, key)  # 或者： pattern1.findall(key)

key = r"afiouwehrfuichuxiuhong@hit.edu.cnaskdjhfiosueh"
p1 = r"afiouwehrfuichuxiuhong@hit\.edu.cn"
pattern1 = re.compile(p1)
print pattern1.findall(key)

key = r"http://www.nsfbuhwe.com and https://www.auhfisna.com"  # 胡编乱造的网址，别在意
p1 = r"https*://"  # 看那个星号！
pattern1 = re.compile(p1)
print pattern1.findall(key)

key = r"lalala<hTml>hello</Html>heiheihei"
p1 = r"<[Hh][Tt][Mm][Ll]>.+?</[Hh][Tt][Mm][Ll]>"  # 范围性的匹配
pattern1 = re.compile(p1)
print pattern1.findall(key)

key = r"mat cat hat pat"
p1 = r"[^p]at"  # 范围性的排除
pattern1 = re.compile(p1)

"""在你使用"+","*"的时候，一定先想好到底是用贪婪型还是懒惰型，尤其是当你用到范围较大的项目上时，因为很有可能它就多匹配字符回来给你！！！**"""
key = r"chuxiuhong@hit.edu.cn"
p1 = r"@.+\."  # 我想匹配到@后面一直到“.”之间的，在这里是hit. 贪婪模式
pattern1 = re.compile(p1)
print pattern1.findall(key)

key = r"chuxiuhong@hit.edu.cn"
p1 = r"@.+?\."  # 我想匹配到@后面一直到“.”之间的，在这里是hit.  懒惰模式
pattern1 = re.compile(p1)
print pattern1.findall(key)

"""
为了能够准确的控制重复次数，正则表达式还提供
{a,b}(代表a<=匹配次数<=b)
"""

key = r"ss saas and sas and saaas"
p1 = r"sa{1,2}s"  # {,2}  {1,}
pattern1 = re.compile(p1)
print pattern1.findall(key)

key = r"192.168.1.1"
p1 = r"^\d+\.\d+\.\d+\.\d+$"  #
pattern1 = re.compile(p1)
print pattern1.findall(key)

key = r"<html><body><h1>hello world</h1></body></html>"  # 这段是你要匹配的文本
p1 = r"(?<=<h1>).+?(?=</h1>)"  # 这是我们写的正则表达式规则，你现在可以不理解啥意思
pattern1 = re.compile(p1)  # 我们在编译这段正则表达式
matcher1 = re.search(pattern1, key)  # 在源文本中搜索符合正则表达式的部分
print matcher1.group(0)  # 打印出来

key = r"<h1>hello world</h1>"
p1 = r"<h([1-6])>.*?</h\1>"
pattern1 = re.compile(p1)
m1 = re.search(pattern1, key)
print m1.group(0)  # 这里是会报错的，因为匹配不到，你如果将源字符串改成</h1>


# key = "hello world , and python is very good, isn't ?"
# p1 = r".+"
# pattern1 = re.compile(p1)
# print pattern1.findall(key)