今天来学习下R中字符串处理操作,主要是stringr包中的字符串处理函数的用法。

先导入stringr包,library(stringr),require(stringr),或者stringr::函数名;这几种方式都行。

一、检测是否匹配

我们先定义一个字符串和变量,在此基础上演示各个函数基本用法。

  1 library(stringr)
2 animal<-c("cow","dog","sheep","goat","pig","monkey","cat","cat")
3 str1<-"I love cat, cat cat !"
4 str2<-"lovelovelove"
5
6 str_detect(animal,"cow") #匹配到指定字符串返回True,否则返回False
7 [1] TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
8
9 str_detect(str1,"love")
10 [1] TRUE
11
12 str_which(animal,"dog") #返回指定字符串位置
13 [1] 2
14
15 str_which(animal,"cat")
16 [1] 7 8
17
18 str_which(str2,"love") #连续重复字符只返回第一个
19 [1] 1
20
21 str_count(animal,"cat") #返回匹配次数
22 [1] 0 0 0 0 0 0 1 1
23
24 str_count(str1,"cat")
25 [1] 3
26
27 str_locate(animal,"cat") #返回匹配起始位置
28 start end
29 [1,] NA NA
30 [2,] NA NA
31 [3,] NA NA
32 [4,] NA NA
33 [5,] NA NA
34 [6,] NA NA
35 [7,] 1 3
36 [8,] 1 3
37
38 str_locate(str1,"cat")
39 start end
40 [1,] 8 10
41
42 str_locate(str2,"love") #连续重复字符值返回第一个字符起始位置
43 start end
44 [1,] 1 4
45

二、子串提取

  1 str_sub(str1,1,3) # 后面两个参数为起始,结束位置
2 [1] "I l"
3
4 str_sub(str1,1) # 可以只跟起始位置,默认到结束位置
5 [1] "I love cat, cat cat !"
6
7 str_sub(str1,3)
8 [1] "love cat, cat cat !"
9
10 str_sub(str1,-5) #位置还可以为负数
11 [1] "cat !"
12
13 str_sub(str1,-5,-1)
14 [1] "cat !"
15
16 str_subset(str1,"a") #匹配到指定字符就返回整个字符串
17 [1] "I love cat, cat cat !"
18
19 str_subset(str1,"x") #匹配不到则返回空
20 character(0)
21
22 str_extract(str1,"cat") #返回第一个匹配到字符串
23 [1] "cat"
24 str_extract(str1,"ca")
25 [1] "ca"
26
27 str_extract_all(str1,"cat") #返回所有匹配到字符串 列表形式返回
28 [[1]]
29 [1] "cat" "cat" "cat"
30
31 str_extract_all(str1,"[aoe]") #返回所有匹配到字符串 列表形式返回
32 [[1]]
33 [1] "o" "e" "a" "a" "a"
34
35 str_match(str1,"cat") #返回第一个匹配到字符串 矩阵形式返回
36 [,1]
37 [1,] "cat"
38
39 str_match_all(str1,"cat") #返回所有匹配到字符串 矩阵形式返回
40 [[1]]
41 [,1]
42 [1,] "cat"
43 [2,] "cat"
44 [3,] "cat"
45
46 str_match_all(str2,"love")
47 [[1]]
48 [,1]
49 [1,] "love"
50 [2,] "love"
51 [3,] "love"
52
53 str_match(str2,"love")
54 [,1]
55 [1,] "love"
56
57 str_match_all(str1,"(I|cat)") #可以多个匹配,不过这个返回结果我没看懂
58 [[1]]
59 [,1] [,2]
60 [1,] "I" "I"
61 [2,] "cat" "cat"
62 [3,] "cat" "cat"
63 [4,] "cat" "cat"

三、字符串长度处理

  1 str_length(str2) # 返回字符串长度
2 [1] 12
3
4 str_length("good job !") # 空格也算一个字符长度
5 [1] 10
6
7 str_trunc(str2,4) #指定字符串替换成替他字符,
8 [1] "l..."
9
10 str_trunc(str2,4,ellipsis = "*") #ellipsis 指定替换符
11 [1] "lov*"
12
13 str_trunc(str2,width = 8,ellipsis = "#") #width指定长度,此处指前8个字符
14 [1] "lovelov#"
15
16 str_trunc(str2,width = 8,side = c("left"),ellipsis = "#") # side指定方向(right,center,left)
17 [1] "#ovelove"
18
19 str_trim("sssss\n") # 去掉字符串首尾空字符,换行,空格等;字符串内部空字符无法去除
20 [1] "sssss"
21 str_trim(" sssss\n")
22 [1] "sssss"

四、字符串替换

  1 str1
2 [1] "I love cat, cat cat !"
3
4 str_sub(str1,1,6) #提取子串
5 [1] "I love"
6
7 str_sub(str1,1,6)<-"she love" #子串替换
8 str1
9 [1] "she love cat, cat cat !"
10
11 str_sub(animal,1,1)<-"F" #向量替换也可以
12 animal
13 [1] "Fow" "Fog" "Fheep" "Foat" "Fig" "Fonkey" "Fat"
14 [8] "Fat"
15
16 str1<-"I love cat, cat cat !"
17
18 str_replace(str1,"cat","dog") #替换第一个匹配项
19 [1] "I love dog, cat cat !"
20
21 str_replace_all(str1,"cat","dog") # 替换所有匹配项
22 [1] "I love dog, dog dog !"
23
24 str_to_lower(str1) # 全部转为小写字母
25 [1] "i love cat, cat cat !"
26
27 str_to_upper(str1) # 全部转为大写字母
28 [1] "I LOVE CAT, CAT CAT !"
29
30 str_to_title(str1) # 单词首字母转为大写
31 [1] "I Love Cat, Cat Cat !"
32
33 str_to_title(str2)
34 [1] "Lovelovelove"
35

五、字符串分割和连接

  1 str_c(str1,str2,sep="+") # 字符串连接
2 [1] "I love cat, cat cat !+lovelovelove"
3
4 str_c(animal,str2,sep="+") #向量一次连接字符串
5 [1] "Fow+lovelovelove" "Fog+lovelovelove" "Fheep+lovelovelove"
6 [4] "Foat+lovelovelove" "Fig+lovelovelove" "Fonkey+lovelovelove"
7 [7] "Fat+lovelovelove" "Fat+lovelovelove"
8
9 str_c(animal,sep="",collapse = "+") # 向量字符串连接
10 [1] "Fow+Fog+Fheep+Foat+Fig+Fonkey+Fat+Fat"
11
12 str_dup(str1,2) #字符串重复,数字代表次数
13 [1] "I love cat, cat cat !I love cat, cat cat !"
14 str_dup(str2,3)
15 [1] "lovelovelovelovelovelovelovelovelove"
16
17 str_split_fixed(animal,"",n=2) #分割字符串,分隔符,n=分割份数,返回矩阵
18 [,1] [,2]
19 [1,] "F" "ow"
20 [2,] "F" "og"
21 [3,] "F" "heep"
22 [4,] "F" "oat"
23 [5,] "F" "ig"
24 [6,] "F" "onkey"
25 [7,] "F" "at"
26 [8,] "F" "at"
27
28 str_split_fixed(str2,"",n=4)
29 [,1] [,2] [,3] [,4]
30 [1,] "l" "o" "v" "elovelove"
31
32 str_split(str2,"",4) # #分割字符串,分隔符,n=分割份数,返回列表
33 [[1]]
34 [1] "l" "o" "v" "elovelove"
35
36 str_glue("pi is {str1}") # 字符串连接变量,{}花括号内是系统变量
37 pi is I love cat, cat cat !
38
39 str_glue("pi is {pi}")
40 pi is 3.14159265358979
41
42 str_glue("log2(8) is {log2(8)}")
43 log2(8) is 3
44
45 str_glue_data(mtcars, "{rownames(mtcars)} has {hp} hp") #数据框或列表对应行连接字符串
46 Mazda RX4 has 110 hp
47 Mazda RX4 Wag has 110 hp
48 Datsun 710 has 93 hp
49 Hornet 4 Drive has 110 hp
50 Hornet Sportabout has 175 hp
51 Valiant has 105 hp
52
53 str_glue_data(mtcars, "{rownames(mtcars)} has {hp*1000} hp") # 话可以做相应计算
54 Mazda RX4 has 110000 hp
55 Mazda RX4 Wag has 110000 hp
56 Datsun 710 has 93000 hp
57 Hornet 4 Drive has 110000 hp
58
59 str_glue_data(mtcars, "{rownames(mtcars)} has {substr(wt,1,2)} wt") # 子串分割
60 Mazda RX4 has 2. wt
61 Mazda RX4 Wag has 2. wt
62 Datsun 710 has 2. wt
63 Hornet 4 Drive has 3. wt

六、字符串排序

  1 str2
2 [1] "lovelovelove"
3 str_order(str2,decreasing = T) # 返回字符串下标
4 [1] 1
5
6 animal
7 [1] "Fow" "Fog" "Fheep" "Foat" "Fig" "Fonkey" "Fat"
8 [8] "Fat"
9 animal[str_order(animal,decreasing = T)]
10 [1] "Fow" "Fonkey" "Fog" "Foat" "Fig" "Fheep" "Fat"
11 [8] "Fat"
12
13 animal
14 [1] "Fow" "Fog" "Fheep" "Foat" "Fig" "Fonkey" "Fat"
15 [8] "Fat"
16 str_sort(animal) #直接对向量字符串排序
17 [1] "Fat" "Fat" "Fheep" "Fig" "Foat" "Fog" "Fonkey"
18 [8] "Fow"
19

最新文章

  1. KETTLE实现数据的删除和更新
  2. 深入Java虚拟机
  3. MongoDB学习笔记-06 数据库命令、固定集合、GridFS、javascript脚本
  4. Android应用如何监听自己是否被卸载及卸载反馈功能的实现
  5. apscheduler 排程
  6. SQL替换语句之批量修改、增加、删除字段内容
  7. dispatch_async 子线程,主线程的简单用法
  8. 使用PHP抓取网站ico图标
  9. [wikioi]线段覆盖 2
  10. swift 截取字符串
  11. find中的-print0和xargs中-0的区别
  12. Tomcat下使用C3P0配置JNDI数据源(在项目的META-INF目录下创建context.xml的文件)
  13. TensorFlow实战Google深度学习框架10-12章学习笔记
  14. jdk settings
  15. 新建 Spring Mvc Web + Maven 的 maven 错误 (二)
  16. 论文笔记之:End-to-End Localization and Ranking for Relative Attributes
  17. 如何用python轻松破解wifi密码( 源码 )
  18. 关于javascript的各种高宽
  19. 【转载】图说C++对象模型:对象内存布局详解
  20. code1319 玩具装箱

热门文章

  1. [对对子队]Scrum Meeting 博客汇总
  2. the Agiles Scrum Meeting 9
  3. BUAA_2020_软件工程_提问回顾与总结
  4. Noip模拟7 2021.6.11
  5. 线路由器频段带宽是是20M好还是40M好
  6. hdu 1028 Ignatius and the Princess III(母函数)
  7. 『学了就忘』Linux基础命令 — 20、文件操作的相关命令
  8. 三、其他主机安装zabbix-agent加入到zabbix
  9. laravel路由导出和参数加密
  10. 【AI测试】人工智能 (AI) 测试--第二篇