拆解脚本

header_start=0
header_len=15 xref_start=$(strings -a -t d $1 | grep -e "\bxref\b" | awk '{print $1}')
trailer_start=$(strings -a -t d $1 | grep -e "\btrailer\b" | awk '{print $1}')
#echo $xref_start
#echo $trailer_start xref_len=$(echo "$trailer_start - $xref_start" | bc)
#echo $xref_len header_dump=$(echo "$1" | sed -re 's/^(.*)\.pdf/tdis\_\1\_header\.bin/g')
dd if=$1 of=$header_dump bs=1 skip=$header_start count=$header_len xref_dump=$(echo "$1" | sed -re 's/^(.*)\.pdf/tdis\_\1\_xref\.bin/g')
dd if=$1 of=$xref_dump bs=1 skip=$xref_start count=$xref_len trailer_dump=$(echo "$1" | sed -re 's/^(.*)\.pdf/tdis\_\1\_trailer\.bin/g')
dd if=$1 of=$trailer_dump bs=1 skip=$trailer_start #cat tdis_"$1"_xref.bin | awk 'NF==3' | awk 'NR!=1{printf("%d 0 obj is at offset: %d\n", NR-1, $1);}'
cat $xref_dump | awk 'NF==3' | awk 'NR!=1{printf("%08d %08d\n", $1, NR-1);}' | sort > tdis_"$xref_dump"
#echo "$xref_start 0" >> tdis_"$xref_dump"
printf "%08d %08d\n" $xref_start 0 >> tdis_"$xref_dump" cat tdis_$xref_dump | awk 'BEGIN{loffset=0;lobjnum=0;}{printf("%3d %3d %3d\n", loffset, $1-loffset, lobjnum);loffset=$1;lobjnum=$2;}' | awk 'NR!=1' > tdis_metrics_"$xref_dump" if [ ! -d objects ]
then
mkdir objects
fi
cat tdis_metrics_"$xref_dump" | while read offset len objn
do
#echo $offset, $len, $objn
obj_name=$(printf "%s_%03d" $1 $objn | sed -re 's/^(.*)\.pdf/tdis\_\1\_obj/g' | awk '{printf("objects/%s.bin", $0);}')
#echo $obj_name
dd if=$1 of=$obj_name bs=1 skip=$offset count=$len
done

  

组合脚本

target=$1
dd if=$(ls -1 | grep "header.bin") of=$target bs=1 count=15 obj_offset=15
obj_nums=0
for file in $(ls -1 objects)
do
#echo $file
obj_len=$(wc objects/$file | awk '{print $3}')
dd if=objects/$file of=$target bs=1 count=$obj_len seek=$obj_offset
printf "%010d %05d n\n" $obj_offset 0 >> "tas_generated_"$1"_xref.bin"
obj_offset=$[ $obj_offset + $obj_len ]
obj_nums=$[ $obj_nums + 1 ]
done
echo "xref" >> $target
printf "0 %d\n" $obj_nums >> $target
echo "0000000000 65535 f" >> $target
cat "tas_generated_"$1"_xref.bin" >> $target awk 'NR<=2' $(ls -1 | grep "trailer.bin") >> $target
echo "startxref" >> $target
echo $obj_offset >> $target
echo "%%EOF" >> $target

这样,我们就可以对解析出来的单个pdf对象进行单独操作了。

手动找出包含graphic operators stream的对象,使用下面脚本解压stream

target=$(ls -1 objects | grep "_obj_"$1".bin")
grep -Ubo --binary-file=text stream objects/$target | sed -e 's/:/ /g' | awk 'NR==1{printf("%d ",$1+7);}NR==2{printf("%d ", $1-10);}' > tdeflate_stream.bin
read xstart xend < tdeflate_stream.bin
dd if=objects/$target of=flated.bin bs=1 skip=$xstart count=$[ $xend - $xstart ]
cat flated.bin | zlib-flate -uncompress > deflated.bin

重新编辑deflated.bin文件,再使用下面脚本压缩

printf "%d 0 obj\n" $1 > tflate_"$1".bin
printf "<</Length %d/Filter/FlateDecode>>stream\n" >> tflate_"$1".bin
cat deflated.bin | zlib-flate -compress >> tflate_"$1".bin
echo "" >> tflate_"$1".bin
echo "endstream" >> tflate_"$1".bin
echo "endobj" >> tflate_"$1".bin target=$(ls -1 objects | grep "_obj_"$1".bin")
rm objects/$target
mv tflate_"$1".bin objects/$target

最新文章

  1. 【GOF23设计模式】工厂模式
  2. 使用Maven Profile实现多环境构建
  3. 关于AjaxPro的用法
  4. IIS 之 查看并发连接数
  5. LINUX 系统备份
  6. Ubuntu15.10 编译VLC Android(安卓)过程记录
  7. Python每日一练(2):找出html中的所有链接(Xpath、正则两个版本)
  8. Qt4.8在Windows下的三种编程环境搭建
  9. 关于grub的那些事(二)
  10. [POI 2007]ZAP-Queries
  11. Mongodb字段自增长
  12. mysql索引及优化
  13. MVC分部视图@Html.Partial
  14. PairProject——结对编程
  15. rdlc报表的导出及预览时表头
  16. 重学C语言---05运算符、表达式和语句
  17. 洛谷P1133 教主的花园
  18. Spring Security构建Rest服务-1205-Spring Security OAuth开发APP认证框架之Token处理
  19. 说一说HTTP
  20. C++练习 | 在递增序列中查找最后一个小于等于指定数的元素

热门文章

  1. maven配置本地仓库、maven配置阿里中央仓库、eclipse配置maven
  2. pipenv虚拟环境
  3. ubuntu server 12.04安装任何软件都出现the following packages have unmet dependencies的解决方法
  4. nginx+lua+redis实现灰度发布_test
  5. 【学习总结】Eclipse常用快捷键
  6. CSS3中的弹性盒子模型
  7. Git--02 Devops介绍及git安装部署
  8. vue 防止xss攻击
  9. 零基础python教程-Python解释器是什么?
  10. cocos2D-X 线程注意事项