<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	xmlns:georss="http://www.georss.org/georss" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:media="http://search.yahoo.com/mrss/"
	>

<channel>
	<title>PDF Hacks</title>
	<atom:link href="http://pdfhacks.wordpress.com/feed/" rel="self" type="application/rss+xml" />
	<link>http://pdfhacks.wordpress.com</link>
	<description>PDF Hacks</description>
	<lastBuildDate>Fri, 19 Nov 2010 08:51:32 +0000</lastBuildDate>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
	<generator>http://wordpress.com/</generator>
<cloud domain='pdfhacks.wordpress.com' port='80' path='/?rsscloud=notify' registerProcedure='' protocol='http-post' />
<image>
		<url>http://s2.wp.com/i/buttonw-com.png</url>
		<title>PDF Hacks</title>
		<link>http://pdfhacks.wordpress.com</link>
	</image>
	<atom:link rel="search" type="application/opensearchdescription+xml" href="http://pdfhacks.wordpress.com/osd.xml" title="PDF Hacks" />
	<atom:link rel='hub' href='http://pdfhacks.wordpress.com/?pushpress=hub'/>
		<item>
		<title>diffpdf-Free Cross Platform Software to compare PDF</title>
		<link>http://pdfhacks.wordpress.com/2010/11/19/diffpdf-free-cross-platform-free-software-to-compare-pdf/</link>
		<comments>http://pdfhacks.wordpress.com/2010/11/19/diffpdf-free-cross-platform-free-software-to-compare-pdf/#comments</comments>
		<pubDate>Fri, 19 Nov 2010 08:51:32 +0000</pubDate>
		<dc:creator>rubypdf</dc:creator>
				<category><![CDATA[Open Source]]></category>
		<category><![CDATA[Software]]></category>
		<category><![CDATA[Windows]]></category>
		<category><![CDATA[compare pdf]]></category>
		<category><![CDATA[pdfdiff]]></category>
		<category><![CDATA[Qt4]]></category>

		<guid isPermaLink="false">http://pdfhacks.wordpress.com/?p=142</guid>
		<description><![CDATA[DiffPDF can compare two PDF files. It offers two comparison modes: Text and Appearance. By default the comparison is of the text on each pair of pages, but comparing the appearance of pages is also supported (for example, if a diagram is changed or if a paragraph is reformatted). It is also possible to compare [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pdfhacks.wordpress.com&amp;blog=8662317&amp;post=142&amp;subd=pdfhacks&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p><a title="diffpdf windows 32" href="http://soft.rubypdf.com/software/diffpdf" target="_blank">DiffPDF</a> can compare two PDF files. It offers two comparison modes: <strong>Text </strong>and <strong>Appearance</strong>.</p>
<p>By default the comparison is of the text on each pair of pages, but  comparing the appearance of pages is also supported (for example, if a  diagram is changed or if a paragraph is reformatted). It is also  possible to compare particular pages or page ranges. For example, if  there are two versions of a PDF file, one with pages 1-12 and the other  with pages 1-13 because of an extra page having been added as page 4,  they can be compared by specifying two page ranges, 1-12 for the first  and 1-3, 5-13 for the second. This will make DiffPDF compare pages in  the pairs (1, 1), (2, 2), (3, 3), (4, 5), (5, 6), and so on, to (12,  13).</p>
<p>&nbsp;</p>
<p>Reference,</p>
<p><a rel="prev" href="http://blog.rubypdf.com/2010/08/31/diffpdf-free-software-to-compare-two-pdf-files-textually-or-visually/">diffpdf-free software to compare two PDF files textually or visually</a><a title="Permanent Link to Free software to Compare the appearance difference of two PDF" rel="bookmark" href="http://blog.rubypdf.com/2010/11/19/free-software-to-compare-the-appearance-difference-of-two-pdf/"></a></p>
<p><a title="Permanent Link to Free software to Compare the appearance difference of two PDF" rel="bookmark" href="http://blog.rubypdf.com/2010/11/19/free-software-to-compare-the-appearance-difference-of-two-pdf/">Free software to Compare the appearance difference of two PDF</a></p>
<p><a title="diffpdf windows 32" href="http://soft.rubypdf.com/software/diffpdf" target="_blank">diffpdf windows 32 version download address</a></p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/pdfhacks.wordpress.com/142/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/pdfhacks.wordpress.com/142/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/pdfhacks.wordpress.com/142/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/pdfhacks.wordpress.com/142/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/pdfhacks.wordpress.com/142/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/pdfhacks.wordpress.com/142/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/pdfhacks.wordpress.com/142/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/pdfhacks.wordpress.com/142/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/pdfhacks.wordpress.com/142/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/pdfhacks.wordpress.com/142/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/pdfhacks.wordpress.com/142/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/pdfhacks.wordpress.com/142/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/pdfhacks.wordpress.com/142/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/pdfhacks.wordpress.com/142/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pdfhacks.wordpress.com&amp;blog=8662317&amp;post=142&amp;subd=pdfhacks&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://pdfhacks.wordpress.com/2010/11/19/diffpdf-free-cross-platform-free-software-to-compare-pdf/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/488a21ba3ef0629402539af1d2abf9d1?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">rubypdf</media:title>
		</media:content>
	</item>
		<item>
		<title>Google Docs support OCR for PDF and Images</title>
		<link>http://pdfhacks.wordpress.com/2010/07/16/google-docs-support-ocr-for-pdf-and-images/</link>
		<comments>http://pdfhacks.wordpress.com/2010/07/16/google-docs-support-ocr-for-pdf-and-images/#comments</comments>
		<pubDate>Thu, 15 Jul 2010 22:46:44 +0000</pubDate>
		<dc:creator>rubypdf</dc:creator>
				<category><![CDATA[PDF News]]></category>
		<category><![CDATA[adobe pdf]]></category>
		<category><![CDATA[OCR Google Docs]]></category>

		<guid isPermaLink="false">http://pdfhacks.wordpress.com/?p=135</guid>
		<description><![CDATA[This feature only works for the following languages: English, French, Italian, German and Spanish. &#8220;For the technically curious: we&#8217;re using Optical Character Recognition (OCR) that our friends from Google Books helped us set up. OCR works best with high-resolution images, and not all formatting may be preserved.&#8221;, Google Docs Blog says. for details, please visit [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pdfhacks.wordpress.com&amp;blog=8662317&amp;post=135&amp;subd=pdfhacks&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>This feature only works for the following  languages: English, French, Italian, German and Spanish. &#8220;For the  technically curious: we&#8217;re using Optical Character Recognition (OCR)  that our friends from Google Books helped us set up. OCR works best with  high-resolution images, and not all formatting may be preserved.&#8221;, <a href="http://googledocs.blogspot.com/2010/06/optical-character-recognition-ocr-in.html">Google   Docs Blog</a> says.</p>
<p>for details, please visit <a href="http://blog.rubypdf.com/2010/07/16/google-docs-add-ocr-support-to-pdf-and-images/">Google  Docs add OCR support to PDF and Images</a>.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/pdfhacks.wordpress.com/135/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/pdfhacks.wordpress.com/135/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/pdfhacks.wordpress.com/135/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/pdfhacks.wordpress.com/135/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/pdfhacks.wordpress.com/135/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/pdfhacks.wordpress.com/135/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/pdfhacks.wordpress.com/135/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/pdfhacks.wordpress.com/135/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/pdfhacks.wordpress.com/135/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/pdfhacks.wordpress.com/135/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/pdfhacks.wordpress.com/135/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/pdfhacks.wordpress.com/135/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/pdfhacks.wordpress.com/135/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/pdfhacks.wordpress.com/135/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pdfhacks.wordpress.com&amp;blog=8662317&amp;post=135&amp;subd=pdfhacks&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://pdfhacks.wordpress.com/2010/07/16/google-docs-support-ocr-for-pdf-and-images/feed/</wfw:commentRss>
		<slash:comments>1</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/488a21ba3ef0629402539af1d2abf9d1?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">rubypdf</media:title>
		</media:content>
	</item>
		<item>
		<title>Freely Rotate PDF Page Online-Google App Engine Application</title>
		<link>http://pdfhacks.wordpress.com/2010/01/12/freely-rotate-pdf-page-online-google-app-engine-application/</link>
		<comments>http://pdfhacks.wordpress.com/2010/01/12/freely-rotate-pdf-page-online-google-app-engine-application/#comments</comments>
		<pubDate>Tue, 12 Jan 2010 03:12:19 +0000</pubDate>
		<dc:creator>rubypdf</dc:creator>
				<category><![CDATA[Uncategorized]]></category>
		<category><![CDATA[Adobe Acrobat]]></category>
		<category><![CDATA[Adobe Reader]]></category>
		<category><![CDATA[GAE]]></category>
		<category><![CDATA[GAE/J]]></category>
		<category><![CDATA[Google App Engine Application]]></category>
		<category><![CDATA[iText#]]></category>
		<category><![CDATA[java]]></category>
		<category><![CDATA[PDF Converter Online]]></category>
		<category><![CDATA[pdfrotate]]></category>

		<guid isPermaLink="false">http://pdfhacks.wordpress.com/?p=131</guid>
		<description><![CDATA[Rotate PDF Page Online(PdfRotate) RubyPDF release the 3rd Google App Engine Application, PDFRotate Online, wit it, you can freely rotate PDF page online, the rotate angles support 90, 180 and 270 degrees. Rotate PDF Page Online(PdfRotate) If you want offline version, please check pdfrotate.<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pdfhacks.wordpress.com&amp;blog=8662317&amp;post=131&amp;subd=pdfhacks&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<h2 style="text-align:center;"><a href="http://rubypdf.appspot.com/pdfrotate.html">Rotate PDF Page Online(PdfRotate)</a></h2>
<p><a href="http://soft.rubypdf.com">RubyPDF </a>release the 3rd Google App Engine Application, PDFRotate Online, wit it, you can freely rotate PDF page online, the rotate angles support 90, 180 and 270 <span style="color:#6e6e6e;">degrees</span>.</p>
<h2 style="text-align:center;"><a href="http://rubypdf.appspot.com/pdfrotate.html">Rotate PDF Page  Online(PdfRotate)</a></h2>
<p>If you want offline version, please check <a href="http://soft.rubypdf.com/softwares/pdfrotate">pdfrotate</a>.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/pdfhacks.wordpress.com/131/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/pdfhacks.wordpress.com/131/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/pdfhacks.wordpress.com/131/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/pdfhacks.wordpress.com/131/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/pdfhacks.wordpress.com/131/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/pdfhacks.wordpress.com/131/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/pdfhacks.wordpress.com/131/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/pdfhacks.wordpress.com/131/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/pdfhacks.wordpress.com/131/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/pdfhacks.wordpress.com/131/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/pdfhacks.wordpress.com/131/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/pdfhacks.wordpress.com/131/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/pdfhacks.wordpress.com/131/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/pdfhacks.wordpress.com/131/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pdfhacks.wordpress.com&amp;blog=8662317&amp;post=131&amp;subd=pdfhacks&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://pdfhacks.wordpress.com/2010/01/12/freely-rotate-pdf-page-online-google-app-engine-application/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/488a21ba3ef0629402539af1d2abf9d1?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">rubypdf</media:title>
		</media:content>
	</item>
		<item>
		<title>Free Divide PDF Page Online-Another Google App Engine Application</title>
		<link>http://pdfhacks.wordpress.com/2010/01/06/free-divide-pdf-page-online-another-google-app-engine-application/</link>
		<comments>http://pdfhacks.wordpress.com/2010/01/06/free-divide-pdf-page-online-another-google-app-engine-application/#comments</comments>
		<pubDate>Wed, 06 Jan 2010 03:58:42 +0000</pubDate>
		<dc:creator>rubypdf</dc:creator>
				<category><![CDATA[Uncategorized]]></category>
		<category><![CDATA[Acrobat]]></category>
		<category><![CDATA[Adobe Acrobat]]></category>
		<category><![CDATA[adobe pdf]]></category>
		<category><![CDATA[GAE]]></category>
		<category><![CDATA[Google App Engine]]></category>
		<category><![CDATA[Google Application Engine]]></category>
		<category><![CDATA[iText in Action]]></category>
		<category><![CDATA[java]]></category>

		<guid isPermaLink="false">http://pdfhacks.wordpress.com/?p=129</guid>
		<description><![CDATA[Today, RubyPDF released another Google App Engine Application, Freely Divide PDF Page Online, also bases on iText. the main feature is Split a PDF page to two half size Pdf Page, for example, Split a A3 Page to two A4 pages. btw, RubyPDF also released desktop version before.<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pdfhacks.wordpress.com&amp;blog=8662317&amp;post=129&amp;subd=pdfhacks&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>Today, <a href="http://soft.rubypdf.com">RubyPDF </a>released another Google App Engine Application, Freely <a href="http://rubypdf.appspot.com/pagedivide.html">Divide PDF Page Online</a>, also bases on iText.</p>
<p>the main feature is <span style="color:#6e6e6e;">Split a PDF page to two half size Pdf Page, for  example, <a href="http://blog.rubypdf.com/2007/02/19/split-an-a3-pagegenerated-by-n-up-application-to-double-a4-pages/">Split  a A3 Page to two A4 pages</a>.</span></p>
<p><span style="color:#6e6e6e;">btw, <a href="http://soft.rubypdf.com">RubyPDF</a> also released </span><span style="color:#6e6e6e;"><a href="http://soft.rubypdf.com/softwares/pdf-pagedivide">desktop version</a> before.<br />
</span></p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/pdfhacks.wordpress.com/129/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/pdfhacks.wordpress.com/129/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/pdfhacks.wordpress.com/129/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/pdfhacks.wordpress.com/129/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/pdfhacks.wordpress.com/129/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/pdfhacks.wordpress.com/129/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/pdfhacks.wordpress.com/129/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/pdfhacks.wordpress.com/129/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/pdfhacks.wordpress.com/129/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/pdfhacks.wordpress.com/129/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/pdfhacks.wordpress.com/129/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/pdfhacks.wordpress.com/129/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/pdfhacks.wordpress.com/129/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/pdfhacks.wordpress.com/129/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pdfhacks.wordpress.com&amp;blog=8662317&amp;post=129&amp;subd=pdfhacks&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://pdfhacks.wordpress.com/2010/01/06/free-divide-pdf-page-online-another-google-app-engine-application/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/488a21ba3ef0629402539af1d2abf9d1?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">rubypdf</media:title>
		</media:content>
	</item>
		<item>
		<title>How to download the big files through Google App Engine UrlFetch API Call</title>
		<link>http://pdfhacks.wordpress.com/2009/12/25/how-to-download-the-big-files-through-google-app-engine-urlfetch-api-call/</link>
		<comments>http://pdfhacks.wordpress.com/2009/12/25/how-to-download-the-big-files-through-google-app-engine-urlfetch-api-call/#comments</comments>
		<pubDate>Fri, 25 Dec 2009 01:44:28 +0000</pubDate>
		<dc:creator>rubypdf</dc:creator>
				<category><![CDATA[Tutorials]]></category>
		<category><![CDATA[GAE]]></category>
		<category><![CDATA[GAE/J]]></category>
		<category><![CDATA[Google App Engine]]></category>
		<category><![CDATA[UrlFetch API Call]]></category>

		<guid isPermaLink="false">http://pdfhacks.wordpress.com/?p=125</guid>
		<description><![CDATA[I offer the UrlFetch function in my PDF Password Remover Online application, but I do not want to let it only manipulate no more 1M PDF, after some study, I got the solution, let UrlFetch API download no more 1M data each time, but repeat many times until all data downloaded, of course, there still [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pdfhacks.wordpress.com&amp;blog=8662317&amp;post=125&amp;subd=pdfhacks&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>I offer the UrlFetch function in my <a href="http://rubypdf.appspot.com/pdfdecrypt.html">PDF Password Remover Online</a> application, but I do not want to let it only manipulate no more 1M PDF, after some study, I got the solution, let UrlFetch API download no more 1M data each time, but repeat many times until all data downloaded, of course, there still a limit, 30 second request limit.<br />
For details, please visit</p>
<h2><a title="Permanent Link to How to Use Google App Engine  UrlFetch API to download the files over 1M" rel="bookmark" href="http://blog.rubypdf.com/2009/12/24/how-to-use-google-app-engine-urlfetch-api-to-download-the-files-over-1m/">How to Use Google App Engine  UrlFetch API to download the files over 1M</a></h2>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/pdfhacks.wordpress.com/125/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/pdfhacks.wordpress.com/125/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/pdfhacks.wordpress.com/125/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/pdfhacks.wordpress.com/125/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/pdfhacks.wordpress.com/125/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/pdfhacks.wordpress.com/125/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/pdfhacks.wordpress.com/125/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/pdfhacks.wordpress.com/125/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/pdfhacks.wordpress.com/125/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/pdfhacks.wordpress.com/125/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/pdfhacks.wordpress.com/125/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/pdfhacks.wordpress.com/125/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/pdfhacks.wordpress.com/125/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/pdfhacks.wordpress.com/125/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pdfhacks.wordpress.com&amp;blog=8662317&amp;post=125&amp;subd=pdfhacks&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://pdfhacks.wordpress.com/2009/12/25/how-to-download-the-big-files-through-google-app-engine-urlfetch-api-call/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/488a21ba3ef0629402539af1d2abf9d1?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">rubypdf</media:title>
		</media:content>
	</item>
		<item>
		<title>First PDF Password Remover application hosted on Google App Engine</title>
		<link>http://pdfhacks.wordpress.com/2009/12/23/first-pdf-password-remover-application-hosted-on-google-app-engine/</link>
		<comments>http://pdfhacks.wordpress.com/2009/12/23/first-pdf-password-remover-application-hosted-on-google-app-engine/#comments</comments>
		<pubDate>Wed, 23 Dec 2009 01:53:34 +0000</pubDate>
		<dc:creator>rubypdf</dc:creator>
				<category><![CDATA[PDF News]]></category>
		<category><![CDATA[Software]]></category>
		<category><![CDATA[cloud computing]]></category>
		<category><![CDATA[GAE]]></category>
		<category><![CDATA[Google App Engine]]></category>
		<category><![CDATA[iText#]]></category>
		<category><![CDATA[pdfdecrypt]]></category>
		<category><![CDATA[pdfunlock]]></category>

		<guid isPermaLink="false">http://pdfhacks.wordpress.com/?p=123</guid>
		<description><![CDATA[RubyPDF Software released the First PDF Password Remover application hosted on Google App Engine, bases on iText(version 2.1.7, but with many modification). with it, you can easily remove the user password or owner password online, and it is free. remove restrictions on any secured PDF document (you should have the right to do it, for [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pdfhacks.wordpress.com&amp;blog=8662317&amp;post=123&amp;subd=pdfhacks&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p><a href="http://soft.rubypdf.com">RubyPDF Software</a> released the First <a href="http://rubypdf.appspot.com/pdfdecrypt.html">PDF Password Remover</a> application hosted on Google App Engine, bases on iText(version 2.1.7, but with many modification). with it, you can easily remove the user password or owner password online, and it is free.</p>
<ul>
<li>remove restrictions on any secured PDF document (you should have the right to do it, for example, if you forgot the password). Any Acrobat version up to 9 is supported, even with <strong>128-bit AES</strong> or <strong>128-bit RC4</strong> encryption. PDF restrictions removal is an instant process. Unlocked file can be opened in any PDF viewer without any restrictions so you may edit/copy/print it.</li>
<li>remove the PDF open password. Decryption of the file with password for opening is guaranteed for PDF files Any Acrobat version up to 9 is supported, even with 128-bit AES or 128-bit RC4 encryption,but you must know the password first.</li>
</ul>
<p>For details, please visit <a href="http://rubypdf.appspot.com/pdfdecrypt.html">RubyPDF PDF Password Remover Online</a>.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/pdfhacks.wordpress.com/123/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/pdfhacks.wordpress.com/123/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/pdfhacks.wordpress.com/123/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/pdfhacks.wordpress.com/123/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/pdfhacks.wordpress.com/123/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/pdfhacks.wordpress.com/123/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/pdfhacks.wordpress.com/123/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/pdfhacks.wordpress.com/123/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/pdfhacks.wordpress.com/123/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/pdfhacks.wordpress.com/123/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/pdfhacks.wordpress.com/123/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/pdfhacks.wordpress.com/123/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/pdfhacks.wordpress.com/123/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/pdfhacks.wordpress.com/123/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pdfhacks.wordpress.com&amp;blog=8662317&amp;post=123&amp;subd=pdfhacks&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://pdfhacks.wordpress.com/2009/12/23/first-pdf-password-remover-application-hosted-on-google-app-engine/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/488a21ba3ef0629402539af1d2abf9d1?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">rubypdf</media:title>
		</media:content>
	</item>
		<item>
		<title>how to Optimize and Reduce PDF File Size with the Help of Adobe Acrobat</title>
		<link>http://pdfhacks.wordpress.com/2009/10/30/how-to-optimize-and-reduce-pdf-file-size-with-the-help-of-adobe-acrobat/</link>
		<comments>http://pdfhacks.wordpress.com/2009/10/30/how-to-optimize-and-reduce-pdf-file-size-with-the-help-of-adobe-acrobat/#comments</comments>
		<pubDate>Thu, 29 Oct 2009 18:13:30 +0000</pubDate>
		<dc:creator>rubypdf</dc:creator>
				<category><![CDATA[Hacks]]></category>
		<category><![CDATA[Tutorials]]></category>
		<category><![CDATA[Adobe Acrobat]]></category>
		<category><![CDATA[adobe pdf]]></category>
		<category><![CDATA[PDF optimizer]]></category>
		<category><![CDATA[Reduce PDF File Size]]></category>

		<guid isPermaLink="false">http://pdfhacks.wordpress.com/?p=121</guid>
		<description><![CDATA[I noticed How to use Adobe Acrobat to Optimize and Reduce PDF File Size lists two PDF version tutorials, PDF version tutorial of Adobe Acrobat 6 solution to optimize and redue file size, http://www.adobe.com/designcenter/acrobat/articles/acr6optimize/acr6optimize.pdf PDF version tutorial of Adobe Acrobat 7 solution to optimize and redue file size, http://www.adobe.com/designcenter/acrobat/articles/acr7optimize/acr7optimize.pdf and I just wonder why they [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pdfhacks.wordpress.com&amp;blog=8662317&amp;post=121&amp;subd=pdfhacks&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>I noticed <a href="http://blog.rubypdf.com/2009/10/30/how-to-use-adobe-acrobat-to-optimize-and-reduce-pdf-file-size/">How to use Adobe Acrobat to Optimize and Reduce PDF File Size</a> lists two PDF version tutorials,<br />
PDF version tutorial of <strong>Adobe Acrobat 6</strong> solution to optimize and redue file size,<br />
<a href="http://www.adobe.com/designcenter/acrobat/articles/acr6optimize/acr6optimize.pdf">http://www.adobe.com/designcenter/acrobat/articles/acr6optimize/acr6optimize.pdf</a><br />
PDF version tutorial of <strong>Adobe Acrobat 7</strong> solution to optimize and redue file size,<br />
<a href="http://www.adobe.com/designcenter/acrobat/articles/acr7optimize/acr7optimize.pdf">http://www.adobe.com/designcenter/acrobat/articles/acr7optimize/acr7optimize.pdf</a><br />
and I just wonder why they do not release the tutorials for <strong>Adobe Acrobat 8</strong> and <strong>Adobe Acrobat 9</strong>.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/pdfhacks.wordpress.com/121/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/pdfhacks.wordpress.com/121/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/pdfhacks.wordpress.com/121/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/pdfhacks.wordpress.com/121/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/pdfhacks.wordpress.com/121/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/pdfhacks.wordpress.com/121/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/pdfhacks.wordpress.com/121/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/pdfhacks.wordpress.com/121/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/pdfhacks.wordpress.com/121/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/pdfhacks.wordpress.com/121/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/pdfhacks.wordpress.com/121/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/pdfhacks.wordpress.com/121/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/pdfhacks.wordpress.com/121/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/pdfhacks.wordpress.com/121/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pdfhacks.wordpress.com&amp;blog=8662317&amp;post=121&amp;subd=pdfhacks&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://pdfhacks.wordpress.com/2009/10/30/how-to-optimize-and-reduce-pdf-file-size-with-the-help-of-adobe-acrobat/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/488a21ba3ef0629402539af1d2abf9d1?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">rubypdf</media:title>
		</media:content>
	</item>
		<item>
		<title>using pdfsizeopt to Optimize &amp; Reduce PDF File Size</title>
		<link>http://pdfhacks.wordpress.com/2009/10/30/using-pdfsizeopt-to-optimize-reduce-pdf-file-size/</link>
		<comments>http://pdfhacks.wordpress.com/2009/10/30/using-pdfsizeopt-to-optimize-reduce-pdf-file-size/#comments</comments>
		<pubDate>Thu, 29 Oct 2009 17:40:21 +0000</pubDate>
		<dc:creator>rubypdf</dc:creator>
				<category><![CDATA[Hacks]]></category>
		<category><![CDATA[Linux]]></category>
		<category><![CDATA[Open Source]]></category>
		<category><![CDATA[Software]]></category>
		<category><![CDATA[Tutorials]]></category>
		<category><![CDATA[Windows]]></category>

		<guid isPermaLink="false">http://pdfhacks.wordpress.com/?p=119</guid>
		<description><![CDATA[pdfsizeopt is open source project hosting on Google Code, the main feature is PDF file size optimizer. it bases on the following tools, pdfsizeopt.py Python Ghostscript Java sam2p jbig2 png22pnm pngtopnm Multivalent.jar PNGOUT pdfsizeopt is a collection of best practices and scripts for Unix to optimize the size of PDF files, with focus on PDFs [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pdfhacks.wordpress.com&amp;blog=8662317&amp;post=119&amp;subd=pdfhacks&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>pdfsizeopt is open source project hosting on Google Code, the main feature  is  PDF file size optimizer. it bases on the following tools,</p>
<ul>
<li>pdfsizeopt.py</li>
<li>Python</li>
<li>Ghostscript</li>
<li>Java</li>
<li>sam2p</li>
<li>jbig2</li>
<li>png22pnm</li>
<li>pngtopnm</li>
<li>Multivalent.jar</li>
<li>PNGOUT</li>
</ul>
<p>pdfsizeopt is a collection of best practices and scripts for Unix to optimize the size of PDF files, with focus on PDFs created from TeX and LaTeX documents. pdfsizeopt is developed on a Linux system, and it depends on existing tools such as Python 2.4, Ghostscript 8.50, jbig2enc (optional), sam2p, pngtopnm, pngout (optional), and the Multivalent PDF compressor (optional) written in Java.</p>
<p>for details, please visit <a title="pdfsizeopt-a Free and Open Source PDF Manipulation Tool to Reduce PDF File Size" rel="bookmark" href="http://blog.rubypdf.com/2009/10/30/pdfsizeopt-a-free-and-open-source-pdf-manipulation-tool-to-reduce-pdf-file-size/">pdfsizeopt-a Free and Open Source PDF Manipulation Tool to Reduce PDF File Size</a></p>
<p>references,</p>
<p><a href="http://code.google.com/p/pdfsizeopt/">pdfsizeopt home page</a><br />
<a title="Convert JBIG2 to PDF with free and open source software agl’s jbig2enc" href="http://blog.rubypdf.com/2009/10/29/convert-jbig2-to-pdf-with-free-and-open-source-software-agls-jbig2enc/">Convert JBIG2 to PDF with free and open source software agl’s jbig2enc</a><br />
<a href="http://soft.rubypdf.com/softwares/windows-version-jbig2-encoder-jbig2-exe">Windows version JBIG2 Encoder-Jbig2.exe</a></p>
<p>&nbsp;</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/pdfhacks.wordpress.com/119/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/pdfhacks.wordpress.com/119/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/pdfhacks.wordpress.com/119/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/pdfhacks.wordpress.com/119/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/pdfhacks.wordpress.com/119/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/pdfhacks.wordpress.com/119/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/pdfhacks.wordpress.com/119/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/pdfhacks.wordpress.com/119/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/pdfhacks.wordpress.com/119/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/pdfhacks.wordpress.com/119/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/pdfhacks.wordpress.com/119/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/pdfhacks.wordpress.com/119/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/pdfhacks.wordpress.com/119/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/pdfhacks.wordpress.com/119/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pdfhacks.wordpress.com&amp;blog=8662317&amp;post=119&amp;subd=pdfhacks&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://pdfhacks.wordpress.com/2009/10/30/using-pdfsizeopt-to-optimize-reduce-pdf-file-size/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/488a21ba3ef0629402539af1d2abf9d1?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">rubypdf</media:title>
		</media:content>
	</item>
		<item>
		<title>PDFMiner-Python PDF parser and analyzer</title>
		<link>http://pdfhacks.wordpress.com/2009/10/19/pdfminer-python-pdf-parser-and-analyzer/</link>
		<comments>http://pdfhacks.wordpress.com/2009/10/19/pdfminer-python-pdf-parser-and-analyzer/#comments</comments>
		<pubDate>Mon, 19 Oct 2009 08:47:43 +0000</pubDate>
		<dc:creator>rubypdf</dc:creator>
				<category><![CDATA[Linux]]></category>
		<category><![CDATA[Open Source]]></category>
		<category><![CDATA[Software]]></category>
		<category><![CDATA[Windows]]></category>
		<category><![CDATA[dumpdf]]></category>
		<category><![CDATA[pdf parser and analyzer]]></category>
		<category><![CDATA[pdf2html]]></category>
		<category><![CDATA[pdf2text]]></category>
		<category><![CDATA[pdf2txt]]></category>
		<category><![CDATA[pdftohtml]]></category>
		<category><![CDATA[pdftotext]]></category>
		<category><![CDATA[pdftotxt]]></category>
		<category><![CDATA[Python]]></category>
		<category><![CDATA[toc]]></category>

		<guid isPermaLink="false">http://pdfhacks.wordpress.com/?p=117</guid>
		<description><![CDATA[PDFMiner is a suite of programs that help extracting and analyzing text data of PDF documents. Unlike other PDF-related tools, it allows to obtain the exact location of texts in a page, as well as other extra information such as font information or ruled lines. It includes a PDF converter that can transform PDF files [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pdfhacks.wordpress.com&amp;blog=8662317&amp;post=117&amp;subd=pdfhacks&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>PDFMiner is a suite of programs that help extracting and analyzing text data of PDF documents. Unlike other PDF-related tools, it allows to obtain the exact location of texts in a page, as well as  other extra information such as font information or ruled lines. It includes a PDF converter that can transform PDF files into other text formats (such as HTML). It has an extensible PDF parser that can be used for other purposes instead of text analysis.</p>
<p><strong>Features:</strong></p>
<ul>
<li> Written entirely in Python. (for version 2.4 or newer)</li>
<li> PDF-1.7 specification support. (well, almost)</li>
<li> Non-ASCII languages and vertical writing scripts support.</li>
<li> Various font types (Type1, TrueType, Type3, and CID) support.</li>
<li> Basic encryption (RC4) support.</li>
<li> PDF to HTML conversion (with a sample converter web app).</li>
<li> Outline (TOC) extraction.</li>
<li> Tagged contents extraction.</li>
<li> Infer text running by using clustering technique.</li>
</ul>
<p><a name="source"></a> Download and For details, please visit<strong> </strong><a title="PDFMiner" href="http://www.unixuser.org/~euske/python/pdfminer/index.html" target="_blank">http://www.unixuser.org/~euske/python/pdfminer/index.html</a>.</p>
<p>btw,</p>
<p>PDFMiner comes with two handy tools: <strong><code>pdf2txt.py</code></strong> and <strong><code>dumppdf.py</code></strong>.</p>
<p><strong><code>pdf2txt.py</code></strong> extracts text contents from a PDF file. It extracts all the texts that are to be rendered programmatically, It cannot recognize texts drawn as images that would require optical character recognition. It also extracts the corresponding locations, font names, font sizes, writing direction (horizontal or vertical) for each text portion. You need to provide a password for protected PDF documents when its access is restricted. You cannot extract any text from a PDF document which does not have extraction permission.</p>
<p>For non-ASCII languages, you can specify the output encoding  (such as UTF-8).</p>
<p><strong><code>dumppdf.py</code></strong> dumps the internal contents of a PDF file in pseudo-XML format. This program is primarily for debugging purpose, but it&#8217;s also possible to extract some meaningful contents (such as images).</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/pdfhacks.wordpress.com/117/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/pdfhacks.wordpress.com/117/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/pdfhacks.wordpress.com/117/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/pdfhacks.wordpress.com/117/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/pdfhacks.wordpress.com/117/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/pdfhacks.wordpress.com/117/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/pdfhacks.wordpress.com/117/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/pdfhacks.wordpress.com/117/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/pdfhacks.wordpress.com/117/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/pdfhacks.wordpress.com/117/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/pdfhacks.wordpress.com/117/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/pdfhacks.wordpress.com/117/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/pdfhacks.wordpress.com/117/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/pdfhacks.wordpress.com/117/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pdfhacks.wordpress.com&amp;blog=8662317&amp;post=117&amp;subd=pdfhacks&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://pdfhacks.wordpress.com/2009/10/19/pdfminer-python-pdf-parser-and-analyzer/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/488a21ba3ef0629402539af1d2abf9d1?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">rubypdf</media:title>
		</media:content>
	</item>
		<item>
		<title>Some PDF Tools developed in Python</title>
		<link>http://pdfhacks.wordpress.com/2009/10/19/some-pdf-tools-developed-in-python/</link>
		<comments>http://pdfhacks.wordpress.com/2009/10/19/some-pdf-tools-developed-in-python/#comments</comments>
		<pubDate>Sun, 18 Oct 2009 17:21:18 +0000</pubDate>
		<dc:creator>rubypdf</dc:creator>
				<category><![CDATA[Linux]]></category>
		<category><![CDATA[Open Source]]></category>
		<category><![CDATA[Software]]></category>
		<category><![CDATA[Tutorials]]></category>
		<category><![CDATA[Windows]]></category>
		<category><![CDATA[adobe pdf]]></category>
		<category><![CDATA[PDF Parser]]></category>
		<category><![CDATA[pdf tools]]></category>
		<category><![CDATA[Python]]></category>

		<guid isPermaLink="false">http://pdfhacks.wordpress.com/?p=111</guid>
		<description><![CDATA[When Search WordPress.com, I noticed a article  PDF Tools, it introduces some small PDF tools and all developed in Python. pdf-parser.py This tool will parse a PDF document to identify the fundamental elements used in the analyzed file. It will not render a PDF document. The code of the parser is quick-and-dirty, I’m not recommending this [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pdfhacks.wordpress.com&amp;blog=8662317&amp;post=111&amp;subd=pdfhacks&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>When Search WordPress.com, I noticed a article  <a rel="bookmark" href="http://blog.didierstevens.com/programs/pdf-tools/">PDF Tools</a>, it introduces some small PDF tools and all developed in Python.</p>
<div>
<p><a name="pdf-parser"><strong>pdf-parser.py</strong></a></p>
<p>This tool will parse a PDF document to identify the <a href="http://blog.didierstevens.com/2008/04/09/quickpost-about-the-physical-and-logical-structure-of-pdf-files/" target="_self">fundamental elements</a> used in the analyzed file. It will not render a PDF document. The code of the parser is quick-and-dirty, I’m not recommending this as text book case for PDF parsers, but it gets the job done.</p>
<p>You can see the parser in action in <a href="http://blog.didierstevens.com/2008/10/20/analyzing-a-malicious-pdf-file/" target="_self">this screencast</a>.</p>
<p><img title="20081030" src="http://didierstevens.files.wordpress.com/2008/10/20081030.png?w=628&#038;h=260&#038;h=260" alt="" width="628" height="260" /></p>
<p>The stats option display statistics of the objects found in the PDF document. Use this to identify PDF documents with unusual/unexpected objects, or to classify PDF documents. For example, I generated statistics for 2 malicious PDF files, and although they were very different in content and size, the statistics were identical, proving that they used the same attack vector and shared the same origin.</p>
<p>The search option searches for a string in indirect objects (not inside the stream of indirect objects). The search is not case-sensitive, and is susceptible to the <a href="http://blog.didierstevens.com/2008/04/29/pdf-let-me-count-the-ways/" target="_self">obfuscation techniques I documented</a> (as I’ve yet to encounter these obfuscation techniques in the wild, I decided no to resort to canonicalization).</p>
<p>filter option applies the filter(s) to the stream. For the moment, only FlateDecode is supported (e.g. zlib decompression).</p>
<p>The raw option makes pdf-parser output raw data (e.g. not the printable Python representation).</p>
<p>objects outputs the data of the indirect object which ID was specified. This ID is not version dependent. If more than one object have the same ID (disregarding the version), all these objects will be outputted.</p>
<p>reference allows you to select all objects referencing the specified indirect object. This ID is not version dependent.</p>
<p>type alows you to select all objects of a given type. The type is a Name and as such is case-sensitive and must start with a slash-character (/).</p>
<p>Download:</p>
<p><a href="http://www.didierstevens.com/files/software/pdf-parser_V0_3_1.zip">pdf-parser_V0_3_1.zip</a> (<a href="https://www.didierstevens.com/files/software/pdf-parser_V0_3_1.zip">https</a>)</p>
<p>MD5: 07CDA54844CD6567473CBF2B0DFC601C</p>
<p>SHA256: 7614AEC453502EEF43F9EA04A82092C4ACDD32AB86D1C4D744B7B590C74152EC</p>
<p><a name="make-pdf"><strong>make-pdf tools</strong></a><br />
make-pdf-javascript.py allows one to create a simple PDF document with embedded JavaScript that will execute upon opening of the PDF document. It’s essentially glue-code for the mPDF.py module which contains a class with methods to create headers, indirect objects, stream objects, trailers and XREFs.</p>
<p><img title="20081109-134003" src="http://didierstevens.files.wordpress.com/2008/11/20081109-134003.png?w=650&#038;h=227&#038;h=227" alt="20081109-134003" width="650" height="227" /></p>
<p>If you execute it without options, it will generate a PDF document with JavaScript to display a message box (calling app.alert).</p>
<p>To provide your own JavaScript, use option –javascript for a script on the command line, or –javascriptfile for a script contained in a file.</p>
<p>Download:</p>
<p><a href="http://www.didierstevens.com/files/software/make-pdf_V0_1_1.zip">make-pdf_V0_1_1.zip</a> (<a href="https://www.didierstevens.com/files/software/make-pdf_V0_1_1.zip">https</a>)</p>
<p>MD5: 9AF2E343B78553021C989E8E22355531</p>
<p>SHA256: C604679ABEB0469C1463159E02E74F12487B2755A6096B416A8F4F638DEB8AA9</p>
<p><a name="pdfid"><strong>pdfid.py</strong></a><br />
This tool is not a PDF parser, but it will scan a file to look for certain PDF keywords, allowing you to identify PDF documents that contain (for example) JavaScript or execute an action when opened. PDFiD will also handle <a href="http://blog.didierstevens.com/2008/04/29/pdf-let-me-count-the-ways/" target="_self">name obfuscation</a>.</p>
<p>The idea is to use this tool first to triage PDF documents, and then <a href="http://blog.didierstevens.com/2008/10/20/analyzing-a-malicious-pdf-file/" target="_self">analyze the suspicious ones with my pdf-parser</a>.</p>
<p>An important design criterium for this program is simplicity. Parsing a PDF document completely requires a very complex program, and hence it is bound to contain many (security) bugs. To avoid the risk of getting exploited, I decided to keep this program very simple (it is even simpler than pdf-parser.py).</p>
<p><img title="20090330-214223" src="http://didierstevens.files.wordpress.com/2008/10/20090330-214223.png?w=423&#038;h=514&#038;h=514" alt="20090330-214223" width="423" height="514" /></p>
<p>PDFiD will scan a PDF document for a given list of strings and count the occurrences (total and obfuscated) of each word:</p>
<ul>
<li> obj</li>
<li> endobj</li>
<li> stream</li>
<li> endstream</li>
<li> xref</li>
<li> trailer</li>
<li> startxref</li>
<li> /Page</li>
<li> /Encrypt</li>
<li>/ObjStm</li>
<li> /JS</li>
<li> /JavaScript</li>
<li> /AA</li>
<li> /OpenAction</li>
<li> /JBIG2Decode</li>
</ul>
<p>Almost every PDF documents will contain the first 7 words (obj through startxref), and to a lesser extent stream and endstream. I’ve found a couple of PDF documents without xref or trailer, but these are rare (BTW, this is not an indication of a malicious PDF document).</p>
<p>/Page gives an indication of the number of pages in the PDF document. Most malicious PDF document have only one page.</p>
<p>/Encrypt indicates that the PDF document has DRM or needs a password to be read.</p>
<p>/ObjStm counts the number of object streams. An object stream is a stream object that can contain other objects, and can therefor be used to obfuscate objects (by using different filters).</p>
<p>/JS and /JavaScript indicate that the PDF document contains JavaScript. Almost all malicious PDF documents that I’ve found in the wild contain JavaScript (to exploit a JavaScript vulnerability and/or to execute a heap spray). Of course, you can also find JavaScript in PDF documents without malicious intend.</p>
<p>/AA and /OpenAction indicate an automatic action to be performed when the page/document is viewed. All malicious PDF documents with JavaScript I’ve seen in the wild had an automatic action to launch the JavaScript without user interaction.</p>
<p>The combination of automatic action  and JavaScript makes a PDF document very suspicious.</p>
<p>/JBIG2Decode indicates if the PDF document uses JBIG2 compression. This is not necessarily and indication of a malicious PDF document, but requires further investigation.</p>
<p>A number that appears between parentheses after the counter represents the number of obfuscated occurrences. For example, /JBIG2Decode 1(1) tells you that the PDF document contains the name /JBIG2Decode and that it was obfuscated (using hexcodes, e.g. /JBIG#32Decode).</p>
<p>BTW, all the counters can be skewed if the PDF document is saved with <a href="http://blog.didierstevens.com/2008/05/07/solving-a-little-pdf-puzzle/" target="_self">incremental updates</a>.</p>
<p>Because PDFiD is just a string scanner (supporting name obfuscation), it will also generate false positives. For example, a simple text file starting with %PDF-1.1 and containing words from the list will also be identified as a PDF document.</p>
<p>Download:</p>
<p><a href="http://www.didierstevens.com/files/software/pdfid_v0_0_9.zip">pdfid_v0_0_9.zip</a> (<a href="https://www.didierstevens.com/files/software/pdfid_v0_0_9.zip">https</a>)</p>
<p>MD5: 1C731D6204C09AAFF219876A8FB5E834</p>
<p>SHA256: 24A9B16E67A84E85488A16879CB611128B2E5921044E48EFB60D784BD785CBD0</p>
</div>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/pdfhacks.wordpress.com/111/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/pdfhacks.wordpress.com/111/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/pdfhacks.wordpress.com/111/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/pdfhacks.wordpress.com/111/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/pdfhacks.wordpress.com/111/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/pdfhacks.wordpress.com/111/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/pdfhacks.wordpress.com/111/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/pdfhacks.wordpress.com/111/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/pdfhacks.wordpress.com/111/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/pdfhacks.wordpress.com/111/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/pdfhacks.wordpress.com/111/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/pdfhacks.wordpress.com/111/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/pdfhacks.wordpress.com/111/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/pdfhacks.wordpress.com/111/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=pdfhacks.wordpress.com&amp;blog=8662317&amp;post=111&amp;subd=pdfhacks&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://pdfhacks.wordpress.com/2009/10/19/some-pdf-tools-developed-in-python/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/488a21ba3ef0629402539af1d2abf9d1?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">rubypdf</media:title>
		</media:content>

		<media:content url="http://didierstevens.files.wordpress.com/2008/10/20081030.png?w=628&#38;h=260" medium="image">
			<media:title type="html">20081030</media:title>
		</media:content>

		<media:content url="http://didierstevens.files.wordpress.com/2008/11/20081109-134003.png?w=650&#38;h=227" medium="image">
			<media:title type="html">20081109-134003</media:title>
		</media:content>

		<media:content url="http://didierstevens.files.wordpress.com/2008/10/20090330-214223.png?w=423&#38;h=514" medium="image">
			<media:title type="html">20090330-214223</media:title>
		</media:content>
	</item>
	</channel>
</rss>
