Class Origami::Parser
In: sources/parser/parser.rb
Parent: Object

Class representing a PDF file parser.

Methods

new   parse  

Constants

VERBOSE_QUIET = 0   Do not output debug information.
VERBOSE_INFO = 1   Output some useful information.
VERBOSE_DEBUG = 2   Output debug information.
VERBOSE_INSANE = 3   Output every objects read
PPK = Adobe::AddressBook::PPK.new(o.PPK)
User = Adobe::AddressBook::UserList.new(o.PPK.User)
AddressBook = Adobe::AddressBook::AddressList.new(o.PPK.AddressBook)

Attributes

options  [RW] 

Public Class methods

Creates a new PDF file Parser.

options:A hash of options modifying the parser behavior.

[Source]

     # File sources/parser/parser.rb, line 123
123:     def initialize(options = {})
124:       
125:       #Default options values
126:       @options = 
127:       { 
128:         :verbosity => VERBOSE_INFO, # Verbose level.
129:         :ignore_errors => true,    # Try to keep on parsing when errors occur.
130:         :callback => Proc.new {},   # Callback procedure whenever a structure is read.
131:         :prompt_password => Proc.new { print "Password: "; gets.chomp } #Callback procedure to prompt password when document is encrypted.
132:       }
133:      
134:       @options.update(options)
135:     end

Public Instance methods

Parse the given file and returns a PDF object, or nil if the parsing process failed.

filename:The path to the PDF file to parse.

[Source]

     # File sources/parser/parser.rb, line 141
141:     def parse(file)
142:       
143:       # Read PDF file contents
144:       begin
145: 
146:         if file.respond_to?(:read)
147:           filename = nil
148:           data = file.read
149:         else
150:           filename = file
151:           data = File.open(filename, "r").binmode.read
152:         end
153:         
154:         stream = StringScanner.new(data)
155:         
156:         info "...Start parsing file ..."
157:         info "...Reading header..."
158: 
159:         hdr = nil
160:         @@file_types.each { |fileType|
161:           begin
162:             hdr = fileType::Header.parse(stream)
163:             break
164:           rescue Exception => e 
165:             next
166:           end
167:         }
168:         
169:         case hdr
170:           when PDF::Header
171:             pdf = PDF.new(false)
172:             pdf.header = hdr
173:             pdf.filename = filename
174:             @options[:callback].call(pdf.header)
175:             
176:             parse_pdf_file(pdf, stream)
177:             
178:             info "...End parsing file..."
179:             info
180:             
181:             return pdf
182:           
183:           when Adobe::AddressBook::Header
184:             addrbk = Adobe::AddressBook.new
185:             addrbk.header = hdr
186:             addrbk.filename = filename
187:             @options[:callback].call(addrbk.header)
188:             
189:             parse_addressbook(addrbk, stream)
190:             
191:             info "...End parsing file..."
192:             info
193:             
194:             return addrbk
195:           
196:           else
197:             raise InvalidHeader, "No file type was recognized"
198:         end
199:         
200:       rescue SystemExit
201:         raise
202: 
203:       rescue Exception => e
204:         error "An error occured while parsing."
205:       
206:         debug "#{e.message} (#{e.class})"
207:         #debug e.backtrace.join("\n")
208:         debug
209:          
210:         raise
211:       end
212:     
213:     end

[Validate]