Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 25 additions & 4 deletions pdfparser/poppler.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -337,12 +337,17 @@ cdef class FontInfo:
unicode name
double size
Color color
PyBool isbold
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this could be rather bool type, to delay type coercion to Python boolean until it's really required.

PyBool isitalic

def __cinit__(self, unicode name, double size, Color color):
nparts=name.split('+',1)
self.name=nparts[-1]
def __cinit__(self, unicode name, double size, Color color, PyBool isbold, PyBool isitalic):
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use Python naming convetion - snake case - e.g. is_bold is_italic

#nparts=name.split('+',1)
#self.name=nparts[-1]
self.name=name
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changing the content of name could break existing implementations, I guess if full name is needed it should be new property full_name

self.size=size
self.color=color
self.isbold=isbold
self.isitalic=isitalic

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

snake case

property name:
def __get__(self):
Expand All @@ -361,6 +366,18 @@ cdef class FontInfo:
return self.color
def __set__(self, Color val):
self.color=val

property isbold:
def __get__(self):
return self.isbold
def __set__(self, PyBool val):
self.isbold=val

property isitalic:
def __get__(self):
return self.isitalic
def __set__(self, PyBool val):
self.isitalic=val

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

snake_case

def __richcmp__(x, y, op):
if isinstance(x, FontInfo) and isinstance(y, FontInfo) and (op == Py_EQ or op == Py_NE):
Expand Down Expand Up @@ -458,6 +475,7 @@ cdef class Line:
BBox last_bbox
FontInfo last_font
double r,g,b
TextFontInfo *textfontinfo

w=self.line.getWords()
while w:
Expand All @@ -475,9 +493,12 @@ cdef class Line:
self._bboxes.append(last_bbox)
w.getColor(&r, &g, &b)
font_name=w.getFontName(i)
textfontinfo = w.getFontInfo(i)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

definitelly need to handle case when w.getFontInfo returns null

last_font=FontInfo(font_name.getCString().decode('UTF-8', 'replace') if <unsigned long>font_name != 0 else u"unknown", # In rare cases font name is not UTF-8 or font name is NULL
w.getFontSize(),
Color(r,g,b)
Color(r,g,b),
textfontinfo.isBold(),
textfontinfo.isItalic()
)
self._fonts.append(last_font)
#and then text as UTF-8 bytes
Expand Down