Skip to content

Commit 5571d70

Browse files
committed
upload of v1.13.18
1 parent 48cb065 commit 5571d70

File tree

11 files changed

+270
-42
lines changed

11 files changed

+270
-42
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# PyMuPDF 1.13.17
1+
# PyMuPDF 1.13.18
22

33
![logo](https://github.com/rk700/PyMuPDF/blob/master/demo/pymupdf.jpg)
44

@@ -13,7 +13,7 @@ On **[PyPI](https://pypi.org/project/PyMuPDF)** since August 2016: [![](http://p
1313

1414
# Introduction
1515

16-
This is **version 1.13.17 of PyMuPDF (formerly python-fitz)**, a Python binding with support for [MuPDF 1.13.0](http://mupdf.com/) - "a lightweight PDF, XPS, and E-book viewer".
16+
This is **version 1.13.18 of PyMuPDF (formerly python-fitz)**, a Python binding with support for [MuPDF 1.13.0](http://mupdf.com/) - "a lightweight PDF, XPS, and E-book viewer".
1717

1818
MuPDF can access files in PDF, XPS, OpenXPS, CBZ, EPUB and FB2 (e-books) formats, and it is known for its top performance and high rendering quality.
1919

doc/PyMuPDF.pdf

104 KB
Binary file not shown.

doc/html.zip

63.5 KB
Binary file not shown.

fitz/fitz.i

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6393,7 +6393,7 @@ struct fz_stext_page_s {
63936393
fz_print_stext_page_as_html(gctx, out, $self);
63946394
break;
63956395
case(2):
6396-
text = JM_stext_page_as_dict(gctx, $self);
6396+
text = JM_stext_page_as_dict(gctx, $self, 0);
63976397
break;
63986398
case(3):
63996399
fz_print_stext_page_as_xml(gctx, out, $self);
@@ -6402,7 +6402,10 @@ struct fz_stext_page_s {
64026402
fz_print_stext_page_as_xhtml(gctx, out, $self);
64036403
break;
64046404
case(5):
6405-
text = JM_stext_page_as_dict(gctx, $self);
6405+
text = JM_stext_page_as_dict(gctx, $self, 0);
6406+
break;
6407+
case(6):
6408+
text = JM_stext_page_as_dict(gctx, $self, 1);
64066409
break;
64076410
default:
64086411
JM_print_stext_page_as_text(gctx, out, $self);
@@ -6438,6 +6441,9 @@ struct fz_stext_page_s {
64386441

64396442
def extractDICT(self):
64406443
return self._extractText(5)
6444+
6445+
def extractRAWDICT(self):
6446+
return self._extractText(6)
64416447
%}
64426448
}
64436449
};

fitz/fitz.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -102,9 +102,9 @@ class _object:
102102

103103

104104
VersionFitz = "1.13.0"
105-
VersionBind = "1.13.17"
106-
VersionDate = "2018-08-18 20:39:19"
107-
version = (VersionBind, VersionFitz, "20180818203919")
105+
VersionBind = "1.13.18"
106+
VersionDate = "2018-08-24 10:34:09"
107+
version = (VersionBind, VersionFitz, "20180824103409")
108108

109109

110110
#------------------------------------------------------------------------------
@@ -3797,6 +3797,9 @@ def extractXHTML(self):
37973797
def extractDICT(self):
37983798
return self._extractText(5)
37993799

3800+
def extractRAWDICT(self):
3801+
return self._extractText(6)
3802+
38003803
TextPage_swigregister = _fitz.TextPage_swigregister
38013804
TextPage_swigregister(TextPage)
38023805

fitz/fitz_wrap.c

Lines changed: 140 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3169,10 +3169,19 @@ PyObject *JM_fitz_config()
31693169
//----------------------------------------------------------------------------
31703170
PyObject *JM_BinFromBuffer(fz_context *ctx, fz_buffer *buffer)
31713171
{
3172-
if (!buffer) return NULL;
3172+
PyObject *bytes = NULL;
31733173
char *c = NULL;
3174-
size_t len = fz_buffer_storage(gctx, buffer, &c);
3175-
return PyBytes_FromStringAndSize(c, (Py_ssize_t) len);
3174+
if (buffer)
3175+
{
3176+
size_t len = fz_buffer_storage(gctx, buffer, &c);
3177+
bytes = PyBytes_FromStringAndSize(c, (Py_ssize_t) len);
3178+
}
3179+
else
3180+
{
3181+
bytes = PyBytes_FromString("");
3182+
}
3183+
Py_INCREF(bytes);
3184+
return bytes;
31763185
}
31773186

31783187
//----------------------------------------------------------------------------
@@ -3296,7 +3305,7 @@ void hexlify(int n, unsigned char *in, unsigned char *out)
32963305
}
32973306

32983307
//----------------------------------------------------------------------------
3299-
// Turn Python a bytes or bytearray object into char* string
3308+
// Turn a bytes or bytearray object into char* string
33003309
// using the "_AsString" functions. Returns string size or 0 on error.
33013310
//----------------------------------------------------------------------------
33023311
size_t JM_CharFromBytesOrArray(PyObject *stream, char **data)
@@ -3317,6 +3326,31 @@ size_t JM_CharFromBytesOrArray(PyObject *stream, char **data)
33173326
return len;
33183327
}
33193328

3329+
//----------------------------------------------------------------------------
3330+
// Return fz_buffer from a PyBytes or PyByteArray object
3331+
//----------------------------------------------------------------------------
3332+
fz_buffer *JM_BufferFromBytes(fz_context *ctx, PyObject *stream)
3333+
{
3334+
if (!stream) return NULL;
3335+
char *c = NULL;
3336+
size_t len = JM_CharFromBytesOrArray(stream, &c);
3337+
if (!c) return NULL;
3338+
fz_buffer *res = NULL;
3339+
fz_var(res);
3340+
fz_try(ctx)
3341+
{
3342+
res = fz_new_buffer(ctx, len);
3343+
fz_append_data(ctx, res, c, len);
3344+
fz_terminate_buffer(ctx, res);
3345+
}
3346+
fz_catch(ctx)
3347+
{
3348+
fz_drop_buffer(ctx, res);
3349+
fz_rethrow(ctx);
3350+
}
3351+
return res;
3352+
}
3353+
33203354
//----------------------------------------------------------------------------
33213355
// Modified copy of SWIG_Python_str_AsChar
33223356
// If Py3, the SWIG original v3.0.12does *not* deliver NULL for a
@@ -4169,12 +4203,91 @@ JM_style_begin_dict(fz_context *ctx, PyObject *span, fz_font *font, float size,
41694203
void
41704204
JM_style_end_dict(fz_context *ctx, fz_buffer *buff, PyObject *span, PyObject *spanlist)
41714205
{
4172-
PyDict_SetItemString(span, "text", JM_StrFromBuffer(ctx, buff));
4206+
if (buff)
4207+
PyDict_SetItemString(span, "text", JM_StrFromBuffer(ctx, buff));
41734208
PyList_Append(spanlist, span);
41744209
}
41754210

41764211
PyObject *
4177-
JM_extract_stext_textblock_as_dict(fz_context *ctx, fz_stext_block *block)
4212+
JM_extract_stext_textchar_as_dict(fz_context *ctx, fz_stext_char *ch)
4213+
{
4214+
PyObject *chardict = NULL;
4215+
4216+
chardict = PyDict_New();
4217+
PyDict_SetItemString(chardict, "c", Py_BuildValue("C", ch->c));
4218+
PyDict_SetItemString(chardict, "origin", Py_BuildValue("ff", ch->origin.x, ch->origin.y));
4219+
PyDict_SetItemString(chardict, "bbox", Py_BuildValue("ffff",
4220+
ch->bbox.x0, ch->bbox.y0,
4221+
ch->bbox.x1, ch->bbox.y1));
4222+
return chardict;
4223+
}
4224+
4225+
PyObject *
4226+
JM_extract_stext_textline_as_dict(fz_context *ctx, fz_stext_line *line)
4227+
{
4228+
fz_stext_char *ch;
4229+
fz_font *font = NULL;
4230+
fz_buffer *buff = NULL;
4231+
float size = 0;
4232+
int sup = 0;
4233+
PyObject *span=NULL, *spanlist = NULL, *linedict = NULL, *charlist;
4234+
PyObject *chardict;
4235+
4236+
linedict = PyDict_New();
4237+
fz_rect *linerect = JM_empty_rect();
4238+
PyDict_SetItemString(linedict, "wmode", Py_BuildValue("i", line->wmode));
4239+
PyDict_SetItemString(linedict, "dir", Py_BuildValue("ff", line->dir.x, line->dir.y));
4240+
spanlist = PyList_New(0);
4241+
font = NULL;
4242+
size = 0;
4243+
4244+
for (ch = line->first_char; ch; ch = ch->next)
4245+
{
4246+
JM_join_rect(linerect, &ch->bbox, ch->size);
4247+
4248+
int ch_sup = detect_super_script(line, ch);
4249+
if (ch->font != font || ch->size != size)
4250+
{ // start new span
4251+
if (font) // must finish old span first
4252+
{
4253+
PyDict_SetItemString(span, "chars", charlist);
4254+
Py_CLEAR(charlist);
4255+
JM_style_end_dict(ctx, NULL, span, spanlist);
4256+
Py_CLEAR(span);
4257+
font = NULL;
4258+
}
4259+
font = ch->font;
4260+
size = ch->size;
4261+
sup = ch_sup;
4262+
charlist = PyList_New(0);
4263+
span = PyDict_New();
4264+
JM_style_begin_dict(ctx, span, font, size, sup);
4265+
}
4266+
chardict = JM_extract_stext_textchar_as_dict(ctx, ch);
4267+
PyList_Append(charlist, chardict);
4268+
Py_CLEAR(chardict);
4269+
}
4270+
if (font)
4271+
{
4272+
PyDict_SetItemString(span, "chars", charlist);
4273+
Py_CLEAR(charlist);
4274+
JM_style_end_dict(ctx, NULL, span, spanlist);
4275+
Py_CLEAR(span);
4276+
font = NULL;
4277+
}
4278+
4279+
PyDict_SetItemString(linedict, "spans", spanlist);
4280+
Py_CLEAR(spanlist);
4281+
PyDict_SetItemString(linedict, "bbox", Py_BuildValue("ffff",
4282+
linerect->x0, linerect->y0,
4283+
linerect->x1, linerect->y1));
4284+
4285+
free(linerect);
4286+
return linedict;
4287+
}
4288+
4289+
PyObject *
4290+
JM_extract_stext_textblock_as_dict(fz_context *ctx, fz_stext_block *block, int rawdict)
41784291
{
41794292
fz_stext_line *line;
41804293
fz_stext_char *ch;
@@ -4190,6 +4303,15 @@ JM_extract_stext_textblock_as_dict(fz_context *ctx, fz_stext_block *block)
41904303

41914304
for (line = block->u.t.first_line; line; line = line->next)
41924305
{
4306+
if (rawdict != 0)
4307+
{
4308+
linedict = JM_extract_stext_textline_as_dict(ctx, line);
4309+
PyList_Append(linelist, linedict);
4310+
Py_CLEAR(linedict);
4311+
JM_join_rect(blockrect, &line->bbox, 0.0f);
4312+
continue;
4313+
}
4314+
41934315
linedict = PyDict_New();
41944316
fz_rect *linerect = JM_empty_rect();
41954317
PyDict_SetItemString(linedict, "wmode", Py_BuildValue("i", line->wmode));
@@ -4226,6 +4348,8 @@ JM_extract_stext_textblock_as_dict(fz_context *ctx, fz_stext_block *block)
42264348
{
42274349
JM_style_end_dict(ctx, buff, span, spanlist);
42284350
Py_CLEAR(span);
4351+
fz_drop_buffer(ctx, buff);
4352+
buff = NULL;
42294353
font = NULL;
42304354
}
42314355

@@ -4313,7 +4437,7 @@ JM_extract_stext_imageblock_as_dict(fz_context *ctx, fz_stext_block *block)
43134437
}
43144438

43154439
PyObject *
4316-
JM_stext_page_as_dict(fz_context *ctx, fz_stext_page *page)
4440+
JM_stext_page_as_dict(fz_context *ctx, fz_stext_page *page, int rawdict)
43174441
{
43184442
PyObject *dict = PyDict_New();
43194443
PyObject *blocklist = PyList_New(0);
@@ -4327,7 +4451,7 @@ JM_stext_page_as_dict(fz_context *ctx, fz_stext_page *page)
43274451
if (block->type == FZ_STEXT_BLOCK_IMAGE)
43284452
PyList_Append(blocklist, JM_extract_stext_imageblock_as_dict(ctx, block));
43294453
else
4330-
PyList_Append(blocklist, JM_extract_stext_textblock_as_dict(ctx, block));
4454+
PyList_Append(blocklist, JM_extract_stext_textblock_as_dict(ctx, block, rawdict));
43314455
}
43324456
PyDict_SetItemString(dict, "blocks", blocklist);
43334457
Py_CLEAR(blocklist);
@@ -7155,24 +7279,19 @@ SWIGINTERN PyObject *fz_document_s__updateStream(struct fz_document_s *self,int
71557279
fz_var(obj);
71567280
fz_buffer *res = NULL;
71577281
fz_var(res);
7158-
size_t len = 0;
7159-
char *c = NULL;
71607282
pdf_document *pdf = pdf_specifics(gctx, self); // get pdf doc
71617283
fz_try(gctx)
71627284
{
71637285
assert_PDF(pdf);
71647286
int xreflen = pdf_xref_len(gctx, pdf);
71657287
if (!INRANGE(xref, 1, xreflen-1))
71667288
THROWMSG("xref out of range");
7167-
len = JM_CharFromBytesOrArray(stream, &c);
7168-
if (!c) THROWMSG("stream must be bytes or bytearray");
71697289
// get the object
71707290
obj = pdf_new_indirect(gctx, pdf, xref, 0);
7171-
if (new == 0 && !pdf_is_stream(gctx, obj))
7291+
if (!new && !pdf_is_stream(gctx, obj))
71727292
THROWMSG("xref not a stream object");
7173-
res = fz_new_buffer(gctx, len);
7174-
fz_append_data(gctx, res, c, len);
7175-
fz_terminate_buffer(gctx, res);
7293+
res = JM_BufferFromBytes(gctx, stream);
7294+
if (!res) THROWMSG("stream must be bytes or bytearray");
71767295
JM_update_stream(gctx, pdf, obj, res);
71777296

71787297
}
@@ -9984,7 +10103,7 @@ SWIGINTERN PyObject *fz_stext_page_s__extractText(struct fz_stext_page_s *self,i
998410103
fz_print_stext_page_as_html(gctx, out, self);
998510104
break;
998610105
case(2):
9987-
text = JM_stext_page_as_dict(gctx, self);
10106+
text = JM_stext_page_as_dict(gctx, self, 0);
998810107
break;
998910108
case(3):
999010109
fz_print_stext_page_as_xml(gctx, out, self);
@@ -9993,7 +10112,10 @@ SWIGINTERN PyObject *fz_stext_page_s__extractText(struct fz_stext_page_s *self,i
999310112
fz_print_stext_page_as_xhtml(gctx, out, self);
999410113
break;
999510114
case(5):
9996-
text = JM_stext_page_as_dict(gctx, self);
10115+
text = JM_stext_page_as_dict(gctx, self, 0);
10116+
break;
10117+
case(6):
10118+
text = JM_stext_page_as_dict(gctx, self, 1);
999710119
break;
999810120
default:
999910121
JM_print_stext_page_as_text(gctx, out, self);

fitz/helper-other.i

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,10 +93,19 @@ PyObject *JM_fitz_config()
9393
//----------------------------------------------------------------------------
9494
PyObject *JM_BinFromBuffer(fz_context *ctx, fz_buffer *buffer)
9595
{
96-
if (!buffer) return NULL;
96+
PyObject *bytes = NULL;
9797
char *c = NULL;
98-
size_t len = fz_buffer_storage(gctx, buffer, &c);
99-
return PyBytes_FromStringAndSize(c, (Py_ssize_t) len);
98+
if (buffer)
99+
{
100+
size_t len = fz_buffer_storage(gctx, buffer, &c);
101+
bytes = PyBytes_FromStringAndSize(c, (Py_ssize_t) len);
102+
}
103+
else
104+
{
105+
bytes = PyBytes_FromString("");
106+
}
107+
Py_INCREF(bytes);
108+
return bytes;
100109
}
101110

102111
//----------------------------------------------------------------------------

0 commit comments

Comments
 (0)