5
5
import shutil
6
6
import traceback
7
7
from xml .dom import minidom
8
+ from bs4 import BeautifulSoup
8
9
import bdd
9
10
from common import *
11
+ from vars import *
10
12
import PIL
11
13
from PIL import Image
12
14
import base64
15
+ import zipfile
13
16
14
17
15
18
def create_thumbnail (path : str ):
@@ -32,6 +35,100 @@ def create_thumbnail(path: str):
32
35
return 'data:image/jpeg;base64,' + base64 .b64encode (buffer .getvalue ()).decode ()
33
36
34
37
38
+ def getEpubIfo (path : str ):
39
+ ret = {
40
+ 'guid' : None ,
41
+ 'title' : None ,
42
+ 'authors' : None ,
43
+ 'serie' : None ,
44
+ 'tags' : None ,
45
+ 'cover' : None ,
46
+ 'toc' : None ,
47
+ 'chapters' : list ()
48
+ }
49
+ try :
50
+ if os .path .isfile (path ) is True :
51
+ myzip = zipfile .ZipFile (path , 'r' )
52
+
53
+ myfile = myzip .open ('META-INF/container.xml' )
54
+ mydoc = minidom .parseString (myfile .read ())
55
+ item = mydoc .getElementsByTagName ('rootfile' )[0 ]
56
+ file2 = item .attributes ['full-path' ].value
57
+ myfile .close ()
58
+
59
+ base = ''
60
+ if '/' in file2 :
61
+ tab = file2 .split ('/' )
62
+ base = ''
63
+ i = 0
64
+ while i < len (tab ) - 1 :
65
+ if i > 0 : base += '/'
66
+ base += tab [i ]
67
+ i += 1
68
+ base += '/'
69
+ myfile = myzip .open (file2 )
70
+ mydoc = minidom .parseString (myfile .read ())
71
+
72
+ try : ret ['guid' ] = mydoc .getElementsByTagName ('dc:identifier' )[0 ].firstChild .data
73
+ except Exception : {}
74
+ try : ret ['title' ] = mydoc .getElementsByTagName ('dc:title' )[0 ].firstChild .data
75
+ except Exception : {}
76
+ try : ret ['authors' ] = mydoc .getElementsByTagName ('dc:creator' )[0 ].firstChild .data
77
+ except Exception : {}
78
+ try : ret ['serie' ] = mydoc .getElementsByTagName ('dc:subject' )[0 ].firstChild .data
79
+ except Exception : {}
80
+
81
+ metas = mydoc .getElementsByTagName ('meta' )
82
+ cov_id = ''
83
+ for meta in metas :
84
+ if meta .attributes ['name' ].value == 'cover' : cov_id = meta .attributes ['content' ].value
85
+ if meta .attributes ['name' ].value == 'calibre:series' : ret ['serie' ] = meta .attributes ['content' ].value
86
+
87
+ items = mydoc .getElementsByTagName ('item' )
88
+ spine = mydoc .getElementsByTagName ('spine' )[0 ].attributes ['toc' ].value
89
+
90
+ for itm in items :
91
+ if itm .attributes ['id' ].value == spine :
92
+ ret ['toc' ] = itm .attributes ['href' ].value
93
+ if cov_id != '' :
94
+ if itm .attributes ['id' ].value == cov_id :
95
+ filepath , ext = os .path .splitext (itm .attributes ['href' ].value )
96
+ tmpdir = appDir + '/tmp' # create var for temporary file extraction
97
+ if os .path .isdir (tmpdir ) is False :
98
+ os .makedirs (tmpdir )
99
+ mfile = myzip .extract (base + itm .attributes ['href' ].value , tmpdir )
100
+ ret ['cover' ] = create_thumbnail (mfile )
101
+ break
102
+ else :
103
+ if itm .attributes ['media-type' ].value in ['image/jpeg' , 'image/png' ]:
104
+ filepath , ext = os .path .splitext (itm .attributes ['href' ].value )
105
+ tmpdir = appDir + '/tmp' # create var for temporary file extraction
106
+ if os .path .isdir (tmpdir ) is False :
107
+ os .makedirs (tmpdir )
108
+ mfile = myzip .extract (base + itm .attributes ['href' ].value , tmpdir )
109
+ ret ['cover' ] = create_thumbnail (mfile )
110
+ break
111
+ myfile .close ()
112
+
113
+ myfile = myzip .open (base + ret ['toc' ])
114
+ mydoc = minidom .parseString (myfile .read ())
115
+ itemrefs = mydoc .getElementsByTagName ('navPoint' )
116
+ for ref in itemrefs :
117
+ id = ref .attributes ['id' ].value
118
+ ret ['chapters' ].append ({
119
+ 'id' : ref .attributes ['id' ].value ,
120
+ 'name' : ref .getElementsByTagName ('text' )[0 ].firstChild .data ,
121
+ 'src' : base + ref .getElementsByTagName ('content' )[0 ].attributes ['src' ].value
122
+ })
123
+ myfile .close ()
124
+
125
+ myzip .close ()
126
+ return ret
127
+ except Exception :
128
+ traceback .print_exc ()
129
+ return None
130
+
131
+
35
132
def insertBook (tools : dict , database : bdd .BDD , file_name_template : str , file_name_separator : str , file : str ):
36
133
if os .path .isfile (file ) is True :
37
134
# list of var for future injection into database
@@ -64,49 +161,13 @@ def insertBook(tools: dict, database: bdd.BDD, file_name_template: str, file_nam
64
161
65
162
if ext in ['.epub' , '.epub2' , '.epub3' ]: # section for EPUB files
66
163
tmp_guid = uid () # assign random guid for CBZ and CBR books
67
- list_args = list () # create list argument for external command execution
68
- list_args .append (tools ['7zip' ][os .name ]['path' ]) # insert executable path
69
- temp_args = tools ['7zip' ][os .name ]['params_deflate' ].split (' ' ) # create table of raw command arguments
70
- for var in temp_args : # parse table of raw command arguments
71
- # insert parsed param
72
- list_args .append (var .replace ('%input%' , file ).replace ('%output%' , tmpdir ))
73
- print (list_args )
74
- process = subprocess .Popen (list_args , shell = False ) # execute the command
75
- process .wait ()
76
- # print(process.returncode)
77
-
78
- try :
79
- metainfo_file = tmpdir + '/META-INF/container.xml'
80
- mydoc = minidom .parse (metainfo_file )
81
- item = mydoc .getElementsByTagName ('rootfile' )[0 ]
82
- print ( item .attributes ['full-path' ].value )
83
-
84
- metadata_file = tmpdir + '/' + item .attributes ['full-path' ].value
85
- mydoc = minidom .parse (metadata_file )
86
- try : tmp_guid = mydoc .getElementsByTagName ('dc:identifier' )[0 ].firstChild .data
87
- except Exception : {}
88
- try : tmp_title = mydoc .getElementsByTagName ('dc:title' )[0 ].firstChild .data
89
- except Exception : {}
90
- try : tmp_authors = mydoc .getElementsByTagName ('dc:creator' )[0 ].firstChild .data
91
- except Exception : {}
92
- try : tmp_serie = mydoc .getElementsByTagName ('dc:subject' )[0 ].firstChild .data
93
- except Exception : {}
94
- metas = mydoc .getElementsByTagName ('meta' )
95
- cov_id = ''
96
- for meta in metas :
97
- if meta .attributes ['name' ].value == 'cover' :
98
- cov_id = meta .attributes ['content' ].value
99
- if meta .attributes ['name' ].value == 'calibre:series' :
100
- tmp_serie = meta .attributes ['content' ].value
101
- print ("cov_id = " .format (cov_id ))
102
-
103
- items = mydoc .getElementsByTagName ('item' )
104
- for itm in items :
105
- if itm .attributes ['id' ].value == cov_id :
106
- print ('cover = {}' .format (itm .attributes ['href' ].value ))
107
- tmp_cover = create_thumbnail (tmpdir + '/' + itm .attributes ['href' ].value )
108
- except Exception :
109
- traceback .print_exc ()
164
+ infos = getEpubIfo (file )
165
+ print (infos )
166
+ if infos ['guid' ] is not None : tmp_guid = infos ['guid' ]
167
+ tmp_title = infos ['title' ]
168
+ tmp_authors = infos ['authors' ]
169
+ tmp_serie = infos ['serie' ]
170
+ tmp_cover = infos ['cover' ]
110
171
111
172
if len (database .getBooks (tmp_guid )) > 0 :
112
173
tmp_guid = uid ()
0 commit comments