Skip to content

Commit

Permalink
Merge fb7a03d from jgm
Browse files Browse the repository at this point in the history
Conflicts:
	CONTRIBUTING.md
	scholdoc.cabal
	scholdoc.hs
	src/Text/Pandoc/Options.hs
	src/Text/Pandoc/Writers/Custom.hs
	src/Text/Pandoc/Writers/HTML.hs
	tests/Tests/Readers/Docx.hs
	tests/html-reader.html
	tests/html-reader.native
  • Loading branch information
timtylin committed Feb 4, 2015
2 parents 7ce60b6 + fb7a03d commit 9a0d8dc
Show file tree
Hide file tree
Showing 25 changed files with 205 additions and 130 deletions.
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,4 @@ All bug fixes and pull requests are requested to provide appropriate addition to


[pandoc-pr-guidelines]: http://johnmacfarlane.net/pandoc/CONTRIBUTING.html#patches-and-pull-requests
[pandoc-contrib-tech]: http://johnmacfarlane.net/pandoc/CONTRIBUTING.html#tests
[pandoc-contrib-tech]: http://johnmacfarlane.net/pandoc/CONTRIBUTING.html#tests
4 changes: 4 additions & 0 deletions README-pandoc
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,10 @@ General options
`epub.css`, `templates`, `slidy`, `slideous`, or `s5` directory
placed in this directory will override pandoc's normal defaults.

`--verbose`
: Give verbose debugging output. Currently this only has an effect
with PDF output.

`-v`, `--version`
: Print version.

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ templates][scholdoc-templates] to ensure output compatibility with
See the [Pandoc Guide][pandocReadme] for more about Pandoc, its usage, and the
Markdown dialect that Pandoc (and hence Scholdoc) [understands][pandocMarkdown].

Scholdoc is currently up to date with [Pandoc][pandoc] version 1.13.2
Scholdoc is currently up to date with [Pandoc][pandoc] version 1.13.2 (up to commit fb7a03dcda)

### Installing Scholdoc

Expand Down
Binary file modified data/reference.docx
Binary file not shown.
1 change: 1 addition & 0 deletions deb/control.in
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ Version: VERSION
Section: text
Priority: optional
Architecture: ARCHITECTURE
Installed-Size: INSTALLED_SIZE
Depends: libc6 (>= 2.11), libgmp10, zlib1g (>= 1:1.1.4)
Maintainer: John MacFarlane <[email protected]>
Description: general markup converter
Expand Down
5 changes: 4 additions & 1 deletion make_deb.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,12 @@ echo "pandoc-citeproc" >> $COPYRIGHT
cat $PANDOC_CITEPROC_PATH/LICENSE >> $COPYRIGHT
rm -rf make_binary_package.tmp.$$

INSTALLED_SIZE=$(du -B 1024 -s $DEST | awk '{print $1}')
mkdir $DIST/DEBIAN
perl -pe "s/VERSION/$DEBVER/" deb/control.in | \
perl -pe "s/ARCHITECTURE/$ARCHITECTURE/" > $DIST/DEBIAN/control
perl -pe "s/ARCHITECTURE/$ARCHITECTURE/" | \
perl -pe "s/INSTALLED_SIZE/$INSTALLED_SIZE/" \
> $DIST/DEBIAN/control

fakeroot dpkg-deb --build $DIST
rm -rf $DIST
1 change: 1 addition & 0 deletions scholdoc.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ Library
hslua >= 0.3 && < 0.4,
binary >= 0.5 && < 0.8,
SHA >= 1.6 && < 1.7,
haddock-library >= 1.1 && < 1.3,
old-time,
JuicyPixels >= 3.1.6.1 && < 3.3
if flag(old-locale)
Expand Down
14 changes: 12 additions & 2 deletions scholdoc.hs
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ data Opt = Opt
, optTOCDepth :: Int -- ^ Number of levels to include in TOC
, optDumpArgs :: Bool -- ^ Output command-line arguments
, optIgnoreArgs :: Bool -- ^ Ignore command-line arguments
, optVerbose :: Bool -- ^ Verbose diagnostic output
, optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
, optWrapText :: Bool -- ^ Wrap text
, optColumns :: Int -- ^ Line length in characters
Expand Down Expand Up @@ -252,6 +253,7 @@ defaultOpts = Opt
, optTOCDepth = 3
, optDumpArgs = False
, optIgnoreArgs = False
, optVerbose = False
, optReferenceLinks = False
, optWrapText = True
, optColumns = 72
Expand Down Expand Up @@ -897,6 +899,11 @@ options =
(\opt -> return opt))
"" -- "Make defaults same as Pandoc (picked up in rawArgs)"

, Option "" ["verbose"]
(NoArg
(\opt -> return opt { optVerbose = True }))
"" -- "Verbose diagnostic output."

, Option "v" ["version"]
(NoArg
(\_ -> do
Expand Down Expand Up @@ -1081,6 +1088,7 @@ main = do
, optTOCDepth = epubTOCDepth
, optDumpArgs = dumpArgs
, optIgnoreArgs = ignoreArgs
, optVerbose = verbose
, optReferenceLinks = referenceLinks
, optWrapText = wrap
, optColumns = columns
Expand Down Expand Up @@ -1330,7 +1338,8 @@ main = do
Right (bs,_) -> return $ UTF8.toString bs

let readFiles [] = error "Cannot read archive from stdin"
readFiles (x:_) = B.readFile x
readFiles [x] = B.readFile x
readFiles (x:xs) = mapM (warn . ("Ignoring: " ++)) xs >> B.readFile x

let convertTabs = tabFilter (if preserveTabs || readerName' == "t2t"
then 0
Expand Down Expand Up @@ -1386,7 +1395,8 @@ main = do
writerReferenceODT = referenceODT,
writerReferenceDocx = referenceDocx,
writerScholarly = scholarlyMode,
writerMediaBag = media
writerMediaBag = media,
writerVerbose = verbose
}


Expand Down
3 changes: 0 additions & 3 deletions src/Text/Pandoc.hs
Original file line number Diff line number Diff line change
Expand Up @@ -164,9 +164,6 @@ mkStringReaderWithWarnings r = StringReader $ \o s -> do
mapM_ warn warnings
return doc

mkBSReader :: (ReaderOptions -> BL.ByteString -> (Pandoc, MediaBag)) -> Reader
mkBSReader r = ByteStringReader (\o s -> return $ r o s)

-- | Association list of formats and readers.
readers :: [(String, Reader)]
readers = [ ("native" , StringReader $ \_ s -> return $ readNative s)
Expand Down
2 changes: 2 additions & 0 deletions src/Text/Pandoc/Options.hs
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,7 @@ data WriterOptions = WriterOptions
, writerReferenceDocx :: Maybe FilePath -- ^ Path to reference DOCX if specified
, writerMediaBag :: MediaBag -- ^ Media collected by docx or epub reader
, writerScholarly :: Bool -- ^ Rendering a ScholMD document
, writerVerbose :: Bool -- ^ Verbose debugging output
} deriving Show

instance Default WriterOptions where
Expand Down Expand Up @@ -411,6 +412,7 @@ instance Default WriterOptions where
, writerReferenceDocx = Nothing
, writerMediaBag = mempty
, writerScholarly = False
, writerVerbose = False
}

-- | Returns True if the given extension is enabled.
Expand Down
40 changes: 29 additions & 11 deletions src/Text/Pandoc/PDF.hs
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,11 @@ import qualified Data.ByteString.Lazy.Char8 as BC
import qualified Data.ByteString as BS
import System.Exit (ExitCode (..))
import System.FilePath
import System.IO (stderr, stdout)
import System.Directory
import Data.Digest.Pure.SHA (showDigest, sha1)
import System.Environment
import Control.Monad (unless, (<=<))
import Control.Monad (unless, when, (<=<))
import qualified Control.Exception as E
import Control.Applicative ((<$))
import Data.List (isInfixOf)
Expand Down Expand Up @@ -70,7 +71,7 @@ makePDF :: String -- ^ pdf creator (pdflatex, lualatex, xelatex)
makePDF program writer opts doc = withTempDir "tex2pdf." $ \tmpdir -> do
doc' <- handleImages opts tmpdir doc
let source = writer opts doc'
tex2pdf' tmpdir program source
tex2pdf' (writerVerbose opts) tmpdir program source

handleImages :: WriterOptions
-> FilePath -- ^ temp dir to store images
Expand Down Expand Up @@ -130,22 +131,24 @@ convertImage tmpdir fname =
mime = getMimeType fname
doNothing = return (Right fname)

tex2pdf' :: FilePath -- ^ temp directory for output
tex2pdf' :: Bool -- ^ Verbose output
-> FilePath -- ^ temp directory for output
-> String -- ^ tex program
-> String -- ^ tex source
-> IO (Either ByteString ByteString)
tex2pdf' tmpDir program source = do
tex2pdf' verbose tmpDir program source = do
let numruns = if "\\tableofcontents" `isInfixOf` source
then 3 -- to get page numbers
else 2 -- 1 run won't give you PDF bookmarks
(exit, log', mbPdf) <- runTeXProgram program numruns tmpDir source
(exit, log', mbPdf) <- runTeXProgram verbose program 1 numruns tmpDir source
case (exit, mbPdf) of
(ExitFailure _, _) -> do
let logmsg = extractMsg log'
let extramsg =
case logmsg of
x | "! Package inputenc Error" `BC.isPrefixOf` x ->
"\nTry running pandoc with --latex-engine=xelatex."
x | ("! Package inputenc Error" `BC.isPrefixOf` x
&& program /= "xelatex")
-> "\nTry running pandoc with --latex-engine=xelatex."
_ -> ""
return $ Left $ logmsg <> extramsg
(ExitSuccess, Nothing) -> return $ Left ""
Expand All @@ -170,9 +173,9 @@ extractMsg log' = do
-- Run a TeX program on an input bytestring and return (exit code,
-- contents of stdout, contents of produced PDF if any). Rerun
-- a fixed number of times to resolve references.
runTeXProgram :: String -> Int -> FilePath -> String
runTeXProgram :: Bool -> String -> Int -> Int -> FilePath -> String
-> IO (ExitCode, ByteString, Maybe ByteString)
runTeXProgram program runsLeft tmpDir source = do
runTeXProgram verbose program runNumber numRuns tmpDir source = do
let file = tmpDir </> "input.tex"
exists <- doesFileExist file
unless exists $ UTF8.writeFile file source
Expand All @@ -192,9 +195,24 @@ runTeXProgram program runsLeft tmpDir source = do
$ lookup "TEXINPUTS" env'
let env'' = ("TEXINPUTS", texinputs) :
[(k,v) | (k,v) <- env', k /= "TEXINPUTS"]
when (verbose && runNumber == 1) $ do
putStrLn $ "[makePDF] Command line:"
putStrLn $ program ++ " " ++ unwords (map show programArgs)
putStr "\n"
putStrLn $ "[makePDF] Environment:"
mapM_ print env''
putStr "\n"
putStrLn $ "[makePDF] Contents of " ++ file' ++ ":"
B.readFile file' >>= B.putStr
putStr "\n"
(exit, out, err) <- pipeProcess (Just env'') program programArgs BL.empty
if runsLeft > 1
then runTeXProgram program (runsLeft - 1) tmpDir source
when verbose $ do
putStrLn $ "[makePDF] Run #" ++ show runNumber
B.hPutStr stdout out
B.hPutStr stderr err
putStr "\n"
if runNumber <= numRuns
then runTeXProgram verbose program (runNumber + 1) numRuns tmpDir source
else do
let pdfFile = replaceDirectory (replaceExtension file ".pdf") tmpDir
pdfExists <- doesFileExist pdfFile
Expand Down
33 changes: 22 additions & 11 deletions src/Text/Pandoc/Readers/HTML.hs
Original file line number Diff line number Diff line change
Expand Up @@ -374,12 +374,20 @@ pTable = try $ do
caption <- option mempty $ pInTags "caption" inline <* skipMany pBlank
-- TODO actually read these and take width information from them
widths' <- pColgroup <|> many pCol
head' <- option [] $ pOptInTag "thead" $ pInTags "tr" (pCell "th")
skipMany pBlank
rows <- pOptInTag "tbody"
$ many1 $ try $ skipMany pBlank >> pInTags "tr" (pCell "td")
skipMany pBlank
let pTh = option [] $ pInTags "tr" (pCell "th")
pTr = try $ skipMany pBlank >> pInTags "tr" (pCell "td" <|> pCell "th")
pTBody = do pOptInTag "tbody" $ many1 pTr
head'' <- pOptInTag "thead" pTh
head' <- pOptInTag "tbody" $ do
if null head''
then pTh
else return head''
rowsLs <- many pTBody
rows' <- pOptInTag "tfoot" $ many pTr
TagClose _ <- pSatisfy (~== TagClose "table")
let rows = (concat rowsLs) ++ rows'
-- fail on empty table
guard $ not $ null head' && null rows
let isSinglePlain x = case B.toList x of
[Plain _] -> True
_ -> False
Expand Down Expand Up @@ -625,14 +633,17 @@ pInTags tagtype parser = try $ do
pSatisfy (~== TagOpen tagtype [])
mconcat <$> manyTill parser (pCloses tagtype <|> eof)

pOptInTag :: String -> TagParser a
-> TagParser a
pOptInTag tagtype parser = try $ do
open <- option False (pSatisfy (~== TagOpen tagtype []) >> return True)
-- parses p, preceeded by an optional opening tag
-- and followed by an optional closing tags
pOptInTag :: String -> TagParser a -> TagParser a
pOptInTag tagtype p = try $ do
skipMany pBlank
optional $ pSatisfy (~== TagOpen tagtype [])
skipMany pBlank
x <- p
skipMany pBlank
x <- parser
optional $ pSatisfy (~== TagClose tagtype)
skipMany pBlank
when open $ pCloses tagtype
return x

pCloses :: String -> TagParser ()
Expand Down
22 changes: 15 additions & 7 deletions src/Text/Pandoc/Readers/LaTeX.hs
Original file line number Diff line number Diff line change
Expand Up @@ -592,7 +592,7 @@ inNote ils =

unescapeURL :: String -> String
unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs
where isEscapable c = c `elem` "#$%&~_^\\{}"
where isEscapable c = c `elem` ("#$%&~_^\\{}" :: String)
unescapeURL (x:xs) = x:unescapeURL xs
unescapeURL [] = ""

Expand Down Expand Up @@ -898,6 +898,12 @@ backslash' = string "\\"
braced' :: IncludeParser
braced' = try $ char '{' *> manyTill (satisfy (/='}')) (char '}')

maybeAddExtension :: String -> FilePath -> FilePath
maybeAddExtension ext fp =
if null (takeExtension fp)
then addExtension fp ext
else fp

include' :: IncludeParser
include' = do
fs' <- try $ do
Expand All @@ -909,8 +915,8 @@ include' = do
skipMany $ try $ char '[' *> (manyTill anyChar (char ']'))
fs <- (map trim . splitBy (==',')) <$> braced'
return $ if name == "usepackage"
then map (flip replaceExtension ".sty") fs
else map (flip replaceExtension ".tex") fs
then map (maybeAddExtension ".sty") fs
else map (maybeAddExtension ".tex") fs
pos <- getPosition
containers <- getState
let fn = case containers of
Expand Down Expand Up @@ -1018,7 +1024,8 @@ environments = M.fromList
, ("center", env "center" blocks)
, ("table", env "table" $
resetCaption *> skipopts *> blocks >>= addTableCaption)
, ("tabular", env "tabular" simpTable)
, ("tabular*", env "tabular" $ simpTable True)
, ("tabular", env "tabular" $ simpTable False)
, ("quote", blockQuote <$> env "quote" blocks)
, ("quotation", blockQuote <$> env "quotation" blocks)
, ("verse", blockQuote <$> env "verse" blocks)
Expand Down Expand Up @@ -1224,7 +1231,7 @@ citationLabel = optional sp *>
<* optional sp
<* optional (char ',')
<* optional sp)
where isBibtexKeyChar c = isAlphaNum c || c `elem` ".:;?!`'()/*@_+=-[]*"
where isBibtexKeyChar c = isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]*" :: String)

cites :: CitationMode -> Bool -> LP [Citation]
cites mode multi = try $ do
Expand Down Expand Up @@ -1304,8 +1311,9 @@ parseTableRow cols = try $ do
spaces
return cells''

simpTable :: LP Blocks
simpTable = try $ do
simpTable :: Bool -> LP Blocks
simpTable hasWidthParameter = try $ do
when hasWidthParameter $ () <$ (spaces >> tok)
spaces
aligns <- parseAligns
let cols = length aligns
Expand Down
5 changes: 4 additions & 1 deletion src/Text/Pandoc/SelfContained.hs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,10 @@ convertTag media sourceURL t@(TagOpen "script" as) =
[] -> return t
src -> do
(raw, mime) <- getRaw media sourceURL (fromAttrib "type" t) src
let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw)
let mime' = if ';' `elem` mime
then mime -- mime type already has charset
else mime ++ ";charset=utf-8"
let enc = "data:" ++ mime' ++ "," ++ escapeURIString isOk (toString raw)
return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"])
convertTag media sourceURL t@(TagOpen "link" as) =
case fromAttrib "href" t of
Expand Down
4 changes: 2 additions & 2 deletions src/Text/Pandoc/Templates.hs
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ getDefaultTemplate :: (Maybe FilePath) -- ^ User data directory to search first
-> String -- ^ Name of writer
-> IO (Either E.IOException String)
getDefaultTemplate user writer = do
let format = takeWhile (`notElem` "+-") writer -- strip off extensions
let format = takeWhile (`notElem` ("+-" :: String)) writer -- strip off extensions
case format of
"native" -> return $ Right ""
"json" -> return $ Right ""
Expand Down Expand Up @@ -288,7 +288,7 @@ reservedWords :: [Text]
reservedWords = ["else","endif","for","endfor","sep"]

skipEndline :: Parser ()
skipEndline = P.try $ P.skipMany (P.satisfy (`elem` " \t")) >> P.char '\n' >> return ()
skipEndline = P.try $ P.skipMany (P.satisfy (`elem` (" \t" :: String))) >> P.char '\n' >> return ()

pConditional :: Parser Template
pConditional = do
Expand Down
Loading

0 comments on commit 9a0d8dc

Please sign in to comment.