-
Notifications
You must be signed in to change notification settings - Fork 403
Expand file tree
/
Copy path__main__.py
More file actions
251 lines (221 loc) · 7.28 KB
/
Copy path__main__.py
File metadata and controls
251 lines (221 loc) · 7.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
# coding=utf-8
# Copyright 2018 The Google Research Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Main module for ``arxiv_latex_cleaner``.
.. code-block:: bash
$ python -m arxiv_latex_cleaner --help
"""
import argparse
import json
import logging
import yaml
from ._version import __version__
from .arxiv_latex_cleaner import merge_args_into_config
from .arxiv_latex_cleaner import run_arxiv_cleaner
PARSER = argparse.ArgumentParser(
prog="arxiv_latex_cleaner",
description=(
"Clean the LaTeX code of your paper to submit to arXiv. "
"Check the README for more information on the use."
),
)
PARSER.add_argument(
"input_folder",
type=str,
help="Input folder or zip archive containing the LaTeX code.",
)
PARSER.add_argument(
"--resize_images",
action="store_true",
help="Resize images.",
)
PARSER.add_argument(
"--im_size",
default=500,
type=int,
help=(
"Size of the output images (in pixels, longest side). Fine tune this "
"to get as close to 10MB as possible."
),
)
PARSER.add_argument(
"--compress_pdf",
action="store_true",
help="Compress PDF images using ghostscript (Linux and Mac only).",
)
PARSER.add_argument(
"--pdf_im_resolution",
default=500,
type=int,
help="Resolution (in dpi) to which the tool resamples the PDF images.",
)
PARSER.add_argument(
"--images_allowlist",
default={},
type=json.loads,
help=(
"Images (and PDFs) that won't be resized to the default resolution,"
"but the one provided here. Value is pixel for images, and dpi for"
"PDFs, as in --im_size and --pdf_im_resolution, respectively. Format "
"is a dictionary as: '{\"path/to/im.jpg\": 1000}'"
),
)
PARSER.add_argument(
"--keep_bib",
action="store_true",
help="Avoid deleting the *.bib files.",
)
PARSER.add_argument(
"--commands_to_delete",
nargs="+",
default=[],
required=False,
help=(
"LaTeX commands that will be deleted. Useful for e.g. user-defined "
"\\todo commands. For example, to delete all occurrences of \\todo1{} "
"and \\todo2{}, run the tool with `--commands_to_delete todo1 todo2`."
"Please note that the positional argument `input_folder` cannot come "
"immediately after `commands_to_delete`, as the parser does not have "
"any way to know if it's another command to delete."
),
)
PARSER.add_argument(
"--commands_only_to_delete",
nargs="+",
default=[],
required=False,
help=(
"LaTeX commands that will be deleted but the text wrapped in the"
" commands will be retained. Useful for commands that change text"
" formats and colors, which you may want to remove but keep the text"
" within. Usages are exactly the same as commands_to_delete. Note that"
" if the commands listed here duplicate that after commands_to_delete,"
" the default action will be retaining the wrapped text."
),
)
PARSER.add_argument(
"--environments_to_delete",
nargs="+",
default=[],
required=False,
help=(
"LaTeX environments that will be deleted. Useful for e.g. user-"
"defined comment environments. For example, to delete all occurrences "
"of \\begin{note} ... \\end{note}, run the tool with "
"`--environments_to_delete note`. Please note that the positional "
"argument `input_folder` cannot come immediately after "
"`environments_to_delete`, as the parser does not have any way to "
"know if it's another environment to delete."
),
)
def if_prefixed(orig_string):
if orig_string.startswith("\\"):
string = orig_string[1:]
else:
string = orig_string
if not string.startswith("if"):
raise argparse.ArgumentTypeError(
f"Expected a string starting with 'if', got '{orig_string}'!"
)
return string
PARSER.add_argument(
"--if_exceptions",
nargs="+",
default=[],
required=False,
type=if_prefixed,
help=(
"Constant TeX primitive conditionals (\\iffalse, \\iftrue, etc.) are "
"simplified, i.e., true branches are kept, false branches deleted. "
"To parse the conditional constructs correctly, all commands starting "
"with `\\if` are assumed to be TeX primitive conditionals (e.g., "
"declared by \\newif\\ifvar). Some known exceptions to this rule are "
"already included (e.g., \\iff, \\ifthenelse, etc.), but you can add "
"custom exceptions using `--if_exceptions iffalt`."
),
)
PARSER.add_argument(
"--use_external_tikz",
type=str,
help=(
"Folder (relative to input folder) containing externalized tikz "
"figures in PDF format."
),
)
PARSER.add_argument(
"--svg_inkscape",
nargs="?",
type=str,
const="svg-inkscape",
help=(
"Include PDF files generated by Inkscape via the `\\includesvg` "
"command from the `svg` package. This is done by replacing the "
"`\\includesvg` calls with `\\includeinkscape` calls pointing to the "
"generated `.pdf_tex` files. By default, these files and the "
"generated PDFs are located under `./svg-inkscape` (relative to the "
"input folder), but a different path (relative to the input folder) "
"can be provided in case a different `inkscapepath` was set when "
"loading the `svg` package."
),
)
PARSER.add_argument(
"--convert_png_to_jpg",
action="store_true",
help="Convert PNG images to JPG format to reduce file size. Note that this will override --resize_images for PNG files.",
)
PARSER.add_argument(
"--png_quality",
type=int,
default=50,
help="JPG quality for PNG conversion (0-100, default: 50)",
)
PARSER.add_argument(
"--png_size_threshold",
type=float,
default=0.5,
help="Minimum PNG file size in MB to apply quality reduction (default: 0.5)",
)
PARSER.add_argument(
"--config",
type=str,
help=(
"Read settings from `.yaml` config file. If command line arguments "
"are provided additionally, the config file parameters are updated "
"with the command line parameters."
),
required=False,
)
PARSER.add_argument(
"--verbose",
action="store_true",
help="Enable detailed output.",
)
ARGS = vars(PARSER.parse_args())
if ARGS["config"] is not None:
try:
with open(ARGS["config"], "r") as config_file:
config_params = yaml.safe_load(config_file)
final_args = merge_args_into_config(ARGS, config_params)
except FileNotFoundError:
print(f"config file {ARGS.config} not found.")
final_args = ARGS
final_args.pop("config", None)
else:
final_args = ARGS
if final_args.get("verbose", False):
logging.basicConfig(level=logging.INFO)
else:
logging.basicConfig(level=logging.ERROR)
run_arxiv_cleaner(final_args)
exit(0)