|
69 | 69 | "warnings.simplefilter('always', AffiliationWarning)" |
70 | 70 | ] |
71 | 71 | }, |
72 | | - { |
73 | | - "cell_type": "code", |
74 | | - "execution_count": null, |
75 | | - "id": "14622700", |
76 | | - "metadata": {}, |
77 | | - "outputs": [], |
78 | | - "source": [ |
79 | | - "# additional CSS to consider. \n", |
80 | | - "# TODO: Current into each exported file. \n", |
81 | | - "# This should be set once into the webpages directly.\n", |
82 | | - "# With only the class/id definitions in the .md files.\n", |
83 | | - "\n", |
84 | | - "debug_html = \"\"\"\n", |
85 | | - "<style>\n", |
86 | | - "#wrap{ overflow:auto; }\n", |
87 | | - "#fig1{ background:yellow; width:100%; float:left; padding:5px; }\n", |
88 | | - "#fig2{ background:red; width:50%; float:left; clear:left; padding:5px; }\n", |
89 | | - "#fig3{ background:green; width:50%; float:left; padding:5px; }\n", |
90 | | - ".macros{ background:yellow; visibility:visible;}\n", |
91 | | - "h1 {margin: 0 0 0 0;}\n", |
92 | | - "mark {background-color:#fff3b6;}\n", |
93 | | - "img {object-fit:contain; max-height:250px; display:inline-block; text-align: center;}\n", |
94 | | - "</style>\n", |
95 | | - "\"\"\" \n", |
96 | | - "html = \"\"\"\n", |
97 | | - "<style>\n", |
98 | | - "#wrap{ overflow:auto; }\n", |
99 | | - "#fig1{ width:100%; float:left; padding: 5px; }\n", |
100 | | - "#fig2{ width:50%; float:left; clear:left; padding: 5px; }\n", |
101 | | - "#fig3{ width:50%; float:left; padding: 5px; }\n", |
102 | | - ".macros{ visibility:hidden; height:0px; }\n", |
103 | | - "h1 {margin: 0em 0 0 0;}\n", |
104 | | - "mark {background-color:#fff3b6;}\n", |
105 | | - "img {object-fit:contain; max-height:250px; display:inline-block; text-align: center;}\n", |
106 | | - "</style>\n", |
107 | | - "\"\"\"" |
108 | | - ] |
109 | | - }, |
110 | 72 | { |
111 | 73 | "cell_type": "markdown", |
112 | 74 | "id": "14bd6310", |
|
128 | 90 | "# get list from MPIA website\n", |
129 | 91 | "# it automatically filters identified non-scientists :func:`mpia.filter_non_scientists`\n", |
130 | 92 | "mpia_authors = mpia.get_mpia_mitarbeiter_list()\n", |
| 93 | + "normed_mpia_authors = [k[1] for k in mpia_authors] # initials + fullname\n", |
131 | 94 | "new_papers = get_new_papers()\n", |
132 | 95 | "# add manual references\n", |
133 | 96 | "add_paper_refs = []\n", |
134 | 97 | "new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])\n", |
135 | 98 | "\n", |
136 | | - "# select only papers with matching author names and highlight authors\n", |
137 | | - "hl_list = [k[0] for k in mpia_authors]\n", |
138 | | - "\n", |
139 | 99 | "candidates = []\n", |
140 | 100 | "for paperk in new_papers:\n", |
141 | | - " hl_authors = highlight_authors_in_list(paperk['authors'], hl_list)\n", |
| 101 | + " # Check author list with their initials\n", |
| 102 | + " normed_author_list = [mpia.get_initials(k) for k in paperk['authors']]\n", |
| 103 | + " hl_authors = highlight_authors_in_list(normed_author_list, normed_mpia_authors, verbose=True)\n", |
142 | 104 | " matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]\n", |
143 | 105 | " paperk['authors'] = hl_authors\n", |
144 | 106 | " if matches:\n", |
| 107 | + " # only select paper if an author matched our list\n", |
145 | 108 | " candidates.append(paperk)\n", |
146 | 109 | "print(\"\"\"Arxiv has {0:,d} new papers today\"\"\".format(len(new_papers))) \n", |
147 | 110 | "print(\"\"\" {0:,d} with possible author matches\"\"\".format(len(candidates)))" |
|
201 | 164 | " doc.comment = (get_markdown_badge(paper_id) + \n", |
202 | 165 | " \"<mark>Appeared on: \" + paper['date'] + \"</mark> - \" +\n", |
203 | 166 | " \"_\" + paper['comments'] + \"_\")\n", |
204 | | - " doc.highlight_authors_in_list(hl_list)\n", |
| 167 | + " # highlight authors (FIXME: doc.highlight_authors)\n", |
| 168 | + " doc._authors = highlight_authors_in_list(\n", |
| 169 | + " [mpia.get_initials(k) for k in doc.authors], \n", |
| 170 | + " normed_mpia_authors, verbose=True)\n", |
205 | 171 | "\n", |
206 | 172 | " full_md = doc.generate_markdown_text()\n", |
207 | 173 | " \n", |
|
373 | 339 | }, |
374 | 340 | { |
375 | 341 | "cell_type": "code", |
376 | | - "execution_count": 4, |
| 342 | + "execution_count": null, |
377 | 343 | "id": "cf665672", |
378 | 344 | "metadata": {}, |
379 | | - "outputs": [ |
380 | | - { |
381 | | - "name": "stdout", |
382 | | - "output_type": "stream", |
383 | | - "text": [ |
384 | | - "184 publications files modified in the last 7 days.\n" |
385 | | - ] |
386 | | - } |
387 | | - ], |
| 345 | + "outputs": [], |
388 | 346 | "source": [ |
389 | 347 | "from datetime import datetime, timedelta, timezone\n", |
390 | 348 | "from glob import glob\n", |
|
408 | 366 | }, |
409 | 367 | { |
410 | 368 | "cell_type": "code", |
411 | | - "execution_count": 5, |
| 369 | + "execution_count": null, |
412 | 370 | "id": "015de740", |
413 | 371 | "metadata": {}, |
414 | | - "outputs": [ |
415 | | - { |
416 | | - "name": "stdout", |
417 | | - "output_type": "stream", |
418 | | - "text": [ |
419 | | - "3 publications in the last 7 days.\n" |
420 | | - ] |
421 | | - } |
422 | | - ], |
| 372 | + "outputs": [], |
423 | 373 | "source": [ |
424 | 374 | "import datetime\n", |
425 | 375 | "from glob import glob\n", |
|
462 | 412 | }, |
463 | 413 | { |
464 | 414 | "cell_type": "code", |
465 | | - "execution_count": 6, |
| 415 | + "execution_count": null, |
466 | 416 | "id": "52ca0208", |
467 | 417 | "metadata": { |
468 | 418 | "scrolled": false |
|
495 | 445 | }, |
496 | 446 | { |
497 | 447 | "cell_type": "code", |
498 | | - "execution_count": 7, |
| 448 | + "execution_count": null, |
499 | 449 | "id": "a6eac5b6", |
500 | 450 | "metadata": { |
501 | 451 | "scrolled": false |
|
519 | 469 | }, |
520 | 470 | { |
521 | 471 | "cell_type": "code", |
522 | | - "execution_count": 10, |
| 472 | + "execution_count": null, |
523 | 473 | "id": "adc1a1ec", |
524 | 474 | "metadata": {}, |
525 | | - "outputs": [ |
526 | | - { |
527 | | - "name": "stdout", |
528 | | - "output_type": "stream", |
529 | | - "text": [ |
530 | | - "1 publications in the last day.\n" |
531 | | - ] |
532 | | - } |
533 | | - ], |
| 475 | + "outputs": [], |
534 | 476 | "source": [ |
535 | 477 | "# redo for today\n", |
536 | 478 | "days = 1\n", |
|
557 | 499 | }, |
558 | 500 | { |
559 | 501 | "cell_type": "code", |
560 | | - "execution_count": 29, |
| 502 | + "execution_count": null, |
561 | 503 | "id": "00eece82", |
562 | 504 | "metadata": { |
563 | 505 | "scrolled": false |
564 | 506 | }, |
565 | | - "outputs": [ |
566 | | - { |
567 | | - "name": "stdout", |
568 | | - "output_type": "stream", |
569 | | - "text": [ |
570 | | - "6 6 publications selected.\n" |
571 | | - ] |
572 | | - } |
573 | | - ], |
| 507 | + "outputs": [], |
574 | 508 | "source": [ |
575 | 509 | "# Create the flat grid of the last N papers (fixed number regardless of dates)\n", |
576 | 510 | "from itertools import islice \n", |
|
598 | 532 | }, |
599 | 533 | { |
600 | 534 | "cell_type": "code", |
601 | | - "execution_count": 20, |
| 535 | + "execution_count": null, |
602 | 536 | "id": "cab45692", |
603 | 537 | "metadata": {}, |
604 | 538 | "outputs": [], |
|
629 | 563 | "name": "python", |
630 | 564 | "nbconvert_exporter": "python", |
631 | 565 | "pygments_lexer": "ipython3", |
632 | | - "version": "3.11.2" |
| 566 | + "version": "3.10.10" |
633 | 567 | }, |
634 | 568 | "vscode": { |
635 | 569 | "interpreter": { |
|
0 commit comments