Browse Source

Reiniciar repositorio para que quede más limpio

drymer 3 years ago
commit
eff77b4324
100 changed files with 55993 additions and 0 deletions
  1. 6
    0
      .gitignore
  2. 7
    0
      AUTHORS
  3. 84
    0
      LICENCE
  4. 20
    0
      README
  5. 41
    0
      THANKS
  6. 7
    0
      TODO
  7. 141
    0
      admin_cb.py
  8. 78
    0
      docs/admin.html
  9. 177
    0
      docs/config.html
  10. 65
    0
      docs/contributing.html
  11. 100
    0
      docs/docs.css
  12. 54
    0
      docs/docs.js
  13. 48
    0
      docs/etiquette.html
  14. 105
    0
      docs/filters.html
  15. BIN
      docs/img/shadowAlpha.png
  16. 56
    0
      docs/index.html
  17. 129
    0
      docs/installation.html
  18. 42
    0
      docs/migration.html
  19. 107
    0
      docs/normalization.html
  20. 190
    0
      docs/templates.html
  21. 109
    0
      docs/venus.svg
  22. 23
    0
      emacses.ini
  23. 82
    0
      examples/filters/categories/categories.xslt
  24. 37
    0
      examples/filters/guess-language/README
  25. 15131
    0
      examples/filters/guess-language/en.data
  26. 22710
    0
      examples/filters/guess-language/fr.data
  27. 58
    0
      examples/filters/guess-language/guess-language.py
  28. 25
    0
      examples/filters/guess-language/learn-language.py
  29. 188
    0
      examples/filters/guess-language/trigram.py
  30. 55
    0
      examples/filters/xpath-sifter/xpath-sifter.ini
  31. 47
    0
      examples/foaf-based.ini
  32. BIN
      examples/images/edd.png
  33. BIN
      examples/images/jdub.png
  34. BIN
      examples/images/keybuk.png
  35. BIN
      examples/images/thom.png
  36. 57
    0
      examples/opml-top100.ini
  37. 78
    0
      examples/planet-schmanet.ini
  38. 17
    0
      expunge.py
  39. 79
    0
      favicon.py
  40. 30
    0
      filters/addsearch.genshi
  41. 70
    0
      filters/addsearch.xslt
  42. 18
    0
      filters/coral_cdn_filter.py
  43. 29
    0
      filters/delDupName/byline_author.xslt
  44. 17
    0
      filters/delDupName/p_by_name.xslt
  45. 15
    0
      filters/delDupName/p_from.xslt
  46. 25
    0
      filters/detitle.xslt
  47. 109
    0
      filters/excerpt.py
  48. 30
    0
      filters/h1title.xslt
  49. 6
    0
      filters/html2xhtml.plugin
  50. 508
    0
      filters/mememe.plugin
  51. 36
    0
      filters/minhead.py
  52. 6
    0
      filters/notweets.py
  53. 44
    0
      filters/regexp_sifter.py
  54. 1
    0
      filters/stripAd/feedburner.sed
  55. 1
    0
      filters/stripAd/google_ad_map.sed
  56. 1
    0
      filters/stripAd/yahoo.sed
  57. 31
    0
      filters/xhtml2html.plugin
  58. 23
    0
      filters/xpath_sifter.py
  59. 97
    0
      planet.py
  60. 42
    0
      planet/__init__.py
  61. 413
    0
      planet/config.py
  62. 30
    0
      planet/csv_config.py
  63. 67
    0
      planet/expunge.py
  64. 197
    0
      planet/foaf.py
  65. 99
    0
      planet/idindex.py
  66. 154
    0
      planet/opml.py
  67. 26
    0
      planet/publish.py
  68. 370
    0
      planet/reconstitute.py
  69. 151
    0
      planet/scrub.py
  70. 67
    0
      planet/shell/__init__.py
  71. 147
    0
      planet/shell/_genshi.py
  72. 51
    0
      planet/shell/dj.py
  73. 64
    0
      planet/shell/plugin.py
  74. 22
    0
      planet/shell/py.py
  75. 19
    0
      planet/shell/sed.py
  76. 276
    0
      planet/shell/tmpl.py
  77. 78
    0
      planet/shell/xslt.py
  78. 496
    0
      planet/spider.py
  79. 191
    0
      planet/splice.py
  80. 1196
    0
      planet/vendor/compat_logging/__init__.py
  81. 299
    0
      planet/vendor/compat_logging/config.py
  82. 728
    0
      planet/vendor/compat_logging/handlers.py
  83. 3689
    0
      planet/vendor/feedparser.py
  84. 17
    0
      planet/vendor/html5lib/__init__.py
  85. 1171
    0
      planet/vendor/html5lib/constants.py
  86. 0
    0
      planet/vendor/html5lib/filters/__init__.py
  87. 10
    0
      planet/vendor/html5lib/filters/_base.py
  88. 127
    0
      planet/vendor/html5lib/filters/formfiller.py
  89. 63
    0
      planet/vendor/html5lib/filters/inject_meta_charset.py
  90. 88
    0
      planet/vendor/html5lib/filters/lint.py
  91. 202
    0
      planet/vendor/html5lib/filters/optionaltags.py
  92. 8
    0
      planet/vendor/html5lib/filters/sanitizer.py
  93. 41
    0
      planet/vendor/html5lib/filters/whitespace.py
  94. 2637
    0
      planet/vendor/html5lib/html5parser.py
  95. 177
    0
      planet/vendor/html5lib/ihatexml.py
  96. 789
    0
      planet/vendor/html5lib/inputstream.py
  97. 258
    0
      planet/vendor/html5lib/sanitizer.py
  98. 17
    0
      planet/vendor/html5lib/serializer/__init__.py
  99. 266
    0
      planet/vendor/html5lib/serializer/htmlserializer.py
  100. 0
    0
      planet/vendor/html5lib/serializer/xhtmlserializer.py

+ 6
- 0
.gitignore View File

@@ -0,0 +1,6 @@
1
+*~
2
+tmpl/*.tmplc
3
+output/*
4
+log
5
+*.pyc
6
+cache/*

+ 7
- 0
AUTHORS View File

@@ -0,0 +1,7 @@
1
+Sam Ruby <rubys@intertwingly.net>
2
+
3
+This codebase represents a radical refactoring of Planet 2.0, which lists
4
+the following authors:
5
+
6
+Scott James Remnant <scott@netsplit.com>
7
+Jeff Waugh <jdub@perkypants.org>

+ 84
- 0
LICENCE View File

@@ -0,0 +1,84 @@
1
+Planet is released under the same licence as Python, here it is:
2
+
3
+
4
+A. HISTORY OF THE SOFTWARE
5
+==========================
6
+
7
+Python was created in the early 1990s by Guido van Rossum at Stichting Mathematisch Centrum (CWI) in the Netherlands as a successor of a language called ABC. Guido is Python's principal author, although it includes many contributions from others. The last version released from CWI was Python 1.2. In 1995, Guido continued his work on Python at the Corporation for National Research Initiatives (CNRI) in Reston, Virginia where he released several versions of the software. Python 1.6 was the last of the versions released by CNRI. In 2000, Guido and the Python core development team moved to BeOpen.com to form the BeOpen PythonLabs team. Python 2.0 was the first and only release from BeOpen.com.
8
+
9
+Following the release of Python 1.6, and after Guido van Rossum left CNRI to work with commercial software developers, it became clear that the ability to use Python with software available under the GNU Public License (GPL) was very desirable. CNRI and the Free Software Foundation (FSF) interacted to develop enabling wording changes to the Python license. Python 1.6.1 is essentially the same as Python 1.6, with a few minor bug fixes, and with a different license that enables later versions to be GPL-compatible. Python 2.1 is a derivative work of Python 1.6.1, as well as of Python 2.0.
10
+
11
+After Python 2.0 was released by BeOpen.com, Guido van Rossum and the other PythonLabs developers joined Digital Creations. All intellectual property added from this point on, starting with Python 2.1 and its alpha and beta releases, is owned by the Python Software Foundation (PSF), a non-profit modeled after the Apache Software Foundation. See http://www.python.org/psf/ for more information about the PSF.
12
+
13
+Thanks to the many outside volunteers who have worked under Guido's direction to make these releases possible.
14
+
15
+B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON
16
+===============================================================
17
+
18
+PSF LICENSE AGREEMENT
19
+---------------------
20
+
21
+1. This LICENSE AGREEMENT is between the Python Software Foundation ("PSF"), and the Individual or Organization ("Licensee") accessing and otherwise using Python 2.1.1 software in source or binary form and its associated documentation.
22
+
23
+2. Subject to the terms and conditions of this License Agreement, PSF hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use Python 2.1.1 alone or in any derivative version, provided, however, that PSF's License Agreement and PSF's notice of copyright, i.e., "Copyright (c) 2001 Python Software Foundation; All Rights Reserved" are retained in Python 2.1.1 alone or in any derivative version prepared by Licensee.
24
+
25
+3. In the event Licensee prepares a derivative work that is based on or incorporates Python 2.1.1 or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to Python 2.1.1.
26
+
27
+4. PSF is making Python 2.1.1 available to Licensee on an "AS IS" basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 2.1.1 WILL NOT INFRINGE ANY THIRD PARTY RIGHTS.
28
+
29
+5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON 2.1.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 2.1.1, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
30
+
31
+6. This License Agreement will automatically terminate upon a material breach of its terms and conditions.
32
+
33
+7. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between PSF and Licensee. This License Agreement does not grant permission to use PSF trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party.
34
+
35
+8. By copying, installing or otherwise using Python 2.1.1, Licensee agrees to be bound by the terms and conditions of this License Agreement.
36
+
37
+BEOPEN.COM TERMS AND CONDITIONS FOR PYTHON 2.0
38
+----------------------------------------------
39
+
40
+BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1
41
+
42
+1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the Individual or Organization ("Licensee") accessing and otherwise using this software in source or binary form and its associated documentation ("the Software").
43
+
44
+2. Subject to the terms and conditions of this BeOpen Python License Agreement, BeOpen hereby grants Licensee a non-exclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use the Software alone or in any derivative version, provided, however, that the BeOpen Python License is retained in the Software, alone or in any derivative version prepared by Licensee.
45
+
46
+3. BeOpen is making the Software available to Licensee on an "AS IS" basis. BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT INFRINGE ANY THIRD PARTY RIGHTS.
47
+
48
+4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
49
+
50
+5. This License Agreement will automatically terminate upon a material breach of its terms and conditions.
51
+
52
+6. This License Agreement shall be governed by and interpreted in all respects by the law of the State of California, excluding conflict of law provisions. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between BeOpen and Licensee. This License Agreement does not grant permission to use BeOpen trademarks or trade names in a trademark sense to endorse or promote products or services of Licensee, or any third party. As an exception, the "BeOpen Python" logos available at http://www.pythonlabs.com/logos.html may be used according to the permissions granted on that web page.
53
+
54
+7. By copying, installing or otherwise using the software, Licensee agrees to be bound by the terms and conditions of this License Agreement.
55
+
56
+CNRI OPEN SOURCE GPL-COMPATIBLE LICENSE AGREEMENT
57
+-------------------------------------------------
58
+
59
+1. This LICENSE AGREEMENT is between the Corporation for National Research Initiatives, having an office at 1895 Preston White Drive, Reston, VA 20191 ("CNRI"), and the Individual or Organization ("Licensee") accessing and otherwise using Python 1.6.1 software in source or binary form and its associated documentation.
60
+
61
+2. Subject to the terms and conditions of this License Agreement, CNRI hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use Python 1.6.1 alone or in any derivative version, provided, however, that CNRI's License Agreement and CNRI's notice of copyright, i.e., "Copyright (c) 1995-2001 Corporation for National Research Initiatives; All Rights Reserved" are retained in Python 1.6.1 alone or in any derivative version prepared by Licensee. Alternately, in lieu of CNRI's License Agreement, Licensee may substitute the following text (omitting the quotes): "Python 1.6.1 is made available subject to the terms and conditions in CNRI's License Agreement. This Agreement together with Python 1.6.1 may be located on the Internet using the following unique, persistent identifier (known as a handle): 1895.22/1013. This Agreement may also be obtained from a proxy server on the Internet using the following URL: http://hdl.handle.net/1895.22/1013".
62
+
63
+3. In the event Licensee prepares a derivative work that is based on or incorporates Python 1.6.1 or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to Python 1.6.1.
64
+
65
+4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS" basis. CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT INFRINGE ANY THIRD PARTY RIGHTS.
66
+
67
+5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON 1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
68
+
69
+6. This License Agreement will automatically terminate upon a material breach of its terms and conditions.
70
+
71
+7. This License Agreement shall be governed by the federal intellectual property law of the United States, including without limitation the federal copyright law, and, to the extent such U.S. federal law does not apply, by the law of the Commonwealth of Virginia, excluding Virginia's conflict of law provisions. Notwithstanding the foregoing, with regard to derivative works based on Python 1.6.1 that incorporate non-separable material that was previously distributed under the GNU General Public License (GPL), the law of the Commonwealth of Virginia shall govern this License Agreement only as to issues arising under or with respect to Paragraphs 4, 5, and 7 of this License Agreement. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between CNRI and Licensee. This License Agreement does not grant permission to use CNRI trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party.
72
+
73
+8. By clicking on the "ACCEPT" button where indicated, or by copying, installing or otherwise using Python 1.6.1, Licensee agrees to be bound by the terms and conditions of this License Agreement.
74
+
75
+        ACCEPT
76
+
77
+CWI PERMISSIONS STATEMENT AND DISCLAIMER
78
+----------------------------------------
79
+
80
+Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam, The Netherlands. All rights reserved.
81
+
82
+Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that copyright notice and this permission notice appear in supporting documentation, and that the name of Stichting Mathematisch Centrum or CWI not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission.
83
+
84
+STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

+ 20
- 0
README View File

@@ -0,0 +1,20 @@
1
+Planet
2
+------
3
+
4
+Planet is a flexible feed aggregator. It downloads news feeds published by
5
+web sites and aggregates their content together into a single combined feed,
6
+latest news first.  This version of Planet is named Venus as it is the
7
+second major version.  The first version is still in wide use and is
8
+also actively being maintained.
9
+
10
+It uses Mark Pilgrim's Universal Feed Parser to read from CDF, RDF, RSS and
11
+Atom feeds; Leonard Richardson's Beautiful Soup to correct markup issues;
12
+and either Tomas Styblo's templating engine or Daniel Viellard's implementation
13
+of XSLT to output static files in any format you can dream up.
14
+
15
+To get started, check out the documentation in the docs directory.  If you have
16
+any questions or comments, please don't hesitate to use the planet mailing list:
17
+
18
+  http://lists.planetplanet.org/mailman/listinfo/devel
19
+
20
+Keywords: feed, blog, aggregator, RSS, RDF, Atom, OPML, Python

+ 41
- 0
THANKS View File

@@ -0,0 +1,41 @@
1
+DeWitt Clinton  - Mac OSX
2
+Mary Gardiner   - PythonPath
3
+Elias Torres    - FOAF OnlineAccounts
4
+Jacques Distler - Template patches
5
+Michael Koziarski - HTTP Auth fix
6
+Brian Ewins     - Win32 / Portalocker
7
+Joe Gregorio    - python versioning for filters, verbose tests, spider_threads
8
+Harry Fuecks    - Pipe characters in file names, filter bug
9
+Eric van der Vlist - Filters to add language, category information
10
+Chris Dolan     - mkdir cache; default template_dirs; fix xsltproc
11
+David Sifry     - rss 2.0 xslt template based on http://atom.geekhood.net/
12
+Morten Frederiksen - Support WordPress LinkManager OPML
13
+Harry Fuecks    - default item date to feed date
14
+Antonio Cavedoni - Django templates
15
+Morten Frederiksen - expungeCache
16
+Lenny Domnitser - Coral CDN support for URLs with non-standard ports
17
+Amit Chakradeo  - Allow read-only files to be overwritten
18
+Matt Brubeck    - fix new_channel
19
+Aristotle Pagaltzis - ensure byline_author filter doesn't drop foreign markup
20
+
21
+This codebase represents a radical refactoring of Planet 2.0, which lists
22
+the following contributors:
23
+
24
+Patches and Bug Fixes
25
+---------------------
26
+
27
+Chris Dolan - fixes, exclude filtering, duplicate culling
28
+David Edmondson - filtering
29
+Lucas Nussbaum - locale configuration
30
+David Pashley - cache code profiling and recursion fixing
31
+Gediminas Paulauskas - days per page
32
+
33
+
34
+Spycyroll Maintainers
35
+---------------------
36
+
37
+Vattekkat Satheesh Babu
38
+Richard Jones
39
+Garth Kidd
40
+Eliot Landrum
41
+Bryan Richard

+ 7
- 0
TODO View File

@@ -0,0 +1,7 @@
1
+TODO
2
+====
3
+
4
+  * Allow display normalisation to specified timezone
5
+
6
+    Some Planet admins would like their feed to be displayed in the local
7
+    timezone, instead of UTC.

+ 141
- 0
admin_cb.py View File

@@ -0,0 +1,141 @@
1
+#!/usr/bin/env python
2
+# -*- coding: utf-8 -*-
3
+
4
+import cgi
5
+import cgitb
6
+cgitb.enable()
7
+
8
+from urllib import unquote
9
+import sys, os
10
+
11
+# Modify this to point to where you usually run planet.
12
+BASE_DIR = '/var/www/emacses/'
13
+
14
+# Modify this to point to your venus installation dir, relative to planet dir above.
15
+VENUS_INSTALL = "venus"
16
+
17
+# Config file, relative to planet dir above
18
+CONFIG_FILE = "emacses.ini"
19
+
20
+# Admin page URL, relative to this script's URL
21
+ADMIN_URL = "admin.html"
22
+
23
+
24
+# chdir to planet dir - config may be relative from there
25
+os.chdir(os.path.abspath(BASE_DIR))
26
+
27
+# Add venus to path.
28
+sys.path.append(VENUS_INSTALL)
29
+
30
+# Add shell dir to path - auto detection does not work
31
+sys.path.append(os.path.join(VENUS_INSTALL, "planet", "shell"))
32
+
33
+# import necessary planet items 
34
+from planet import config
35
+from planet.spider import filename
36
+
37
+
38
+# Load config
39
+config.load(CONFIG_FILE)
40
+
41
+# parse query parameters
42
+form = cgi.FieldStorage()
43
+
44
+
45
+# Start HTML output at once
46
+print "Content-Type: text/html;charset=utf-8"     # HTML is following
47
+print                                             # blank line, end of headers
48
+
49
+
50
+print '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'
51
+print '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="sv"><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" /><title>Admin results</title></head><body>'
52
+print '<div>'
53
+
54
+# Cache and blacklist dirs
55
+
56
+cache = config.cache_directory()
57
+blacklist = config.cache_blacklist_directory()
58
+
59
+# Must have command parameter
60
+if not "command" in form:
61
+  print "<p>Unknown command</p>"
62
+
63
+elif form['command'].value == "blacklist":
64
+
65
+
66
+  # Create the blacklist dir if it does not exist
67
+  if not os.path.exists(blacklist):
68
+    os.mkdir(blacklist)
69
+    print "<p>Created directory %s</p>" % blacklist
70
+  
71
+  # find list of urls, in the form bl[n]=url
72
+
73
+  for key in form.keys():
74
+
75
+    if not key.startswith("bl"): continue
76
+
77
+    url = unquote(form[key].value)
78
+
79
+    # find corresponding files
80
+    cache_file = filename(cache, url)
81
+    blacklist_file = filename(blacklist, url)
82
+
83
+    # move to blacklist if found
84
+    if os.path.exists(cache_file):
85
+
86
+      os.rename(cache_file, blacklist_file)
87
+
88
+      print "<p>Blacklisted <a href='%s'>%s</a></p>" % (url, url)
89
+
90
+    else:
91
+
92
+      print "<p>Unknown file: %s</p>" % cache_file
93
+
94
+    print """
95
+<p>Note that blacklisting does not automatically 
96
+refresh the planet. You will need to either wait for
97
+a scheduled planet run, or refresh manually from the admin interface.</p>
98
+"""
99
+
100
+
101
+elif form['command'].value == "run":
102
+
103
+  # run spider and refresh
104
+
105
+  from planet import spider, splice
106
+  try:
107
+     spider.spiderPlanet(only_if_new=False)
108
+     print "<p>Successfully ran spider</p>"
109
+  except Exception, e:
110
+     print e
111
+
112
+  doc = splice.splice()
113
+  splice.apply(doc.toxml('utf-8'))
114
+
115
+elif form['command'].value == "refresh":
116
+
117
+  # only refresh
118
+
119
+  from planet import splice
120
+
121
+  doc = splice.splice()
122
+  splice.apply(doc.toxml('utf-8'))
123
+
124
+  print "<p>Successfully refreshed</p>"
125
+
126
+elif form['command'].value == "expunge":
127
+
128
+  # only expunge
129
+  from planet import expunge
130
+  expunge.expungeCache()
131
+
132
+  print "<p>Successfully expunged</p>"
133
+
134
+
135
+
136
+
137
+print "<p><strong><a href='" + ADMIN_URL + "'>Return</a> to admin interface</strong></p>"
138
+
139
+
140
+
141
+print "</body></html>"

+ 78
- 0
docs/admin.html View File

@@ -0,0 +1,78 @@
1
+<!DOCTYPE html PUBLIC
2
+    "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
3
+    "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
4
+<html xmlns="http://www.w3.org/1999/xhtml">
5
+<head>
6
+<script type="text/javascript" src="docs.js"></script>
7
+<link rel="stylesheet" type="text/css" href="docs.css"/>
8
+<title>Administration interface</title>
9
+</head>
10
+<body>
11
+<h2>Administration interface</h2>
12
+<p>Venus comes with a basic administration interface, allowing you to manually run planet, do a refresh from cache, expunge the cache or blacklist individual entries from the planet.</p>
13
+
14
+<h3>Using the administration interface</h3>
15
+
16
+<p>The administration interface allows you to manage the everyday tasks related to your venus installation.</p>
17
+
18
+<ul><li><strong>Running planet</strong>. By clicking the "Run planet" button, you can do a full run of the planet script, rechecking all the feeds and recreating the generated files. This corresponds to running <code>python planet.py config.ini</code> with no arguments. Note that, depending on the numer of feeds, this operation may take some time.</li> 
19
+<li><strong>Refreshing planet</strong>. By clicking the "Refresh planet" button, you can do an "offline" run of the planet script, without rechecking all the feeds but still recreating the generated files. This corresponds to running <code>python planet.py -o config.ini</code>.</li>
20
+<li><strong>Expunging the planet cache</strong>. By clicking the "Expunge cache" button, you can clean the cache from outdated entries. This corresponds to running <code>python planet.py -x config.ini</code>.</li>
21
+<li><strong>Blacklisting</strong>. By selecting one or more of the entries in the list of entries, and clicking the "Blacklist" button, you can stop these items from displaying on the planet. This is very useful for quickly blocking inappropriate or malformed content from your planet. <i>Note that blacklisting does not take effect until you refresh or rerun the planet</i>. (Blacklisting can also be done manually on the server by moving files from the cache directory to the blacklist directory.)</li>
22
+</ul>
23
+
24
+<p>Installing the administration interface securely requires some knowledge of web server configuration.</p>
25
+
26
+<p>The admin interface consists of two parts: the admin template file and the server callback script. Both must be correctly installed for the administration interface to work.</p>
27
+
28
+<h3>Installing the admin template</h3> 
29
+
30
+The admin page template is found in <code>themes/common/admin.html.tmpl</code>. This template needs to be added to your config file along with your other templates, and optionally customized. Make sure that <code>action="admin_cb.py"</code> found in several places in the file points to the URL (or relative URL) of the admin callback script below.
31
+
32
+<h3>Installing the admin callback script</h3>
33
+
34
+<p>The admin callback script, admin_cb.py, needs to be copied to somewhere among your web server files. Depending on the details of your web server, your permissions, etc., this can be done in several different ways and in different places. There are three steps involved:</p>
35
+<ol><li>Configuring the script</li>
36
+<li>Enabling CGI</li>
37
+<li>Secure access</li></ol>
38
+
39
+
40
+<h4>Configuring the script</h4>
41
+
42
+<p>At the top of the script, there are four variables you must customize. The correct values of the first three variables can be found by analyzing how you normally run the <code>planet.py</code> script. If you typically run planet from within the working directory <code>BASE_DIR</code>, using a command like <blockquote><code>python [VENUS_INSTALL]/planet.py [CONFIG_FILE]</code></blockquote> you know all three values.</p>
43
+
44
+<dl><dt><code>BASE_DIR</code></dt><dd>
45
+This variable must contain the directory from where you usually run the planet.py script, to ensure that relative file names in the config files work correctly.</dd>
46
+<dt><code>VENUS_INSTALL</code></dt><dd>
47
+This variable must contain your venus installation directory, relative to BASE_DIR above.</dd>
48
+<dt><code>CONFIG_FILE</code></dt><dd>
49
+This variable must contain your configuration file, relative to BASE_DIR above.</dd>
50
+<dt><code>ADMIN_URL</code></dt><dd>
51
+This variable must contain the URL (or relative URL) of the administration page, relative to this script's URL.</dd>
52
+</dl>
53
+
54
+<h4>Enabling CGI</h4>
55
+
56
+<p>You will need to ensure that it can be run as a CGI script. This is done differently on different web server platforms, but there are at least three common patterns</p>
57
+
58
+<ul><li><b>Apache with <code>.htaccess</code></b>. If your server allows you to use <code>.htaccess</code> files, you can simply add
59
+<blockquote><code>Options +ExecCGI<br />
60
+AddHandler cgi-script .py</code></blockquote>
61
+in an .htaccess file in the planet output directory to enable the server to run the script. In this case, the admin_cb.py file can be put alongside the rest of the planet output files.
62
+</li>
63
+<li><b>Apache without <code>.htaccess</code></b>. If your server does not allow you to add CGI handlers to <code>.htaccess</code> files, you can add
64
+<blockquote><code>Options +ExecCGI<br />
65
+AddHandler cgi-script .py</code></blockquote>
66
+to the relevant part of the central apache configuration files.
67
+</li>
68
+<li><b>Apache with cgi-bin</b>. If your server only allow CGI handlers in pre-defined directories, you can place the <code>admin_cb.py</code> file there, and make sure to update the <code>action="admin_cb.py"</code> code in the template file <code>admin.html.tmpl</code>, as well as the <code>ADMIN_URL</code> in the callback script.
69
+</li>
70
+</ul>
71
+
72
+<p>In all cases, it is necessary to make sure that the script is executed as the same user that owns the planet output files and the cache. Either the planet output is owned by the apache user (usually <code>www-data</code>), or Apache's <a href="http://httpd.apache.org/docs/2.0/suexec.html">suexec</a> feature can be used to run the script as the right user.</p> 
73
+
74
+<h4>Securing the admin interface</h4>
75
+<p>If you don't want every user to be able to administrate your planet, you must secure at least the <code>admin_cb.py</code> file, and preferrably the <code>admin.html</code> file as well. This can be done using your web server's regular access control features. See <a href="http://httpd.apache.org/docs/2.0/howto/auth.html">here</a> for Apache documentation.</p>
76
+
77
+</body>
78
+</html>

+ 177
- 0
docs/config.html View File

@@ -0,0 +1,177 @@
1
+<!DOCTYPE html PUBLIC
2
+    "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
3
+    "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
4
+<html xmlns="http://www.w3.org/1999/xhtml">
5
+<head>
6
+<script type="text/javascript" src="docs.js"></script>
7
+<link rel="stylesheet" type="text/css" href="docs.css"/>
8
+<title>Venus Configuration</title>
9
+</head>
10
+<body>
11
+
12
+<h2>Configuration</h2>
13
+<p>Configuration files are in <a href="http://docs.python.org/lib/module-
14
+ConfigParser.html">ConfigParser</a> format which basically means the same
15
+format as INI files, i.e., they consist of a series of
16
+<code>[sections]</code>, in square brackets, with each section containing a
17
+list of <code>name:value</code> pairs (or <code>name=value</code> pairs, if
18
+you prefer).</p>
19
+<p>You are welcome to place your entire configuration into one file.
20
+Alternately, you may factor out the templating into a "theme", and
21
+the list of subscriptions into one or more "reading lists".</p>
22
+<h3 id="planet"><code>[planet]</code></h3>
23
+<p>This is the only required section, which is a bit odd as none of the
24
+parameters listed below are required.  Even so, you really do want to 
25
+provide many of these, especially ones that identify your planet and
26
+either (or both) of <code>template_files</code> and <code>theme</code>.</p>
27
+<p>Below is a complete list of predefined planet configuration parameters,
28
+including <del>ones not (yet) implemented by Venus</del> and <ins>ones that
29
+are either new or implemented differently by Venus</ins>.</p>
30
+
31
+<blockquote>
32
+<dl class="compact code">
33
+<dt>name</dt>
34
+<dd>Your planet's name</dd>
35
+<dt>link</dt>
36
+<dd>Link to the main page</dd>
37
+<dt>owner_name</dt>
38
+<dd>Your name</dd>
39
+<dt>owner_email</dt>
40
+<dd>Your e-mail address</dd>
41
+
42
+</dl>
43
+<dl class="compact code">
44
+
45
+<dt>cache_directory</dt>
46
+<dd>Where cached feeds are stored</dd>
47
+<dt>output_dir</dt>
48
+<dd>Directory to place output files</dd>
49
+
50
+</dl>
51
+<dl class="compact code">
52
+
53
+<dt><ins>output_theme</ins></dt>
54
+<dd>Directory containing a <code>config.ini</code> file which is merged
55
+with this one.  This is typically used to specify templating and bill of
56
+material information.</dd>
57
+<dt>template_files</dt>
58
+<dd>Space-separated list of output template files</dd>
59
+<dt><ins>template_directories</ins></dt>
60
+<dd>Space-separated list of directories in which <code>template_files</code>
61
+can be found</dd>
62
+<dt><ins>bill_of_materials</ins></dt>
63
+<dd>Space-separated list of files to be copied as is directly from the <code>template_directories</code> to the <code>output_dir</code></dd>
64
+<dt>filter</dt>
65
+<dd>Regular expression that must be found in the textual portion of the entry</dd>
66
+<dt>exclude</dt>
67
+<dd>Regular expression that must <b>not</b> be found in the textual portion of the entry</dd>
68
+<dt><ins>filters</ins></dt>
69
+<dd>Space-separated list of <a href="filters.html">filters</a> to apply to
70
+each entry</dd>
71
+<dt><ins>filter_directories</ins></dt>
72
+<dd>Space-separated list of directories in which <code>filters</code>
73
+can be found</dd>
74
+
75
+</dl>
76
+<dl class="compact code">
77
+
78
+<dt>items_per_page</dt>
79
+<dd>How many items to put on each page.  <ins>Whereas Planet 2.0 allows this to
80
+be overridden on a per template basis, Venus currently takes the maximum value
81
+for this across all templates.</ins></dd>
82
+<dt><del>days_per_page</del></dt>
83
+<dd>How many complete days of posts to put on each page This is the absolute, hard limit (over the item limit)</dd>
84
+<dt>date_format</dt>
85
+<dd><a href="http://docs.python.org/lib/module-time.html#l2h-2816">strftime</a> format for the default 'date' template variable</dd>
86
+<dt>new_date_format</dt>
87
+<dd><a href="http://docs.python.org/lib/module-time.html#l2h-2816">strftime</a> format for the 'new_date' template variable <ins>only applies to htmltmpl templates</ins></dd>
88
+<dt><del>encoding</del></dt>
89
+<dd>Output encoding for the file, Python 2.3+ users can use the special "xml" value to output ASCII with XML character references</dd>
90
+<dt><del>locale</del></dt>
91
+<dd>Locale to use for (e.g.) strings in dates, default is taken from your system</dd>
92
+<dt>activity_threshold</dt>
93
+<dd>If non-zero, all feeds which have not been updated in the indicated
94
+number of days will be marked as inactive</dd>
95
+
96
+</dl>
97
+<dl class="compact code">
98
+
99
+<dt>log_level</dt>
100
+<dd>One of <code>DEBUG</code>, <code>INFO</code>, <code>WARNING</code>, <code>ERROR</code> or <code>CRITICAL</code></dd>
101
+<dt><ins>log_format</ins></dt>
102
+<dd><a href="http://docs.python.org/lib/node422.html">format string</a> to
103
+use for logging output.  Note: this configuration value is processed
104
+<a href="http://docs.python.org/lib/ConfigParser-objects.html">raw</a></dd>
105
+<dt>feed_timeout</dt>
106
+<dd>Number of seconds to wait for any given feed</dd>
107
+<dt>new_feed_items</dt>
108
+<dd>Maximum number of items to include in the output from any one feed</dd>
109
+<dt><ins>spider_threads</ins></dt>
110
+<dd>The number of threads to use when spidering. When set to 0, the default, 
111
+no threads are used and spidering follows the traditional algorithm.</dd>
112
+<dt><ins>http_cache_directory</ins></dt>
113
+<dd>If <code>spider_threads</code> is specified, you can also specify a
114
+directory to be used for an additional HTTP cache to front end the Venus
115
+cache.  If specified as a relative path, it is evaluated relative to the
116
+<code>cache_directory</code>.</dd>
117
+<dt><ins>cache_keep_entries</ins></dt>
118
+<dd>Used by <code>expunge</code> to determine how many entries should be
119
+kept for each source when expunging old entries from the cache directory.
120
+This may be overriden on a per subscription feed basis.</dd>
121
+<dt><ins>pubsubhubbub_hub</ins></dt>
122
+<dd>URL to a PubSubHubbub hub, for example <a
123
+href="http://pubsubhubbub.appspot.com">http://pubsubhubbub.appspot.com</a>.
124
+Used by <code>publish</code> to ping the
125
+hub when feeds are published, speeding delivery of updates to
126
+subscribers.  See
127
+the <a href="http://code.google.com/p/pubsubhubbub/"> PubSubHubbub
128
+home page</a> for more information.</dd>
129
+<dt><ins>pubsubhubbub_feeds</ins></dt>
130
+<dd>List of feeds to publish.  Defaults to <code>atom.xml rss10.xml
131
+rss20.xml</code>.</dd>
132
+<dt id="django_autoescape"><ins>django_autoescape</ins></dt>
133
+<dd>Control <a href="http://docs.djangoproject.com/en/dev/ref/templates/builtins/#autoescape">autoescaping</a> behavior of django templates.  Defaults to <code>on</code>.</dd>
134
+</dl>
135
+<p>Additional options can be found in
136
+<a href="normalization.html#overrides">normalization level overrides</a>.</p>
137
+</blockquote>
138
+
139
+<h3 id="default"><code>[DEFAULT]</code></h3>
140
+<p>Values placed in this section are used as default values for all sections.
141
+While it is true that few values make sense in all sections; in most cases
142
+unused parameters cause few problems.</p>
143
+
144
+<h3 id="subscription"><code>[</code><em>subscription</em><code>]</code></h3>
145
+<p>All sections other than <code>planet</code>, <code>DEFAULT</code>, or are
146
+named in <code>[planet]</code>'s <code>filters</code> or
147
+<code>templatefiles</code> parameters
148
+are treated as subscriptions and typically take the form of a
149
+<acronym title="Uniform Resource Identifier">URI</acronym>.</p>
150
+<p>Parameters placed in this section are passed to templates.  While
151
+you are free to include as few or as many parameters as you like, most of
152
+the predefined themes presume that at least <code>name</code> is defined.</p>
153
+<p>The <code>content_type</code> parameter can be defined to indicate that
154
+this subscription is a <em>reading list</em>, i.e., is an external list
155
+of subscriptions.  At the moment, three formats of reading lists are supported:
156
+<code>opml</code>, <code>foaf</code>, <code>csv</code>, and
157
+<code>config</code>.  In the future,
158
+support for formats like <code>xoxo</code> could be added.</p>
159
+<p><a href="normalization.html#overrides">Normalization overrides</a> can
160
+also be defined here.</p>
161
+
162
+<h3 id="template"><code>[</code><em>template</em><code>]</code></h3>
163
+<p>Sections which are listed in <code>[planet] template_files</code> are
164
+processed as <a href="templates.html">templates</a>.  With Planet 2.0,
165
+it is possible to override parameters like <code>items_per_page</code>
166
+on a per template basis, but at the current time Planet Venus doesn't
167
+implement this.</p>
168
+<p><ins><a href="filters.html">Filters</a> can be defined on a per-template basis, and will be used to post-process the output of the template.</ins></p>
169
+
170
+<h3 id="filter"><code>[</code><em>filter</em><code>]</code></h3>
171
+<p>Sections which are listed in <code>[planet] filters</code> are
172
+processed as <a href="filters.html">filters</a>.</p>
173
+<p>Parameters which are listed in this section are passed to the filter
174
+in a language specific manner.  Given the way defaults work, filters
175
+should be prepared to ignore parameters that they didn't expect.</p>
176
+</body>
177
+</html>

+ 65
- 0
docs/contributing.html View File

@@ -0,0 +1,65 @@
1
+<!DOCTYPE html>
2
+<html xmlns="http://www.w3.org/1999/xhtml">
3
+<head>
4
+<script type="text/javascript" src="docs.js"></script>
5
+<link rel="stylesheet" type="text/css" href="docs.css"/>
6
+<title>Contributing</title>
7
+</head>
8
+<body>
9
+<h2>Contributing</h2>
10
+<p>If you make changes to Venus, you have no obligation to share them.
11
+And unlike systems based on <code>CVS</code> or <code>subversion</code>,
12
+there is no notion of &ldquo;committers&rdquo; &mdash; everybody is
13
+a peer.</p>
14
+<p>If you should chose to share your changes, the steps outlined below may
15
+increase your changes of your code being picked up.</p>
16
+
17
+<h3>Documentation and Tests</h3>
18
+<p>For best results, include both documentation and tests in your
19
+contribution.</p>
20
+<p>Documentation can be found in the <code>docs</code> directory.  It is
21
+straight XHTML.</p>
22
+<p>Test cases can be found in the
23
+<a href="http://intertwingly.net/code/venus/tests/">tests</a> directory, and
24
+make use of the
25
+<a href="http://docs.python.org/lib/module-unittest.html">Python Unit testing framework</a>.  To run them, simply enter:</p>
26
+<blockquote><pre>python runtests.py</pre></blockquote>
27
+
28
+<h3>Git</h3>
29
+<p>If you have done a <a href="index.html">git pull</a>, you have already set up
30
+a repository.  The only additional step you might need to do is to introduce
31
+yourself to <a href="http://git-scm.com/">git</a>.  Type in the following,
32
+after replacing the <b>bold text</b> with your information:</p>
33
+
34
+<blockquote><pre>git config --global user.name '<b>Your Name</b>'
35
+git config --global user.email '<b>youremail</b>@<b>example.com</b>'</pre></blockquote>
36
+
37
+<p>Then, simply make the changes you like.  When you are done, type:</p>
38
+
39
+<blockquote><pre>git status</pre></blockquote>
40
+
41
+<p>This will tell you which files you have modified, and which ones you may
42
+have added.  If you add files and you want them to be included, simply do a:</p>
43
+
44
+<blockquote><pre>git add file1 file2...</pre></blockquote>
45
+
46
+<p>You can also do a <code>git diff</code> to see if there are any changes
47
+which you made that you don't want included.  I can't tell you how many
48
+debug print statements I have caught this way.</p>
49
+
50
+<p>Next, type:</p>
51
+
52
+<blockquote><pre>git commit -a</pre></blockquote>
53
+
54
+<p>This will allow you to enter a comment describing your change.  If your
55
+repository is already on your web server, simple let others know where they
56
+can find it.  If not, consider using <a href="">github</a> to host your
57
+<a href="http://help.github.com/forking/">fork</a> of Venus.</p>
58
+
59
+<h3>Telling others</h3>
60
+<p>Once you have a change worth sharing, post a message on the
61
+<a href="http://lists.planetplanet.org/mailman/listinfo/devel">mailing
62
+list</a>, or use github to send a <a
63
+href="http://github.com/guides/pull-requests">pull request</a>.</p>
64
+</body>
65
+</html>

+ 100
- 0
docs/docs.css View File

@@ -0,0 +1,100 @@
1
+body {
2
+  background-color: #fff;
3
+  color: #333;
4
+  font-family: 'Lucida Grande', Verdana, Geneva, Lucida, Helvetica, sans-serif;
5
+  font-size: small;
6
+  margin: 40px;
7
+  padding: 0;
8
+}
9
+
10
+a:link, a:visited { 
11
+  background-color: transparent;
12
+  color: #333;
13
+  text-decoration: none !important;
14
+  border-bottom: 1px dotted #333 !important;
15
+}
16
+
17
+a:hover {
18
+  background-color: transparent;
19
+  color: #934;
20
+  text-decoration: none !important;
21
+  border-bottom: 1px dotted #993344 !important;
22
+}
23
+
24
+pre, code {
25
+  background-color: #FFF;
26
+  color: #00F;
27
+  font-size: large
28
+}
29
+
30
+h1 {
31
+  margin: 8px 0 10px 20px;
32
+  padding: 0;
33
+  font-variant: small-caps;
34
+  letter-spacing: 0.1em;
35
+  font-family: "Book Antiqua", Georgia, Palatino, Times, "Times New Roman", serif;
36
+}
37
+
38
+h2 {
39
+  clear: both;
40
+}
41
+
42
+ul, ul.outer > li {
43
+  margin: 14px 0 10px 0;
44
+}
45
+
46
+.z {
47
+  float:left;
48
+  background: url(img/shadowAlpha.png) no-repeat bottom right !important;
49
+  margin: -15px 0 20px -15px !important;
50
+}
51
+
52
+.z .logo {
53
+ color: magenta;
54
+}
55
+
56
+.z p {
57
+  margin: 14px 0 10px 15px !important;
58
+}
59
+
60
+.z .sectionInner {
61
+  width: 730px;
62
+  background: none !important;
63
+  padding: 0 !important;
64
+  }
65
+
66
+.z .sectionInner .sectionInner2 {
67
+  border: 1px solid #a9a9a9;
68
+  padding: 4px;
69
+  margin: -6px 6px 6px -6px !important;
70
+}
71
+
72
+ins {
73
+  background-color: #FFF;
74
+  color: #F0F;
75
+  text-decoration: none;
76
+}
77
+
78
+dl.compact {
79
+  margin-bottom: 1em;
80
+  margin-top: 1em;
81
+}
82
+
83
+dl.compact > dt {
84
+  clear: left;
85
+  float: left;
86
+  margin-bottom: 0;
87
+  padding-right: 8px;
88
+  margin-top: 0;
89
+  list-style-type: none;
90
+}
91
+
92
+dl.compact > dd {
93
+  margin-bottom: 0;
94
+  margin-top: 0;
95
+  margin-left: 10em;
96
+}
97
+
98
+th, td {
99
+  font-size: small;
100
+}

+ 54
- 0
docs/docs.js View File

@@ -0,0 +1,54 @@
1
+window.onload=function() {
2
+  var vindex = document.URL.lastIndexOf('venus/');
3
+  if (vindex<0) vindex = document.URL.lastIndexOf('planet/');
4
+  var base = document.URL.substring(0,vindex+6);
5
+
6
+  var body = document.getElementsByTagName('body')[0];
7
+  var div = document.createElement('div');
8
+  div.setAttribute('class','z');
9
+  var h1 = document.createElement('h1');
10
+  var span = document.createElement('span');
11
+  span.appendChild(document.createTextNode('\u2640'));
12
+  span.setAttribute('class','logo');
13
+  h1.appendChild(span);
14
+  h1.appendChild(document.createTextNode(' Planet Venus'));
15
+
16
+  var inner2=document.createElement('div');
17
+  inner2.setAttribute('class','sectionInner2');
18
+  inner2.appendChild(h1);
19
+
20
+  var p = document.createElement('p');
21
+  p.appendChild(document.createTextNode("Planet Venus is an awesome \u2018river of news\u2019 feed reader. It downloads news feeds published by web sites and aggregates their content together into a single combined feed, latest news first."));
22
+  inner2.appendChild(p);
23
+
24
+  p = document.createElement('p');
25
+  var a = document.createElement('a');
26
+  a.setAttribute('href',base);
27
+  a.appendChild(document.createTextNode('Download'));
28
+  p.appendChild(a);
29
+  p.appendChild(document.createTextNode(" \u00b7 "));
30
+  a = document.createElement('a');
31
+  a.setAttribute('href',base+'docs/index.html');
32
+  a.appendChild(document.createTextNode('Documentation'));
33
+  p.appendChild(a);
34
+  p.appendChild(document.createTextNode(" \u00b7 "));
35
+  a = document.createElement('a');
36
+  a.setAttribute('href',base+'tests/');
37
+  a.appendChild(document.createTextNode('Unit tests'));
38
+  p.appendChild(a);
39
+  p.appendChild(document.createTextNode(" \u00b7 "));
40
+  a = document.createElement('a');
41
+  a.setAttribute('href','http://lists.planetplanet.org/mailman/listinfo/devel');
42
+  a.appendChild(document.createTextNode('Mailing list'));
43
+  p.appendChild(a);
44
+  inner2.appendChild(p);
45
+
46
+  var inner1=document.createElement('div');
47
+  inner1.setAttribute('class','sectionInner');
48
+  inner1.setAttribute('id','inner1');
49
+  inner1.appendChild(inner2);
50
+
51
+  div.appendChild(inner1);
52
+
53
+  body.insertBefore(div, body.firstChild);
54
+}

+ 48
- 0
docs/etiquette.html View File

@@ -0,0 +1,48 @@
1
+<!DOCTYPE html PUBLIC
2
+    "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
3
+    "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
4
+<html xmlns="http://www.w3.org/1999/xhtml">
5
+<head>
6
+<script type="text/javascript" src="docs.js"></script>
7
+<link rel="stylesheet" type="text/css" href="docs.css"/>
8
+<title>Etiquette</title>
9
+</head>
10
+<body>
11
+<h2>Etiquette</h2>
12
+<p>You would think that people who publish syndication feeds do it with the
13
+intent to be syndicated.  But the truth is that we live in a world where
14
+<a href="http://en.wikipedia.org/wiki/Deep_linking">deep linking</a> can
15
+cause people to complain.  Nothing is safe.  But that doesn&#8217;t
16
+stop us from doing links.</p>
17
+
18
+<p>These concerns tend to increase when you profit, either directly via ads or
19
+indirectly via search engine rankings, from the content of others.</p>
20
+
21
+<p>While there are no hard and fast rules that apply here, here&#8217;s are a
22
+few things you can do to mitigate the concern:</p>
23
+
24
+<ul>
25
+<li>Aggressively use robots.txt, meta tags, and the google/livejournal
26
+atom namespace to mark your pages as not to be indexed by search
27
+engines.</li>
28
+<blockquote><p><dl>
29
+<dt><a href="http://www.robotstxt.org/">robots.txt</a>:</dt>
30
+<dd><p><code>User-agent: *<br/>
31
+Disallow: /</code></p></dd>
32
+<dt>index.html:</dt>
33
+<dd><p><code>&lt;<a href="http://www.robotstxt.org/wc/meta-user.html">meta name="robots"</a> content="noindex,nofollow"/&gt;</code></p></dd>
34
+<dt>atom.xml:</dt>
35
+<dd><p><code>&lt;feed xmlns:indexing="<a href="http://community.livejournal.com/lj_dev/696793.html">urn:atom-extension:indexing</a>" indexing:index="no"&gt;</code></p>
36
+<p><code>&lt;access:restriction xmlns:access="<a href="http://www.bloglines.com/about/specs/fac-1.0">http://www.bloglines.com/about/specs/fac-1.0</a>" relationship="deny"/&gt;</code></p></dd>
37
+</dl></p></blockquote>
38
+<li><p>Ensure that all <a href="http://nightly.feedparser.org/docs/reference-entry-source.html#reference.entry.source.rights">copyright</a> and <a href="http://nightly.feedparser.org/docs/reference-entry-license.html">licensing</a> information is propagated to the
39
+combined feed(s) that you produce.</p></li>
40
+
41
+<li><p>Add no advertising.  Consider filtering out ads, lest you
42
+be accused of using someone&#8217;s content to help your friends profit.</p></li>
43
+
44
+<li><p>Most importantly, if anyone does object to their content being included,
45
+quickly and without any complaint, remove them.</p></li>
46
+</ul>
47
+</body>
48
+</html>

+ 105
- 0
docs/filters.html View File

@@ -0,0 +1,105 @@
1
+<!DOCTYPE html PUBLIC
2
+    "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
3
+    "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
4
+<html xmlns="http://www.w3.org/1999/xhtml">
5
+<head>
6
+<script type="text/javascript" src="docs.js"></script>
7
+<link rel="stylesheet" type="text/css" href="docs.css"/>
8
+<title>Venus Filters</title>
9
+</head>
10
+<body>
11
+<h2>Filters and Plugins</h2>
12
+<p>Filters and plugins are simple Unix pipes.  Input comes in
13
+<code>stdin</code>, parameters come from the config file, and output goes to
14
+<code>stdout</code>.  Anything written to <code>stderr</code> is logged as an
15
+ERROR message.  If no <code>stdout</code> is produced, the entry is not written
16
+to the cache or processed further; in fact, if the entry had previously been
17
+written to the cache, it will be removed.</p>
18
+
19
+<p>There are two types of filters supported by Venus, input and template.</p>
20
+<p>Input to an input filter is a aggressively
21
+<a href="normalization.html">normalized</a> entry.  For
22
+example, if a feed is RSS 1.0 with 10 items, the filter will be called ten
23
+times, each with a single Atom 1.0 entry, with all textConstructs
24
+expressed as XHTML, and everything encoded as UTF-8.</p>
25
+<p>Input to a template filter will be the output produced by the template.</p>
26
+
27
+<p>You will find a small set of example filters in the <a
28
+href="../filters">filters</a> directory.  The <a
29
+href="../filters/coral_cdn_filter.py">coral cdn filter</a> will change links
30
+to images in the entry itself.  The filters in the <a
31
+href="../filters/stripAd/">stripAd</a> subdirectory will strip specific
32
+types of advertisements that you may find in feeds.</p>
33
+
34
+<p>The <a href="../filters/excerpt.py">excerpt</a> filter adds metadata (in
35
+the form of a <code>planet:excerpt</code> element) to the feed itself.  You
36
+can see examples of how parameters are passed to this program in either
37
+<a href="../tests/data/filter/excerpt-images.ini">excerpt-images</a> or 
38
+<a href="../examples/opml-top100.ini">opml-top100.ini</a>.
39
+Alternately parameters may be passed
40
+<abbr title="Uniform Resource Identifier">URI</abbr> style, for example: 
41
+<a href="../tests/data/filter/excerpt-images2.ini">excerpt-images2</a>.
42
+</p>
43
+
44
+<p>The <a href="../filters/xpath_sifter.py">xpath sifter</a> is a variation of
45
+the above, including or excluding feeds based on the presence (or absence) of
46
+data specified by <a href="http://www.w3.org/TR/xpath20/">xpath</a>
47
+expressions.  Again, parameters can be passed as
48
+<a href="../tests/data/filter/xpath-sifter.ini">config options</a> or 
49
+<a href="../tests/data/filter/xpath-sifter2.ini">URI style</a>.
50
+</p>
51
+
52
+<p>The <a href="../filters/regexp_sifter.py">regexp sifter</a> operates just
53
+like the xpath sifter, except it uses
54
+<a href="http://docs.python.org/lib/re-syntax.html">regular expressions</a>
55
+instead of XPath expressions.</p>
56
+
57
+<h3>Notes</h3>
58
+
59
+<ul>
60
+<li>Any filters listed in the <code>[planet]</code> section of your config.ini
61
+will be invoked on all feeds.  Filters listed in individual
62
+<code>[feed]</code> sections will only be invoked on those feeds.
63
+Filters listed in <code>[template]</code> sections will be invoked on the
64
+output of that template.</li>
65
+
66
+<li>Input filters are executed when a feed is fetched, and the results are
67
+placed into the cache.  Changing a configuration file alone is not sufficient to
68
+change the contents of the cache &mdash; typically that only occurs after
69
+a feed is modified.</li>
70
+
71
+<li>Filters are simply invoked in the order they are listed in the
72
+configuration file (think unix pipes). Planet wide filters are executed before
73
+feed specific filters.</li>
74
+
75
+<li>The file extension of the filter is significant.  <code>.py</code> invokes
76
+python. <code>.xslt</code> involkes XSLT.  <code>.sed</code> and
77
+<code>.tmpl</code> (a.k.a. htmltmp) are also options. Other languages, like
78
+perl or ruby or class/jar (java), aren't supported at the moment, but these
79
+would be easy to add.</li>
80
+
81
+<li>If the filter name contains a redirection character (<code>&gt;</code>),
82
+then the output stream is
83
+<a href="http://en.wikipedia.org/wiki/Tee_(Unix)">tee</a>d; one branch flows
84
+through the specified filter and the output is planced into the named file; the
85
+other unmodified branch continues onto the next filter, if any.
86
+One use case for this function is to use
87
+<a href="../filters/xhtml2html.plugin">xhtml2html</a> to produce both an XHTML
88
+and an HTML output stream from one source.</li>
89
+
90
+<li>Templates written using htmltmpl or django currently only have access to a
91
+fixed set of fields, whereas XSLT and genshi templates have access to
92
+everything.</li>
93
+
94
+<li>Plugins differ from filters in that while filters are forked, plugins are
95
+<a href="http://docs.python.org/lib/module-imp.html">imported</a>.  This
96
+means that plugins are limited to Python and are run in-process.  Plugins
97
+therefore have direct access to planet internals like configuration and
98
+logging facitilies, as well as access to the bundled libraries like the
99
+<a href="http://feedparser.org/docs/">Universal Feed Parser</a> and
100
+<a href="http://code.google.com/p/html5lib/">html5lib</a>; but it also
101
+means that functions like <code>os.abort()</code> can't be recovered
102
+from.</li>
103
+</ul>
104
+</body>
105
+</html>

BIN
docs/img/shadowAlpha.png View File


+ 56
- 0
docs/index.html View File

@@ -0,0 +1,56 @@
1
+<!DOCTYPE html PUBLIC
2
+    "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
3
+    "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
4
+<html xmlns="http://www.w3.org/1999/xhtml">
5
+<head>
6
+<script type="text/javascript" src="docs.js"></script>
7
+<link rel="stylesheet" type="text/css" href="docs.css"/>
8
+<title>Venus Documentation</title>
9
+</head>
10
+<body>
11
+<h2>Table of Contents</h2>
12
+<ul class="outer">
13
+<li><a href="installation.html">Getting started</a></li>
14
+<li>Basic Features
15
+<ul>
16
+<li><a href="config.html">Configuration</a></li>
17
+<li><a href="templates.html">Templates</a></li>
18
+</ul>
19
+</li>
20
+<li>Advanced Features
21
+<ul>
22
+<li><a href="venus.svg">Architecture</a></li>
23
+<li><a href="normalization.html">Normalization</a></li>
24
+<li><a href="filters.html">Filters and Plugins</a></li>
25
+<li><a href="admin.html">Administration interface</a></li>
26
+</ul>
27
+</li>
28
+<li>Other
29
+<ul>
30
+<li><a href="migration.html">Migration from Planet 2.0</a></li>
31
+<li><a href="contributing.html">Contributing</a></li>
32
+<li><a href="etiquette.html">Etiquette</a></li>
33
+</ul>
34
+</li>
35
+<li>Reference
36
+<ul>
37
+<li><a href="http://www.planetplanet.org/">Planet</a></li>
38
+<li><a href="http://feedparser.org/docs/">Universal Feed Parser</a></li>
39
+<li><a href="http://code.google.com/p/html5lib/">html5lib</a></li>
40
+<li><a href="http://htmltmpl.sourceforge.net/">htmltmpl</a></li>
41
+<li><a href="http://bitworking.org/projects/httplib2/">httplib2</a></li>
42
+<li><a href="http://www.w3.org/TR/xslt">XSLT</a></li>
43
+<li><a href="http://www.gnu.org/software/sed/manual/html_mono/sed.html">sed</a></li>
44
+<li><a href="http://www.djangoproject.com/documentation/templates/">Django templates</a></li>
45
+</ul>
46
+</li>
47
+<li>Credits and License
48
+<ul>
49
+<li><a href="../AUTHORS">Authors</a></li>
50
+<li><a href="../THANKS">Contributors</a></li>
51
+<li><a href="../LICENCE">License</a></li>
52
+</ul>
53
+</li>
54
+</ul>
55
+</body>
56
+</html>

+ 129
- 0
docs/installation.html View File

@@ -0,0 +1,129 @@
1
+<!DOCTYPE html PUBLIC
2
+    "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
3
+    "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
4
+<html xmlns="http://www.w3.org/1999/xhtml">
5
+<head>
6
+<script type="text/javascript" src="docs.js"></script>
7
+<link rel="stylesheet" type="text/css" href="docs.css"/>
8
+<title>Venus Installation</title>
9
+</head>
10
+<body>
11
+<h2>Installation</h2>
12
+<p>Venus has been tested on Linux, and Mac OSX, and Windows.</p>
13
+
14
+<p>You'll need at least Python 2.2 installed on your system, we recommend
15
+Python 2.4 though as there may be bugs with the earlier libraries.</p>
16
+
17
+<p>Everything Pythonesque Planet needs to provide basic operation should be
18
+included in the distribution.  Some optional features may require
19
+additional libraries, for example:</p>
20
+<ul>
21
+<li>Usage of XSLT requires either
22
+<a href="http://xmlsoft.org/XSLT/xsltproc2.html">xsltproc</a>
23
+or <a href="http://xmlsoft.org/XSLT/python.html">python-libxslt</a>.</li>
24
+<li>The current interface to filters written in non-templating languages
25
+(e.g., python) uses the
26
+<a href="http://docs.python.org/lib/module-subprocess.html">subprocess</a>
27
+module which was introduced in Python 2.4.</li>
28
+<li>Usage of FOAF as a reading list requires
29
+<a href="http://librdf.org/">librdf</a>.</li>
30
+</ul>
31
+
32
+<h3>General Instructions</h3>
33
+
34
+<p>
35
+These instructions apply to any platform.  Check the instructions
36
+below for more specific instructions for your platform.
37
+</p>
38
+
39
+<ol>
40
+<li><p>If you are reading this online, you will need to
41
+<a href="../index.html">download</a> and extract the files into a folder somewhere.
42
+You can place this wherever you like, <code>~/planet</code>
43
+and <code>~/venus</code> are good
44
+choices, but so's anywhere else you prefer.</p></li>
45
+<li><p>This is very important: from within that directory, type the following
46
+command:</p>
47
+<blockquote><code>python runtests.py</code></blockquote>
48
+<p>This should take anywhere from a one to ten seconds to execute.  No network
49
+connection is required, and the script cleans up after itself.  If the
50
+script completes with an "OK", you are good to go.  Otherwise stopping here
51
+and inquiring on the
52
+<a href="http://lists.planetplanet.org/mailman/listinfo/devel">mailing list</a>
53
+ is a good idea as it can save you lots of frustration down the road.</p></li>
54
+<li><p>Make a copy of one of the <code>ini</code> the files in the
55
+<a href="../examples">examples</a> subdirectory,
56
+and put it wherever you like; I like to use the Planet's name (so
57
+<code>~/planet/debian</code>), but it's really up to you.</p></li>
58
+<li><p>Edit the <code>config.ini</code> file in this directory to taste,
59
+it's pretty well documented so you shouldn't have any problems here.  Pay
60
+particular attention to the <code>output_dir</code> option, which should be
61
+readable by your web server.  If the directory you specify in your
62
+<code>cache_dir</code> exists; make sure that it is empty.</p></li>
63
+<li><p>Run it: <code>python planet.py pathto/config.ini</code></p>
64
+<p>You'll want to add this to cron, make sure you run it from the
65
+right directory.</p></li>
66
+<li><p>(Optional)</p>
67
+<p>Tell us about it! We'd love to link to you on planetplanet.org :-)</p></li>
68
+<li><p>(Optional)</p>
69
+<p>Build your own themes, templates, or filters!  And share!</p></li>
70
+</ol>
71
+
72
+<h3 id="macosx">Mac OS X and Fink Instructions</h3>
73
+
74
+<p>
75
+The <a href="http://fink.sourceforge.net/">Fink Project</a> packages
76
+various open source software for MacOS.  This makes it a little easier
77
+to get started with projects like Planet Venus.
78
+</p>
79
+
80
+<p>
81
+Note: in the following, we recommend explicitly
82
+using <code>python2.4</code>.  As of this writing, Fink is starting to
83
+support <code>python2.5</code> but the XML libraries, for example, are
84
+not yet ported to the newer python so Venus will be less featureful.
85
+</p>
86
+
87
+<ol>
88
+ <li><p>Install the XCode development tools from your Mac OS X install
89
+        disks</p></li>
90
+ <li><p><a href="http://fink.sourceforge.net/download/">Download</a>
91
+        and install Fink</p></li>
92
+ <li><p>Tell fink to install the Planet Venus prerequisites:<br />
93
+        <code>fink install python24 celementtree-py24 bzr-py24 libxslt-py24
94
+        libxml2-py24</code></p></li>
95
+ <li><p><a href="../index.html">Download</a> and extract the Venus files into a
96
+        folder somewhere</p></li>
97
+ <li><p>Run the tests: <code>python2.4 runtests.py</code><br /> This
98
+        will warn you that the RDF library is missing, but that's
99
+        OK.</p></li>
100
+ <li><p>Continue with the general steps above, starting with Step 3.  You
101
+        may want to explicitly specify <code>python2.4</code>.</p></li>
102
+</ol>
103
+
104
+<h3 id="ubuntu">Ubuntu Linux (Edgy Eft) instructions</h3>
105
+
106
+<p>Before starting, issue the following command:</p>
107
+
108
+<blockquote><pre>sudo apt-get install bzr python2.4-librdf</pre></blockquote>
109
+
110
+<h3 id="windows">Windows instructions</h3>
111
+
112
+<p>
113
+  htmltmpl templates (and Django too, since it currently piggybacks on
114
+  the htmltmpl implementation) on Windows require
115
+  the <a href="http://sourceforge.net/projects/pywin32/">pywin32</a>
116
+  module.
117
+</p>
118
+
119
+<h3 id="python22">Python 2.2 instructions</h3>
120
+
121
+<p>If you are running Python 2.2, you may also need to install <a href="http://pyxml.sourceforge.net/">pyxml</a>.  If the
122
+following runs without error, you do <b>not</b> have the problem.</p>
123
+<blockquote><pre>python -c "__import__('xml.dom.minidom').dom.minidom.parseString('&lt;entry xml:lang=\"en\"/&gt;')"</pre></blockquote>
124
+<p>Installation of pyxml varies by platform.  For Ubuntu Linux (Dapper Drake), issue the following command:</p>
125
+
126
+<blockquote><pre>sudo apt-get install python2.2-xml</pre></blockquote>
127
+
128
+</body>
129
+</html>

+ 42
- 0
docs/migration.html View File

@@ -0,0 +1,42 @@
1
+<!DOCTYPE html PUBLIC
2
+    "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
3
+    "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
4
+<html xmlns="http://www.w3.org/1999/xhtml">
5
+<head>
6
+<script type="text/javascript" src="docs.js"></script>
7
+<link rel="stylesheet" type="text/css" href="docs.css"/>
8
+<title>Venus Migration</title>
9
+</head>
10
+<body>
11
+<h2>Migration from Planet 2.0</h2>
12
+<p>The intent is that existing Planet 2.0 users should be able to reuse
13
+their existing <code>config.ini</code> and <code>.tmpl</code> files,
14
+but the reality is that users will need to be aware of the following:</p>
15
+<ul>
16
+<li>You will need to start over with a new cache directory as the format
17
+of the cache has changed dramatically.</li>
18
+<li>Existing <code>.tmpl</code> and <code>.ini</code> files should work,
19
+though some <a href="config.html">configuration</a> options (e.g.,
20
+<code>days_per_page</code>) have not yet been implemented</li>
21
+<li>No testing has been done on Python 2.1, and it is presumed not to work.</li>
22
+<li>To take advantage of all features, you should install the optional
23
+XML and RDF libraries described on
24
+the <a href="installation.html">Installation</a> page.</li>
25
+</ul>
26
+
27
+<p>
28
+Common changes to config.ini include:
29
+</p>
30
+<ul>
31
+ <li><p>Filename changes:</p>
32
+<pre>
33
+examples/fancy/index.html.tmpl => themes/classic_fancy/index.html.tmpl
34
+examples/atom.xml.tmpl         => themes/common/atom.xml.xslt
35
+examples/rss20.xml.tmpl        => themes/common/rss20.xml.tmpl
36
+examples/rss10.xml.tmpl        => themes/common/rss10.xml.tmpl
37
+examples/opml.xml.tmpl         => themes/common/opml.xml.xslt
38
+examples/foafroll.xml.tmpl     => themes/common/foafroll.xml.xslt
39
+</pre></li>
40
+</ul>
41
+</body>
42
+</html>

+ 107
- 0
docs/normalization.html View File

@@ -0,0 +1,107 @@
1
+<!DOCTYPE html PUBLIC
2
+    "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
3
+    "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
4
+<html xmlns="http://www.w3.org/1999/xhtml">
5
+<head>
6
+<script type="text/javascript" src="docs.js"></script>
7
+<link rel="stylesheet" type="text/css" href="docs.css"/>
8
+<title>Venus Normalization</title>
9
+</head>
10
+<body>
11
+<h2>Normalization</h2>
12
+<p>Venus builds on, and extends, the <a
13
+href="http://www.feedparser.org/">Universal Feed Parser</a> and <a
14
+href="http://code.google.com/p/html5lib/">html5lib</a> to
15
+convert all feeds into Atom 1.0, with well formed XHTML, and encoded as UTF-8,
16
+meaning that you don't have to worry about funky feeds, tag soup, or character
17
+encoding.</p>
18
+<h3>Encoding</h3>
19
+<p>Input data in feeds may be encoded in a variety of formats, most commonly
20
+ASCII, ISO-8859-1, WIN-1252, AND UTF-8.  Additionally, many feeds make use of
21
+the wide range of
22
+<a href="http://www.w3.org/TR/html401/sgml/entities.html">character entity
23
+references</a> provided by HTML.  Each is converted to UTF-8, an encoding
24
+which is a proper superset of ASCII, supports the entire range of Unicode
25
+characters, and is one of 
26
+<a href="http://www.w3.org/TR/2006/REC-xml-20060816/#charsets">only two</a>
27
+encodings required to be supported by all conformant XML processors.</p>
28
+<p>Encoding problems are one of the more common feed errors, and every
29
+attempt is made to correct common errors, such as the inclusion of
30
+the so-called
31
+<a href="http://www.fourmilab.ch/webtools/demoroniser/">moronic</a> versions
32
+of smart-quotes.  In rare cases where individual characters can not be
33
+converted to valid UTF-8 or into
34
+<a href="http://www.w3.org/TR/xml/#charsets">characters allowed in XML 1.0
35
+documents</a>, such characters will be replaced with the Unicode
36
+<a href="http://www.fileformat.info/info/unicode/char/fffd/index.htm">Replacement character</a>, with a title that describes the original character whenever possible.</p>
37
+<p>In order to support the widest range of inputs, use of Python 2.3 or later,
38
+as well as the installation of the python <code>iconvcodec</code>, is
39
+recommended.</p>
40
+<h3>HTML</h3>
41
+<p>A number of different normalizations of HTML are performed.  For starters,
42
+the HTML is
43
+<a href="http://www.feedparser.org/docs/html-sanitization.html">sanitized</a>,
44
+meaning that HTML tags and attributes that could introduce javascript or
45
+other security risks are removed.</p>
46
+<p>Then,
47
+<a href="http://www.feedparser.org/docs/resolving-relative-links.html">relative
48
+links are resolved</a> within the HTML.  This is also done for links
49
+in other areas in the feed too.</p>
50
+<p>Finally, unmatched tags are closed.  This is done with a
51
+<a href="http://code.google.com/p/html5lib/">knowledge of the semantics of HTML</a>.  Additionally, a
52
+<a href="http://golem.ph.utexas.edu/~distler/blog/archives/000165.html#sanitizespec">large
53
+subset of MathML</a>, as well as a
54
+<a href="http://www.w3.org/TR/SVGMobile/">tiny profile of SVG</a>
55
+is also supported.</p>
56
+<h3>Atom 1.0</h3>
57
+<p>The Universal Feed Parser also
58
+<a href="http://www.feedparser.org/docs/content-normalization.html">normalizes the content of feeds</a>.  This involves a
59
+<a href="http://www.feedparser.org/docs/reference.html">large number of elements</a>; the best place to start is to look at
60
+<a href="http://www.feedparser.org/docs/annotated-examples.html">annotated examples</a>.  Among other things a wide variety of
61
+<a href="http://www.feedparser.org/docs/date-parsing.html">date formats</a>
62
+are converted into
63
+<a href="http://www.ietf.org/rfc/rfc3339.txt">RFC 3339</a> formatted dates.</p>
64
+<p>If no <a href="http://www.feedparser.org/docs/reference-entry-id.html">ids</a> are found in entries, attempts are made to synthesize one using (in order):</p>
65
+<ul>
66
+<li><a href="http://www.feedparser.org/docs/reference-entry-link.html">link</a></li>
67
+<li><a href="http://www.feedparser.org/docs/reference-entry-title.html">title</a></li>
68
+<li><a href="http://www.feedparser.org/docs/reference-entry-summary.html">summary</a></li>
69
+<li><a href="http://www.feedparser.org/docs/reference-entry-content.html">content</a></li>
70
+</ul>
71
+<p>If no <a href="http://www.feedparser.org/docs/reference-feed-
72
+updated.html">updated</a> dates are found in an entry, the updated date from
73
+the feed is used.  If no updated date is found in either the feed or
74
+the entry, the current time is substituted.</p>
75
+<h3 id="overrides">Overrides</h3>
76
+<p>All of the above describes what Venus does automatically, either directly
77
+or through its dependencies.  There are a number of errors which can not
78
+be corrected automatically, and for these, there are configuration parameters
79
+that can be used to help.</p>
80
+<ul>
81
+<li><code>ignore_in_feed</code> allows you to list any number of elements
82
+or attributes which are to be ignored in feeds.  This is often handy in the
83
+case of feeds where the <code>author</code>, <code>id</code>,
84
+<code>updated</code> or <code>xml:lang</code> values can't be trusted.</li>
85
+<li><code>title_type</code>, <code>summary_type</code>,
86
+<code>content_type</code> allow you to override the 
87
+<a href="http://www.feedparser.org/docs/reference-entry-title_detail.html#reference.entry.title_detail.type"><code>type</code></a>
88
+attributes on these elements.</li>
89
+<li><code>name_type</code> does something similar for
90
+<a href="http://www.feedparser.org/docs/reference-entry-author_detail.html#reference.entry.author_detail.name">author names</a></li>
91
+<li><code>future_dates</code> allows you to specify how to deal with dates which are in the future.
92
+<ul style="margin:0">
93
+<li><code>ignore_date</code> will cause the date to be ignored (and will therefore default to the time the entry was first seen) until the feed is updated and the time indicated is past, at which point the entry will be updated with the new date.</li>
94
+<li><code>ignore_entry</code> will cause the entire entry containing the future date to be ignored until the date is past.</li>
95
+<li>Anything else (i.e.. the default) will leave the date as is, causing the entries that contain these dates sort to the top of the planet until the time passes.</li>
96
+</ul>
97
+</li>
98
+<li><code>xml_base</code> will adjust the <code>xml:base</code> values in effect for each of the text constructs in the feed (things like <code>title</code>, <code>summary</code>, and <code>content</code>).  Other elements in the feed (most notably, <code>link</code> are not affected by this value.
99
+<ul style="margin:0">
100
+<li><code>feed_alternate</code> will replace the <code>xml:base</code> in effect with the value of the <code>alternate</code> <code>link</code> found either in the enclosed <code>source</code> or enclosing <code>feed</code> element.</li>
101
+<li><code>entry_alternate</code> will replace the <code>xml:base</code> in effect with the value of the <code>alternate</code> <code>link</code> found in this entry.</li>
102
+<li>Any other value will be treated as a <a href="http://www.ietf.org/rfc/rfc3986.txt">URI reference</a>.  These values may be relative or absolute.  If relative, the <code>xml:base</code> values in each text construct will each be adjusted separately using to the specified value.</li>
103
+</ul>
104
+</li>
105
+</ul>
106
+</body>
107
+</html>

+ 190
- 0
docs/templates.html View File

@@ -0,0 +1,190 @@
1
+<!DOCTYPE html PUBLIC
2
+    "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
3
+    "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
4
+<html xmlns="http://www.w3.org/1999/xhtml">
5
+<head>
6
+<script type="text/javascript" src="docs.js"></script>
7
+<link rel="stylesheet" type="text/css" href="docs.css"/>
8
+<title>Venus Templates</title>
9
+</head>
10
+<body>
11
+<h2>Templates</h2>
12
+<p>Template names take the form
13
+<em>name</em><code>.</code><em>ext</em><code>.</code><em>type</em>, where
14
+<em>name</em><code>.</code><em>ext</em> identifies the name of the output file
15
+to be created in the <code>output_directory</code>, and <em>type</em>
16
+indicates which language processor to use for the template.</p>
17
+<p>Like with <a href="filter.html">filters</a>, templates may be written
18
+in a variety of languages and are based on the standard Unix pipe convention
19
+of producing <code>stdout</code> from <code>stdin</code>, but in practice
20
+two languages are used more than others:</p>
21
+<h3>htmltmpl</h3>
22
+<p>Many find <a href="http://htmltmpl.sourceforge.net/">htmltmpl</a>
23
+easier to get started with as you can take a simple example of your
24
+output file, sprinkle in a few <code>&lt;TMPL_VAR&gt;</code>s and
25
+<code>&lt;TMPL_LOOP&gt;</code>s and you are done.  Eventually, however,
26
+you may find that your template involves <code>&lt;TMPL_IF&gt;</code>
27
+blocks inside of attribute values, and you may find the result difficult
28
+to read and create correctly.</p>
29
+<p>It is also important to note that htmltmpl based templates do not
30
+have access to the full set of information available in the feed, just
31
+the following (rather substantial) subset:</p>
32
+
33
+<blockquote>
34
+<table border="1" cellpadding="5" cellspacing="0">
35
+<tr><th>VAR</th><th>type</th><th>source</th></tr>
36
+<tr><td>author</td><td>String</td><td><a href="http://feedparser.org/docs/reference-feed-author.html">author</a></td></tr>
37
+<tr><td>author_name</td><td>String</td><td><a href="http://feedparser.org/docs/reference-feed-author_detail.html#reference.feed.author_detail.name">author_detail.name</a></td></tr>
38
+<tr><td>generator</td><td>String</td><td><a href="http://feedparser.org/docs/reference-feed-generator.html">generator</a></td></tr>
39
+<tr><td>id</td><td>String</td><td><a href="http://feedparser.org/docs/reference-feed-id.html">id</a></td></tr>
40
+<tr><td>icon</td><td>String</td><td><a href="http://feedparser.org/docs/reference-feed-icon.html">icon</a></td></tr>
41
+<tr><td>last_updated_822</td><td>Rfc822</td><td><a href="http://feedparser.org/docs/reference-feed-icon.html">updated_parsed</a></td></tr>
42
+<tr><td>last_updated_iso</td><td>Rfc3399</td><td><a href="http://feedparser.org/docs/reference-feed-icon.html">updated_parsed</a></td></tr>
43
+<tr><td>last_updated</td><td>PlanetDate</td><td><a href="http://feedparser.org/docs/reference-feed-icon.html">updated_parsed</a></td></tr>
44
+<tr><td>link</td><td>String</td><td><a href="http://feedparser.org/docs/reference-feed-link.html">link</a></td></tr>
45
+<tr><td>logo</td><td>String</td><td><a href="http://feedparser.org/docs/reference-feed-logo.html">logo</a></td></tr>
46
+<tr><td>rights</td><td>String</td><td><a href="http://feedparser.org/docs/reference-feed-rights_detail.html#reference.feed.rights_detail.value">rights_detail.value</a></td></tr>
47
+<tr><td>subtitle</td><td>String</td><td><a href="http://feedparser.org/docs/reference-feed-subtitle_detail.html#reference.feed.subtitle_detail.value">subtitle_detail.value</a></td></tr>
48
+<tr><td>title</td><td>String</td><td><a href="http://feedparser.org/docs/reference-feed-title_detail.html#reference.feed.title_detail.value">title_detail.value</a></td></tr>
49
+<tr><td>title_plain</td><td>Plain</td><td><a href="http://feedparser.org/docs/reference-feed-title_detail.html#reference.feed.title_detail.value">title_detail.value</a></td></tr>
50
+<tr><td rowspan="2">url</td><td rowspan="2">String</td><td><a href="http://feedparser.org/docs/reference-feed-links.html#reference.feed.links.href">links[rel='self'].href</a></td></tr>
51
+<tr><td><a href="http://feedparser.org/docs/reference-headers.html">headers['location']</a></td></tr>
52
+</table>
53
+</blockquote>
54
+
55
+<p>Note: when multiple sources are listed, the last one wins</p>
56
+<p>In addition to these variables, Planet Venus makes available two
57
+arrays, <code>Channels</code> and <code>Items</code>, with one entry
58
+per subscription and per output entry respectively.  The data values
59
+within the <code>Channels</code> array exactly match the above list.
60
+The data values within the <code>Items</code> array are as follows:</p>
61
+
62
+<blockquote>
63
+<table border="1" cellpadding="5" cellspacing="0">
64
+<tr><th>VAR</th><th>type</th><th>source</th></tr>
65
+<tr><td>author</td><td>String</td><td><a href="http://feedparser.org/docs/reference-entry-author.html">author</a></td></tr>
66
+<tr><td>author_email</td><td>String</td><td><a href="http://feedparser.org/docs/reference-entry-author_detail.html#reference.entry.author_detail.email">author_detail.email</a></td></tr>
67
+<tr><td>author_name</td><td>String</td><td><a href="http://feedparser.org/docs/reference-entry-author_detail.html#reference.entry.author_detail.name">author_detail.name</a></td></tr>
68
+<tr><td>author_uri</td><td>String</td><td><a href="http://feedparser.org/docs/reference-entry-author_detail.html#reference.entry.author_detail.href">author_detail.href</a></td></tr>
69
+<tr><td>content_language</td><td>String</td><td><a href="http://feedparser.org/docs/reference-entry-content.html#reference.entry.content.language">content[0].language</a></td></tr>
70
+<tr><td rowspan="2">content</td><td rowspan="2">String</td><td><a href="http://feedparser.org/docs/reference-entry-summary_detail.html#reference.entry.summary_detail.value">summary_detail.value</a></td></tr>
71
+<tr><td><a href="http://feedparser.org/docs/reference-entry-content.html#reference.entry.content.value">content[0].value</a></td></tr>
72
+<tr><td rowspan="2">date</td><td rowspan="2">PlanetDate</td><td><a href="http://feedparser.org/docs/reference-entry-published_parsed.html">published_parsed</a></td></tr>
73
+<tr><td><a href="http://feedparser.org/docs/reference-entry-updated_parsed.html">updated_parsed</a></td></tr>
74
+<tr><td rowspan="2">date_822</td><td rowspan="2">Rfc822</td><td><a href="http://feedparser.org/docs/reference-entry-published_parsed.html">published_parsed</a></td></tr>
75
+<tr><td><a href="http://feedparser.org/docs/reference-entry-updated_parsed.html">updated_parsed</a></td></tr>
76
+<tr><td rowspan="2">date_iso</td><td rowspan="2">Rfc3399</td><td><a href="http://feedparser.org/docs/reference-entry-published_parsed.html">published_parsed</a></td></tr>
77
+<tr><td><a href="http://feedparser.org/docs/reference-entry-updated_parsed.html">updated_parsed</a></td></tr>
78
+<tr><td><ins>enclosure_href</ins></td><td>String</td><td><a href="http://feedparser.org/docs/reference-entry-enclosures.html#reference.entry.enclosures.href">enclosures[0].href</a></td></tr>
79
+<tr><td><ins>enclosure_length</ins></td><td>String</td><td><a href="http://feedparser.org/docs/reference-entry-enclosures.html#reference.entry.enclosures.length">enclosures[0].length</a></td></tr>
80
+<tr><td><ins>enclosure_type</ins></td><td>String</td><td><a href="http://feedparser.org/docs/reference-entry-enclosures.html#reference.entry.enclosures.type">enclosures[0].type</a></td></tr>
81
+<tr><td><ins>guid_isPermaLink</ins></td><td>String</td><td><a href="http://blogs.law.harvard.edu/tech/rss#ltguidgtSubelementOfLtitemgt">isPermaLink</a></td></tr>
82
+<tr><td>id</td><td>String</td><td><a href="http://feedparser.org/docs/reference-entry-id.html">id</a></td></tr>
83
+<tr><td>link</td><td>String</td><td><a href="http://feedparser.org/docs/reference-entry-links.html#reference.entry.links.href">links[rel='alternate'].href</a></td></tr>
84
+<tr><td>new_channel</td><td>String</td><td><a href="http://feedparser.org/docs/reference-entry-id.html">id</a></td></tr>
85
+<tr><td rowspan="2">new_date</td><td rowspan="2">NewDate</td><td><a href="http://feedparser.org/docs/reference-entry-published_parsed.html">published_parsed</a></td></tr>
86
+<tr><td><a href="http://feedparser.org/docs/reference-entry-updated_parsed.html">updated_parsed</a></td></tr>
87
+<tr><td>rights</td><td>String</td><td><a href="http://feedparser.org/docs/reference-entry-rights_detail.html#reference.entry.rights_detail.value">rights_detail.value</a></td></tr>
88
+<tr><td>title_language</td><td>String</td><td><a href="http://feedparser.org/docs/reference-entry-title_detail.html#reference.entry.title_detail.language">title_detail.language</a></td></tr>
89
+<tr><td>title_plain</td><td>Plain</td><td><a href="http://feedparser.org/docs/reference-entry-title_detail.html#reference.entry.title_detail.value">title_detail.value</a></td></tr>
90
+<tr><td>title</td><td>String</td><td><a href="http://feedparser.org/docs/reference-entry-title_detail.html#reference.entry.title_detail.value">title_detail.value</a></td></tr>
91
+<tr><td>summary_language</td><td>String</td><td><a href="http://feedparser.org/docs/reference-entry-summary_detail.html#reference.entry.summary_detail.language">summary_detail.language</a></td></tr>
92
+<tr><td>updated</td><td>PlanetDate</td><td><a href="http://feedparser.org/docs/reference-entry-updated_parsed.html">updated_parsed</a></td></tr>
93
+<tr><td>updated_822</td><td>Rfc822</td><td><a href="http://feedparser.org/docs/reference-entry-updated_parsed.html">updated_parsed</a></td></tr>
94
+<tr><td>updated_iso</td><td>Rfc3399</td><td><a href="http://feedparser.org/docs/reference-entry-updated_parsed.html">updated_parsed</a></td></tr>
95
+<tr><td>published</td><td>PlanetDate</td><td><a href="http://feedparser.org/docs/reference-entry-published_parsed.html">published_parsed</a></td></tr>
96
+<tr><td>published_822</td><td>Rfc822</td><td><a href="http://feedparser.org/docs/reference-entry-published_parsed.html">published_parsed</a></td></tr>
97
+<tr><td>published_iso</td><td>Rfc3399</td><td><a href="http://feedparser.org/docs/reference-entry-published_parsed.html">published_parsed</a></td></tr>
98
+</table>
99
+</blockquote>
100
+<p>Note: variables above which start with
101
+<code>new_</code> are only set if their values differ from the previous
102
+Item.</p>
103
+
104
+<h3>django</h3>
105
+
106
+<p>
107
+  If you have the <a href="http://www.djangoproject.com/">Django</a>
108
+  framework installed, 
109
+  <a href="http://www.djangoproject.com/documentation/templates/"
110
+  >Django templates</a> are automatically available to Venus
111
+  projects. You will have to save them with a <code>.html.dj</code>
112
+  extension in your themes. The variable set is the same as the one
113
+  from htmltmpl, above. In the Django template context you'll have
114
+  access to <code>Channels</code> and <code>Items</code> and you'll be
115
+  able to iterate through them.
116
+</p>
117
+
118
+<p>
119
+  You also have access to the <code>Config</code> dictionary, which contains
120
+  the Venus configuration variables from your <code>.ini</code> file.
121
+</p>
122
+
123
+<p>
124
+  If you lose your way and want to introspect all the variable in the 
125
+  context, there's the useful <code>{% debug %}</code> template tag. 
126
+</p>
127
+
128
+<p>
129
+  In the <code>themes/django/</code> you'll find a sample Venus theme
130
+  that uses the Django templates that might be a starting point for
131
+  your own custom themes.
132
+</p>
133
+
134
+<p>
135
+  All the standard Django template tags and filter are supposed to
136
+  work, with the notable exception of the <code>date</code> filter on
137
+  the updated and published dates of an item (it works on the main 
138
+  <code>{{ date }}</code> variable).
139
+</p>
140
+
141
+<p>
142
+  Please note that Django, and therefore Venus' Django support,
143
+  requires at least Python 2.3.
144
+</p>
145
+
146
+<p>
147
+  The <a href="config.html#django_autoescape">django_autoescape</a> config
148
+  option may be used to globally set the default value for
149
+   <a href="http://docs.djangoproject.com/en/dev/ref/templates/builtins/#autoescape">auto-escaping</a>.
150
+</p>
151
+
152
+<h3>xslt</h3>
153
+<p><a href="http://www.w3.org/TR/xslt">XSLT</a> is a paradox: it actually
154
+makes some simple things easier to do than htmltmpl, and certainly can
155
+make more difficult things possible; but it is fair to say that many
156
+find XSLT less approachable than htmltmpl.</p>
157
+<p>But in any case, the XSLT support is easier to document as the
158
+input is a <a href="normalization.html">highly normalized</a> feed,
159
+with a few extension elements.</p>
160
+<ul>
161
+<li><code>atom:feed</code> will have the following child elements:
162
+<ul>
163
+<li>A <code>planet:source</code> element per subscription, with the same child elements as <a href="http://www.atomenabled.org/developers/syndication/atom-format-spec.php#element.source"><code>atom:source</code></a>, as well as
164
+an additional child element in the planet namespace for each
165
+<a href="config.html#subscription">configuration parameter</a> that applies to
166
+this subscription.</li>
167
+<li><a href="http://www.feedparser.org/docs/reference-version.html"><code>planet:format</code></a> indicating the format and version of the source feed.</li>
168
+<li><a href="http://www.feedparser.org/docs/reference-bozo.html"><code>planet:bozo</code></a> which is either <code>true</code> or <code>false</code>.</li>
169
+</ul>
170
+</li>
171
+<li><code>atom:updated</code> and <code>atom:published</code> will have
172
+a <code>planet:format</code> attribute containing the referenced date
173
+formatted according to the <code>[planet] date_format</code> specified
174
+in the configuration</li>
175
+</ul>
176
+
177
+<h3>genshi</h3>
178
+<p>Genshi approaches the power of XSLT, but with a syntax that many Python
179
+programmers find more natural, succinct and expressive.  Genshi templates
180
+have access to the full range of <a href="http://feedparser.org/docs/reference.html">feedparser</a> values, with the following additions:</p>
181
+<ul>
182
+<li>In addition to a <code>feed</code> element which describes the feed
183
+for your planet, there is also a <code>feeds</code> element which contains
184
+the description for each subscription.</li>
185
+<li>All <code>feed</code>, <code>feeds</code>, and <code>source</code> elements have a child <code>config</code> element which contains the config.ini entries associated with that feed.</li>
186
+<li>All text construct detail elements (<code>subtitle</code>, <code>rights</code>, <code>title</code>, <code>summary</code>, <code>content</code>) also contain a <code>stream</code> element which contains the value as a Genshi stream.</li>
187
+<li>Each of the <code>entries</code> has a <code>new_date</code> and <code>new_feed</code> value which indicates if this entry's date or feed differs from the preceeding entry.</li>
188
+</ul>
189
+</body>
190
+</html>

+ 109
- 0
docs/venus.svg View File

@@ -0,0 +1,109 @@
1
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1280 1024"  xmlns:xlink="http://www.w3.org/1999/xlink">>
2
+  <defs>
3
+    <g id="feed">
4
+      <path d="M10,15l75,0l0,75l-75,0z" fill="#F80"
5
+        stroke-linejoin="round" stroke-width="20" stroke="#F80"/>
6
+      <circle cx="15" cy="82" r="6" fill="#FFF"/>
7
+      <path d="M35,82s0-20-20-20 M55,82s0-40-40-40 M75,82s0-60-60-60"
8
+        stroke-linecap="round" stroke-width="12" stroke="#FFF" fill="none"/>
9
+    </g>
10
+
11
+    <g id="entry">
12
+      <g fill="none">
13
+       <ellipse stroke="#689" rx="3" ry="22"/>
14
+       <ellipse stroke="#eb4" rx="3" ry="22" transform="rotate(-66)"/>
15
+       <ellipse stroke="#8ac" rx="3" ry="22" transform="rotate(66)"/>
16
+       <circle  stroke="#451" r="22"/>
17
+      </g>
18
+      <g fill="#689" stroke="#FFF">
19
+       <circle fill="#8ac" r="6.5"/>
20
+       <circle cy="-22" r="4.5"/>
21
+       <circle cx="-20" cy="9" r="4.5"/>
22
+       <circle cx="20" cy="9" r="4.5"/>
23
+      </g>
24
+    </g>
25
+    <g id="node" stroke="none">
26
+      <circle r="18" fill="#049"/>
27
+      <path d="M-14,7a16,16,0,0,1,22-21a15,15,0,0,0-14,2a3,3,0,1,1-5,5
28
+        a15,15,0,0,0-3,14" fill="#FFF"/>
29
+    </g>
30
+    <path d="M-14-6a44,62,0,0,0,28,0l0,12a44,62,0,0,0-28,0z"
31
+      fill="#049" id="arc"/>
32
+  </defs>
33
+
34
+  <rect height="1024" width="1280" fill="#0D0"/>
35
+
36
+  <use xlink:href="#feed" x="220" y="30"/>
37
+  <use xlink:href="#feed" x="150" y="60"/>
38
+  <use xlink:href="#feed" x="100" y="100"/>
39
+  <use xlink:href="#feed" x="60" y="150"/>
40
+  <use xlink:href="#feed" x="30" y="220"/>
41
+
42
+  <g fill="#F00" stroke-linejoin="round" stroke-width="12" stroke="#F88">
43
+    <path d="M50,800l0,180l1000,0l0-180z" fill="#FFF"/>
44
+    <path d="M150,330l400,0l0,300l-400,0z"/>
45
+    <path d="M750,200l200,0 l0,110l100,0l0,60l-100,0 l0,40l100,0l0,60l-100,0
46
+      l0,40l100,0l0,60l-100,0 l0,130l70,70l-340,0l70,-70z"/> 
47
+  </g>
48
+
49
+  <path d="M1080,360l100,0l0,-70l-30,-30l-70,0z" fill="#FFF"/>
50
+  <path d="M1180,290l-30,0l0,-30" fill="none" stroke="#000"/>
51
+  <use xlink:href="#feed" x="1080" y="380"/>
52
+
53
+  <g transform="translate(1080,500)">
54
+  <use xlink:href="#arc" transform="translate(76,50) rotate(90)"/>
55
+  <use xlink:href="#arc" transform="translate(50,35) rotate(-30)"/>
56
+  <use xlink:href="#arc" transform="translate(50,65) rotate(30)"/>
57
+  <use xlink:href="#node" transform="translate(24,50)"/>
58
+  <use xlink:href="#node" transform="translate(76,80)"/>
59
+  <use xlink:href="#node" transform="translate(76,20)"/>
60
+  </g>
61
+
62
+  <path d="M260,150s100,60,90,280 M170,270s150,0,180,120
63
+           M200,200s150,0,150,200l0,450m-100,-70l100,70l100,-70
64
+           M850,807l0,-200m-70,70l70,-70l70,70"
65
+    stroke="#000" fill="none" stroke-width="40"/>
66
+
67
+  <ellipse cx="350" cy="368" fill="#FFF" rx="80" ry="30"/>
68
+  <ellipse cx="850" cy="238" fill="#FFF" rx="80" ry="30"/>
69
+  <g font-size="32" fill="#FFF" text-anchor="middle">
70
+    <text x="350" y="380" fill="#F00">Spider</text>
71
+    <text x="350" y="460">Universal Feed Parser</text>
72
+    <text x="350" y="530">html5lib</text>
73
+    <text x="350" y="600">Reconstitute</text>
74
+    <text x="350" y="750">Filter(s)</text>
75
+    <text x="850" y="250" fill="#F00">Splice</text>
76
+    <text x="950" y="350">Template</text>
77
+    <text x="950" y="450">Template</text>
78
+    <text x="950" y="550">Template</text>
79
+    <text x="1126" y="330" fill="#000">HTML</text>
80
+  </g>
81
+
82
+
83
+  <use xlink:href="#entry" x="100" y="900"/>
84
+  <use xlink:href="#entry" x="180" y="950"/>
85
+  <use xlink:href="#entry" x="200" y="850"/>
86
+  <use xlink:href="#entry" x="290" y="920"/>
87
+  <use xlink:href="#entry" x="400" y="900"/>
88
+  <use xlink:href="#entry" x="470" y="840"/>
89
+  <use xlink:href="#entry" x="500" y="930"/>
90
+  <use xlink:href="#entry" x="570" y="870"/>
91
+  <use xlink:href="#entry" x="620" y="935"/>
92
+  <use xlink:href="#entry" x="650" y="835"/>
93
+  <use xlink:href="#entry" x="690" y="900"/>
94
+  <use xlink:href="#entry" x="720" y="835"/>
95
+  <use xlink:href="#entry" x="730" y="950"/>
96
+  <use xlink:href="#entry" x="760" y="900"/>
97
+  <use xlink:href="#entry" x="790" y="835"/>
98
+  <use xlink:href="#entry" x="800" y="950"/>
99
+  <use xlink:href="#entry" x="830" y="900"/>
100
+  <use xlink:href="#entry" x="860" y="835"/>
101
+  <use xlink:href="#entry" x="870" y="950"/>
102
+  <use xlink:href="#entry" x="900" y="900"/>
103
+  <use xlink:href="#entry" x="930" y="835"/>
104
+  <use xlink:href="#entry" x="940" y="950"/>
105
+  <use xlink:href="#entry" x="970" y="900"/>
106
+  <use xlink:href="#entry" x="1000" y="835"/>
107
+  <use xlink:href="#entry" x="1010" y="950"/>
108
+
109
+</svg>

+ 23
- 0
emacses.ini View File

@@ -0,0 +1,23 @@
1
+[Planet]
2
+name = Planet Emacses
3
+link = http://planet.emacs-es.org
4
+owner_name = Nobody
5
+owner_email = nobody@nbd.com
6
+cache_directory = /var/www/emacs-es.org/cache
7
+log_level = INFO
8
+feed_timeout = 20
9
+output_theme = emacsen
10
+output_dir = /var/www/emacs-es.org/output/
11
+items_per_page = 60
12
+bill_of_materials:
13
+  images/#{face}
14
+activity_threshold = 90
15
+new_feed_items = 5
16
+locale = "es_ES"
17
+
18
+[DEFAULT]
19
+[http://daemons.cf/categories/emacs.xml]
20
+name = Bad Daemons
21
+
22
+[http://www.maxxcan.com/category/emacs/feed/]
23
+name = Maxxcan's Site

+ 82
- 0
examples/filters/categories/categories.xslt View File

@@ -0,0 +1,82 @@
1
+<?xml version="1.0" encoding="UTF-8"?>
2
+<!DOCTYPE xsl:stylesheet [
3
+<!ENTITY categoryTerm "WebSemantique">
4
+]>
5
+<!-- 
6
+
7
+  This transformation is released under the same licence as Python
8
+  see http://www.intertwingly.net/code/venus/LICENCE.
9
+
10
+  Author: Eric van der Vlist <vdv@dyomedea.com>
11
+  
12
+  This transformation is meant to be used as a filter that determines if
13
+  Atom entries are relevant to a specific topic and adds the corresonding
14
+  <category/> element when it is the case.
15
+  
16
+  This is done by a simple keyword matching mechanism.
17
+  
18
+  To customize this filter to your needs:
19
+  
20
+    1) Replace WebSemantique by your own category name in the definition of
21
+        the categoryTerm entity above.
22
+    2) Review the "upper" and "lower" variables that are used to convert text
23
+        nodes to lower case and replace common ponctuation signs into spaces
24
+        to check that they meet your needs.
25
+    3) Define your own list of keywords in <d:keyword/> elements. Note that 
26
+        the leading and trailing spaces are significant: "> rdf <" will match rdf
27
+        as en entier word while ">rdf<" would match the substring "rdf" and
28
+        "> rdf<" would match words starting by rdf. Also note that the test is done
29
+        after conversion to lowercase.
30
+
31
+  To use it with venus, just add this filter to the list of filters, for instance:
32
+  
33
+filters= categories.xslt guess_language.py
34
+  
35
+-->
36
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
37
+  xmlns:atom="http://www.w3.org/2005/Atom" xmlns="http://www.w3.org/2005/Atom"
38
+  xmlns:d="http://ns.websemantique.org/data/" exclude-result-prefixes="d atom" version="1.0">
39
+  <xsl:variable name="upper"
40
+    >,.;AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZzÀàÁáÂâÃãÄäÅåÆæÇçÈèÉéÊêËëÌìÍíÎîÏïÐðÑñÒòÓóÔôÕõÖöØøÙùÚúÛûÜüÝýÞþ</xsl:variable>
41
+  <xsl:variable name="lower"
42
+    >   aabbccddeeffgghhiijjkkllmmnnooppqqrrssttuuvvwwxxyyzzaaaaaaaaaaaaææcceeeeeeeeiiiiiiiiððnnooooooooooøøuuuuuuuuyyþþ</xsl:variable>
43
+  <d:keywords>
44
+    <d:keyword> wiki semantique </d:keyword>
45
+    <d:keyword> wikis semantiques </d:keyword>
46
+    <d:keyword> web semantique </d:keyword>
47
+    <d:keyword> websemantique </d:keyword>
48
+    <d:keyword> semantic web</d:keyword>
49
+    <d:keyword> semweb</d:keyword>
50
+    <d:keyword> rdf</d:keyword>
51
+    <d:keyword> owl </d:keyword>
52
+    <d:keyword> sparql </d:keyword>
53
+    <d:keyword> topic map</d:keyword>
54
+    <d:keyword> doap </d:keyword>
55
+    <d:keyword> foaf </d:keyword>
56
+    <d:keyword> sioc </d:keyword>
57
+    <d:keyword> ontology </d:keyword>
58
+    <d:keyword> ontologie</d:keyword>
59
+    <d:keyword> dublin core </d:keyword>
60
+  </d:keywords>
61
+  <xsl:template match="@*|node()">
62
+    <xsl:copy>
63
+      <xsl:apply-templates select="@*|node()"/>
64
+    </xsl:copy>
65
+  </xsl:template>
66
+  <xsl:template match="atom:entry/atom:updated">
67
+    <xsl:copy>
68
+      <xsl:apply-templates select="@*|node()"/>
69
+    </xsl:copy>
70
+    <xsl:variable name="concatenatedText">
71
+      <xsl:for-each select="../atom:title|../atom:summary|../atom:content|../atom:category/@term">
72
+        <xsl:text> </xsl:text>
73
+        <xsl:value-of select="translate(., $upper, $lower)"/>
74
+      </xsl:for-each>
75
+      <xsl:text> </xsl:text>
76
+    </xsl:variable>
77
+    <xsl:if test="document('')/*/d:keywords/d:keyword[contains($concatenatedText, .)]">
78
+      <category term="WebSemantique"/>
79
+    </xsl:if>
80
+  </xsl:template>
81
+  <xsl:template match="atom:category[@term='&categoryTerm;']"/>
82
+</xsl:stylesheet>

+ 37
- 0
examples/filters/guess-language/README View File

@@ -0,0 +1,37 @@
1
+This filter is released under the same licence as Python
2
+see http://www.intertwingly.net/code/venus/LICENCE.
3
+
4
+Author: Eric van der Vlist <vdv@dyomedea.com>
5
+  
6
+This filter guesses whether an Atom entry is written
7
+in English or French. It should be trivial to chose between
8
+two other languages, easy to extend to more than two languages
9
+and useful to pass these languages as Venus configuration
10
+parameters.
11
+
12
+The code used to guess the language is the one that has been
13
+described by Douglas Bagnall as the Python recipe titled
14
+"Language detection using character trigrams"
15
+http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/326576.
16
+
17
+To add support for a new language, this language must first be
18
+"learned" using learn-language.py. This learning phase is nothing
19
+more than saving a pickled version of the Trigram object for this
20
+language. 
21
+
22
+To learn Finnish, you would execute:
23
+
24
+$ ./learn-language.py http://gutenberg.net/dirs/1/0/4/9/10492/10492-8.txt fi.data
25
+
26
+where http://gutenberg.net/dirs/1/0/4/9/10492/10492-8.txt is a text
27
+representative of the Finnish language and "fi.data" is the name of the
28
+data file for "fi" (ISO code for Finnish).
29
+
30
+To install this filter, copy this directory under the Venus
31
+filter directory and declare it in your filters list, for instance:
32
+
33
+filters= categories.xslt guess-language/guess-language.py
34
+
35
+NOTE: this filter depends on Amara 
36
+(http://uche.ogbuji.net/tech/4suite/amara/)
37
+

+ 15131
- 0
examples/filters/guess-language/en.data
File diff suppressed because it is too large
View File


+ 22710
- 0
examples/filters/guess-language/fr.data
File diff suppressed because it is too large
View File


+ 58
- 0
examples/filters/guess-language/guess-language.py View File

@@ -0,0 +1,58 @@
1
+#!/usr/bin/env python
2
+"""A filter to guess languages.
3
+
4
+This filter guesses whether an Atom entry is written
5
+in English or French. It should be trivial to chose between
6
+two other languages, easy to extend to more than two languages
7
+and useful to pass these languages as Venus configuration
8
+parameters.
9
+
10
+(See the REAME file for more details).
11
+
12
+Requires Python 2.1, recommends 2.4.
13
+"""
14
+__authors__ = [ "Eric van der Vlist <vdv@dyomedea.com>"]
15
+__license__ = "Python"
16
+
17
+import amara
18
+from sys import stdin, stdout
19
+from trigram import Trigram
20
+from xml.dom import XML_NAMESPACE as XML_NS
21
+import cPickle
22
+
23
+ATOM_NSS = {
24
+    u'atom': u'http://www.w3.org/2005/Atom',
25
+    u'xml': XML_NS
26
+}
27
+
28
+langs = {}
29
+
30
+def tri(lang):
31
+    if not langs.has_key(lang):
32
+	f = open('filters/guess-language/%s.data' % lang, 'r')
33
+	t = cPickle.load(f)
34
+	f.close()
35
+	langs[lang] = t
36
+    return langs[lang]
37
+    
38
+
39
+def guess_language(entry):
40
+    text = u'';
41
+    for child in entry.xml_xpath(u'atom:title|atom:summary|atom:content'):
42
+	text = text + u' '+ child.__unicode__()
43
+    t = Trigram()
44
+    t.parseString(text)
45
+    if tri('fr') - t > tri('en') - t:
46
+	lang=u'en'
47
+    else:
48
+	lang=u'fr'
49
+    entry.xml_set_attribute((u'xml:lang', XML_NS), lang)
50
+
51
+def main():
52
+    feed = amara.parse(stdin, prefixes=ATOM_NSS)
53
+    for entry in feed.xml_xpath(u'//atom:entry[not(@xml:lang)]'):
54
+	guess_language(entry)
55
+    feed.xml(stdout)
56
+
57
+if __name__ == '__main__':
58
+    main()

+ 25
- 0
examples/filters/guess-language/learn-language.py View File

@@ -0,0 +1,25 @@
1
+#!/usr/bin/env python
2
+"""A filter to guess languages.
3
+
4
+This utility saves a Trigram object on file.
5
+
6
+(See the REAME file for more details).
7
+
8
+Requires Python 2.1, recommends 2.4.
9
+"""
10
+__authors__ = [ "Eric van der Vlist <vdv@dyomedea.com>"]
11
+__license__ = "Python"
12
+
13
+from trigram import Trigram
14
+from sys import argv
15
+from cPickle import dump
16
+
17
+
18
+def main():
19
+    tri = Trigram(argv[1])
20
+    out = open(argv[2], 'w')
21
+    dump(tri, out)
22
+    out.close()
23
+
24
+if __name__ == '__main__':
25
+    main()

+ 188
- 0
examples/filters/guess-language/trigram.py View File

@@ -0,0 +1,188 @@
1
+#!/usr/bin/python
2
+# -*- coding: UTF-8 -*-
3
+"""
4
+    This class is based on the Python recipe titled
5
+    "Language detection using character trigrams"
6
+    http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/326576
7
+    by Douglas Bagnall.
8
+    It has been (slightly) adapted by Eric van der Vlist to support
9
+    Unicode and accept a method to parse strings.
10
+"""
11
+__authors__ = [ "Douglas Bagnall", "Eric van der Vlist <vdv@dyomedea.com>"]
12
+__license__ = "Python"
13
+
14
+import random
15
+from urllib import urlopen
16
+
17
+class Trigram:
18
+    """
19
+    From one or more text files, the frequency of three character
20
+    sequences is calculated.  When treated as a vector, this information
21
+    can be compared to other trigrams, and the difference between them
22
+    seen as an angle.  The cosine of this angle varies between 1 for
23
+    complete similarity, and 0 for utter difference.  Since letter
24
+    combinations are characteristic to a language, this can be used to
25
+    determine the language of a body of text. For example:
26
+
27
+        >>> reference_en = Trigram('/path/to/reference/text/english')
28
+        >>> reference_de = Trigram('/path/to/reference/text/german')
29
+        >>> unknown = Trigram('url://pointing/to/unknown/text')
30
+        >>> unknown.similarity(reference_de)
31
+        0.4
32
+        >>> unknown.similarity(reference_en)
33
+        0.95
34
+
35
+    would indicate the unknown text is almost cetrtainly English.  As
36
+    syntax sugar, the minus sign is overloaded to return the difference
37
+    between texts, so the above objects would give you:
38
+
39
+        >>> unknown - reference_de
40
+        0.6
41
+        >>> reference_en - unknown    # order doesn't matter.
42
+        0.05
43
+
44
+    As it stands, the Trigram ignores character set information, which
45
+    means you can only accurately compare within a single encoding
46
+    (iso-8859-1 in the examples).  A more complete implementation might
47
+    convert to unicode first.
48
+
49
+    As an extra bonus, there is a method to make up nonsense words in the
50
+    style of the Trigram's text.
51
+
52
+        >>> reference_en.makeWords(30)
53
+        My withillonquiver and ald, by now wittlectionsurper, may sequia,
54
+        tory, I ad my notter. Marriusbabilly She lady for rachalle spen
55
+        hat knong al elf
56
+
57
+    Beware when using urls: HTML won't be parsed out.
58
+
59
+    Most methods chatter away to standard output, to let you know they're
60
+    still there.
61
+    """
62
+
63
+    length = 0
64
+
65
+    def __init__(self, fn=None):
66
+        self.lut = {}
67
+        if fn is not None:
68
+            self.parseFile(fn)
69
+
70
+    def _parseAFragment(self, line, pair='  '):
71
+	for letter in line:
72
+	    d = self.lut.setdefault(pair, {})
73
+            d[letter] = d.get(letter, 0) + 1
74
+            pair = pair[1] + letter
75
+	return pair
76
+
77
+    def parseString(self, string):
78
+	self._parseAFragment(string)
79
+        self.measure()
80
+    
81
+    def parseFile(self, fn, encoding="iso-8859-1"):
82
+        pair = '  '
83
+        if '://' in fn:
84
+            #print "trying to fetch url, may take time..."
85
+            f = urlopen(fn)
86
+        else:
87
+            f = open(fn)
88
+        for z, line in enumerate(f):
89
+            #if not z % 1000:
90
+            #    print "line %s" % z
91
+            # \n's are spurious in a prose context
92
+            pair = self._parseAFragment(line.strip().decode(encoding) + ' ')
93
+        f.close()
94
+        self.measure()
95
+
96
+
97
+    def measure(self):
98
+        """calculates the scalar length of the trigram vector and
99
+        stores it in self.length."""
100
+        total = 0
101
+        for y in self.lut.values():
102
+            total += sum([ x * x for x in y.values() ])
103
+        self.length = total ** 0.5
104
+
105
+    def similarity(self, other):
106
+        """returns a number between 0 and 1 indicating similarity.
107
+        1 means an identical ratio of trigrams;
108
+        0 means no trigrams in common.
109
+        """
110
+        if not isinstance(other, Trigram):
111
+            raise TypeError("can't compare Trigram with non-Trigram")
112
+        lut1 = self.lut
113
+        lut2 = other.lut
114
+        total = 0
115
+        for k in lut1.keys():
116
+            if k in lut2:
117
+                a = lut1[k]
118
+                b = lut2[k]
119
+                for x in a:
120
+                    if x in b:
121
+                        total += a[x] * b[x]
122
+
123
+        return float(total) / (self.length * other.length)
124
+
125
+    def __sub__(self, other):
126
+        """indicates difference between trigram sets; 1 is entirely
127
+        different, 0 is entirely the same."""
128
+        return 1 - self.similarity(other)
129
+
130
+
131
+    def makeWords(self, count):
132
+        """returns a string of made-up words based on the known text."""
133
+        text = []
134
+        k = '  '
135
+        while count:
136
+            n = self.likely(k)
137
+            text.append(n)
138
+            k = k[1] + n
139
+            if n in ' \t':
140
+                count -= 1
141
+        return ''.join(text)
142
+
143
+
144
+    def likely(self, k):
145
+        """Returns a character likely to follow the given string
146
+        two character string, or a space if nothing is found."""
147
+        if k not in self.lut:
148
+            return ' '
149
+        # if you were using this a lot, caching would a good idea.
150
+        letters = []
151
+        for k, v in self.lut[k].items():
152
+            letters.append(k * v)
153
+        letters = ''.join(letters)
154
+        return random.choice(letters)
155
+
156
+
157
+def test():
158
+    en = Trigram('http://gutenberg.net/dirs/etext97/lsusn11.txt')
159
+   #NB fr and some others have English license text.
160
+    #   no has english excerpts.
161
+    fr = Trigram('http://gutenberg.net/dirs/etext03/candi10.txt')
162
+    fi = Trigram('http://gutenberg.net/dirs/1/0/4/9/10492/10492-8.txt')
163
+    no = Trigram('http://gutenberg.net/dirs/1/2/8/4/12844/12844-8.txt')
164
+    se = Trigram('http://gutenberg.net/dirs/1/0/1/1/10117/10117-8.txt')
165
+    no2 = Trigram('http://gutenberg.net/dirs/1/3/0/4/13041/13041-8.txt')
166
+    en2 = Trigram('http://gutenberg.net/dirs/etext05/cfgsh10.txt')
167
+    fr2 = Trigram('http://gutenberg.net/dirs/1/3/7/0/13704/13704-8.txt')
168
+    print "calculating difference:"
169
+    print "en - fr is %s" % (en - fr)
170
+    print "fr - en is %s" % (fr - en)
171
+    print "en - en2 is %s" % (en - en2)
172
+    print "en - fr2 is %s" % (en - fr2)
173
+    print "fr - en2 is %s" % (fr - en2)
174
+    print "fr - fr2 is %s" % (fr - fr2)
175
+    print "fr2 - en2 is %s" % (fr2 - en2)
176
+    print "fi - fr  is %s" % (fi - fr)
177
+    print "fi - en  is %s" % (fi - en)
178
+    print "fi - se  is %s" % (fi - se)
179
+    print "no - se  is %s" % (no - se)
180
+    print "en - no  is %s" % (en - no)
181
+    print "no - no2  is %s" % (no - no2)
182
+    print "se - no2  is %s" % (se - no2)
183
+    print "en - no2  is %s" % (en - no2)
184
+    print "fr - no2  is %s" % (fr - no2)
185
+
186
+
187
+if __name__ == '__main__':
188
+    test()

+ 55
- 0
examples/filters/xpath-sifter/xpath-sifter.ini View File

@@ -0,0 +1,55 @@
1
+# The xpath_sifter filter allows you to stop entries from a feed being displayed
2
+# if they do not match a particular pattern.
3
+
4
+# It is useful for things like only displaying entries in a particular category
5
+# even if the site does not provide per category feeds, and displaying only entries
6
+# that contain a particular string in their title.
7
+
8
+# The xpath_sifter filter applies only after all feeds are normalised to Atom 1.0.
9
+# Look in your cache to see what entries look like.
10
+
11
+[Planet]
12
+# we are only applying the filter to certain feeds, so we do not configure it in the
13
+# [Planet] section
14
+
15
+### FIRST FEED: FILTER ON CATEGORY ###
16
+
17
+# We are only interested in entries in the category "two" from this blogger, but
18
+# he does not provide a per-category feed.
19
+# The Atom for categories looks like this: <category term="two"/>, so here
20
+# we filter the http://example.com/uncategorised.xml file for entries with a
21
+# category tag with the term attribute equal to 'two'
22
+[http://example.com/uncategorised.xml]
23
+name = Category 'two' (from Site Without a Categorised Feed)
24
+
25
+# This first version is the readable way to do it, but you'll run into trouble
26
+# if you have any special characters, like spaces, in your require string
27
+# filters = xpath_sifter.py?require=//atom:category[@term='two']
28
+
29
+# Here's a URL quoted version:
30
+filters = xpath_sifter.py?require=//atom%3Acategory%5B%40term%3D%27two%27%5D
31
+
32
+# Here's a way to get the URL quoted version on the command line:
33
+# python -c "import urllib; print urllib.quote('STRING');"
34
+# eg
35
+# python -c "import urllib; print urllib.quote('atom:category[@term=\'two\']');"
36
+
37
+### SECOND FEED: FILTER ON TITLE ###
38
+
39
+# The verbose blogger whose feed is below blogs about many subjects but we are
40
+# only interested in entries about Venus. She does not use categories but
41
+# fortunately her titles are very consistent, so we search within the title
42
+# tag's text for the text 'Venus'
43
+[http://example.com/verbose.xml]
44
+name = Venus (from Verbose Site)
45
+
46
+# Non-quoted version
47
+# filters = xpath_sifter.py?require=//atom:title[contains(.,'Venus')]
48
+# Quoted version
49
+filters = xpath_sifter.py?atom%3Atitle%5Bcontains%28.%2C%27Venus%27%29%5D
50
+
51
+### THIRD FEED: NO FILTER ###
52
+
53
+# We can include other feeds that do not have the filter applied
54
+[http://example.com/normal.xml]
55
+name = No filter applied

+ 47
- 0
examples/foaf-based.ini View File

@@ -0,0 +1,47 @@
1
+# Planet configuration file
2
+
3
+# Every planet needs a [Planet] section
4
+[Planet]
5
+# name: Your planet's name
6
+# link: Link to the main page
7
+# owner_name: Your name
8
+# owner_email: Your e-mail address
9
+name = Elias' Planet
10
+link = http://torrez.us/planet/
11
+owner_name = Elias Torres
12
+owner_email = elias@torrez.us
13
+
14
+# cache_directory: Where cached feeds are stored
15
+# log_level: One of DEBUG, INFO, WARNING, ERROR or CRITICAL
16
+cache_directory = /tmp/venus/
17
+log_level = DEBUG
18
+
19
+# The following provide defaults for each template:
20
+# output_theme: "theme" of the output
21
+# output_dir: Directory to place output files
22
+# items_per_page: How many items to put on each page
23
+output_theme = mobile
24
+output_dir = /var/www/emacses
25
+items_per_page = 60
26
+
27
+# If non-zero, all feeds which have not been updated in the indicated
28
+# number of days will be marked as inactive
29
+activity_threshold = 90
30
+
31
+# filters to be run
32
+filters = excerpt.py
33
+
34
+# filter parameters
35
+[excerpt.py]
36
+omit = img p br
37
+width = 500
38
+
39
+# subscription list
40
+[http://torrez.us/who#elias]
41
+content_type = foaf
42
+[http://daemons.cf/categories/emacs.xml]
43
+name = Bad Daemons
44
+
45
+online_accounts = 
46
+  http://del.icio.us/|http://del.icio.us/rss/{foaf:accountName}
47
+  http://flickr.com/|http://api.flickr.com/services/feeds/photos_public.gne?id={foaf:accountName}

BIN
examples/images/edd.png View File


BIN
examples/images/jdub.png View File


BIN
examples/images/keybuk.png View File


BIN
examples/images/thom.png View File


+ 57
- 0
examples/opml-top100.ini View File

@@ -0,0 +1,57 @@
1
+# Planet configuration file
2
+
3
+# Every planet needs a [Planet] section
4
+[Planet]
5
+# name: Your planet's name
6
+# link: Link to the main page
7
+# owner_name: Your name
8
+# owner_email: Your e-mail address
9
+name = Techmeme Leaderboard
10
+link = http://planet.intertwingly.net/top100/
11
+owner_name = Sam Ruby
12
+owner_email = rubys@intertwingly.net
13
+
14
+# cache_directory: Where cached feeds are stored
15
+# log_level: One of DEBUG, INFO, WARNING, ERROR or CRITICAL
16
+cache_directory = /home/rubys/planet/top100
17
+log_level = INFO
18
+
19
+# The following provide defaults for each template:
20
+# output_theme: "theme" of the output
21
+# output_dir: Directory to place output files
22
+# items_per_page: How many items to put on each page
23
+output_theme = mobile
24
+output_dir = /home/rubys/public_html/top100
25
+items_per_page = 60
26
+
27
+# If non-zero, all feeds which have not been updated in the indicated
28
+# number of days will be marked as inactive
29
+activity_threshold = 90
30
+
31
+# filters to be run
32
+filters = excerpt.py
33
+
34
+# Don't let any one feed monopolize the output (symptom often occurs when
35
+# somebody 'migrates' their weblog.
36
+new_feed_items = 4
37
+
38
+bill_of_materials:
39
+  .htaccess
40
+  favicon.ico
41
+  robots.txt
42
+
43
+# filter parameters
44
+[excerpt.py]
45
+omit = img p br
46
+width = 500
47
+
48
+# add memes to output
49
+[index.html.xslt]
50
+filters = mememe.plugin
51
+
52
+[mememe.plugin]
53
+sidebar = //*[@id="footer"]
54
+
55
+# subscription list
56
+[http://www.techmeme.com/lb.opml]
57
+content_type = opml

+ 78
- 0
examples/planet-schmanet.ini View File

@@ -0,0 +1,78 @@
1
+# Planet configuration file based on the 'fancy' Planet 2.0 example.
2
+#
3
+# This illustrates some of Planet's fancier features with example.
4
+
5
+# Every planet needs a [Planet] section
6
+[Planet]
7
+# name: Your planet's name
8
+# link: Link to the main page
9
+# owner_name: Your name
10
+# owner_email: Your e-mail address
11
+name = Planet Schmanet
12
+link = http://planet.schmanet.janet/
13
+owner_name = Janet Weiss
14
+owner_email = janet@slut.sex
15
+
16
+# cache_directory: Where cached feeds are stored
17
+# log_level: One of DEBUG, INFO, WARNING, ERROR or CRITICAL
18
+# feed_timeout: number of seconds to wait for any given feed
19
+cache_directory = /home/rubys/planet/pscache
20
+log_level = DEBUG
21
+feed_timeout = 20
22
+
23
+# output_theme: "theme" of the output
24
+# output_dir: Directory to place output files
25
+# items_per_page: How many items to put on each page
26
+output_theme = classic_fancy
27
+output_dir = /home/rubys/public_html/fancy
28
+items_per_page = 60
29
+
30
+# additional files to copy (note the wildcards!)
31
+bill_of_materials:
32
+  images/#{face} 
33
+
34
+# Options placed in the [DEFAULT] section provide defaults for the feed
35
+# sections.  Placing a default here means you only need to override the
36
+# special cases later.
37
+[DEFAULT]
38
+# Hackergotchi default size.
39
+# If we want to put a face alongside a feed, and it's this size, we
40
+# can omit these variables.
41
+facewidth = 65
42
+faceheight = 85
43
+
44
+
45
+# Any other section defines a feed to subscribe to.  The section title
46
+# (in the []s) is the URI of the feed itself.  A section can also be
47
+# have any of the following options:
48
+# 
49
+# name: Name of the feed (defaults to the title found in the feed)
50
+#
51
+# Additionally any other option placed here will be available in
52
+# the template (prefixed with channel_ for the Items loop).  We use
53
+# this trick to make the faces work -- this isn't something Planet
54
+# "natively" knows about.  Look at fancy-examples/index.html.tmpl
55
+# for the flip-side of this.
56
+
57
+[http://www.netsplit.com/blog/index.rss]
58
+name = Scott James Remnant
59
+face = keybuk.png
60
+# pick up the default facewidth and faceheight
61
+
62
+[http://www.gnome.org/~jdub/blog/?flav=rss]
63
+name = Jeff Waugh
64
+face = jdub.png
65
+facewidth = 70
66
+faceheight = 74
67
+
68
+[http://usefulinc.com/edd/blog/rss91]
69
+name = Edd Dumbill
70
+face = edd.png
71
+facewidth = 62
72
+faceheight = 80
73
+
74
+[http://blog.clearairturbulence.org/?flav=rss]
75
+name = Thom May
76
+face = thom.png
77
+# pick up the default faceheight only
78
+facewidth = 59

+ 17
- 0
expunge.py View File

@@ -0,0 +1,17 @@
1
+#!/usr/bin/env python
2
+"""
3
+Main program to run just the expunge portion of planet
4
+"""
5
+
6
+import os.path
7
+import sys
8
+from planet import expunge, config
9
+
10
+if __name__ == '__main__':
11
+
12
+    if len(sys.argv) == 2 and os.path.isfile(sys.argv[1]):
13
+        config.load(sys.argv[1])
14
+        expunge.expungeCache()
15
+    else:
16
+        print "Usage:"
17
+        print "  python %s config.ini" % sys.argv[0]

+ 79
- 0
favicon.py View File

@@ -0,0 +1,79 @@
1
+import sys, socket
2
+from planet import config, feedparser
3
+from planet.spider import filename
4
+from urllib2 import urlopen
5
+from urlparse import urljoin
6
+from html5lib import html5parser, treebuilders
7
+from ConfigParser import ConfigParser
8
+
9
+# load config files (default: config.ini)
10
+for arg in sys.argv[1:]:
11
+  config.load(arg)
12
+if len(sys.argv) == 1:
13
+  config.load('config.ini')
14
+
15
+from Queue import Queue
16
+from threading import Thread
17
+
18
+# determine which subscriptions have no icon but do have a html page
19
+fetch_queue = Queue()
20
+html = ['text/html', 'application/xhtml+xml']
21
+sources = config.cache_sources_directory()
22
+for sub in config.subscriptions():
23
+  data=feedparser.parse(filename(sources,sub))
24
+  if data.feed.get('icon'): continue
25
+  if not data.feed.get('links'): continue
26
+  for link in data.feed.links:
27
+    if link.rel=='alternate' and link.type in html:
28
+      fetch_queue.put((sub, link.href))
29
+      break
30
+
31
+# find the favicon for a given webpage
32
+def favicon(page):
33
+  parser=html5parser.HTMLParser(tree=treebuilders.getTreeBuilder('dom'))
34
+  doc=parser.parse(urlopen(page))
35
+  favicon = urljoin(page, '/favicon.ico')
36
+  for link in doc.getElementsByTagName('link'):
37
+    if link.hasAttribute('rel') and link.hasAttribute('href'):
38
+      if 'icon' in link.attributes['rel'].value.lower().split(' '):
39
+        favicon = urljoin(page, link.attributes['href'].value)
40
+  if urlopen(favicon).info()['content-length'] != '0':
41
+    return favicon
42
+
43
+# thread worker that fills in the dictionary which maps subs to favicon
44
+icons = {}
45
+def fetch(thread_index, fetch_queue, icons):
46
+  while 1: 
47
+    sub, html = fetch_queue.get()
48
+    if not html: break
49
+    try:
50
+      icon = favicon(html)
51
+      if icon: icons[sub] = icon
52
+    except:
53
+      pass
54
+
55
+# set timeout
56
+try:
57
+  socket.setdefaulttimeout(float(config.feed_timeout()))
58
+except:
59
+  pass
60
+
61
+# (optionally) spawn threads, fetch pages
62
+threads = {}
63
+if int(config.spider_threads()):
64
+  for i in range(int(config.spider_threads())):
65
+    threads[i] = Thread(target=fetch, args=(i, fetch_queue, icons))
66
+    fetch_queue.put((None, None))
67
+    threads[i].start()
68
+  for i in range(int(config.spider_threads())):
69
+    threads[i].join()
70
+else:
71
+  fetch_queue.put((None, None))
72
+  fetch(0, fetch_queue, icons)
73
+
74
+# produce config file
75
+config = ConfigParser()
76
+for sub, icon in icons.items():
77
+  config.add_section(sub)
78
+  config.set(sub, 'favicon', icon)
79
+config.write(sys.stdout)

+ 30
- 0
filters/addsearch.genshi View File

@@ -0,0