XML News from Friday, January 8, 2010

I didn't really like the format of yesterday's Twitter dump so today I opened another can of XQuery ass-kicking to improve it. First, let's group by date:

xquery version "1.0";
declare namespace atom="http://www.w3.org/2005/Atom";

let $tweets := for $entry in reverse(document("/db/twitter/elharo")/atom:feed/atom:entry)
return 
  <div><date>{substring-before($entry/atom:updated/text(), "T")}</date> <p> <span>{substring-before(substring-after($entry/atom:updated/text(), "T"), "+")} UTC</span> {substring-after($entry/atom:title/text(), "elharo:")}</p></div>

return 
  for $date in distinct-values($tweets/date)
  return <div><h3>{$date}</h3>
   {
   for $tweet in $tweets
   where $tweet/date = $date
   return $tweet/p
  }</div>

Now let's hyperlink the URLs:

xquery version "1.0";
declare namespace atom="http://www.w3.org/2005/Atom";

let $tweets := for $entry in reverse(document("/db/twitter/elharo")/atom:feed/atom:entry)
return 
  <div><date>{substring-before($entry/atom:updated/text(), "T")}</date> <p> <span>{substring-before(substring-after($entry/atom:updated/text(), "T"), "+")} </span>
{replace(substring-after($entry/atom:title/text(), "elharo:"), 
"(http://[^\s]+)", 
"<a href='http://$1'>http://$1</a>")}</p></div>

return 
  for $date in distinct-values($tweets/date)
  return <div><h3>{$date}</h3>
   {
   for $tweet in $tweets
   where $tweet/date = $date
   return $tweet/p
  }</div>

Let's do the same for @names:

xquery version "1.0";
declare namespace atom="http://www.w3.org/2005/Atom"; 

let $tweets := for $entry in reverse(document("/db/twitter/elharo")/atom:feed/atom:entry)
return 
  <div><date>{substring-before($entry/atom:updated/text(), "T")}</date> <p> <span>{substring-before(substring-after($entry/atom:updated/text(), "T"), "+")} </span>
{
replace (
    replace(substring-after($entry/atom:title/text(), "elharo:"), 
        "(http://[^\s]+)", 
        "<a href='$1'>$1</a>"),
    " @([a-zA-Z]+)",
    " <a href='http://twitter.com/$1'>@$1</a>"
)
}</p></div>

return 
  for $date in distinct-values($tweets/date)
  return <div><h3>{$date}</h3>
   {
   for $tweet in $tweets
   where $tweet/date = $date
   return $tweet/p
  }</div>

And one more time for hash tags:

xquery version "1.0";
declare namespace atom="http://www.w3.org/2005/Atom"; 

let $tweets := for $entry in reverse(document("/db/twitter/elharo")/atom:feed/atom:entry)
return 
  <div><date>{substring-before($entry/atom:updated/text(), "T")}</date> <p> <span>{substring-before(substring-after($entry/atom:updated/text(), "T"), "+")} </span>
{
replace (
    replace (
        replace(substring-after($entry/atom:title/text(), "elharo:"), 
            "(http://[^\s]+)", 
            "<a href='$1'>$1</a>"),
        " @([a-zA-Z]+)",
        " <a href='http://twitter.com/$1'>@$1</a>"
    ),
    " #([a-zA-Z]+)",
    " <a href='http://twitter.com/search?q=#$1'>#$1</a>"
)
}</p></div>

return 
  for $date in distinct-values($tweets/date)
  return <div><h3>{$date}</h3>
   {
   for $tweet in $tweets
   where $tweet/date = $date
   return $tweet/p
  }</div>

And here's the finished result.