add bti --shrink-urls
authorBart Trojanowski <bart@jukie.net>
Sat, 21 Mar 2009 00:48:40 +0000 (20:48 -0400)
committerBart Trojanowski <bart@jukie.net>
Sun, 22 Mar 2009 16:24:47 +0000 (12:24 -0400)
This patch adds a bti-shrink-urls script which uses http://2tu.us/ to
convert any URLs to something more sensible for a tweet.

bti, with the --shrink-urls option, now uses the above to convert any
URLs it finds.

'shrink-urls' can also be set to 'true' or 'yes' in the config file.

Signed-off-by: Bart Trojanowski <bart@jukie.net>
Makefile
bti-shrink-urls [new file with mode: 0755]
bti.c

index d037908..e11ee91 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -19,6 +19,7 @@
 VERSION = 015
 
 PROGRAM = bti
+SCRIPTS = bti-shrink-urls
 
 CORE_OBJS = \
        bti.o
@@ -35,7 +36,8 @@ LD = $(CROSS_COMPILE)gcc
 AR = $(CROSS_COMPILE)ar
 
 XML2_CFLAGS = `xml2-config --cflags`
-override CFLAGS        += -g -Wall -pipe -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -O2 $(XML2_CFLAGS)
+PCRE_CFLAGS = `pcre-config --cflags`
+override CFLAGS        += -g -Wall -pipe -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -O2 $(XML2_CFLAGS) $(PCRE_CFLAGS)
 
 WARNINGS       = -Wstrict-prototypes -Wsign-compare -Wshadow \
                  -Wchar-subscripts -Wmissing-declarations -Wnested-externs \
@@ -68,14 +70,14 @@ export E Q
 #LIB_OBJS = -lcurl -lnsl -lssl -lcrypto
 LIB_OBJS = -lcurl -lnsl -lreadline
 LIB_XML2 = `xml2-config --libs`
+LIB_PCRE = `pcre-config --libs`
 
 all:   $(PROGRAM) $(MAN_PAGES)
 
 # "Static Pattern Rule" to build all programs
 bti: %: $(HEADERS) $(GEN_HEADERS) $(CORE_OBJS)
        $(E) "  LD      " $@
-       $(Q) $(LD) $(LDFLAGS) $(CORE_OBJS) -o $@ $(LIB_OBJS) $(LIB_XML2)
-
+       $(Q) $(LD) $(LDFLAGS) $(CORE_OBJS) -o $@ $(LIB_OBJS) $(LIB_XML2) $(LIB_PCRE)
 
 # build the objects
 %.o: %.c $(HEADERS) $(GEN_HEADERS)
@@ -119,6 +121,6 @@ release:
 
 install: all
        $(E) "  INSTALL  " ${DEST}${PREFIX}
-       ${Q} ${INSTALL_BIN} ${PROGRAM}
+       ${Q} ${INSTALL_BIN} ${PROGRAM} ${SCRIPTS}
        ${Q} ${INSTALL_MAN} ${MAN_PAGES}
 .PHONY: install
diff --git a/bti-shrink-urls b/bti-shrink-urls
new file mode 100755 (executable)
index 0000000..ee85b4c
--- /dev/null
@@ -0,0 +1,96 @@
+#!/bin/bash
+# Copyright (C) 2009 Bart Trojanowski <bart@jukie.net>
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+needs_escape=true
+
+while test -n "$1" ; do
+        word="$1"
+        shift
+        case "$word" in
+            --escaped)
+                needs_escape=
+                ;;
+            --help|-h)
+                cat <<END
+bti-shrink-urls - convert URLs to a shorter form using a web service
+
+    $0 [--escaped] [<url>]
+
+Currently only http://2tu.us/ is supported.
+END
+                exit 0
+                ;;
+            *)
+                URL=$word
+                ;;
+        esac
+done
+
+function convert_url() {
+        local url=$1
+        test -n "$url" || return 1
+        test "${url%:*}" = 'http' || return 1
+
+        local urllen="${#url}"
+
+        # http://en.wikipedia.org/wiki/Percent-encoding
+        if test -n "$needs_escape" ; then
+                url=$(echo "$url" | sed -e 's/\%/%25/g' \
+                                        -e 's/!/%21/g' \
+                                        -e 's/*/%2A/g' \
+                                        -e "s/'/%27/g" \
+                                        -e 's/(/%28/g' \
+                                        -e 's/)/%29/g' \
+                                        -e 's/;/%3B/g' \
+                                        -e 's/:/%3A/g' \
+                                        -e 's/@/%40/g' \
+                                        -e 's/&/%26/g' \
+                                        -e 's/=/%3D/g' \
+                                        -e 's/+/%2B/g' \
+                                        -e 's/\$/%24/g' \
+                                        -e 's/,/%2C/g' \
+                                        -e 's,/,%2F,g' \
+                                        -e 's/?/%3F/g' \
+                                        -e 's/#/%23/g' \
+                                        -e 's/\[/%5B/g' \
+                                        -e 's/]/%5D/g')
+        fi
+
+        # http://2tu.us/
+        local submit="http://2tu.us/?save=y&url=$url"
+
+        local res=$(wget -q -O - "$submit" | awk -F"'" '/Your tight URL is:/ { print $2 }')
+        if test "${res%:*}" = 'http' -a "${#res}" -lt "$urllen" ; then
+                echo $res
+                return 0
+        fi
+        return 1
+}
+
+function die() {
+        echo >&2 $@
+        exit 1
+}
+
+if test -n "$URL" ; then
+        convert_url "$URL" || die "Failed to shrink '$URL'"
+        exit $?
+fi
+
+test -t 0 && echo >&2 "Type in some urls and I'll try to shrink them for you..."
+while read line ; do
+        convert_url "$line" || echo $line
+done
diff --git a/bti.c b/bti.c
index 8f09356..70b0437 100644 (file)
--- a/bti.c
+++ b/bti.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2008 Greg Kroah-Hartman <greg@kroah.com>
+ * Copyright (C) 2009 Bart Trojanowski <bart@jukie.net>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
 #include <unistd.h>
 #include <time.h>
 #include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
 #include <curl/curl.h>
 #include <readline/readline.h>
 #include <libxml/xmlmemory.h>
 #include <libxml/parser.h>
 #include <libxml/tree.h>
+#include <pcre.h>
 #include "bti_version.h"
 
 
@@ -69,6 +73,7 @@ struct session {
        char *logfile;
        char *user;
        int bash;
+       int shrink_urls;
        enum host host;
        enum action action;
 };
@@ -94,6 +99,7 @@ static void display_help(void)
        fprintf(stdout, "  --proxy PROXY:PORT\n");
        fprintf(stdout, "  --host HOST\n");
        fprintf(stdout, "  --logfile logfile\n");
+       fprintf(stdout, "  --shrink-urls\n");
        fprintf(stdout, "  --bash\n");
        fprintf(stdout, "  --debug\n");
        fprintf(stdout, "  --version\n");
@@ -427,6 +433,7 @@ static void parse_configfile(struct session *session)
        char *action = NULL;
        char *user = NULL;
        char *file;
+       int shrink_urls = 0;
 
        /* config file is ~/.bti  */
        file = alloca(strlen(session->homedir) + 7);
@@ -492,6 +499,12 @@ static void parse_configfile(struct session *session)
                        c += 5;
                        if (c[0] != '\0')
                                user = strdup(c);
+               } else if (!strncasecmp(c, "shrink-urls", 11) &&
+                               (c[11] == '=')) {
+                       c += 12;
+                       if (!strncasecmp(c, "true", 4) ||
+                                       !strncasecmp(c, "yes", 3))
+                               shrink_urls = 1;
                }
        } while (!feof(config_file));
 
@@ -531,6 +544,7 @@ static void parse_configfile(struct session *session)
        if (user) {
                session->user = user;
        }
+       session->shrink_urls = shrink_urls;
 
        /* Free buffer and close file.  */
        free(line);
@@ -615,6 +629,259 @@ static char *get_string_from_stdin(void)
        return string;
 }
 
+static int find_urls(const char *tweet, int **pranges)
+{
+       // magic obtained from http://www.geekpedia.com/KB65_How-to-validate-an-URL-using-RegEx-in-Csharp.html
+       static const char *re_magic =
+               "(([a-zA-Z][0-9a-zA-Z+\\-\\.]*:)/{1,3}"
+               "[0-9a-zA-Z;/~?:@&=+$\\.\\-_'()%]+)"
+               "(#[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%]+)?";
+       pcre *re;
+       const char *errptr;
+       int erroffset;
+       int ovector[10] = {0,};
+       const size_t ovsize = sizeof(ovector)/sizeof(*ovector);
+       int startoffset, tweetlen;
+       int i, rc;
+       int rbound = 10;
+       int rcount = 0;
+       int *ranges = malloc(sizeof(int) * rbound);
+
+       re = pcre_compile(re_magic,
+                       PCRE_NO_AUTO_CAPTURE,
+                       &errptr, &erroffset, NULL);
+       if (!re) {
+               fprintf(stderr, "pcre_compile @%u: %s\n", erroffset, errptr);
+               exit (1);
+       }
+
+       tweetlen = strlen(tweet);
+       for (startoffset=0; startoffset<tweetlen; ) {
+
+               rc = pcre_exec(re, NULL, tweet, strlen(tweet), startoffset, 0,
+                               ovector, ovsize);
+               if (rc == PCRE_ERROR_NOMATCH)
+                       break;
+
+               if (rc<0) {
+                       fprintf(stderr, "pcre_exec @%u: %s\n", erroffset, errptr);
+                       exit (1);
+               }
+
+               for (i=0; i<rc; i+=2) {
+                       if ((rcount+2) == rbound) {
+                               rbound *= 2;
+                               ranges = realloc(ranges, sizeof(int) * rbound);
+                       }
+
+                       ranges[rcount++] = ovector[i];
+                       ranges[rcount++] = ovector[i+1];
+               }
+
+               startoffset = ovector[1];
+       }
+
+       pcre_free(re);
+
+       *pranges = ranges;
+       return rcount;
+}
+
+/**
+ * bidirectional popen() call
+ *
+ * @param rwepipe - int array of size three
+ * @param exe - program to run
+ * @param argv - argument list
+ * @return pid or -1 on error
+ *
+ * The caller passes in an array of three integers (rwepipe), on successful
+ * execution it can then write to element 0 (stdin of exe), and read from
+ * element 1 (stdout) and 2 (stderr).
+ */
+static int popenRW(int *rwepipe, const char *exe, const char *const argv[])
+{
+       int in[2];
+       int out[2];
+       int err[2];
+       int pid;
+       int rc;
+
+       rc = pipe(in);
+       if (rc<0)
+               goto error_in;
+
+       rc = pipe(out);
+       if (rc<0)
+               goto error_out;
+
+       rc = pipe(err);
+       if (rc<0)
+               goto error_err;
+
+       pid = fork();
+       if (pid > 0) { // parent
+               close(in[0]);
+               close(out[1]);
+               close(err[1]);
+               rwepipe[0] = in[1];
+               rwepipe[1] = out[0];
+               rwepipe[2] = err[0];
+               return pid;
+       } else if (pid == 0) { // child
+               close(in[1]);
+               close(out[0]);
+               close(err[0]);
+               close(0);
+               dup(in[0]);
+               close(1);
+               dup(out[1]);
+               close(2);
+               dup(err[1]);
+
+               execvp(exe, (char**)argv);
+               exit(1);
+       } else
+               goto error_fork;
+
+       return pid;
+
+error_fork:
+       close(err[0]);
+       close(err[1]);
+error_err:
+       close(out[0]);
+       close(out[1]);
+error_out:
+       close(in[0]);
+       close(in[1]);
+error_in:
+       return -1;
+}
+
+static int pcloseRW(int pid, int *rwepipe)
+{
+       int rc, status;
+       close(rwepipe[0]);
+       close(rwepipe[1]);
+       close(rwepipe[2]);
+       rc = waitpid(pid, &status, 0);
+       return status;
+}
+
+static char *shrink_one_url(int *rwepipe, char *big)
+{
+       int biglen = strlen(big);
+       char *small;
+       int smalllen;
+       int rc;
+
+       rc = dprintf(rwepipe[0], "%s\n", big);
+       if (rc < 0)
+               return big;
+
+       smalllen = biglen + 128;
+       small = malloc(smalllen);
+       if (!small)
+               return big;
+
+       rc = read(rwepipe[1], small, smalllen);
+       if (rc < 0 || rc > biglen)
+               goto error_free_small;
+
+       if (strncmp(small, "http://", 7))
+               goto error_free_small;
+
+       smalllen = rc;
+       while (smalllen && isspace(small[smalllen-1]))
+                       small[--smalllen] = 0;
+
+       free (big);
+       return small;
+
+error_free_small:
+       free(small);
+       return big;
+}
+
+static char *shrink_urls(char *text)
+{
+       int *ranges;
+       int rcount;
+       int i;
+       int inofs = 0;
+       int outofs = 0;
+       const char *const shrink_args[] = {
+               "bti-shrink-urls",
+               NULL
+       };
+       int shrink_pid;
+       int shrink_pipe[2];
+       int inlen = strlen(text);
+
+       dbg("before len=%u\n", inlen);
+
+       shrink_pid = popenRW(shrink_pipe, shrink_args[0], shrink_args);
+       if (shrink_pid < 0)
+               return text;
+
+       rcount = find_urls(text, &ranges);
+
+       for (i=0; i<rcount; i+=2) {
+               int url_start = ranges[i];
+               int url_end = ranges[i+1];
+               int long_url_len = url_end - url_start;
+               char *url = strndup(text + url_start, long_url_len);
+               int short_url_len;
+               int not_url_len = url_start - inofs;
+
+               dbg("long  url[%u]: %s\n", long_url_len, url);
+               url = shrink_one_url(shrink_pipe, url);
+               short_url_len = url ? strlen(url) : 0;
+               dbg("short url[%u]: %s\n", short_url_len, url);
+
+               if (!url || short_url_len >= long_url_len) {
+                       // the short url ended up being too long or unavailable
+                       if (inofs) {
+                               strncpy(text + outofs, text + inofs,
+                                               not_url_len + long_url_len);
+                       }
+                       inofs += not_url_len + long_url_len;
+                       outofs += not_url_len + long_url_len;
+
+               } else {
+                       // copy the unmodified block
+                       strncpy(text + outofs, text + inofs, not_url_len);
+                       inofs += not_url_len;
+                       outofs += not_url_len;
+
+                       // copy the new url
+                       strncpy(text + outofs, url, short_url_len);
+                       inofs += long_url_len;
+                       outofs += short_url_len;
+               }
+
+               free (url);
+       }
+
+       // copy the last block after the last match
+       if (inofs) {
+               int tail = inlen - inofs;
+               if (tail) {
+                       strncpy(text + outofs, text + inofs, tail);
+                       outofs += tail;
+               }
+       }
+
+       free(ranges);
+
+       (void)pcloseRW(shrink_pid, shrink_pipe);
+
+       text[outofs] = 0;
+       dbg("after len=%u\n", outofs);
+       return text;
+}
+
 int main(int argc, char *argv[], char *envp[])
 {
        static const struct option options[] = {
@@ -626,6 +893,7 @@ int main(int argc, char *argv[], char *envp[])
                { "action", 1, NULL, 'A' },
                { "user", 1, NULL, 'u' },
                { "logfile", 1, NULL, 'L' },
+               { "shrink-urls", 0, NULL, 's' },
                { "help", 0, NULL, 'h' },
                { "bash", 0, NULL, 'b' },
                { "version", 0, NULL, 'v' },
@@ -723,6 +991,9 @@ int main(int argc, char *argv[], char *envp[])
                        session->logfile = strdup(optarg);
                        dbg("logfile = %s\n", session->logfile);
                        break;
+               case 's':
+                       session->shrink_urls = 1;
+                       break;
                case 'H':
                        if (strcasecmp(optarg, "twitter") == 0)
                                session->host = HOST_TWITTER;
@@ -771,6 +1042,9 @@ int main(int argc, char *argv[], char *envp[])
                        return -1;
                }
 
+               if (session->shrink_urls)
+                       tweet = shrink_urls(tweet);
+
                session->tweet = zalloc(strlen(tweet) + 10);
                if (session->bash)
                        sprintf(session->tweet, "$ %s", tweet);