From 3f148399e9109e25741fcd4a32e17317c573a5ea Mon Sep 17 00:00:00 2001 From: Bart Trojanowski Date: Fri, 20 Mar 2009 20:48:40 -0400 Subject: [PATCH] add bti --shrink-urls This patch adds a bti-shrink-urls script which uses http://2tu.us/ to convert any URLs to something more sensible for a tweet. bti, with the --shrink-urls option, now uses the above to convert any URLs it finds. 'shrink-urls' can also be set to 'true' or 'yes' in the config file. Signed-off-by: Bart Trojanowski --- Makefile | 10 +- bti-shrink-urls | 96 +++++++++++++++++ bti.c | 274 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 376 insertions(+), 4 deletions(-) create mode 100755 bti-shrink-urls diff --git a/Makefile b/Makefile index d037908..e11ee91 100644 --- a/Makefile +++ b/Makefile @@ -19,6 +19,7 @@ VERSION = 015 PROGRAM = bti +SCRIPTS = bti-shrink-urls CORE_OBJS = \ bti.o @@ -35,7 +36,8 @@ LD = $(CROSS_COMPILE)gcc AR = $(CROSS_COMPILE)ar XML2_CFLAGS = `xml2-config --cflags` -override CFLAGS += -g -Wall -pipe -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -O2 $(XML2_CFLAGS) +PCRE_CFLAGS = `pcre-config --cflags` +override CFLAGS += -g -Wall -pipe -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -O2 $(XML2_CFLAGS) $(PCRE_CFLAGS) WARNINGS = -Wstrict-prototypes -Wsign-compare -Wshadow \ -Wchar-subscripts -Wmissing-declarations -Wnested-externs \ @@ -68,14 +70,14 @@ export E Q #LIB_OBJS = -lcurl -lnsl -lssl -lcrypto LIB_OBJS = -lcurl -lnsl -lreadline LIB_XML2 = `xml2-config --libs` +LIB_PCRE = `pcre-config --libs` all: $(PROGRAM) $(MAN_PAGES) # "Static Pattern Rule" to build all programs bti: %: $(HEADERS) $(GEN_HEADERS) $(CORE_OBJS) $(E) " LD " $@ - $(Q) $(LD) $(LDFLAGS) $(CORE_OBJS) -o $@ $(LIB_OBJS) $(LIB_XML2) - + $(Q) $(LD) $(LDFLAGS) $(CORE_OBJS) -o $@ $(LIB_OBJS) $(LIB_XML2) $(LIB_PCRE) # build the objects %.o: %.c $(HEADERS) $(GEN_HEADERS) @@ -119,6 +121,6 @@ release: install: all $(E) " INSTALL " ${DEST}${PREFIX} - ${Q} ${INSTALL_BIN} ${PROGRAM} + ${Q} ${INSTALL_BIN} ${PROGRAM} ${SCRIPTS} ${Q} ${INSTALL_MAN} ${MAN_PAGES} .PHONY: install diff --git a/bti-shrink-urls b/bti-shrink-urls new file mode 100755 index 0000000..ee85b4c --- /dev/null +++ b/bti-shrink-urls @@ -0,0 +1,96 @@ +#!/bin/bash +# Copyright (C) 2009 Bart Trojanowski +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation version 2 of the License. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +needs_escape=true + +while test -n "$1" ; do + word="$1" + shift + case "$word" in + --escaped) + needs_escape= + ;; + --help|-h) + cat <] + +Currently only http://2tu.us/ is supported. +END + exit 0 + ;; + *) + URL=$word + ;; + esac +done + +function convert_url() { + local url=$1 + test -n "$url" || return 1 + test "${url%:*}" = 'http' || return 1 + + local urllen="${#url}" + + # http://en.wikipedia.org/wiki/Percent-encoding + if test -n "$needs_escape" ; then + url=$(echo "$url" | sed -e 's/\%/%25/g' \ + -e 's/!/%21/g' \ + -e 's/*/%2A/g' \ + -e "s/'/%27/g" \ + -e 's/(/%28/g' \ + -e 's/)/%29/g' \ + -e 's/;/%3B/g' \ + -e 's/:/%3A/g' \ + -e 's/@/%40/g' \ + -e 's/&/%26/g' \ + -e 's/=/%3D/g' \ + -e 's/+/%2B/g' \ + -e 's/\$/%24/g' \ + -e 's/,/%2C/g' \ + -e 's,/,%2F,g' \ + -e 's/?/%3F/g' \ + -e 's/#/%23/g' \ + -e 's/\[/%5B/g' \ + -e 's/]/%5D/g') + fi + + # http://2tu.us/ + local submit="http://2tu.us/?save=y&url=$url" + + local res=$(wget -q -O - "$submit" | awk -F"'" '/Your tight URL is:/ { print $2 }') + if test "${res%:*}" = 'http' -a "${#res}" -lt "$urllen" ; then + echo $res + return 0 + fi + return 1 +} + +function die() { + echo >&2 $@ + exit 1 +} + +if test -n "$URL" ; then + convert_url "$URL" || die "Failed to shrink '$URL'" + exit $? +fi + +test -t 0 && echo >&2 "Type in some urls and I'll try to shrink them for you..." +while read line ; do + convert_url "$line" || echo $line +done diff --git a/bti.c b/bti.c index 8f09356..70b0437 100644 --- a/bti.c +++ b/bti.c @@ -1,5 +1,6 @@ /* * Copyright (C) 2008 Greg Kroah-Hartman + * Copyright (C) 2009 Bart Trojanowski * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -26,11 +27,14 @@ #include #include #include +#include +#include #include #include #include #include #include +#include #include "bti_version.h" @@ -69,6 +73,7 @@ struct session { char *logfile; char *user; int bash; + int shrink_urls; enum host host; enum action action; }; @@ -94,6 +99,7 @@ static void display_help(void) fprintf(stdout, " --proxy PROXY:PORT\n"); fprintf(stdout, " --host HOST\n"); fprintf(stdout, " --logfile logfile\n"); + fprintf(stdout, " --shrink-urls\n"); fprintf(stdout, " --bash\n"); fprintf(stdout, " --debug\n"); fprintf(stdout, " --version\n"); @@ -427,6 +433,7 @@ static void parse_configfile(struct session *session) char *action = NULL; char *user = NULL; char *file; + int shrink_urls = 0; /* config file is ~/.bti */ file = alloca(strlen(session->homedir) + 7); @@ -492,6 +499,12 @@ static void parse_configfile(struct session *session) c += 5; if (c[0] != '\0') user = strdup(c); + } else if (!strncasecmp(c, "shrink-urls", 11) && + (c[11] == '=')) { + c += 12; + if (!strncasecmp(c, "true", 4) || + !strncasecmp(c, "yes", 3)) + shrink_urls = 1; } } while (!feof(config_file)); @@ -531,6 +544,7 @@ static void parse_configfile(struct session *session) if (user) { session->user = user; } + session->shrink_urls = shrink_urls; /* Free buffer and close file. */ free(line); @@ -615,6 +629,259 @@ static char *get_string_from_stdin(void) return string; } +static int find_urls(const char *tweet, int **pranges) +{ + // magic obtained from http://www.geekpedia.com/KB65_How-to-validate-an-URL-using-RegEx-in-Csharp.html + static const char *re_magic = + "(([a-zA-Z][0-9a-zA-Z+\\-\\.]*:)/{1,3}" + "[0-9a-zA-Z;/~?:@&=+$\\.\\-_'()%]+)" + "(#[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%]+)?"; + pcre *re; + const char *errptr; + int erroffset; + int ovector[10] = {0,}; + const size_t ovsize = sizeof(ovector)/sizeof(*ovector); + int startoffset, tweetlen; + int i, rc; + int rbound = 10; + int rcount = 0; + int *ranges = malloc(sizeof(int) * rbound); + + re = pcre_compile(re_magic, + PCRE_NO_AUTO_CAPTURE, + &errptr, &erroffset, NULL); + if (!re) { + fprintf(stderr, "pcre_compile @%u: %s\n", erroffset, errptr); + exit (1); + } + + tweetlen = strlen(tweet); + for (startoffset=0; startoffset 0) { // parent + close(in[0]); + close(out[1]); + close(err[1]); + rwepipe[0] = in[1]; + rwepipe[1] = out[0]; + rwepipe[2] = err[0]; + return pid; + } else if (pid == 0) { // child + close(in[1]); + close(out[0]); + close(err[0]); + close(0); + dup(in[0]); + close(1); + dup(out[1]); + close(2); + dup(err[1]); + + execvp(exe, (char**)argv); + exit(1); + } else + goto error_fork; + + return pid; + +error_fork: + close(err[0]); + close(err[1]); +error_err: + close(out[0]); + close(out[1]); +error_out: + close(in[0]); + close(in[1]); +error_in: + return -1; +} + +static int pcloseRW(int pid, int *rwepipe) +{ + int rc, status; + close(rwepipe[0]); + close(rwepipe[1]); + close(rwepipe[2]); + rc = waitpid(pid, &status, 0); + return status; +} + +static char *shrink_one_url(int *rwepipe, char *big) +{ + int biglen = strlen(big); + char *small; + int smalllen; + int rc; + + rc = dprintf(rwepipe[0], "%s\n", big); + if (rc < 0) + return big; + + smalllen = biglen + 128; + small = malloc(smalllen); + if (!small) + return big; + + rc = read(rwepipe[1], small, smalllen); + if (rc < 0 || rc > biglen) + goto error_free_small; + + if (strncmp(small, "http://", 7)) + goto error_free_small; + + smalllen = rc; + while (smalllen && isspace(small[smalllen-1])) + small[--smalllen] = 0; + + free (big); + return small; + +error_free_small: + free(small); + return big; +} + +static char *shrink_urls(char *text) +{ + int *ranges; + int rcount; + int i; + int inofs = 0; + int outofs = 0; + const char *const shrink_args[] = { + "bti-shrink-urls", + NULL + }; + int shrink_pid; + int shrink_pipe[2]; + int inlen = strlen(text); + + dbg("before len=%u\n", inlen); + + shrink_pid = popenRW(shrink_pipe, shrink_args[0], shrink_args); + if (shrink_pid < 0) + return text; + + rcount = find_urls(text, &ranges); + + for (i=0; i= long_url_len) { + // the short url ended up being too long or unavailable + if (inofs) { + strncpy(text + outofs, text + inofs, + not_url_len + long_url_len); + } + inofs += not_url_len + long_url_len; + outofs += not_url_len + long_url_len; + + } else { + // copy the unmodified block + strncpy(text + outofs, text + inofs, not_url_len); + inofs += not_url_len; + outofs += not_url_len; + + // copy the new url + strncpy(text + outofs, url, short_url_len); + inofs += long_url_len; + outofs += short_url_len; + } + + free (url); + } + + // copy the last block after the last match + if (inofs) { + int tail = inlen - inofs; + if (tail) { + strncpy(text + outofs, text + inofs, tail); + outofs += tail; + } + } + + free(ranges); + + (void)pcloseRW(shrink_pid, shrink_pipe); + + text[outofs] = 0; + dbg("after len=%u\n", outofs); + return text; +} + int main(int argc, char *argv[], char *envp[]) { static const struct option options[] = { @@ -626,6 +893,7 @@ int main(int argc, char *argv[], char *envp[]) { "action", 1, NULL, 'A' }, { "user", 1, NULL, 'u' }, { "logfile", 1, NULL, 'L' }, + { "shrink-urls", 0, NULL, 's' }, { "help", 0, NULL, 'h' }, { "bash", 0, NULL, 'b' }, { "version", 0, NULL, 'v' }, @@ -723,6 +991,9 @@ int main(int argc, char *argv[], char *envp[]) session->logfile = strdup(optarg); dbg("logfile = %s\n", session->logfile); break; + case 's': + session->shrink_urls = 1; + break; case 'H': if (strcasecmp(optarg, "twitter") == 0) session->host = HOST_TWITTER; @@ -771,6 +1042,9 @@ int main(int argc, char *argv[], char *envp[]) return -1; } + if (session->shrink_urls) + tweet = shrink_urls(tweet); + session->tweet = zalloc(strlen(tweet) + 10); if (session->bash) sprintf(session->tweet, "$ %s", tweet); -- 2.39.5