Skip to content

Commit

Permalink
added tests
Browse files Browse the repository at this point in the history
  • Loading branch information
astockwell committed Jul 6, 2014
1 parent 763ab0a commit 8eaffcf
Show file tree
Hide file tree
Showing 3 changed files with 104 additions and 9 deletions.
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ __`serfix` corrects the character counts in PHP serialized string objects, withi
Build
-----

`go get github.com/astockwell/serfix`

-or-

1. Clone & `cd` into the repo
2. `go build serfix.go` (requires go compiler installed)
3. Copy the resulting `serfix` binary into your path
Expand Down Expand Up @@ -91,7 +95,7 @@ Benchmarks
Roadmap
-------

- Write tests (the regexp matching and char counting were extensively tested in the previous Python incarnation, need to port to Go)
- Expand test suite
- Remove the second regexp search that is called on every match to find the submatches (this seems to be necessary with the Go standard regexp package, but may not be in the future)
- Better yet, rewrite all regexp operations using a proper lexer

Expand Down
17 changes: 9 additions & 8 deletions serfix.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,15 @@ import (
const (
helpFlagUsage = "Help and usage instructions"
forceFlagUsage = "Force overwrite of destination file if it exists"
readBuffer = 2 * 1024 * 1024
)

var helpPtr = flag.Bool("help", false, helpFlagUsage)
var forcePtr = flag.Bool("force", false, forceFlagUsage)
var counter int = 0
var lexer = regexp.MustCompile(`s:\d+:\\?\".*?\\?\";`)
var re = regexp.MustCompile(`(s:)(\d+)(:\\?\")(.*?)(\\?\";)`)
var esc = regexp.MustCompile(`(\\"|\\'|\\\\|\\a|\\b|\\n|\\r|\\s|\\t|\\v)`)
var esc = regexp.MustCompile(`(\\"|\\'|\\\\|\\a|\\b|\\f|\\n|\\r|\\s|\\t|\\v)`)

func init() {
// Short flags too
Expand All @@ -39,7 +40,7 @@ func main() {
args := flag.Args()

if *helpPtr {
printUsage()
PrintUsage()
return
}

Expand Down Expand Up @@ -77,11 +78,11 @@ func main() {
// close out file
defer tempfile.Close()

r := bufio.NewReaderSize(infile, 2*1024*1024)
r := bufio.NewReaderSize(infile, readBuffer)

line, err := r.ReadString('\n')
for err == nil {
tempfile.WriteString(lexer.ReplaceAllStringFunc(string(line), replace))
tempfile.WriteString(lexer.ReplaceAllStringFunc(string(line), Replace))

line, err = r.ReadString('\n')
}
Expand Down Expand Up @@ -123,11 +124,11 @@ func main() {
}

} else {
r := bufio.NewReaderSize(os.Stdin, 2*1024*1024)
r := bufio.NewReaderSize(os.Stdin, readBuffer)

line, isPrefix, err := r.ReadLine()
for err == nil && !isPrefix {
fmt.Println(lexer.ReplaceAllStringFunc(string(line), replace))
fmt.Println(lexer.ReplaceAllStringFunc(string(line), Replace))

line, isPrefix, err = r.ReadLine()
}
Expand All @@ -142,13 +143,13 @@ func main() {
}
}

func replace(matches string) string {
func Replace(matches string) string {
parts := re.FindStringSubmatch(matches)
str_len := len(parts[4]) - len(esc.FindAllString(parts[4], -1))
return fmt.Sprintf("%s%d%s%s%s", parts[1], str_len, parts[3], parts[4], parts[5])
}

func printUsage() {
func PrintUsage() {
fmt.Println("Usage: serfix [flags] filename [outfilename]")
fmt.Println("Alt. Usage: cat filename | serfix")
fmt.Println("")
Expand Down
90 changes: 90 additions & 0 deletions serfix_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
package main

import (
"testing"
)

var serfixTests = []struct {
in string
out string
}{
{
// Empty string, no escaped quotes
in: `s:9:"";`,
out: `s:0:"";`,
},
{
// Empty string, no escaped quotes
in: `s:9:\"\";`,
out: `s:0:\"\";`,
},
{
// Easy string, no escaped quotes
in: `s:0:"0";`,
out: `s:1:"0";`,
},
{
// Easy string, with escaped quotes
in: `s:0:\"0\";`,
out: `s:1:\"0\";`,
},
{
// Complex string, no escaped quotes
in: `s:00:"http://example.com/image.jpg";`,
out: `s:28:"http://example.com/image.jpg";`,
},
{
// Complex string, with escaped quotes
in: `s:00:\"http://example.com/image.jpg\";`,
out: `s:28:\"http://example.com/image.jpg\";`,
},
{
// Escaped paths, with escaped quotes
in: `s:00:\".*wp-(atom|rdf|rss|rss2|feed|commentsrss2).php$\";`,
out: `s:47:\".*wp-(atom|rdf|rss|rss2|feed|commentsrss2).php$\";`,
},
{
// Escaped paths, with escaped quotes
in: `s:00:\".*wp-(atom|rdf|rss|rss2|feed|commentsrss2)\\.php$\";`,
out: `s:48:\".*wp-(atom|rdf|rss|rss2|feed|commentsrss2)\\.php$\";`,
},
{
// Complex string with escape sequences, with escaped quotes
in: `s:000:\"1234 N. Myspace Road, Suburbn, AL 12345 | (555) 555-5555\r\n<br /><br />\r\nPerson places things ideas are the fun part of all thisby Special People Town Foundation.\";`,
out: `s:163:\"1234 N. Myspace Road, Suburbn, AL 12345 | (555) 555-5555\r\n<br /><br />\r\nPerson places things ideas are the fun part of all thisby Special People Town Foundation.\";`,
},
{
// Spanish characters, with escaped quotes
in: `s:000:\"<br /><h2>Nuestro objetivo es servir a todos los niños del distrito escolar primario <br />sin ningún costo. </h2>\";`,
out: `s:116:\"<br /><h2>Nuestro objetivo es servir a todos los niños del distrito escolar primario <br />sin ningún costo. </h2>\";`,
},
{
// Timestamp
in: `s:00:\"Fri, 27 Jun 2014 18:45:31 +0000\";`,
out: `s:31:\"Fri, 27 Jun 2014 18:45:31 +0000\";`,
},
{
// Just ridiculous
in: `s:00:\"\n \n \n \n \n \n \n\";`,
out: `s:13:\"\n \n \n \n \n \n \n\";`,
},
{
// Just ridiculous
in: `s:000:\"<div id=\"v-q5P4Vemb-1\" class=\"video-player\">\n</div><br /> <a rel=\"nofollow\" href=\"http://feeds.wordpress.com/1.0/gocomments/wptv.wordpress.com/36028/\"><img alt=\"\" border=\"0\" src=\"http://feeds.wordpress.com/1.0/comments/wptv.wordpress.com/36028/\" /></a> <img alt=\"\" border=\"0\" src=\"http://stats.wordpress.com/b.gif?host=wordpress.tv&blog=5089392&post=36028&subd=wptv&ref=&feed=1\" width=\"1\" height=\"1\" /><div><a href=\"http://wordpress.tv/2014/06/27/carrie-dils-learning-to-troubleshoot-wordpress/\"><img alt=\"Carrie Dils: Learning to Troubleshoot WordPress\" src=\"http://videos.videopress.com/q5P4Vemb/video-55e3804ddb_scruberthumbnail_0.jpg\" width=\"160\" height=\"120\" /></a></div>\";`,
out: `s:677:\"<div id=\"v-q5P4Vemb-1\" class=\"video-player\">\n</div><br /> <a rel=\"nofollow\" href=\"http://feeds.wordpress.com/1.0/gocomments/wptv.wordpress.com/36028/\"><img alt=\"\" border=\"0\" src=\"http://feeds.wordpress.com/1.0/comments/wptv.wordpress.com/36028/\" /></a> <img alt=\"\" border=\"0\" src=\"http://stats.wordpress.com/b.gif?host=wordpress.tv&blog=5089392&post=36028&subd=wptv&ref=&feed=1\" width=\"1\" height=\"1\" /><div><a href=\"http://wordpress.tv/2014/06/27/carrie-dils-learning-to-troubleshoot-wordpress/\"><img alt=\"Carrie Dils: Learning to Troubleshoot WordPress\" src=\"http://videos.videopress.com/q5P4Vemb/video-55e3804ddb_scruberthumbnail_0.jpg\" width=\"160\" height=\"120\" /></a></div>\";`,
},
{
// Just ridiculous
in: `s:0000:\"<p>WordCamp Europe organizers <a href=\"http://2014.europe.wordcamp.org/2014/06/27/ticket-sales-open-for-wordcamp-europe/\" target=\"_blank\">announced</a> today that tickets are now on sale for this year&#8217;s event, which will be held in Sofia, Bulgaria, on September 27th – 29th. Last year&#8217;s event was by all accounts a smashing success and included diverse attendees from around the world. Approximately 70% of those in attendance flew in from outside the Netherlands.</p>\n<p>The organizers expect 900+ attendees this year, which would make it one of the largest WordPress events of the year. Fortunately, they were able to secure the <a href=\"http://www.ndk.bg/\" target=\"_blank\">National Palace of Culture</a> for the venue, the largest multifunctional congress, conference, convention and exhibition center in Southeastern Europe.</p>\n<p><a href=\"http://i0.wp.com/wptavern.com/wp-content/uploads/2014/03/npc.jpg\" rel=\"prettyphoto[25449]\"><img src=\"http://i0.wp.com/wptavern.com/wp-content/uploads/2014/03/npc.jpg?resize=789%2C379\" alt=\"npc\" class=\"aligncenter size-full wp-image-18647\" /></a></p>\n<p>Due to the success of the previous year, companies are rushing to <a href=\"http://2014.europe.wordcamp.org/sponsor-wordcamp-europe/\" target=\"_blank\">sponsor the event</a>, and the packages are even cheaper because of the lower cost of the location. The organizers reported that all the top tier sponsorship packages were sold out within 24 hours last year.</p>\n<p>In a recent <a href=\"http://joshspeaking.com/matt-mullenweg/\" target=\"_blank\">interview</a>, Matt Mullenweg noted that May 2014 marked the first time that non-English downloads of WordPress surpassed the number of English downloads. Although the software was created by English-speaking people, its user base is rapidly expanding to become more representative of the world&#8217;s population. WordCamp Europe is currently one of the few events that demonstrates the true diversity of the community by bringing together a massive multicultural, multilingual group of WordPress users and professionals.</p>\n<p>If you want to connect with the European WordPress community, Sofia is the place to be at the end of September. The deadline for speaker applications is July 5th, 2014. Last year&#8217;s featured presenters included Matt Mullenweg, Joost de Valk, and Vitaly Friedman, along with many other internationally renowned speakers. Tickets for this highly anticipated event cost 30 Euros and 100 tickets were <a href=\"https://twitter.com/WCEurope/status/482503859429183488\" target=\"_blank\">sold within the first hour</a>. If you plan on going, <a href=\"http://2014.europe.wordcamp.org/2014/06/27/ticket-sales-open-for-wordcamp-europe/\" target=\"_blank\">purchase yours</a> as soon as possible; WordCamp Europe is likely to sell out soon.</p>\";`,
out: `s:2816:\"<p>WordCamp Europe organizers <a href=\"http://2014.europe.wordcamp.org/2014/06/27/ticket-sales-open-for-wordcamp-europe/\" target=\"_blank\">announced</a> today that tickets are now on sale for this year&#8217;s event, which will be held in Sofia, Bulgaria, on September 27th – 29th. Last year&#8217;s event was by all accounts a smashing success and included diverse attendees from around the world. Approximately 70% of those in attendance flew in from outside the Netherlands.</p>\n<p>The organizers expect 900+ attendees this year, which would make it one of the largest WordPress events of the year. Fortunately, they were able to secure the <a href=\"http://www.ndk.bg/\" target=\"_blank\">National Palace of Culture</a> for the venue, the largest multifunctional congress, conference, convention and exhibition center in Southeastern Europe.</p>\n<p><a href=\"http://i0.wp.com/wptavern.com/wp-content/uploads/2014/03/npc.jpg\" rel=\"prettyphoto[25449]\"><img src=\"http://i0.wp.com/wptavern.com/wp-content/uploads/2014/03/npc.jpg?resize=789%2C379\" alt=\"npc\" class=\"aligncenter size-full wp-image-18647\" /></a></p>\n<p>Due to the success of the previous year, companies are rushing to <a href=\"http://2014.europe.wordcamp.org/sponsor-wordcamp-europe/\" target=\"_blank\">sponsor the event</a>, and the packages are even cheaper because of the lower cost of the location. The organizers reported that all the top tier sponsorship packages were sold out within 24 hours last year.</p>\n<p>In a recent <a href=\"http://joshspeaking.com/matt-mullenweg/\" target=\"_blank\">interview</a>, Matt Mullenweg noted that May 2014 marked the first time that non-English downloads of WordPress surpassed the number of English downloads. Although the software was created by English-speaking people, its user base is rapidly expanding to become more representative of the world&#8217;s population. WordCamp Europe is currently one of the few events that demonstrates the true diversity of the community by bringing together a massive multicultural, multilingual group of WordPress users and professionals.</p>\n<p>If you want to connect with the European WordPress community, Sofia is the place to be at the end of September. The deadline for speaker applications is July 5th, 2014. Last year&#8217;s featured presenters included Matt Mullenweg, Joost de Valk, and Vitaly Friedman, along with many other internationally renowned speakers. Tickets for this highly anticipated event cost 30 Euros and 100 tickets were <a href=\"https://twitter.com/WCEurope/status/482503859429183488\" target=\"_blank\">sold within the first hour</a>. If you plan on going, <a href=\"http://2014.europe.wordcamp.org/2014/06/27/ticket-sales-open-for-wordcamp-europe/\" target=\"_blank\">purchase yours</a> as soon as possible; WordCamp Europe is likely to sell out soon.</p>\";`,
},
}

func TestSerfixCharacterCounts(t *testing.T) {
for i, test := range serfixTests {
actual := Replace(test.in)
if actual != test.out {
t.Error("Test", i, "Expected", test.out, "got", actual)
}
}
}

0 comments on commit 8eaffcf

Please sign in to comment.