For some reason there is no method to search for a specific text in a ByteArray. At least i could not find one. I tried a very straightforward search by simply scanning from every position. Although it works, it is very slow with bigger ByteArrays.
A quick look on the internet resulted in an algorithm that suited my needs: the Boyer-Moore-Horspool algorithm. Unfortunately there was no ActionScript alternative available for it yet. So i decided to port it myself and with great result, because it greatly improved the performance of the searches for my use case.
package nl.vanhulzenonline.utils
{
import flash.utils.ByteArray;
public class ByteArrayUtils
{
public static function getIndexOf(text:String, data:ByteArray, start:int = 0, end:int = -1):int
{
var pattern:ByteArray = new ByteArray();
pattern.writeUTFBytes(text);
pattern.position = 0;
if (end == -1)
end = data.length - 1;
var i:int;
var badCharSkip:Array = new Array();
// initialize the table to default value
// when a character is encountered that does not occur
// in the pattern, we can safely skip ahead for the whole
// length of the pattern.
for (i = 0; i <= 255; i++)
badCharSkip[i] = pattern.length;
// then populate it with the analysis of the pattern
var endOfPattern:int = pattern.length - 1;
for (i = 0; i < endOfPattern; i = i + 1)
badCharSkip[pattern.readUnsignedByte()] = endOfPattern - i;
// do the matching
// search the data, while the pattern can still be within it.
var dataPart:int;
var endOfData:int = end;
var dataPosition:int = start;
while (endOfData >= endOfPattern)
{
// scan from the end of the pattern
i = endOfPattern;
while(true)
{
data.position = dataPosition + i;
pattern.position = i;
if (data.readUnsignedByte() == pattern.readUnsignedByte())
{
// if the first byte matches, we've found it.
if (i == 0)
return dataPosition;
i--;
}
else
break;
}
// otherwise, we need to skip some bytes and start again.
// note that here we are getting the skip value based on
// the last byte of pattern, no matter where we didn't
// match. so if pattern is: "abcd" then we are skipping
// based on 'd' and that value will be 4, and for "abcdd"
// we again skip on 'd' but the value will be only 1.
// the alternative of pretending that the mismatched
// character was the last character is slower in the normal
// case (eg. finding "abcd" in "...azcd..." gives 4 by
// using 'd' but only 4-2==2 using 'z'.
data.position = dataPosition + endOfPattern;
dataPart = data.readUnsignedByte();
endOfData -= badCharSkip[dataPart];
dataPosition += badCharSkip[dataPart];
}
return -1;
}
}
}