ABOUT ME

-

Today
-
Yesterday
-
Total
-
  • FAST SEARCHING IN A BYTEARRAY
    공부방/Flex 2012. 2. 13. 13:35
    For some reason there is no method to search for a specific text in a ByteArray. At least i could not find one. I tried a very straightforward search by simply scanning from every position. Although it works, it is very slow with bigger ByteArrays. A quick look on the internet resulted in an algorithm that suited my needs: the Boyer-Moore-Horspool algorithm. Unfortunately there was no ActionScript alternative available for it yet. So i decided to port it myself and with great result, because it greatly improved the performance of the searches for my use case.
    package nl.vanhulzenonline.utils
    {
    	import flash.utils.ByteArray;
    
    	public class ByteArrayUtils
    	{
    		public static function getIndexOf(text:String, data:ByteArray, start:int = 0, end:int = -1):int
    		{
    			var pattern:ByteArray = new ByteArray();
    			pattern.writeUTFBytes(text);
    			pattern.position = 0;
    
    			if (end == -1)
    				end = data.length - 1;
    
    			var i:int;
    			var badCharSkip:Array = new Array();
    
    			// initialize the table to default value
    			// when a character is encountered that does not occur
    			// in the pattern, we can safely skip ahead for the whole
    			// length of the pattern.
    			for (i = 0; i <= 255; i++)
    				badCharSkip[i] = pattern.length;
    
    			// then populate it with the analysis of the pattern
    			var endOfPattern:int = pattern.length - 1;
    			for (i = 0; i < endOfPattern; i = i + 1)
    				badCharSkip[pattern.readUnsignedByte()] = endOfPattern - i;
    
    			// do the matching
    
    			// search the data, while the pattern can still be within it.
    			var dataPart:int;
    			var endOfData:int = end;
    			var dataPosition:int = start;
    			while (endOfData >= endOfPattern)
    			{
    				// scan from the end of the pattern
    				i = endOfPattern;
    				while(true)
    				{
    					data.position = dataPosition + i;
    					pattern.position = i;
    					if (data.readUnsignedByte() == pattern.readUnsignedByte())
    					{
    						// if the first byte matches, we've found it.
    						if (i == 0)
    							return dataPosition;
    						i--;
    					}
    					else
    						break;
    				}
    
    				// otherwise, we need to skip some bytes and start again.
    				// note that here we are getting the skip value based on
    				// the last byte of pattern, no matter where we didn't
    				// match. so if pattern is: "abcd" then we are skipping
    				// based on 'd' and that value will be 4, and for "abcdd"
    				// we again skip on 'd' but the value will be only 1.
    				// the alternative of pretending that the mismatched
    				// character was the last character is slower in the normal
     				// case (eg. finding "abcd" in "...azcd..." gives 4 by
    				// using 'd' but only 4-2==2 using 'z'.
    				data.position = dataPosition + endOfPattern;
    				dataPart = data.readUnsignedByte();
    				endOfData -= badCharSkip[dataPart];
    				dataPosition += badCharSkip[dataPart];
    			}
    			return -1;
    		}
    	}
    }
Designed by Tistory.