function [Record DuplicatedGrams] = FourFiveGramLocator(STR) % [Record DuplicatedGrams] = FourFiveGramLocator(STR) % Input: STR a string of ciphertext in upper-case letters % % Outputs: Record = a matrix containing numercial data corresponding to all % repeated 4-grams, and 5-grams in the ciphertext sample. Each % Record corresponds to a duplicated gram in STR (of length 4 or 5). % The number of columns of Record is 4: the first column gives the length % of the gram (4 or 5), the second column gives the location of the % character in the first instance of the gram, the third column gives the number of % occurrences of the gram in the ciphertext, and the fourth column gives % the gcd of all separation distances between the duplications. % DuplicatedGrams = a vector of the strings of duplicated grams % corresponding to the rows of the Record matrix. %Initialize Record, and row counter: Record = zeros(1,4); row = 1; numChars = length(STR); %Search for duplicated 4-grams: %First form vector of 4-grams: for i = 1:numChars-3 Fourgrams(i,:) = STR(i:i+3); end %Now use nested for loops to find and record duplication data: DupFourgramIndexSet = []; %initially set of duplicated 4-grams is empty for i = 1:numChars-3 if ~ (ismember(i,DupFourgramIndexSet)) %this is a new 4-gram to be looked at fourgram = Fourgrams(i,:); dupCounter = 0; DupIndSet = [i]; for j = i+1:numChars-3 if Fourgrams(j,:) == fourgram; %duplication found DupIndSet = union(j,DupIndSet); dupCounter = dupCounter + 1; sepdist = j-i; if dupCounter == 1 sepdistGCD = sepdist; else sepdistGCD = gcd(sepdistGCD, sepdist); end end end end if dupCounter > 0 Record(row,:) = [4 i dupCounter+1 sepdistGCD]; DuplicatedGrams(row,:) = [fourgram, ' ']; row = row + 1; DupFourgramIndexSet = union(DupFourgramIndexSet,DupIndSet); end end %Search for duplicated 5-grams: %First form vector of 5-grams: for i = 1:numChars-4 Fivegrams(i,:) = STR(i:i+4); end %Now use nested for loops to find and record duplication data: DupFivegramIndexSet = []; %initially set of duplicated 5-grams is empty for i = 1:numChars-4 if ~ (ismember(i,DupFivegramIndexSet)) %this is a new 5-gram to be looked at fivegram = Fivegrams(i,:); dupCounter = 0; DupIndSet = [i]; for j = i+1:numChars-4 if Fivegrams(j,:) == fivegram; %duplication found DupIndSet = union(j,DupIndSet); dupCounter = dupCounter + 1; sepdist = j-i; if dupCounter == 1 sepdistGCD = sepdist; else sepdistGCD = gcd(sepdistGCD, sepdist); end end end end if dupCounter > 0 Record(row,:) = [5 i dupCounter+1 sepdistGCD]; DuplicatedGrams(row,:) = fivegram; row = row + 1; DupFivegramIndexSet = union(DupFivegramIndexSet,DupIndSet); end end