Muhtemelen girdilerdeki sözcükleri bir HashSet'e depolarım ve sonra dizinin üzerine yineleyin ve dizideki her sözcüğün kümede .contains olup olmadığını görün.
Burada kod var ... giriş "Around the world in 80 days".
import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Scanner;
import java.util.Set;
public class Main
{
public static void main(final String[] argv)
throws FileNotFoundException
{
final File file;
final String[] wordsToFind;
file = new File(argv[0]);
wordsToFind = getWordsToFind(file);
a(file, wordsToFind);
b(file, wordsToFind);
c(file, wordsToFind);
d(file, wordsToFind);
}
// this just reads the file into the disk cache
private static String[] getWordsToFind(final File file)
throws FileNotFoundException
{
final Scanner scanner;
final Set<String> words;
scanner = new Scanner(file);
words = new HashSet<String>();
while(scanner.hasNext())
{
final String word;
word = scanner.next();
words.add(word);
}
return (words.toArray(new String[words.size()]));
}
// bad way, read intpo a list and then iterate over the list until you find a match
private static void a(final File file,
final String[] wordsToFind)
throws FileNotFoundException
{
final long start;
final long end;
final long total;
final Scanner scanner;
final List<String> words;
int matches;
scanner = new Scanner(file);
words = new ArrayList<String>();
while(scanner.hasNext())
{
final String word;
word = scanner.next();
words.add(word);
}
start = System.nanoTime();
{
matches = 0;
for(final String wordToFind : wordsToFind)
{
for(final String word : words)
{
if(word.equals(wordToFind))
{
matches++;
break;
}
}
}
System.out.println(matches);
}
end = System.nanoTime();
total = end - start;
System.out.println("a: " + total);
}
// slightly better way, read intpo a list and then iterate over the set (which reduces the number of things you progbably
// have to read until you find a match), until you find a match
private static void b(final File file,
final String[] wordsToFind)
throws FileNotFoundException
{
final long start;
final long end;
final long total;
final Scanner scanner;
final Set<String> words;
int matches;
scanner = new Scanner(file);
words = new HashSet<String>();
while(scanner.hasNext())
{
final String word;
word = scanner.next();
words.add(word);
}
start = System.nanoTime();
{
matches = 0;
for(final String wordToFind : wordsToFind)
{
for(final String word : words)
{
if(word.equals(wordToFind))
{
matches++;
break;
}
}
}
System.out.println(matches);
}
end = System.nanoTime();
total = end - start;
System.out.println("b: " + total);
}
// my way
private static void c(final File file,
final String[] wordsToFind)
throws FileNotFoundException
{
final long start;
final long end;
final long total;
final Scanner scanner;
final Set<String> words;
int matches;
scanner = new Scanner(file);
words = new HashSet<String>();
while(scanner.hasNext())
{
final String word;
word = scanner.next();
words.add(word);
}
start = System.nanoTime();
{
matches = 0;
for(final String wordToFind : wordsToFind)
{
if(words.contains(wordToFind))
{
matches++;
}
}
System.out.println(matches);
}
end = System.nanoTime();
total = end - start;
System.out.println("c: " + total);
}
// Nikita Rybak way
private static void d(final File file,
final String[] wordsToFind)
throws FileNotFoundException
{
final long start;
final long end;
final long total;
final Scanner scanner;
final Set<String> words;
int matches;
scanner = new Scanner(file);
words = new HashSet<String>();
while(scanner.hasNext())
{
final String word;
word = scanner.next();
words.add(word);
}
start = System.nanoTime();
{
words.retainAll(new HashSet<String>(Arrays.asList(wordsToFind)));
matches = words.size();
System.out.println(matches);
}
end = System.nanoTime();
total = end - start;
System.out.println("d: " + total);
}
}
sonuçları (birkaç koşular sonrasında, her çalışma olsa hemen hemen aynıdır):
12596
a: 2440699000
12596
b: 2531635000
12596
c: 4507000
12596
d: 5597000
bunu (getWordsToFind kelimelerin her birine "XXX" ekleyerek değiştirirseniz hayır kelime Alacağınız) bulunur:
0
a: 7415291000
0
b: 4688973000
0
c: 2849000
0
d: 7981000
ve şeyiyle ben sadece kelime "Ben" araması denedim ve sonuçlar şunlardır:
1
a: 235000
1
b: 351000
1
c: 75000
1
d: 10725000
İlginç bir soru, naif algoritmadan daha iyi bir şey bulabilirmiyim bakalım – quantumSoup
Tekrarlar hakkında ne dersiniz? Cevaplar, verileri "a ve" için 3, "a a a" için ise 1 olacak şekilde ayarlar.İstenen davranış bu mu yoksa her ikisi de rapor 3? – Chadwick