// Translation of [Peter Norvig's spell checker](http://norvig.com/spell-correct.html) into Swift.
// Sample input corpus [here](http://norvig.com/big.txt)
import Foundation
extension SequenceType {
func mapSome<T>(@noescape transform: (Self.Generator.Element) -> T?) -> [T] {
return Array(self.map(transform).filter { $0 != nil }.map { $0! })
}
}
struct SpellingCorrector {
var knownWords: [String:Int] = [:]
init?(contentsOfFile file: String) {
do {
let text = try String(contentsOfFile: file, encoding: NSUTF8StringEncoding)
print("Loaded file \(file)")
let words = text.lowercaseString.characters.split { !("a"..."z").contains($0) }.map(String.init)
print("Split string into \(words.count) words")
for word in words { self.train(word) }
print("Trained \(knownWords.count) unique words")
} catch _ as NSError {
return nil
}
}
mutating func train(word: String) {
knownWords[word] = knownWords[word]?.successor() ?? 1
}
// Given a word, produce a set of possible alternatives with
// letters transposed, deleted, replaced or rogue characters inserted
func edits1(word: String) -> Set<String> {
if word.isEmpty { return [] }
let splits = word.characters.indices.map {
return (word[word.startIndex..<$0], word[$0..<word.endIndex])
}
let deletes = splits.map { (left, right) -> String in
return left + right.substringFromIndex(right.startIndex.advancedBy(1))
}
let transposes = (splits + [(word, "")]).mapSome { (left, right) -> String? in
if let fst = right.characters.first {
let drop1 = right.substringFromIndex(right.startIndex.advancedBy(1))
if let snd = String(drop1).characters.first {
let drop2 = drop1.substringFromIndex(drop1.startIndex.advancedBy(1))
return left + String(snd) + String(fst) + drop2
}
}
return nil
}
let alphabet = "abcdefghijklmnopqrstuvwxyz"
let replaces = splits.flatMap { left, right in
alphabet.characters.map {
left + String($0) + right.substringFromIndex(right.startIndex.advancedBy(1))
}
}
let inserts = (splits + [(word, "")]).flatMap { left, right -> [String] in
alphabet.characters.map {
return left + String($0) + right
}
}
return Set(deletes + transposes + replaces + inserts)
}
func knownEdits2(word: String) -> Set<String>? {
var known_edits: Set<String> = []
for edit in edits1(word) {
if let k = known(edits1(edit)) {
known_edits.unionInPlace(k)
}
}
return known_edits.isEmpty ? nil : known_edits
}
func known<S: SequenceType where S.Generator.Element == String>(words: S) -> Set<String>? {
let s = Set(words.filter { self.knownWords.indexForKey($0) != nil })
return s.isEmpty ? nil : s
}
func correct(word: String) -> String {
let candidates = known([word]) ?? known(edits1(word)) ?? knownEdits2(word)
return (candidates ?? []).reduce(word, combine: {
(knownWords[$0] ?? 1) < (knownWords[$1] ?? 1) ? $1 : $0
})
}
}
let tests1 = ["access": "acess", "accessing": "accesing", "accommodation":
// .../...
"whether": "wether", "wrote": "rote wote"]
let tests2 = ["forbidden": "forbiden", "decisions": "deciscions descisions",
// .../...
"together": "togehter", "profits": "proffits"]
func spelltest(tests: [String:String], bias: Int? = nil, verbose: Bool = false) -> [String:Any]{
var n = 0, bad = 0, unknown = 0, start = NSDate()
var checker = SpellingCorrector(contentsOfFile: "big.txt")!
if let bias = bias {
for target in tests.keys {
checker.knownWords[target]! += bias
}
}
for (target, wrongs) in tests {
let strings = wrongs.characters.split{$0 == " "}.map(String.init)
for wrong in strings {
n += 1
let w = checker.correct(wrong)
if w != target {
bad += 1
unknown += (checker.knownWords[target] != nil ? 1 : 0)
if verbose {
print ("\(wrong) => \(w) (\(checker.knownWords[w])); expected \(target) (\(checker.knownWords[target]))")
}
}
}
}
let pct = Int(100.0 - 100.0*Double(bad)/Double(n))
let end = NSDate()
return ["bad": bad, "n": n, "bias": bias, "pct": pct, "unknown": unknown, "secs": end.timeIntervalSinceDate(start)]
}
print(spelltest(tests1, bias: nil, verbose: true))
print(spelltest(tests2))
Gördüğüm düşünüyorum, ama ([düzenlemeler fonksiyonuna] içine örnek kullanımlarını fiş çalıştığınızda https://gist.github.com/airspeedswift/6d8c9d95f0d812f58be3 # file-spelling-swift-L36) dönüş olarak (kelime [RangeStart (bit: $ 0)], kelime [RangeStart (başlangıç: $ 0)]) 'hala bana derleyici hatası veriyor. –
@DougSmith Bağladığınız satır, yukarıda anlattığınız "Array" uzantısından ziyade "String" uzantısından faydalanıyor gibi görünüyor. Ayrıca, bir 'String' örneğinde bu şekilde kullanılan indeksleri hiç görmedim. ("şu anda kullanılamıyor" CollectionType "nesneleri için" endeksler "eski küresel fonksiyonudur). – dfri
Bunu nasıl kullanırdım? Ayrıca 'endeksler' sadece .characters'ın eskiden olduğu şeydir. –