# cache.tcl --
#
#       FIXME: This file needs a description here.
#
# Copyright (c) 1998-2002 The Regents of the University of California.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# A. Redistributions of source code must retain the above copyright notice,
#    this list of conditions and the following disclaimer.
# B. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
# C. Neither the names of the copyright holders nor the names of its
#    contributors may be used to endorse or promote products derived from this
#    software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import WebCacheControl WC_Recorder WC_Player

#
# A web cache object. It takes care of storing web contents
# on local disk index by the associated urls. It is also
# responsible for fetching the contents directly from the
# origin server if the data is not in the local cache.
#
Class WebCacheApplication -superclass Application -configuration {
	cacheDir ~/.mash/srmcache/
	cacheSize 1000000
	playFilename none
	recordFilename none
	d1 1.0
	d2 2.0
	e1 2.0
	e2 3.0
}

#
# The web cache constructor.
#
WebCacheApplication public init { argv } {
	$self next cache

	set netspec [$self init_argv $argv]
	if { $netspec == "" } {
		$self usage
		$self fatal "unknown/invalid address"
	}

	# srmv2 related variables
	$self instvar session_ source_
	set ab [new AddressBlock $netspec]
	set addr [$ab addr]
	set port [$ab sport]
	set ttl  [$ab ttl]
	delete $ab

	set session_ [srm_create_session $addr $port $port $ttl]
	set source_ [srm_create_source $session_]
	srm_callbacks $session_ srm_recv srm_should_recover srm_read_adu \
		srm_source_update srm_recv_cid

	# cache control object
	$self instvar control_ d1_ d2_ e1_ e2_
	set e1_ [$self get_option e1]
	set e2_ [$self get_option e2]
	set d1_ [$self get_option d1]
	set d2_ [$self get_option d2]
	set control_ [new WebCacheControl $d1_ $d2_ $e1_ $e2_ \
			$addr/[expr $port-1]/$ttl]
	$control_ set cache_ $self

	# cache related variables
	$self instvar cache_dir_ index_ index_filename_ used_ total_ lru_
	$self create_dir [$self get_option cacheDir]
	set cache_dir_ [glob [$self get_option cacheDir]]

	set index_filename_ [file join $cache_dir_ index.db]
	if {! [catch {set f [open $index_filename_]}] } {
		while 1 {
			set line [gets $f]
			if [eof $f] {
				close $f
				break
			}
			set index_([lindex $line 0]) [lindex $line 1]
		}
	}

	# keep track of space limitations in cache
	set lru_ {}
	set used_ 0
	set total_ [$self get_option cacheSize]

	# check if we're recording and possibly set up record file.
	if { [$self get_option record] != "" } {
		$self instvar recorder_
		set rf [$self get_option recordFilename]
		if { $rf == "none" } {
			set hn [info hostname]
			set hn [lindex [split $hn .] 0]
			set rf $hn.rlog
		}
		set recorder_ [new WC_Recorder $rf]
        }

	# check if we're playing and possibly set up a player.
	if { [$self get_option play] != "" } {
		$self instvar player_
		set rf [$self get_option playFilename]
		if { $rf == "none" } {
			set hn [info hostname]
			set hn [lindex [split $hn .] 0]
			set rf $hn.rlog
		}
		set player_ [new WC_Player $rf $self 1]
		# wait for 2 seconds for everything to settle down
		after 2000 $player_ start
	}
}

WebCacheApplication public usage { } {
	set o [$self options]

	puts  "cache: \[-layer|-lazy|-base\] \[other-options-listed-below] address"
	$o usage
}

WebCacheApplication private init_argv { argv } {
	set o [$self options]

	$o register_option -cacheDir cacheDir
	$o register_option -cacheSize cacheSize
	$o register_option -playFilename playFilename
	$o register_option -recordFilename recordFilename

	$o register_option -d1 d1
	$o register_option -d2 d2
	$o register_option -e1 e1
	$o register_option -e2 e2

	$o register_boolean_option -record record
	$o register_boolean_option -play play
	$o register_boolean_option -playRealtime realtime

	return [$o parse_args $argv]
}


#
# Called by srm if another cache needs to recover the specified data.
#
WebCacheApplication public read_data { source cid seqno } {
	$self instvar index_

	set url [$self cid_2_name $source $cid]

	if { $url == "" || ![info exists index_($url)] } {
		return ""
	} else {
		return $index_($url)
	}
}


#
# Called by the cache control after winning the response timer
# war. If the data is in the local cache, get it from there.
# Otherwise, fetch it from the origin server.
#
WebCacheApplication public send_data { url } {
	$self instvar sockets_ source_ index_ cid_names_ proxy_

	mtrace trcWC  "cache: send_data $url"

	if ![info exists index_($url)] {
		# need to fetch data from origin server first
		$self fetch $url
		return
	}

	# allocate a cid for this url if one does not exist
	set p [split [$self name_2_cid $url] ,]
	set source [lindex $p 0]
	set cid [lindex $p 1]
	if { $cid == "" || $source != $source_} {
		set cid [srm_calloc $source_ 0 $url]
		set cid_names_($source_,$cid) $url
	}

	set f [open $index_($url)]
	fconfigure $f -translation binary
	set buffer ""
	while { ![eof $f] } {
		append buffer [read $f 4096]
	}
	close $f

	# send the data to the session
	srm_send $source_ $cid $buffer

	ztrace "done [gettimeofday] $url fetch"

	# callback to proxy to hand data to browser
	if [info exists sockets_($url)] {
		$proxy_ done_fetch $url $sockets_($url) $index_($url)
		unset sockets_($url)
	}
}


#
# Called when received data from the cache session. The
# data is stored in the filename <i>fn</i>.
#
WebCacheApplication public recv_data { source cid seqno fn } {
	$self instvar proxy_ control_ sockets_ index_

	# do a reverse mapping lookup to get url
	set url [$self cid_2_name $source $cid]

	ztrace "done [gettimeofday] $url srm"
	mtrace trcWC  "cache: recv_data $url"

	# store the data onto local cache
	$self put $url $fn $seqno

	# tell cache control to cancel timers related for this url
	$control_ cancel_all_timers $url

	# callback to proxy to hand data to browser
	if [info exists sockets_($url)] {
		$proxy_ done_fetch $url $sockets_($url) $index_($url)
		unset sockets_($url)
	}
}


#
# Default behavior of cache is to recovery everything.
# Subclasses of this object should define their recovery strategies.
#
WebCacheApplication public should_recover { source cid sseq eseq } {
	return 1
}


#
# Receive an URL to SRM mapping information. If this is a duplicate
# mapping for the same URL for different SRM names, we still remember
# this, but make sure we don't store the same data repeatedly.
#
WebCacheApplication public recv_cid { source cid parent_cid name } {
	$self instvar cid_names_

	mtrace trcWC  "cache: recv_cid $name"
	set cid_names_($source,$cid) $name
}


#
# Given a cid, this method returns the application level
# name (url) associated with it.
#
WebCacheApplication public cid_2_name { source cid } {
	$self instvar cid_names_

	if [info exists cid_names_($source,$cid)] {
		set url $cid_names_($source,$cid)
	} else {
		set url [srm_get_container_name $source $cid]
	}
	return $url
}

#
# Give a name (url), this method returns the (source,cid) pair
# associated with it or an empty if it does not exist.
#
WebCacheApplication public name_2_cid { url } {
	$self instvar source_ cid_names_

	foreach p [array names cid_names_] {
		if { "$cid_names_($p)" == "$url" } {
			return [split $p ,]
		}
	}
	return ""
}

#
# Returns a filename if the url is local in the cache.
# Otherwise, an empty string is returned.
#
WebCacheApplication public hit { url } {
	$self instvar index_
	if [info exists index_($url)] {
		return $index_($url)
	} else {
		return ""
	}
}

#
# Called to initiate the loop to access the cache. For example,
# from casting director in mashcast, or full page request from
# infocaster, or from the browser proxy.
#
WebCacheApplication public get { url { socket "" } } {
	$self instvar index_ sockets_ proxy_ control_

	ztrace "get [gettimeofday] $url"
	mtrace trcWC  "cache: get $url"

	# record this url request into a file if we're recording
	if { [$self get_option record] != "" } {
		$self instvar recorder_
		$recorder_ record $url
	}

	# remove url from stack and put on top
	$self push_lru $url

	if { $socket != "" } {
		set sockets_($url) $socket
	}

	if [info exists index_($url)] {
		# cache has data on local disk
		ztrace "done [gettimeofday] $url disk"
		mtrace trcWC  "cache: get from disk"

		# callback to proxy if it is requested from there
		if [info exists sockets_($url)] {
			$proxy_ done_fetch $url $sockets_($url) $index_($url)
			unset sockets_($url)
		}
	} else {
		# cache needs to get data from others
		mtrace trcWC  "cache: get from others"

		set m [$self name_2_cid $url]
		if { $m != "" } {
			# if a mapping exists already, this means the
			# data is already in some other cache, so
			# we do a srm repair request on this cid.
			# note: we assume there is always only one adu
			# in any container

			srm_recover [lindex $m 0] [lindex $m 1] 0 0
		} else {
			# the data is not in local disk nor other caches
			# so start a timer loop

			$control_ create_get_timer $url
		}
	}
}

#
# Put the contents of url into the cache. The data is stored
# in the filename <i>fn</i>. Even though the data might not
# have originated from local clients, we still count it as
# the most recently accessed in the LRU algorithm.
#
WebCacheApplication public put { url fn { seqno 0 } } {
	$self instvar index_ cache_dir_ index_filename_ sockets_ \
		used_ total_ control_

	mtrace trcWC  "cache: put $url $fn"

	# do lru business
	$self push_lru $url

	# if the data is already on disk, don't need to store a
	# duplicate. this situation can arise if we have to different
	# srm names for the same url.
	if [info exists index_($url)] {
		return
	}

	# check whether there is enough space in cache
	# if not need to make room

	set fs [file size $fn]

	if { [expr $fs + $used_] > $total_ } {
		# need to make room in cache
		$self make_room [expr $fs - ($total_ - $used_)]
	}

	set update_index_file 1

	set name cache[clock clicks]
	set index_($url) [file join $cache_dir_ $name[file extension $url]]

	file rename $fn $index_($url)

	# write the index file
	if [catch {set f [open $index_filename_ a]}] {
		set f [open $index_filename_ w 0644]
	}

	puts $f [list $url $index_($url)]
	close $f

	# tell cache control to cancel timers related for this url
	#$control_ cancel_all_timers $url
}


#
# Adjust LRU stack
#
WebCacheApplication private push_lru { url } {
	$self instvar lru_

	# this url is the most recently used so move it to the
	# top of the lru stack
	set idx [lsearch $lru_ $url]
	if { $idx != -1 } {
		set lru_ [lreplace $lru_ $idx $idx]
	}
	lappend lru_ $url
}


#
# Fetch the contents of <i>url</i> from the origin server.
# This is non-blocking and calls done_fetch when the
# transactionis finished.
#
WebCacheApplication private fetch { url } {
	mtrace trcWC  "cache: fetching... $url"
	set token [::http::geturl $url -command "$self done_fetch"]
}


#
# Called when start_fetch is finished. This puts the contents
# fetched into the cache by passing the data to the put method.
# Do a callback to cache control to notify the data is here.
#
WebCacheApplication private done_fetch { token } {
	$self instvar proxy_ control_ sockets_ index_ cache_dir_

	upvar #0 $token state
	set url $state(url)

	mtrace trcWC  "cache: done_fetch $url"

	# if we received the data via the srm session while we
	# were fetching the data from the origin server, we can
	# just return because recv_data would have put the data in
	# our cache and passed the data to the browser, if necessary.
	if [info exists index_($url)] {
		return
	}

	set fn [file join $cache_dir_ tmp[clock clicks]]
	set f [open $fn w 0644]
	fconfigure $f -translation binary
	puts -nonewline $f "$state(http)\r\n"
	foreach {name value} $state(meta) {
		puts -nonewline $f "$name: $value\r\n"
	}
	puts -nonewline $f "\r\n"
	puts -nonewline $f $state(body)
	close $f

	# tell cache control to cancel timers related for this url
	$control_ cancel_all_timers $url

	# put the data onto disk
	$self put $url $fn

	# send the data to session
	$self send_data $url
}

#
# Uses LRU algorithm to determine which object to evict from the
# cache. Subclasses should define their own strategies. <i>needed</i>
# denotes the amount of space needed in B.
#
WebCacheApplication public make_room { needed } {
	$self instvar index_ lru_ used_

	set cleared 0

	while { $cleared < needed } {

		# find the lru object in cache
		set url [lindex $lru_ 0]
		set fn $index_($url)
		set fs [file size $fn]

		set cleared [expr $cleared + $fs]

		$self flush_url $url
	}

}

WebCacheApplication private flush_url { url } {
	$self instvar index_ lru_ used_

	# remove the file and clean up cache index
	file delete $fn
	unset index_($url)
	set lru_ [lrange $lru_ 1 end]

	set used_ [expr $used_ - $fs]
}

WebCacheApplication public flush_all { } {
	$self instvar index_ cache_dir_
	file delete -force -- [glob -nocomplain [file join $cache_dir_ *]]
	catch {unset index_}
}

WebCacheApplication private create_dir { path } {
	if { ![file isdirectory $path] } {
		set dir ""
		foreach split [file split $path] {
			set dir [file join $dir $split]
			if { ![file exists $dir] } {
				# this command will cause an error
				# if it is not possible to create the dir
				file mkdir $dir
			}
		}
	}
}

#
# SRMv2 tcl callbacks
#

# FIXME
set fn {/tmp/recv.dat}

proc srm_recv { src cid seqno data } {
	global cache fn

	set f [open $fn w 0644]
	fconfigure $f -translation binary
	puts -nonewline $f $data
	close $f

	$cache recv_data $src $cid $seqno $fn
}

proc srm_read_adu { src cid seqno } {
	global cache

	set fn [$cache read_data $src $cid $seqno]

	if { $fn == "" } {
		return ""
	} else {
		set f [open $fn]
		fconfigure $f -translation binary
		set buffer ""
		while { ![eof $f] } {
			append buffer [read $f 4096]
		}
		close $f

		return $buffer
	}
}

proc srm_should_recover { src cid sseq eseq } {
	global cache
	$cache should_recover $src $cid $sseq $eseq
}


proc srm_recv_cid { src cid parent_cid name } {
	global cache
	$cache recv_cid $src $cid $parent_cid $name
}

proc srm_source_update { src info } {
	# do nothing now
}

